#include "mat_mul.h" //#include #include #include #include #ifndef max #define max(a,b) (((a) > (b)) ? (a) : (b)) #endif #ifndef min #define min(a,b) (((a) < (b)) ? (a) : (b)) #endif static float *A, *B, *C; static int M, N, K; static int num_threads; struct threadinfo { int NumofThread; int idx; }; int portion; static void* mat_mul_thread(void *data) { // TODO: parallelize & optimize matrix multiplication threadinfo *thInfo=(threadinfo*)data; //printf("%s %d %d %d created \n",__func__,__LINE__,thInfo->NumofThread,thInfo->idx); int totalthread=thInfo->NumofThread; int idx=thInfo->idx; int pid = thInfo->idx; int slice = M / num_threads; int start = pid * slice; int end = pid == num_threads - 1 ? M : (pid + 1) * slice; float Aik; int bs = 32; for (int kk = 0; kk < K; kk += bs) { // for (int jj = 0; jj < N; jj += bs) { for (int i = start; i < end; ++i) { for (int k = kk; k < min(kk + bs, K); ++k) {//blocking // for(int k=0; kNumofThread = num_threads; } pthread_t thread[81]; if(M % num_threads==0) { portion=M/num_threads; } else{ portion=M/num_threads+1; } for(int i=0;iidx =i; pthread_create(&thread[i], NULL, mat_mul_thread, (void *)thInfo[i]); } for(int i=0;i