#include "mat_mul.h" #include #include #include static float *A, *B, *C; static int M, N, K; static int num_threads; static void* mat_mul_thread(void *data) { int thread_num = *((int*)(&data)); int size_i = 4096; int size_j = 4096; int size_k = 64; int ii,jj,kk,i,j,k; for (ii = 0; ii < M; ii += size_i) { int min_ii; if(ii+size_i < M) min_ii = ii+size_i; else min_ii = M; for (jj = 0; jj < N; jj += size_j) { int min_jj; if(jj+size_j < N) min_jj = jj+size_j; else min_jj = N; for (kk = 0; kk < K; kk += size_k) { int min_kk; if(kk+size_k < K) min_kk = kk+size_k; else min_kk = K; for (i = ii; i < min_ii; i++) { int iN = i*N; int iK = i*K; if( (i%num_threads) == thread_num) { for (k = kk; k < min_kk; k++) { int kN = k*N; for (j = jj; j < min_jj; j++) { C[iN + j + 0] += A[iK+ k] * B[kN+ j + 0]; } } } } } } } return NULL; } void mat_mul(float *_A, float *_B, float *_C, int _M, int _N, int _K, int _num_threads) { A = _A, B = _B, C = _C; M = _M, N = _N, K = _K; num_threads = _num_threads; // TODO: create '_num_threads' pthreads pthread_t thread[80]; //maximum number of threads is 40 int new_threads; int idx; new_threads = num_threads; for(idx=0; idx