64 lines
1.6 KiB
C++
64 lines
1.6 KiB
C++
#include "mat_mul.h"
|
|
|
|
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <pthread.h>
|
|
|
|
static float *A, *B, *C;
|
|
static int M, N, K;
|
|
static int num_threads;
|
|
|
|
|
|
static void* mat_mul_thread(void *data) {
|
|
int thread_num = *((int*)(&data));
|
|
int size_i = 4096;
|
|
int size_j = 4096;
|
|
int size_k = 64;
|
|
int ii,jj,kk,i,j,k;
|
|
for (ii = 0; ii < M; ii += size_i) {
|
|
int min_ii;
|
|
if(ii+size_i < M) min_ii = ii+size_i; else min_ii = M;
|
|
for (jj = 0; jj < N; jj += size_j) {
|
|
int min_jj;
|
|
if(jj+size_j < N) min_jj = jj+size_j; else min_jj = N;
|
|
for (kk = 0; kk < K; kk += size_k) {
|
|
int min_kk;
|
|
if(kk+size_k < K) min_kk = kk+size_k; else min_kk = K;
|
|
for (i = ii; i < min_ii; i++) {
|
|
int iN = i*N;
|
|
int iK = i*K;
|
|
if( (i%num_threads) == thread_num) {
|
|
for (k = kk; k < min_kk; k++) {
|
|
int kN = k*N;
|
|
for (j = jj; j < min_jj; j++) {
|
|
C[iN + j + 0] += A[iK+ k] * B[kN+ j + 0];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
void mat_mul(float *_A, float *_B, float *_C, int _M, int _N, int _K, int _num_threads) {
|
|
A = _A, B = _B, C = _C;
|
|
M = _M, N = _N, K = _K;
|
|
num_threads = _num_threads;
|
|
|
|
// TODO: create '_num_threads' pthreads
|
|
pthread_t thread[80]; //maximum number of threads is 40
|
|
int new_threads;
|
|
int idx;
|
|
new_threads = num_threads;
|
|
for(idx=0; idx<new_threads; idx++) {
|
|
pthread_create(&thread[idx], NULL, mat_mul_thread, (void *)idx);
|
|
}
|
|
for(idx=0; idx<new_threads; idx++) {
|
|
pthread_join(thread[idx], NULL);
|
|
}
|
|
}
|