#include "mat_mul.h" #include #include #include #include static float *A, *B, *C; static int M, N, K; static int num_threads; static void* mat_mul_thread(void *data) { int pid = *(int *) data; int slice = M / num_threads; int start = pid * slice; int end = pid == num_threads - 1 ? M : (pid+1)*slice; float Aik; int bs = 32; for (int kk=0; kk