#include "mat_mul.h" #include #include #include #include #include "util.h" static float *A, *B, *C; static int M, N, K; static int num_threads; static int mpi_rank, mpi_world_size; // define #define NRA M // number of rows in Matrix A #define NCA K // number of cols in Matrix A #define NCB N // number of cols in Matrix B #define MASTER 0 // number of cols in Matrix B #define FROM_MASTER 1 // number of cols in Matrix B #define FROM_WORKER 2 // number of cols in Matrix B #define TM 32 #define TK 16 #define TN 2048 static int rows[4] = {0,}; // rows of mat A set to each worker static int offset[4] = {0,}; // rows of mat A set to each worker static void mat_mul_omp() { int start = 0; int end = rows[mpi_rank]; #pragma omp parallel for num_threads(num_threads) schedule(dynamic) for(int ii=start; ii