#include "mat_mul.h" #include #include #include #include #define CHECK_ERROR(err) \ if (err != CL_SUCCESS) { \ printf("[%s:%d] OpenCL error %d\n", __FILE__, __LINE__, err); \ exit(EXIT_FAILURE); \ } #define TS 32 #define WPT 8 #define DEV_NUM 4 static cl_int err; static cl_platform_id platform; static cl_device_id device[DEV_NUM]; static cl_context context; static cl_command_queue queue[DEV_NUM]; static cl_program program; static cl_kernel kernel[DEV_NUM]; static cl_mem a_d[DEV_NUM], b_d[DEV_NUM], c_d[DEV_NUM]; static float *A, *B, *C; static int M, N, K; static int size[DEV_NUM]; static int ndev; static std::ofstream fout; void mat_mul(float *_A, float *_B, float *_C, int _M, int _N, int _K) { A = _A, B = _B, C = _C; M = _M, N = _N, K = _K; #ifdef _DEBUG fout.open("dump.out"); #endif for (int i=0; i a_d (gpu), B (cpu) -> b_d (gpu) err = clEnqueueWriteBuffer(queue[i], a_d[i], CL_TRUE, 0, rows * K * sizeof(float), &A[start * K], 0, NULL, NULL); CHECK_ERROR(err); err = clEnqueueWriteBuffer(queue[i], b_d[i], CL_TRUE, 0, K * N * sizeof(float), B, 0, NULL, NULL); CHECK_ERROR(err); // DO NOT REMOVE; NEEDED FOR TIME MEASURE err = clFinish(queue[i]); CHECK_ERROR(err); } } void mat_mul_final(float *A, float *B, float *C, int M, int N, int K) { // memset(C, -1, M * N * sizeof(float)); for (int i=0; i C (cpu) err = clEnqueueReadBuffer(queue[i], c_d[i], CL_TRUE, 0, rows * N * sizeof(float), &C[start * N], 0, NULL, NULL); CHECK_ERROR(err); } for (int i=0; i