14 lines
422 B
Common Lisp
14 lines
422 B
Common Lisp
// super super slow sgemm kernel by heehoon
|
|
__kernel void sgemm(__global float *A, __global float *B, __global float *C, int M, int N, int K) {
|
|
int i = get_global_id(0); // row index of C
|
|
int j = get_global_id(1); // column index of C
|
|
float value;
|
|
if (i >= M || j >= N) return; // boundary check
|
|
|
|
value = 0;
|
|
for (int k = 0; k < K; k++) {
|
|
value += A[i * K + k] * B[k * N + j];
|
|
}
|
|
C[i * N + j] = value;
|
|
}
|