27 lines
1.3 KiB
C
27 lines
1.3 KiB
C
#pragma once
|
|
|
|
#include <cstddef>
|
|
|
|
void matmul_cpu(float *A, float *B, float *C, size_t M, size_t N, size_t K);
|
|
void matmul_naive(float *A, float *B, float *C, size_t M, size_t N, size_t K);
|
|
void matmul_buffering(float *A, float *B, float *C, size_t M, size_t N,
|
|
size_t K);
|
|
void matmul_multigpu(float *A, float *B, float *C, size_t M, size_t N,
|
|
size_t K);
|
|
void matmul_cublas(float *A, float *B, float *C, size_t M, size_t N, size_t K);
|
|
void matmul_tiling(float *A, float *B, float *C, size_t M, size_t N, size_t K);
|
|
|
|
void matmul_cpu_initialize(size_t M, size_t N, size_t K);
|
|
void matmul_naive_initialize(size_t M, size_t N, size_t K);
|
|
void matmul_buffering_initialize(size_t M, size_t N, size_t K);
|
|
void matmul_multigpu_initialize(size_t M, size_t N, size_t K);
|
|
void matmul_cublas_initialize(size_t M, size_t N, size_t K);
|
|
void matmul_tiling_initialize(size_t M, size_t N, size_t K);
|
|
|
|
void matmul_cpu_finalize(size_t M, size_t N, size_t K);
|
|
void matmul_naive_finalize(size_t M, size_t N, size_t K);
|
|
void matmul_buffering_finalize(size_t M, size_t N, size_t K);
|
|
void matmul_multigpu_finalize(size_t M, size_t N, size_t K);
|
|
void matmul_cublas_finalize(size_t M, size_t N, size_t K);
|
|
void matmul_tiling_finalize(size_t M, size_t N, size_t K);
|