#pragma once
void matmul(const float *A, const float *B, float *C, int M, int N, int K,
int threads_per_process, int mpi_rank, int mpi_world_size);