#include "convolution.h" #include #include #include #define MAX_NUM_THREADS 100 #define NTILESIZE 32 #define KTILESIZE 4 //static int (int x, int y) { return (x < y)? x : y; } static float *input, *output, *filter; static int N, C, H, W; static int K, R, S; static int OH, OW; static int pad; static int dilation; static int stride; static int mpi_rank, mpi_world_size; void convolution( float *_input, float *_output, float *_filter, int _N, int _C, int _H, int _W, int _K, int _R, int _S, int _pad, int _dilation, int _stride) { input = _input; output = _output; filter = _filter; OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1; OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1; /*---------- seperating for nodes ----------*/ //MPI_Status status; //MPI_Request request = MPI_REQUEST_NULL; if (mpi_rank >= mpi_world_size) return; if (mpi_rank != 0){ input = (float *) aligned_alloc(32, sizeof(float) * N*C*H*W); filter = (float *) aligned_alloc(32, sizeof(float) * K*C*R*S); output = (float *) aligned_alloc(32, sizeof(float) * N*K*OH*OW); } int ns[mpi_world_size], ne[mpi_world_size]; for (int i=0; i= H || w < 0 || w >= W) continue; float i = input[n * C * H * W + c * H * W + h * W + w]; float f = filter[k * C * R * S + c * R * S + r * S + s]; o += i * f; } } } output[n * K * OH * OW + k * OH * OW + oh * OW + ow] = o; } } } } /*---------- merging output ----------*/ if (mpi_rank == 0){ for(int i=1; i= mpi_world_size) return; if (mpi_rank != 0){ free(input); free(filter); free(output); } }