#include "uNet.h" #include "util.h" #include "tensor.h" #include #include #include #include #include // Parameters Tensor *inc_double_conv_0_weight; Tensor *inc_double_conv_1_weight; Tensor *inc_double_conv_1_bias; Tensor *inc_double_conv_3_weight; Tensor *inc_double_conv_4_weight; Tensor *inc_double_conv_4_bias; Tensor *down1_maxpool_conv_1_double_conv_0_weight; Tensor *down1_maxpool_conv_1_double_conv_1_weight; Tensor *down1_maxpool_conv_1_double_conv_1_bias; Tensor *down1_maxpool_conv_1_double_conv_3_weight; Tensor *down1_maxpool_conv_1_double_conv_4_weight; Tensor *down1_maxpool_conv_1_double_conv_4_bias; Tensor *down2_maxpool_conv_1_double_conv_0_weight; Tensor *down2_maxpool_conv_1_double_conv_1_weight; Tensor *down2_maxpool_conv_1_double_conv_1_bias; Tensor *down2_maxpool_conv_1_double_conv_3_weight; Tensor *down2_maxpool_conv_1_double_conv_4_weight; Tensor *down2_maxpool_conv_1_double_conv_4_bias; Tensor *down3_maxpool_conv_1_double_conv_0_weight; Tensor *down3_maxpool_conv_1_double_conv_1_weight; Tensor *down3_maxpool_conv_1_double_conv_1_bias; Tensor *down3_maxpool_conv_1_double_conv_3_weight; Tensor *down3_maxpool_conv_1_double_conv_4_weight; Tensor *down3_maxpool_conv_1_double_conv_4_bias; Tensor *down4_maxpool_conv_1_double_conv_0_weight; Tensor *down4_maxpool_conv_1_double_conv_1_weight; Tensor *down4_maxpool_conv_1_double_conv_1_bias; Tensor *down4_maxpool_conv_1_double_conv_3_weight; Tensor *down4_maxpool_conv_1_double_conv_4_weight; Tensor *down4_maxpool_conv_1_double_conv_4_bias; Tensor *up1_up_weight; Tensor *up1_up_bias; Tensor *up1_conv_double_conv_0_weight; Tensor *up1_conv_double_conv_1_weight; Tensor *up1_conv_double_conv_1_bias ; Tensor *up1_conv_double_conv_3_weight; Tensor *up1_conv_double_conv_4_weight; Tensor *up1_conv_double_conv_4_bias ; Tensor *up2_up_weight; Tensor *up2_up_bias; Tensor *up2_conv_double_conv_0_weight; Tensor *up2_conv_double_conv_1_weight; Tensor *up2_conv_double_conv_1_bias; Tensor *up2_conv_double_conv_3_weight; Tensor *up2_conv_double_conv_4_weight; Tensor *up2_conv_double_conv_4_bias; Tensor *up3_up_weight; Tensor *up3_up_bias; Tensor *up3_conv_double_conv_0_weight; Tensor *up3_conv_double_conv_1_weight; Tensor *up3_conv_double_conv_1_bias; Tensor *up3_conv_double_conv_3_weight; Tensor *up3_conv_double_conv_4_weight; Tensor *up3_conv_double_conv_4_bias; Tensor *up4_up_weight; Tensor *up4_up_bias; Tensor *up4_conv_double_conv_0_weight; Tensor *up4_conv_double_conv_1_weight; Tensor *up4_conv_double_conv_1_bias; Tensor *up4_conv_double_conv_3_weight; Tensor *up4_conv_double_conv_4_weight; Tensor *up4_conv_double_conv_4_bias; Tensor *outc_conv_weight; Tensor *outc_conv_bias; Tensor *inc_batchnorm_0_running_mean; Tensor *inc_batchnorm_0_running_var; Tensor *down1_batchnorm_0_running_mean; Tensor *down1_batchnorm_0_running_var; Tensor *down2_batchnorm_0_running_mean; Tensor *down2_batchnorm_0_running_var; Tensor *down3_batchnorm_0_running_mean; Tensor *down3_batchnorm_0_running_var; Tensor *down4_batchnorm_0_running_mean; Tensor *down4_batchnorm_0_running_var; Tensor *up1_batchnorm_0_running_mean; Tensor *up1_batchnorm_0_running_var; Tensor *up2_batchnorm_0_running_mean; Tensor *up2_batchnorm_0_running_var; Tensor *up3_batchnorm_0_running_mean; Tensor *up3_batchnorm_0_running_var; Tensor *up4_batchnorm_0_running_mean; Tensor *up4_batchnorm_0_running_var; Tensor *inc_batchnorm_1_running_mean; Tensor *inc_batchnorm_1_running_var; Tensor *down1_batchnorm_1_running_mean; Tensor *down1_batchnorm_1_running_var; Tensor *down2_batchnorm_1_running_mean; Tensor *down2_batchnorm_1_running_var; Tensor *down3_batchnorm_1_running_mean; Tensor *down3_batchnorm_1_running_var; Tensor *down4_batchnorm_1_running_mean; Tensor *down4_batchnorm_1_running_var; Tensor *up1_batchnorm_1_running_mean; Tensor *up1_batchnorm_1_running_var; Tensor *up2_batchnorm_1_running_mean; Tensor *up2_batchnorm_1_running_var; Tensor *up3_batchnorm_1_running_mean; Tensor *up3_batchnorm_1_running_var; Tensor *up4_batchnorm_1_running_mean; Tensor *up4_batchnorm_1_running_var; // Activations Tensor *inc_conv_0_output; Tensor *inc_batchnorm_0_output; Tensor *inc_conv_1_output; Tensor *inc_batchnorm_1_output; Tensor *down1_maxpool2d_0_output; Tensor *down1_conv_0_output; Tensor *down1_batchnorm_0_output; Tensor *down1_conv_1_output; Tensor *down1_batchnorm_1_output; Tensor *down2_maxpool2d_0_output; Tensor *down2_conv_0_output; Tensor *down2_batchnorm_0_output; Tensor *down2_conv_1_output; Tensor *down2_batchnorm_1_output; Tensor *down3_maxpool2d_0_output; Tensor *down3_conv_0_output; Tensor *down3_batchnorm_0_output; Tensor *down3_conv_1_output; Tensor *down3_batchnorm_1_output; Tensor *down4_maxpool2d_0_output; Tensor *down4_conv_0_output; Tensor *down4_batchnorm_0_output; Tensor *down4_conv_1_output; Tensor *down4_batchnorm_1_output; Tensor *up1_convt_0_output; Tensor *up1_concat_0_output; Tensor *up1_conv_0_output; Tensor *up1_batchnorm_0_output; Tensor *up1_conv_1_output; Tensor *up1_batchnorm_1_output; Tensor *up2_convt_0_output; Tensor *up2_concat_0_output; Tensor *up2_conv_0_output; Tensor *up2_batchnorm_0_output; Tensor *up2_conv_1_output; Tensor *up2_batchnorm_1_output; Tensor *up3_convt_0_output; Tensor *up3_concat_0_output; Tensor *up3_conv_0_output; Tensor *up3_batchnorm_0_output; Tensor *up3_conv_1_output; Tensor *up3_batchnorm_1_output; Tensor *up4_convt_0_output; Tensor *up4_concat_0_output; Tensor *up4_conv_0_output; Tensor *up4_batchnorm_0_output; Tensor *up4_conv_1_output; Tensor *up4_batchnorm_1_output; Tensor *outc_conv_0_output; // forward declaration, prototype void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias); void ReLU(Tensor *inout); void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum); void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad); void MaxPool2d(Tensor *input, Tensor *output); void Concat(Tensor *input1, Tensor *input2, Tensor *output); void uNet_initialize(int, int, char*); void uNet(Tensor*, Tensor*); void uNet_finalize(); /* * uNet */ void uNet(Tensor *input, Tensor *output) { // inc(n_channels, 64) Conv2d(input, inc_double_conv_0_weight, NULL, inc_conv_0_output, 1, 1, 1, false); BatchNorm2d(inc_conv_0_output, inc_double_conv_1_weight, inc_double_conv_1_bias, inc_batchnorm_0_running_mean, inc_batchnorm_0_running_var, inc_batchnorm_0_output, 1e-5, 0.1); ReLU(inc_batchnorm_0_output); Conv2d(inc_batchnorm_0_output, inc_double_conv_3_weight, NULL, inc_conv_1_output, 1, 1, 1, false); BatchNorm2d(inc_conv_1_output, inc_double_conv_4_weight, inc_double_conv_4_bias, inc_batchnorm_1_running_mean, inc_batchnorm_1_running_var, inc_batchnorm_1_output, 1e-5, 0.1); ReLU(inc_batchnorm_1_output); // down1(64, 128) MaxPool2d(inc_batchnorm_1_output, down1_maxpool2d_0_output); Conv2d(down1_maxpool2d_0_output, down1_maxpool_conv_1_double_conv_0_weight, NULL, down1_conv_0_output, 1, 1, 1, false); BatchNorm2d(down1_conv_0_output, down1_maxpool_conv_1_double_conv_1_weight, down1_maxpool_conv_1_double_conv_1_bias, down1_batchnorm_0_running_mean, down1_batchnorm_0_running_var, down1_batchnorm_0_output, 1e-5, 0.1); ReLU(down1_batchnorm_0_output); Conv2d(down1_batchnorm_0_output, down1_maxpool_conv_1_double_conv_3_weight, NULL, down1_conv_1_output, 1, 1, 1, false); BatchNorm2d(down1_conv_1_output, down1_maxpool_conv_1_double_conv_4_weight, down1_maxpool_conv_1_double_conv_4_bias, down1_batchnorm_1_running_mean, down1_batchnorm_1_running_var, down1_batchnorm_1_output, 1e-5, 0.1); ReLU(down1_batchnorm_1_output); // down2(128, 256) MaxPool2d(down1_batchnorm_1_output, down2_maxpool2d_0_output); Conv2d(down2_maxpool2d_0_output, down2_maxpool_conv_1_double_conv_0_weight, NULL, down2_conv_0_output, 1, 1, 1, false); BatchNorm2d(down2_conv_0_output, down2_maxpool_conv_1_double_conv_1_weight, down2_maxpool_conv_1_double_conv_1_bias, down2_batchnorm_0_running_mean, down2_batchnorm_0_running_var, down2_batchnorm_0_output, 1e-5, 0.1); ReLU(down2_batchnorm_0_output); Conv2d(down2_batchnorm_0_output, down2_maxpool_conv_1_double_conv_3_weight, NULL, down2_conv_1_output, 1, 1, 1, false); BatchNorm2d(down2_conv_1_output, down2_maxpool_conv_1_double_conv_4_weight, down2_maxpool_conv_1_double_conv_4_bias, down2_batchnorm_1_running_mean, down2_batchnorm_1_running_var, down2_batchnorm_1_output, 1e-5, 0.1); ReLU(down2_batchnorm_1_output); // down3(256, 512) MaxPool2d(down2_batchnorm_1_output, down3_maxpool2d_0_output); Conv2d(down3_maxpool2d_0_output, down3_maxpool_conv_1_double_conv_0_weight, NULL, down3_conv_0_output, 1, 1, 1, false); BatchNorm2d(down3_conv_0_output, down3_maxpool_conv_1_double_conv_1_weight, down3_maxpool_conv_1_double_conv_1_bias, down3_batchnorm_0_running_mean, down3_batchnorm_0_running_var, down3_batchnorm_0_output, 1e-5, 0.1); ReLU(down3_batchnorm_0_output); Conv2d(down3_batchnorm_0_output, down3_maxpool_conv_1_double_conv_3_weight, NULL, down3_conv_1_output, 1, 1, 1, false); BatchNorm2d(down3_conv_1_output, down3_maxpool_conv_1_double_conv_4_weight, down3_maxpool_conv_1_double_conv_4_bias, down3_batchnorm_1_running_mean, down3_batchnorm_1_running_var, down3_batchnorm_1_output, 1e-5, 0.1); ReLU(down3_batchnorm_1_output); // down4(512, 1024) MaxPool2d(down3_batchnorm_1_output, down4_maxpool2d_0_output); Conv2d(down4_maxpool2d_0_output, down4_maxpool_conv_1_double_conv_0_weight, NULL, down4_conv_0_output, 1, 1, 1, false); BatchNorm2d(down4_conv_0_output, down4_maxpool_conv_1_double_conv_1_weight, down4_maxpool_conv_1_double_conv_1_bias, down4_batchnorm_0_running_mean, down4_batchnorm_0_running_var, down4_batchnorm_0_output, 1e-5, 0.1); ReLU(down4_batchnorm_0_output); Conv2d(down4_batchnorm_0_output, down4_maxpool_conv_1_double_conv_3_weight, NULL, down4_conv_1_output, 1, 1, 1, false); BatchNorm2d(down4_conv_1_output, down4_maxpool_conv_1_double_conv_4_weight, down4_maxpool_conv_1_double_conv_4_bias, down4_batchnorm_1_running_mean, down4_batchnorm_1_running_var, down4_batchnorm_1_output, 1e-5, 0.1); ReLU(down4_batchnorm_1_output); // up1(1024, 512), (down4_batchnorm_1_output, down3_batchnorm_1_output) ConvTranspose2d(down4_batchnorm_1_output, up1_up_weight, up1_up_bias, up1_convt_0_output, 2, 0); Concat(up1_convt_0_output, down3_batchnorm_1_output, up1_concat_0_output); Conv2d(up1_concat_0_output, up1_conv_double_conv_0_weight, NULL, up1_conv_0_output, 1, 1, 1, false); BatchNorm2d(up1_conv_0_output, up1_conv_double_conv_1_weight, up1_conv_double_conv_1_bias, up1_batchnorm_0_running_mean, up1_batchnorm_0_running_var, up1_batchnorm_0_output, 1e-5, 0.1); ReLU(up1_batchnorm_0_output); Conv2d(up1_batchnorm_0_output, up1_conv_double_conv_3_weight, NULL, up1_conv_1_output, 1, 1, 1, false); BatchNorm2d(up1_conv_1_output, up1_conv_double_conv_4_weight, up1_conv_double_conv_4_bias, up1_batchnorm_1_running_mean, up1_batchnorm_1_running_var, up1_batchnorm_1_output, 1e-5, 0.1); ReLU(up1_batchnorm_1_output); // up2(512, 256), (up1_concat_0_output, down2_batchnorm_1_output) ConvTranspose2d(up1_batchnorm_1_output, up2_up_weight, up2_up_bias, up2_convt_0_output, 2, 0); Concat(up2_convt_0_output, down2_batchnorm_1_output, up2_concat_0_output); Conv2d(up2_concat_0_output, up2_conv_double_conv_0_weight, NULL, up2_conv_0_output, 1, 1, 1, false); BatchNorm2d(up2_conv_0_output, up2_conv_double_conv_1_weight, up2_conv_double_conv_1_bias, up2_batchnorm_0_running_mean, up2_batchnorm_0_running_var, up2_batchnorm_0_output, 1e-5, 0.1); ReLU(up2_batchnorm_0_output); Conv2d(up2_batchnorm_0_output, up2_conv_double_conv_3_weight, NULL, up2_conv_1_output, 1, 1, 1, false); BatchNorm2d(up2_conv_1_output, up2_conv_double_conv_4_weight, up2_conv_double_conv_4_bias, up2_batchnorm_1_running_mean, up2_batchnorm_1_running_var, up2_batchnorm_1_output, 1e-5, 0.1); ReLU(up2_batchnorm_1_output); // up3(256, 128), (up2_concat_0_output, down1_batchnorm_1_output) ConvTranspose2d(up2_batchnorm_1_output, up3_up_weight, up3_up_bias, up3_convt_0_output, 2, 0); Concat(up3_convt_0_output, down1_batchnorm_1_output, up3_concat_0_output); Conv2d(up3_concat_0_output, up3_conv_double_conv_0_weight, NULL, up3_conv_0_output, 1, 1, 1, false); BatchNorm2d(up3_conv_0_output, up3_conv_double_conv_1_weight, up3_conv_double_conv_1_bias, up3_batchnorm_0_running_mean, up3_batchnorm_0_running_var, up3_batchnorm_0_output, 1e-5, 0.1); ReLU(up3_batchnorm_0_output); Conv2d(up3_batchnorm_0_output, up3_conv_double_conv_3_weight, NULL, up3_conv_1_output, 1, 1, 1, false); BatchNorm2d(up3_conv_1_output, up3_conv_double_conv_4_weight, up3_conv_double_conv_4_bias, up3_batchnorm_1_running_mean, up3_batchnorm_1_running_var, up3_batchnorm_1_output, 1e-5, 0.1); ReLU(up3_batchnorm_1_output); // up4(128, 64), (up3_concat_0_output, inc_batchnorm_1_output) ConvTranspose2d(up3_batchnorm_1_output, up4_up_weight, up4_up_bias, up4_convt_0_output, 2, 0); Concat(up4_convt_0_output, inc_batchnorm_1_output, up4_concat_0_output); Conv2d(up4_concat_0_output, up4_conv_double_conv_0_weight, NULL, up4_conv_0_output, 1, 1, 1, false); BatchNorm2d(up4_conv_0_output, up4_conv_double_conv_1_weight, up4_conv_double_conv_1_bias, up4_batchnorm_0_running_mean, up4_batchnorm_0_running_var, up4_batchnorm_0_output, 1e-5, 0.1); ReLU(up4_batchnorm_0_output); Conv2d(up4_batchnorm_0_output, up4_conv_double_conv_3_weight, NULL, up4_conv_1_output, 1, 1, 1, false); BatchNorm2d(up4_conv_1_output, up4_conv_double_conv_4_weight, up4_conv_double_conv_4_bias, up4_batchnorm_1_running_mean, up4_batchnorm_1_running_var, up4_batchnorm_1_output, 1e-5, 0.1); ReLU(up4_batchnorm_1_output); // outc(64, n_classes) Conv2d(up4_batchnorm_1_output, outc_conv_weight, outc_conv_bias, output, 1, 0, 1, true); } /* * Convolution * input shape = (N, C, H, W) * weight shape = (K, C, R, S) * bias shape = (K) * output shape = (N, K, OH, OW) * where OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, * OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1 */ void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias) { int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int K = weight->shape[0], R = weight->shape[2], S = weight->shape[3]; int OH = output->shape[2], OW = output->shape[3]; CHECK_ERROR(OH == (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, "[Conv2d] Output height mismatch"); CHECK_ERROR(OW == (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1, "[Conv2d] Output width mismatch"); CHECK_ERROR(weight->shape[1] == C && (!has_bias || bias->shape[0] == K) && output->shape[1] == K, "[Conv2d] Channel size mismatch"); #pragma omp parallel for for (int k = 0; k < K; ++k) { for (int oh = 0; oh < OH; ++oh) { for (int ow = 0; ow < OW; ++ow) { float o = has_bias ? bias->buf[k] : 0; for (int c = 0; c < C; ++c) { for (int r = 0; r < R; ++r) { for (int s = 0; s < S; ++s) { int h = oh * stride - pad + r * dilation; int w = ow * stride - pad + s * dilation; if (h < 0 || h >= H || w < 0 || w >= W) continue; float i = input->buf[c * H * W + h * W + w]; float f = weight->buf[k * C * R * S + c * R * S + r * S + s]; o += i * f; } } } output->buf[k * OH * OW + oh * OW + ow] = o; } } } } /* * ReLU * input shape = (N, C, H, W) * output shape = (N, C, H, W) * Formula: y = max(x, 0) */ void ReLU(Tensor *inout) { int C = inout->shape[1], H = inout->shape[2], W = inout->shape[3]; #pragma omp parallel for for (int c = 0; c < C; ++c) { for (int h = 0; h < H; ++h) { for (int w = 0; w < W; ++w) { int idx = c * H * W + h * W + w; inout->buf[idx] = inout->buf[idx] > 0 ? inout->buf[idx] : 0; } } } } /* * Batch Normaliztion * input shape = (N, C, H, W) * gamma shape = (C) * beta shape = (C) * output shape = (N, C, H, W) */ void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum) { int N = input->shape[0], C = input->shape[1], H = input->shape[2], W = input->shape[3]; CHECK_ERROR(gamma->shape[0] == C && beta->shape[0] == C, "[BatchNorm2d] gamma, beta shape mismatch"); CHECK_ERROR(output->shape[1] == C && output->shape[2] == H && output->shape[3] == W, "[BatchNorm2d] Output shape mismatch"); #pragma omp parallel for for (int c=0; cbuf[c]; float variance = running_var->buf[c]; float x = input->buf[n * C * H * W + c * H * W + h * W + w]; float x_hat = (x - mean) / sqrt(variance + eps); output->buf[n * C * H * W + c * H * W + h * W + w] = gamma->buf[c] * x_hat + beta->buf[c]; } } } } } /* * Transposed convolution * input shape = (N, C, H, W) * weight shape = (C, K, R, S) * bias shape = (K) * output shape = (N, K, OH, OW) * where OH = (H - 1) * stride - 2 * pad + R * OW = (W - 1) * stride - 2 * pad + S */ void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad) { int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int K = weight->shape[1], R = weight->shape[2], S = weight->shape[3]; int OH = output->shape[2], OW = output->shape[3]; CHECK_ERROR(OH == (H - 1) * stride - 2 * pad + R, "[ConvT2d] Output height mismatch"); CHECK_ERROR(OW == (W - 1) * stride - 2 * pad + S, "[ConvT2d] Output width mismatch"); CHECK_ERROR(weight->shape[0] == C && bias->shape[0] == K && output->shape[1] == K, "[ConvT2d] Channel size mismatch"); #pragma omp parallel for for (int k = 0; k < K; ++k) { for (int oh = 0; oh < OH; ++oh) { for (int ow = 0; ow < OW; ++ow) { float o = bias->buf[k]; for (int c = 0; c < C; ++c) { for (int r = 0; r < R; ++r) { for (int s = 0; s < S; ++s) { if ((oh + pad - r) % stride != 0) continue; if ((ow + pad - s) % stride != 0) continue; int h = (oh + pad - r) / stride; int w = (ow + pad - s) / stride; if (h < 0 || h >= H || w < 0 || w >= W) continue; float i = input->buf[c * H * W + h * W + w]; float f = weight->buf[c * K * R * S + k * R * S + r * S + s]; o += i * f; } } } output->buf[k * OH * OW + oh * OW + ow] = o; } } } } float max4(float in0, float in1, float in2, float in3){ float max = in0; if (in1 > max) max = in1; if (in2 > max) max = in2; if (in3 > max) max = in3; return max; } /* * MaxPool2d * input shape = (N, C, H, W) * output shape = (N, OC, OH, OW) * where OH = H / 2 * OW = W / 2 */ void MaxPool2d(Tensor *input, Tensor *output){ int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; CHECK_ERROR(OW == W / 2, "[MaxPool2d] Output width mismatch"); CHECK_ERROR(OH == H / 2, "[MaxPool2d] Output height mismatch"); CHECK_ERROR(OC== C, "[MaxPool2d] Output channel mismatch"); #pragma omp parallel for for (int oc=0; ocbuf[oc * H * W + 2 * oh * W + 2 * ow]; float in1 = input->buf[oc * H * W + 2 * oh * W + 2 * ow + 1]; float in2 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow]; float in3 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow + 1]; output->buf[oc * OH * OW + oh * OW + ow] = max4(in0, in1, in2, in3); } } } } /* * Concat * input1 shape = (N, C1, H1, W1) * input2 shape = (N, C2, H2, W2) * output shape = (N, OC, OH, OW) * where OH = H2, H1 * OW = W2 = W1 + 1 */ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ int C1 = input1->shape[1], H1 = input1->shape[2], W1 = input1->shape[3]; int C2 = input2->shape[1], H2 = input2->shape[2], W2 = input2->shape[3]; int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; CHECK_ERROR(OC == C1 * 2 && OC == C2 * 2, "[Concat] Output channel mismatch"); CHECK_ERROR(OW == W1 + 1 && OW == W2, "[Concat] Output width mismatch"); CHECK_ERROR(OH == H1 && OH == H2, "[Concat] Output height mismatch"); #pragma omp parallel for for (int oc=0; ocbuf[oc * OH * OW + oh * OW + ow] = input2->buf[oc * OH * OW + oh * OW + ow]; } } } #pragma omp parallel for for (int oc=OC/2; ocbuf[oc * OH * OW + oh * OW + ow] = 0.0; // zero padding else output->buf[oc * OH * OW + oh * OW + ow] = input1->buf[(oc-OC/2) * H1 * W1 + oh * W1 + ow]; } } } } /* * uNet_initialize * Initialize the model. Do input-independent job here. */ void uNet_initialize(int N, int random_seed, char *parameter_fname) { size_t parameter_binary_size = 0; float *parameter = (float *)read_binary(parameter_fname, ¶meter_binary_size); inc_double_conv_0_weight = new Tensor({64,3,3,3}, parameter + OFFSET0); inc_double_conv_1_weight = new Tensor({64}, parameter + OFFSET1); inc_double_conv_1_bias = new Tensor({64}, parameter + OFFSET2); inc_double_conv_3_weight = new Tensor({64,64,3,3}, parameter + OFFSET3); inc_double_conv_4_weight = new Tensor({64}, parameter + OFFSET4); inc_double_conv_4_bias = new Tensor({64}, parameter + OFFSET5); down1_maxpool_conv_1_double_conv_0_weight = new Tensor({128,64,3,3}, parameter + OFFSET6); down1_maxpool_conv_1_double_conv_1_weight = new Tensor({128}, parameter + OFFSET7); down1_maxpool_conv_1_double_conv_1_bias = new Tensor({128}, parameter + OFFSET8); down1_maxpool_conv_1_double_conv_3_weight = new Tensor({128,128,3,3}, parameter + OFFSET9); down1_maxpool_conv_1_double_conv_4_weight = new Tensor({128}, parameter + OFFSET10); down1_maxpool_conv_1_double_conv_4_bias = new Tensor({128}, parameter + OFFSET11); down2_maxpool_conv_1_double_conv_0_weight = new Tensor({256,128,3,3}, parameter + OFFSET12); down2_maxpool_conv_1_double_conv_1_weight = new Tensor({256}, parameter + OFFSET13); down2_maxpool_conv_1_double_conv_1_bias = new Tensor({256}, parameter + OFFSET14); down2_maxpool_conv_1_double_conv_3_weight = new Tensor({256,256,3,3}, parameter + OFFSET15); down2_maxpool_conv_1_double_conv_4_weight = new Tensor({256}, parameter + OFFSET16); down2_maxpool_conv_1_double_conv_4_bias = new Tensor({256}, parameter + OFFSET17); down3_maxpool_conv_1_double_conv_0_weight = new Tensor({512,256,3,3}, parameter + OFFSET18); down3_maxpool_conv_1_double_conv_1_weight = new Tensor({512}, parameter + OFFSET19); down3_maxpool_conv_1_double_conv_1_bias = new Tensor({512}, parameter + OFFSET20); down3_maxpool_conv_1_double_conv_3_weight = new Tensor({512,512,3,3}, parameter + OFFSET21); down3_maxpool_conv_1_double_conv_4_weight = new Tensor({512}, parameter + OFFSET22); down3_maxpool_conv_1_double_conv_4_bias = new Tensor({512}, parameter + OFFSET23); down4_maxpool_conv_1_double_conv_0_weight = new Tensor({1024,512,3,3}, parameter + OFFSET24); down4_maxpool_conv_1_double_conv_1_weight = new Tensor({1024}, parameter + OFFSET25); down4_maxpool_conv_1_double_conv_1_bias = new Tensor({1024}, parameter + OFFSET26); down4_maxpool_conv_1_double_conv_3_weight = new Tensor({1024,1024,3,3}, parameter + OFFSET27); down4_maxpool_conv_1_double_conv_4_weight = new Tensor({1024}, parameter + OFFSET28); down4_maxpool_conv_1_double_conv_4_bias = new Tensor({1024}, parameter + OFFSET29); up1_up_weight = new Tensor({1024,512,2,2}, parameter + OFFSET30); up1_up_bias = new Tensor({512}, parameter + OFFSET31); up1_conv_double_conv_0_weight = new Tensor({512,1024,3,3}, parameter + OFFSET32); up1_conv_double_conv_1_weight = new Tensor({512}, parameter + OFFSET33); up1_conv_double_conv_1_bias = new Tensor({512}, parameter + OFFSET34); up1_conv_double_conv_3_weight = new Tensor({512,512,3,3}, parameter + OFFSET35); up1_conv_double_conv_4_weight = new Tensor({512}, parameter + OFFSET36); up1_conv_double_conv_4_bias = new Tensor({512}, parameter + OFFSET37); up2_up_weight = new Tensor({512,256,2,2}, parameter + OFFSET38); up2_up_bias = new Tensor({256}, parameter + OFFSET39); up2_conv_double_conv_0_weight = new Tensor({256,512,3,3}, parameter + OFFSET40); up2_conv_double_conv_1_weight = new Tensor({256}, parameter + OFFSET41); up2_conv_double_conv_1_bias = new Tensor({256}, parameter + OFFSET42); up2_conv_double_conv_3_weight = new Tensor({256,256,3,3}, parameter + OFFSET43); up2_conv_double_conv_4_weight = new Tensor({256}, parameter + OFFSET44); up2_conv_double_conv_4_bias = new Tensor({256}, parameter + OFFSET45); up3_up_weight = new Tensor({256,128,2,2}, parameter + OFFSET46); up3_up_bias = new Tensor({128}, parameter + OFFSET47); up3_conv_double_conv_0_weight = new Tensor({128,256,3,3}, parameter + OFFSET48); up3_conv_double_conv_1_weight = new Tensor({128}, parameter + OFFSET49); up3_conv_double_conv_1_bias = new Tensor({128}, parameter + OFFSET50); up3_conv_double_conv_3_weight = new Tensor({128,128,3,3}, parameter + OFFSET51); up3_conv_double_conv_4_weight = new Tensor({128}, parameter + OFFSET52); up3_conv_double_conv_4_bias = new Tensor({128}, parameter + OFFSET53); up4_up_weight = new Tensor({128,64,2,2}, parameter + OFFSET54); up4_up_bias = new Tensor({64}, parameter + OFFSET55); up4_conv_double_conv_0_weight = new Tensor({64,128,3,3}, parameter + OFFSET56); up4_conv_double_conv_1_weight = new Tensor({64}, parameter + OFFSET57); up4_conv_double_conv_1_bias = new Tensor({64}, parameter + OFFSET58); up4_conv_double_conv_3_weight = new Tensor({64,64,3,3}, parameter + OFFSET59); up4_conv_double_conv_4_weight = new Tensor({64}, parameter + OFFSET60); up4_conv_double_conv_4_bias = new Tensor({64}, parameter + OFFSET61); outc_conv_weight = new Tensor({2,64,1,1}, parameter + OFFSET62); outc_conv_bias = new Tensor({2}, parameter + OFFSET63); inc_batchnorm_0_running_mean = new Tensor({64}, parameter + OFFSET64); inc_batchnorm_0_running_var = new Tensor({64}, parameter + OFFSET65); inc_batchnorm_1_running_mean = new Tensor({64}, parameter + OFFSET66); inc_batchnorm_1_running_var = new Tensor({64}, parameter + OFFSET67); down1_batchnorm_0_running_mean = new Tensor({128}, parameter + OFFSET68); down1_batchnorm_0_running_var = new Tensor({128}, parameter + OFFSET69); down1_batchnorm_1_running_mean = new Tensor({128}, parameter + OFFSET70); down1_batchnorm_1_running_var = new Tensor({128}, parameter + OFFSET71); down2_batchnorm_0_running_mean = new Tensor({256}, parameter + OFFSET72); down2_batchnorm_0_running_var = new Tensor({256}, parameter + OFFSET73); down2_batchnorm_1_running_mean = new Tensor({256}, parameter + OFFSET74); down2_batchnorm_1_running_var = new Tensor({256}, parameter + OFFSET75); down3_batchnorm_0_running_mean = new Tensor({512}, parameter + OFFSET76); down3_batchnorm_0_running_var = new Tensor({512}, parameter + OFFSET77); down3_batchnorm_1_running_mean = new Tensor({512}, parameter + OFFSET78); down3_batchnorm_1_running_var = new Tensor({512}, parameter + OFFSET79); down4_batchnorm_0_running_mean = new Tensor({1024}, parameter + OFFSET80); down4_batchnorm_0_running_var = new Tensor({1024}, parameter + OFFSET81); down4_batchnorm_1_running_mean = new Tensor({1024}, parameter + OFFSET82); down4_batchnorm_1_running_var = new Tensor({1024}, parameter + OFFSET83); up1_batchnorm_0_running_mean = new Tensor({512}, parameter + OFFSET84); up1_batchnorm_0_running_var = new Tensor({512}, parameter + OFFSET85); up1_batchnorm_1_running_mean = new Tensor({512}, parameter + OFFSET86); up1_batchnorm_1_running_var = new Tensor({512}, parameter + OFFSET87); up2_batchnorm_0_running_mean = new Tensor({256}, parameter + OFFSET88); up2_batchnorm_0_running_var = new Tensor({256}, parameter + OFFSET89); up2_batchnorm_1_running_mean = new Tensor({256}, parameter + OFFSET90); up2_batchnorm_1_running_var = new Tensor({256}, parameter + OFFSET91); up3_batchnorm_0_running_mean = new Tensor({128}, parameter + OFFSET92); up3_batchnorm_0_running_var = new Tensor({128}, parameter + OFFSET93); up3_batchnorm_1_running_mean = new Tensor({128}, parameter + OFFSET94); up3_batchnorm_1_running_var = new Tensor({128}, parameter + OFFSET95); up4_batchnorm_0_running_mean = new Tensor({64}, parameter + OFFSET96); up4_batchnorm_0_running_var = new Tensor({64}, parameter + OFFSET97); up4_batchnorm_1_running_mean = new Tensor({64}, parameter + OFFSET98); up4_batchnorm_1_running_var = new Tensor({64}, parameter + OFFSET99); inc_conv_0_output = new Tensor({1, 64, 640, 959}); inc_batchnorm_0_output = new Tensor({1, 64, 640, 959}); inc_conv_1_output = new Tensor({1, 64, 640, 959}); inc_batchnorm_1_output = new Tensor({1, 64, 640, 959}); down1_maxpool2d_0_output = new Tensor({1, 64, 320, 479}); down1_conv_0_output = new Tensor({1, 128, 320, 479}); down1_batchnorm_0_output = new Tensor({1, 128, 320, 479}); down1_conv_1_output = new Tensor({1, 128, 320, 479}); down1_batchnorm_1_output = new Tensor({1, 128, 320, 479}); down2_maxpool2d_0_output = new Tensor({1, 128, 160, 239}); down2_conv_0_output = new Tensor({1, 256, 160, 239}); down2_batchnorm_0_output = new Tensor({1, 256, 160, 239}); down2_conv_1_output = new Tensor({1, 256, 160, 239}); down2_batchnorm_1_output = new Tensor({1, 256, 160, 239}); down3_maxpool2d_0_output = new Tensor({1, 256, 80, 119}); down3_conv_0_output = new Tensor({1, 512, 80, 119}); down3_batchnorm_0_output = new Tensor({1, 512, 80, 119}); down3_conv_1_output = new Tensor({1, 512, 80, 119}); down3_batchnorm_1_output = new Tensor({1, 512, 80, 119}); down4_maxpool2d_0_output = new Tensor({1, 512, 40, 59}); down4_conv_0_output = new Tensor({1, 1024, 40, 59}); down4_batchnorm_0_output = new Tensor({1, 1024, 40, 59}); down4_conv_1_output = new Tensor({1, 1024, 40, 59}); down4_batchnorm_1_output = new Tensor({1, 1024, 40, 59}); up1_convt_0_output = new Tensor({1, 512, 80, 118}); up1_concat_0_output = new Tensor({1, 1024, 80, 119}); up1_conv_0_output = new Tensor({1, 512, 80, 119}); up1_batchnorm_0_output = new Tensor({1, 512, 80, 119}); up1_conv_1_output = new Tensor({1, 512, 80, 119}); up1_batchnorm_1_output = new Tensor({1, 512, 80, 119}); up2_convt_0_output = new Tensor({1, 256, 160, 238}); up2_concat_0_output = new Tensor({1, 512, 160, 239}); up2_conv_0_output = new Tensor({1, 256, 160, 239}); up2_batchnorm_0_output = new Tensor({1, 256, 160, 239}); up2_conv_1_output = new Tensor({1, 256, 160, 239}); up2_batchnorm_1_output = new Tensor({1, 256, 160, 239}); up3_convt_0_output = new Tensor({1, 128, 320, 478}); up3_concat_0_output = new Tensor({1, 256, 320, 479}); up3_conv_0_output = new Tensor({1, 128, 320, 479}); up3_batchnorm_0_output = new Tensor({1, 128, 320, 479}); up3_conv_1_output = new Tensor({1, 128, 320, 479}); up3_batchnorm_1_output = new Tensor({1, 128, 320, 479}); up4_convt_0_output = new Tensor({1, 64, 640, 958}); up4_concat_0_output = new Tensor({1, 128, 640, 959}); up4_conv_0_output = new Tensor({1, 64, 640, 959}); up4_batchnorm_0_output = new Tensor({1, 64, 640, 959}); up4_conv_1_output = new Tensor({1, 64, 640, 959}); up4_batchnorm_1_output = new Tensor({1, 64, 640, 959}); outc_conv_0_output = new Tensor({1, 2, 640, 959}); } /* * uNet_finalize * Finalize the model. */ void uNet_finalize() { // delete parameters delete inc_double_conv_0_weight; delete inc_double_conv_1_weight; delete inc_double_conv_1_bias; delete inc_double_conv_3_weight; delete inc_double_conv_4_weight; delete inc_double_conv_4_bias; delete down1_maxpool_conv_1_double_conv_0_weight; delete down1_maxpool_conv_1_double_conv_1_weight; delete down1_maxpool_conv_1_double_conv_1_bias; delete down1_maxpool_conv_1_double_conv_3_weight; delete down1_maxpool_conv_1_double_conv_4_weight; delete down1_maxpool_conv_1_double_conv_4_bias; delete down2_maxpool_conv_1_double_conv_0_weight; delete down2_maxpool_conv_1_double_conv_1_weight; delete down2_maxpool_conv_1_double_conv_1_bias; delete down2_maxpool_conv_1_double_conv_3_weight; delete down2_maxpool_conv_1_double_conv_4_weight; delete down2_maxpool_conv_1_double_conv_4_bias; delete down3_maxpool_conv_1_double_conv_0_weight; delete down3_maxpool_conv_1_double_conv_1_weight; delete down3_maxpool_conv_1_double_conv_1_bias; delete down3_maxpool_conv_1_double_conv_3_weight; delete down3_maxpool_conv_1_double_conv_4_weight; delete down3_maxpool_conv_1_double_conv_4_bias; delete down4_maxpool_conv_1_double_conv_0_weight; delete down4_maxpool_conv_1_double_conv_1_weight; delete down4_maxpool_conv_1_double_conv_1_bias; delete down4_maxpool_conv_1_double_conv_3_weight; delete down4_maxpool_conv_1_double_conv_4_weight; delete down4_maxpool_conv_1_double_conv_4_bias; delete up1_up_weight; delete up1_up_bias; delete up1_conv_double_conv_0_weight; delete up1_conv_double_conv_1_weight; delete up1_conv_double_conv_1_bias ; delete up1_conv_double_conv_3_weight; delete up1_conv_double_conv_4_weight; delete up1_conv_double_conv_4_bias ; delete up2_up_weight; delete up2_up_bias; delete up2_conv_double_conv_0_weight; delete up2_conv_double_conv_1_weight; delete up2_conv_double_conv_1_bias; delete up2_conv_double_conv_3_weight; delete up2_conv_double_conv_4_weight; delete up2_conv_double_conv_4_bias; delete up3_up_weight; delete up3_up_bias; delete up3_conv_double_conv_0_weight; delete up3_conv_double_conv_1_weight; delete up3_conv_double_conv_1_bias; delete up3_conv_double_conv_3_weight; delete up3_conv_double_conv_4_weight; delete up3_conv_double_conv_4_bias; delete up4_up_weight; delete up4_up_bias; delete up4_conv_double_conv_0_weight; delete up4_conv_double_conv_1_weight; delete up4_conv_double_conv_1_bias; delete up4_conv_double_conv_3_weight; delete up4_conv_double_conv_4_weight; delete up4_conv_double_conv_4_bias; delete outc_conv_weight; delete outc_conv_bias; delete inc_batchnorm_0_running_mean; delete inc_batchnorm_0_running_var; delete down1_batchnorm_0_running_mean; delete down1_batchnorm_0_running_var; delete down2_batchnorm_0_running_mean; delete down2_batchnorm_0_running_var; delete down3_batchnorm_0_running_mean; delete down3_batchnorm_0_running_var; delete down4_batchnorm_0_running_mean; delete down4_batchnorm_0_running_var; delete up1_batchnorm_0_running_mean; delete up1_batchnorm_0_running_var; delete up2_batchnorm_0_running_mean; delete up2_batchnorm_0_running_var; delete up3_batchnorm_0_running_mean; delete up3_batchnorm_0_running_var; delete up4_batchnorm_0_running_mean; delete up4_batchnorm_0_running_var; delete inc_batchnorm_1_running_mean; delete inc_batchnorm_1_running_var; delete down1_batchnorm_1_running_mean; delete down1_batchnorm_1_running_var; delete down2_batchnorm_1_running_mean; delete down2_batchnorm_1_running_var; delete down3_batchnorm_1_running_mean; delete down3_batchnorm_1_running_var; delete down4_batchnorm_1_running_mean; delete down4_batchnorm_1_running_var; delete up1_batchnorm_1_running_mean; delete up1_batchnorm_1_running_var; delete up2_batchnorm_1_running_mean; delete up2_batchnorm_1_running_var; delete up3_batchnorm_1_running_mean; delete up3_batchnorm_1_running_var; delete up4_batchnorm_1_running_mean; delete up4_batchnorm_1_running_var; // delete activations delete inc_conv_0_output; delete inc_batchnorm_0_output; delete inc_conv_1_output; delete inc_batchnorm_1_output; delete down1_maxpool2d_0_output; delete down1_conv_0_output; delete down1_batchnorm_0_output; delete down1_conv_1_output; delete down1_batchnorm_1_output; delete down2_maxpool2d_0_output; delete down2_conv_0_output; delete down2_batchnorm_0_output; delete down2_conv_1_output; delete down2_batchnorm_1_output; delete down3_maxpool2d_0_output; delete down3_conv_0_output; delete down3_batchnorm_0_output; delete down3_conv_1_output; delete down3_batchnorm_1_output; delete down4_maxpool2d_0_output; delete down4_conv_0_output; delete down4_batchnorm_0_output; delete down4_conv_1_output; delete down4_batchnorm_1_output; delete up1_convt_0_output; delete up1_concat_0_output; delete up2_convt_0_output; delete up2_concat_0_output; delete up3_convt_0_output; delete up3_concat_0_output; delete up4_convt_0_output; delete up4_concat_0_output; delete outc_conv_0_output; delete up1_conv_0_output; delete up1_batchnorm_0_output; delete up1_conv_1_output; delete up1_batchnorm_1_output; delete up2_conv_0_output; delete up2_batchnorm_0_output; delete up2_conv_1_output; delete up2_batchnorm_1_output; delete up3_conv_0_output; delete up3_batchnorm_0_output; delete up3_conv_1_output; delete up3_batchnorm_1_output; delete up4_conv_0_output; delete up4_batchnorm_0_output; delete up4_conv_1_output; delete up4_batchnorm_1_output; }