diff --git a/APWS23/project/main.cpp b/APWS23/project/main.cpp index b821eaa..2b15dc5 100644 --- a/APWS23/project/main.cpp +++ b/APWS23/project/main.cpp @@ -15,9 +15,8 @@ char *parameter_fname; char *output_fname; char *input_fname; - int main(int argc, char **argv) { - + check_and_parse_args(argc, argv); print_model(); @@ -26,16 +25,16 @@ int main(int argc, char **argv) { Tensor *input = new Tensor({N, 3, 640, 959}); Tensor *output = new Tensor({N, 2, 640, 959}); - + size_t input_size = 0; - read_binary((void*)input->buf, input_fname, &input_size); + read_binary((void *)input->buf, input_fname, &input_size); printf(" process %d image(s)...", N); - fflush(stdout); + fflush(stdout); // warm_up printf("\nWarmimg up."); - for(int i = 0 ; i < WARM_UP ; ++i){ + for (int i = 0; i < WARM_UP; ++i) { uNet(input, output, N); printf("."); } @@ -45,25 +44,23 @@ int main(int argc, char **argv) { // run uNet and measure time printf("\nProcess."); double uNet_st = get_time(); - for(int j = 0 ; j < MEASURE ; ++j){ + for (int j = 0; j < MEASURE; ++j) { uNet(input, output, N); printf("."); } cudaDeviceSynchronize(); - double uNet_en = get_time(); + double uNet_en = get_time(); printf("\n"); - double elapsed_time = uNet_en - uNet_st; - printf("Done! (%lf img/sec)\n", N/elapsed_time/MEASURE); + double elapsed_time = uNet_en - uNet_st; + printf("Done! (%lf img/sec)\n", N / elapsed_time / MEASURE); - write_binary((void*)output->buf, output_fname, (size_t)(N * 2 * 640 * 959)); + write_binary((void *)output->buf, output_fname, (size_t)(N * 2 * 640 * 959)); printf(" Writing to %s ...", output_fname); fflush(stdout); - + printf("Done!\n\n"); // Finalize program uNet_finalize(); } - - diff --git a/APWS23/project/tensor.h b/APWS23/project/tensor.h index 29c54f5..9895cd0 100644 --- a/APWS23/project/tensor.h +++ b/APWS23/project/tensor.h @@ -6,32 +6,32 @@ // You can modify the data structure as you want struct Tensor { - // Alloc memory + // Alloc memory Tensor(std::vector shape_) { ndim = shape_.size(); for (int i = 0; i < ndim; i++) { shape[i] = shape_[i]; } int n = num_elem(); - buf = (float*)malloc(n * sizeof(float)); + buf = (float *)malloc(n * sizeof(float)); } - - // Alloc memory and copy + + // Alloc memory and copy Tensor(std::vector shape_, float *buf_) { ndim = shape_.size(); for (int i = 0; i < ndim; i++) { shape[i] = shape_[i]; } int n = num_elem(); - buf = (float*)malloc(n * sizeof(float)); - for (int i=0; i -#include -#include -#include #include +#include +#include +#include +#include // Parameters for U-Net Tensor *inc_double_conv_0_weight; @@ -43,10 +43,10 @@ Tensor *up1_up_weight; Tensor *up1_up_bias; Tensor *up1_conv_double_conv_0_weight; Tensor *up1_conv_double_conv_1_weight; -Tensor *up1_conv_double_conv_1_bias ; +Tensor *up1_conv_double_conv_1_bias; Tensor *up1_conv_double_conv_3_weight; Tensor *up1_conv_double_conv_4_weight; -Tensor *up1_conv_double_conv_4_bias ; +Tensor *up1_conv_double_conv_4_bias; Tensor *up2_up_weight; Tensor *up2_up_bias; Tensor *up2_conv_double_conv_0_weight; @@ -149,7 +149,7 @@ Tensor *up2_conv_1_output; Tensor *up2_batchnorm_1_output; Tensor *up3_convt_0_output; Tensor *up3_concat_0_output; -Tensor *up3_conv_0_output; +Tensor *up3_conv_0_output; Tensor *up3_batchnorm_0_output; Tensor *up3_conv_1_output; Tensor *up3_batchnorm_1_output; @@ -162,18 +162,22 @@ Tensor *up4_batchnorm_1_output; Tensor *outc_conv_0_output; // forward declaration, prototype -void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias); +void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, + int stride, int pad, int dilation, bool has_bias); void ReLU(Tensor *inout); -void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum); -void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad); +void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, + Tensor *running_mean, Tensor *running_var, Tensor *output, + const float eps, const float momentum); +void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, + Tensor *output, int stride, int pad); void MaxPool2d(Tensor *input, Tensor *output); void Concat(Tensor *input1, Tensor *input2, Tensor *output); -void uNet_initialize(int, int, char*); -void uNet(Tensor*, Tensor*); +void uNet_initialize(int, int, char *); +void uNet(Tensor *, Tensor *); void uNet_finalize(); -/* - * uNet +/* + * uNet * This model identifies the boundaries of the cars in an image file (input.bin) * and removes the background. */ @@ -182,98 +186,167 @@ void uNet(Tensor *inputN, Tensor *outputN, int N) { Tensor *input = new Tensor({1, 3, 640, 959}); Tensor *output = new Tensor({1, 2, 640, 959}); - for (int idx = 0 ; idx < N ; ++idx){ + for (int idx = 0; idx < N; ++idx) { - memcpy(input->buf, inputN->buf + (idx * 1 * 3 * 640 * 959), sizeof(float) * 1 * 3 * 640 * 959); + memcpy(input->buf, inputN->buf + (idx * 1 * 3 * 640 * 959), + sizeof(float) * 1 * 3 * 640 * 959); // inc(n_channels, 64) - Conv2d(input, inc_double_conv_0_weight, NULL, inc_conv_0_output, 1, 1, 1, false); - BatchNorm2d(inc_conv_0_output, inc_double_conv_1_weight, inc_double_conv_1_bias, inc_batchnorm_0_running_mean, inc_batchnorm_0_running_var, inc_batchnorm_0_output, 1e-5, 0.1); + Conv2d(input, inc_double_conv_0_weight, NULL, inc_conv_0_output, 1, 1, 1, + false); + BatchNorm2d(inc_conv_0_output, inc_double_conv_1_weight, + inc_double_conv_1_bias, inc_batchnorm_0_running_mean, + inc_batchnorm_0_running_var, inc_batchnorm_0_output, 1e-5, 0.1); ReLU(inc_batchnorm_0_output); - Conv2d(inc_batchnorm_0_output, inc_double_conv_3_weight, NULL, inc_conv_1_output, 1, 1, 1, false); - BatchNorm2d(inc_conv_1_output, inc_double_conv_4_weight, inc_double_conv_4_bias, inc_batchnorm_1_running_mean, inc_batchnorm_1_running_var, inc_batchnorm_1_output, 1e-5, 0.1); + Conv2d(inc_batchnorm_0_output, inc_double_conv_3_weight, NULL, + inc_conv_1_output, 1, 1, 1, false); + BatchNorm2d(inc_conv_1_output, inc_double_conv_4_weight, + inc_double_conv_4_bias, inc_batchnorm_1_running_mean, + inc_batchnorm_1_running_var, inc_batchnorm_1_output, 1e-5, 0.1); ReLU(inc_batchnorm_1_output); - + // down1(64, 128) MaxPool2d(inc_batchnorm_1_output, down1_maxpool2d_0_output); - Conv2d(down1_maxpool2d_0_output, down1_maxpool_conv_1_double_conv_0_weight, NULL, down1_conv_0_output, 1, 1, 1, false); - BatchNorm2d(down1_conv_0_output, down1_maxpool_conv_1_double_conv_1_weight, down1_maxpool_conv_1_double_conv_1_bias, down1_batchnorm_0_running_mean, down1_batchnorm_0_running_var, down1_batchnorm_0_output, 1e-5, 0.1); + Conv2d(down1_maxpool2d_0_output, down1_maxpool_conv_1_double_conv_0_weight, + NULL, down1_conv_0_output, 1, 1, 1, false); + BatchNorm2d(down1_conv_0_output, down1_maxpool_conv_1_double_conv_1_weight, + down1_maxpool_conv_1_double_conv_1_bias, + down1_batchnorm_0_running_mean, down1_batchnorm_0_running_var, + down1_batchnorm_0_output, 1e-5, 0.1); ReLU(down1_batchnorm_0_output); - Conv2d(down1_batchnorm_0_output, down1_maxpool_conv_1_double_conv_3_weight, NULL, down1_conv_1_output, 1, 1, 1, false); - BatchNorm2d(down1_conv_1_output, down1_maxpool_conv_1_double_conv_4_weight, down1_maxpool_conv_1_double_conv_4_bias, down1_batchnorm_1_running_mean, down1_batchnorm_1_running_var, down1_batchnorm_1_output, 1e-5, 0.1); + Conv2d(down1_batchnorm_0_output, down1_maxpool_conv_1_double_conv_3_weight, + NULL, down1_conv_1_output, 1, 1, 1, false); + BatchNorm2d(down1_conv_1_output, down1_maxpool_conv_1_double_conv_4_weight, + down1_maxpool_conv_1_double_conv_4_bias, + down1_batchnorm_1_running_mean, down1_batchnorm_1_running_var, + down1_batchnorm_1_output, 1e-5, 0.1); ReLU(down1_batchnorm_1_output); - + // down2(128, 256) MaxPool2d(down1_batchnorm_1_output, down2_maxpool2d_0_output); - Conv2d(down2_maxpool2d_0_output, down2_maxpool_conv_1_double_conv_0_weight, NULL, down2_conv_0_output, 1, 1, 1, false); - BatchNorm2d(down2_conv_0_output, down2_maxpool_conv_1_double_conv_1_weight, down2_maxpool_conv_1_double_conv_1_bias, down2_batchnorm_0_running_mean, down2_batchnorm_0_running_var, down2_batchnorm_0_output, 1e-5, 0.1); + Conv2d(down2_maxpool2d_0_output, down2_maxpool_conv_1_double_conv_0_weight, + NULL, down2_conv_0_output, 1, 1, 1, false); + BatchNorm2d(down2_conv_0_output, down2_maxpool_conv_1_double_conv_1_weight, + down2_maxpool_conv_1_double_conv_1_bias, + down2_batchnorm_0_running_mean, down2_batchnorm_0_running_var, + down2_batchnorm_0_output, 1e-5, 0.1); ReLU(down2_batchnorm_0_output); - Conv2d(down2_batchnorm_0_output, down2_maxpool_conv_1_double_conv_3_weight, NULL, down2_conv_1_output, 1, 1, 1, false); - BatchNorm2d(down2_conv_1_output, down2_maxpool_conv_1_double_conv_4_weight, down2_maxpool_conv_1_double_conv_4_bias, down2_batchnorm_1_running_mean, down2_batchnorm_1_running_var, down2_batchnorm_1_output, 1e-5, 0.1); + Conv2d(down2_batchnorm_0_output, down2_maxpool_conv_1_double_conv_3_weight, + NULL, down2_conv_1_output, 1, 1, 1, false); + BatchNorm2d(down2_conv_1_output, down2_maxpool_conv_1_double_conv_4_weight, + down2_maxpool_conv_1_double_conv_4_bias, + down2_batchnorm_1_running_mean, down2_batchnorm_1_running_var, + down2_batchnorm_1_output, 1e-5, 0.1); ReLU(down2_batchnorm_1_output); // down3(256, 512) MaxPool2d(down2_batchnorm_1_output, down3_maxpool2d_0_output); - Conv2d(down3_maxpool2d_0_output, down3_maxpool_conv_1_double_conv_0_weight, NULL, down3_conv_0_output, 1, 1, 1, false); - BatchNorm2d(down3_conv_0_output, down3_maxpool_conv_1_double_conv_1_weight, down3_maxpool_conv_1_double_conv_1_bias, down3_batchnorm_0_running_mean, down3_batchnorm_0_running_var, down3_batchnorm_0_output, 1e-5, 0.1); + Conv2d(down3_maxpool2d_0_output, down3_maxpool_conv_1_double_conv_0_weight, + NULL, down3_conv_0_output, 1, 1, 1, false); + BatchNorm2d(down3_conv_0_output, down3_maxpool_conv_1_double_conv_1_weight, + down3_maxpool_conv_1_double_conv_1_bias, + down3_batchnorm_0_running_mean, down3_batchnorm_0_running_var, + down3_batchnorm_0_output, 1e-5, 0.1); ReLU(down3_batchnorm_0_output); - Conv2d(down3_batchnorm_0_output, down3_maxpool_conv_1_double_conv_3_weight, NULL, down3_conv_1_output, 1, 1, 1, false); - BatchNorm2d(down3_conv_1_output, down3_maxpool_conv_1_double_conv_4_weight, down3_maxpool_conv_1_double_conv_4_bias, down3_batchnorm_1_running_mean, down3_batchnorm_1_running_var, down3_batchnorm_1_output, 1e-5, 0.1); + Conv2d(down3_batchnorm_0_output, down3_maxpool_conv_1_double_conv_3_weight, + NULL, down3_conv_1_output, 1, 1, 1, false); + BatchNorm2d(down3_conv_1_output, down3_maxpool_conv_1_double_conv_4_weight, + down3_maxpool_conv_1_double_conv_4_bias, + down3_batchnorm_1_running_mean, down3_batchnorm_1_running_var, + down3_batchnorm_1_output, 1e-5, 0.1); ReLU(down3_batchnorm_1_output); // down4(512, 1024) MaxPool2d(down3_batchnorm_1_output, down4_maxpool2d_0_output); - Conv2d(down4_maxpool2d_0_output, down4_maxpool_conv_1_double_conv_0_weight, NULL, down4_conv_0_output, 1, 1, 1, false); - BatchNorm2d(down4_conv_0_output, down4_maxpool_conv_1_double_conv_1_weight, down4_maxpool_conv_1_double_conv_1_bias, down4_batchnorm_0_running_mean, down4_batchnorm_0_running_var, down4_batchnorm_0_output, 1e-5, 0.1); + Conv2d(down4_maxpool2d_0_output, down4_maxpool_conv_1_double_conv_0_weight, + NULL, down4_conv_0_output, 1, 1, 1, false); + BatchNorm2d(down4_conv_0_output, down4_maxpool_conv_1_double_conv_1_weight, + down4_maxpool_conv_1_double_conv_1_bias, + down4_batchnorm_0_running_mean, down4_batchnorm_0_running_var, + down4_batchnorm_0_output, 1e-5, 0.1); ReLU(down4_batchnorm_0_output); - Conv2d(down4_batchnorm_0_output, down4_maxpool_conv_1_double_conv_3_weight, NULL, down4_conv_1_output, 1, 1, 1, false); - BatchNorm2d(down4_conv_1_output, down4_maxpool_conv_1_double_conv_4_weight, down4_maxpool_conv_1_double_conv_4_bias, down4_batchnorm_1_running_mean, down4_batchnorm_1_running_var, down4_batchnorm_1_output, 1e-5, 0.1); + Conv2d(down4_batchnorm_0_output, down4_maxpool_conv_1_double_conv_3_weight, + NULL, down4_conv_1_output, 1, 1, 1, false); + BatchNorm2d(down4_conv_1_output, down4_maxpool_conv_1_double_conv_4_weight, + down4_maxpool_conv_1_double_conv_4_bias, + down4_batchnorm_1_running_mean, down4_batchnorm_1_running_var, + down4_batchnorm_1_output, 1e-5, 0.1); ReLU(down4_batchnorm_1_output); // up1(1024, 512), (down4_batchnorm_1_output, down3_batchnorm_1_output) - ConvTranspose2d(down4_batchnorm_1_output, up1_up_weight, up1_up_bias, up1_convt_0_output, 2, 0); + ConvTranspose2d(down4_batchnorm_1_output, up1_up_weight, up1_up_bias, + up1_convt_0_output, 2, 0); Concat(up1_convt_0_output, down3_batchnorm_1_output, up1_concat_0_output); - Conv2d(up1_concat_0_output, up1_conv_double_conv_0_weight, NULL, up1_conv_0_output, 1, 1, 1, false); - BatchNorm2d(up1_conv_0_output, up1_conv_double_conv_1_weight, up1_conv_double_conv_1_bias, up1_batchnorm_0_running_mean, up1_batchnorm_0_running_var, up1_batchnorm_0_output, 1e-5, 0.1); + Conv2d(up1_concat_0_output, up1_conv_double_conv_0_weight, NULL, + up1_conv_0_output, 1, 1, 1, false); + BatchNorm2d(up1_conv_0_output, up1_conv_double_conv_1_weight, + up1_conv_double_conv_1_bias, up1_batchnorm_0_running_mean, + up1_batchnorm_0_running_var, up1_batchnorm_0_output, 1e-5, 0.1); ReLU(up1_batchnorm_0_output); - Conv2d(up1_batchnorm_0_output, up1_conv_double_conv_3_weight, NULL, up1_conv_1_output, 1, 1, 1, false); - BatchNorm2d(up1_conv_1_output, up1_conv_double_conv_4_weight, up1_conv_double_conv_4_bias, up1_batchnorm_1_running_mean, up1_batchnorm_1_running_var, up1_batchnorm_1_output, 1e-5, 0.1); + Conv2d(up1_batchnorm_0_output, up1_conv_double_conv_3_weight, NULL, + up1_conv_1_output, 1, 1, 1, false); + BatchNorm2d(up1_conv_1_output, up1_conv_double_conv_4_weight, + up1_conv_double_conv_4_bias, up1_batchnorm_1_running_mean, + up1_batchnorm_1_running_var, up1_batchnorm_1_output, 1e-5, 0.1); ReLU(up1_batchnorm_1_output); // up2(512, 256), (up1_concat_0_output, down2_batchnorm_1_output) - ConvTranspose2d(up1_batchnorm_1_output, up2_up_weight, up2_up_bias, up2_convt_0_output, 2, 0); + ConvTranspose2d(up1_batchnorm_1_output, up2_up_weight, up2_up_bias, + up2_convt_0_output, 2, 0); Concat(up2_convt_0_output, down2_batchnorm_1_output, up2_concat_0_output); - Conv2d(up2_concat_0_output, up2_conv_double_conv_0_weight, NULL, up2_conv_0_output, 1, 1, 1, false); - BatchNorm2d(up2_conv_0_output, up2_conv_double_conv_1_weight, up2_conv_double_conv_1_bias, up2_batchnorm_0_running_mean, up2_batchnorm_0_running_var, up2_batchnorm_0_output, 1e-5, 0.1); + Conv2d(up2_concat_0_output, up2_conv_double_conv_0_weight, NULL, + up2_conv_0_output, 1, 1, 1, false); + BatchNorm2d(up2_conv_0_output, up2_conv_double_conv_1_weight, + up2_conv_double_conv_1_bias, up2_batchnorm_0_running_mean, + up2_batchnorm_0_running_var, up2_batchnorm_0_output, 1e-5, 0.1); ReLU(up2_batchnorm_0_output); - Conv2d(up2_batchnorm_0_output, up2_conv_double_conv_3_weight, NULL, up2_conv_1_output, 1, 1, 1, false); - BatchNorm2d(up2_conv_1_output, up2_conv_double_conv_4_weight, up2_conv_double_conv_4_bias, up2_batchnorm_1_running_mean, up2_batchnorm_1_running_var, up2_batchnorm_1_output, 1e-5, 0.1); + Conv2d(up2_batchnorm_0_output, up2_conv_double_conv_3_weight, NULL, + up2_conv_1_output, 1, 1, 1, false); + BatchNorm2d(up2_conv_1_output, up2_conv_double_conv_4_weight, + up2_conv_double_conv_4_bias, up2_batchnorm_1_running_mean, + up2_batchnorm_1_running_var, up2_batchnorm_1_output, 1e-5, 0.1); ReLU(up2_batchnorm_1_output); // up3(256, 128), (up2_concat_0_output, down1_batchnorm_1_output) - ConvTranspose2d(up2_batchnorm_1_output, up3_up_weight, up3_up_bias, up3_convt_0_output, 2, 0); + ConvTranspose2d(up2_batchnorm_1_output, up3_up_weight, up3_up_bias, + up3_convt_0_output, 2, 0); Concat(up3_convt_0_output, down1_batchnorm_1_output, up3_concat_0_output); - Conv2d(up3_concat_0_output, up3_conv_double_conv_0_weight, NULL, up3_conv_0_output, 1, 1, 1, false); - BatchNorm2d(up3_conv_0_output, up3_conv_double_conv_1_weight, up3_conv_double_conv_1_bias, up3_batchnorm_0_running_mean, up3_batchnorm_0_running_var, up3_batchnorm_0_output, 1e-5, 0.1); + Conv2d(up3_concat_0_output, up3_conv_double_conv_0_weight, NULL, + up3_conv_0_output, 1, 1, 1, false); + BatchNorm2d(up3_conv_0_output, up3_conv_double_conv_1_weight, + up3_conv_double_conv_1_bias, up3_batchnorm_0_running_mean, + up3_batchnorm_0_running_var, up3_batchnorm_0_output, 1e-5, 0.1); ReLU(up3_batchnorm_0_output); - Conv2d(up3_batchnorm_0_output, up3_conv_double_conv_3_weight, NULL, up3_conv_1_output, 1, 1, 1, false); - BatchNorm2d(up3_conv_1_output, up3_conv_double_conv_4_weight, up3_conv_double_conv_4_bias, up3_batchnorm_1_running_mean, up3_batchnorm_1_running_var, up3_batchnorm_1_output, 1e-5, 0.1); + Conv2d(up3_batchnorm_0_output, up3_conv_double_conv_3_weight, NULL, + up3_conv_1_output, 1, 1, 1, false); + BatchNorm2d(up3_conv_1_output, up3_conv_double_conv_4_weight, + up3_conv_double_conv_4_bias, up3_batchnorm_1_running_mean, + up3_batchnorm_1_running_var, up3_batchnorm_1_output, 1e-5, 0.1); ReLU(up3_batchnorm_1_output); // up4(128, 64), (up3_concat_0_output, inc_batchnorm_1_output) - ConvTranspose2d(up3_batchnorm_1_output, up4_up_weight, up4_up_bias, up4_convt_0_output, 2, 0); + ConvTranspose2d(up3_batchnorm_1_output, up4_up_weight, up4_up_bias, + up4_convt_0_output, 2, 0); Concat(up4_convt_0_output, inc_batchnorm_1_output, up4_concat_0_output); - Conv2d(up4_concat_0_output, up4_conv_double_conv_0_weight, NULL, up4_conv_0_output, 1, 1, 1, false); - BatchNorm2d(up4_conv_0_output, up4_conv_double_conv_1_weight, up4_conv_double_conv_1_bias, up4_batchnorm_0_running_mean, up4_batchnorm_0_running_var, up4_batchnorm_0_output, 1e-5, 0.1); + Conv2d(up4_concat_0_output, up4_conv_double_conv_0_weight, NULL, + up4_conv_0_output, 1, 1, 1, false); + BatchNorm2d(up4_conv_0_output, up4_conv_double_conv_1_weight, + up4_conv_double_conv_1_bias, up4_batchnorm_0_running_mean, + up4_batchnorm_0_running_var, up4_batchnorm_0_output, 1e-5, 0.1); ReLU(up4_batchnorm_0_output); - Conv2d(up4_batchnorm_0_output, up4_conv_double_conv_3_weight, NULL, up4_conv_1_output, 1, 1, 1, false); - BatchNorm2d(up4_conv_1_output, up4_conv_double_conv_4_weight, up4_conv_double_conv_4_bias, up4_batchnorm_1_running_mean, up4_batchnorm_1_running_var, up4_batchnorm_1_output, 1e-5, 0.1); + Conv2d(up4_batchnorm_0_output, up4_conv_double_conv_3_weight, NULL, + up4_conv_1_output, 1, 1, 1, false); + BatchNorm2d(up4_conv_1_output, up4_conv_double_conv_4_weight, + up4_conv_double_conv_4_bias, up4_batchnorm_1_running_mean, + up4_batchnorm_1_running_var, up4_batchnorm_1_output, 1e-5, 0.1); ReLU(up4_batchnorm_1_output); // outc(64, 2) - Conv2d(up4_batchnorm_1_output, outc_conv_weight, outc_conv_bias, output, 1, 0, 1, true); + Conv2d(up4_batchnorm_1_output, outc_conv_weight, outc_conv_bias, output, 1, + 0, 1, true); - memcpy(outputN->buf + (idx * 1 * 2 * 640 * 959), output->buf, sizeof(float) * (1 * 2 * 640 * 959)); + memcpy(outputN->buf + (idx * 1 * 2 * 640 * 959), output->buf, + sizeof(float) * (1 * 2 * 640 * 959)); } } @@ -288,17 +361,22 @@ void uNet(Tensor *inputN, Tensor *outputN, int N) { * where OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, * OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1 */ - void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias) { - - int C = input->shape[1], H = input->shape[2], W = input->shape[3]; +void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, + int stride, int pad, int dilation, bool has_bias) { + + int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int K = weight->shape[0], R = weight->shape[2], S = weight->shape[3]; int OH = output->shape[2], OW = output->shape[3]; - - CHECK_ERROR(OH == (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, "[Conv2d] Output height mismatch"); - CHECK_ERROR(OW == (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1, "[Conv2d] Output width mismatch"); - CHECK_ERROR(weight->shape[1] == C && (!has_bias || bias->shape[0] == K) && output->shape[1] == K, "[Conv2d] Channel size mismatch"); - #pragma omp parallel for + CHECK_ERROR(OH == (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, + "[Conv2d] Output height mismatch"); + CHECK_ERROR(OW == (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1, + "[Conv2d] Output width mismatch"); + CHECK_ERROR(weight->shape[1] == C && (!has_bias || bias->shape[0] == K) && + output->shape[1] == K, + "[Conv2d] Channel size mismatch"); + +#pragma omp parallel for for (int k = 0; k < K; ++k) { for (int oh = 0; oh < OH; ++oh) { for (int ow = 0; ow < OW; ++ow) { @@ -308,7 +386,8 @@ void uNet(Tensor *inputN, Tensor *outputN, int N) { for (int s = 0; s < S; ++s) { int h = oh * stride - pad + r * dilation; int w = ow * stride - pad + s * dilation; - if (h < 0 || h >= H || w < 0 || w >= W) continue; + if (h < 0 || h >= H || w < 0 || w >= W) + continue; float i = input->buf[c * H * W + h * W + w]; float f = weight->buf[k * C * R * S + c * R * S + r * S + s]; o += i * f; @@ -328,10 +407,10 @@ void uNet(Tensor *inputN, Tensor *outputN, int N) { * Formula: y = max(x, 0) */ void ReLU(Tensor *inout) { - + int C = inout->shape[1], H = inout->shape[2], W = inout->shape[3]; - - #pragma omp parallel for + +#pragma omp parallel for for (int c = 0; c < C; ++c) { for (int h = 0; h < H; ++h) { for (int w = 0; w < W; ++w) { @@ -349,31 +428,36 @@ void ReLU(Tensor *inout) { * beta shape = (C) * output shape = (N, C, H, W) */ -void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum) { - - int N = input->shape[0], C = input->shape[1], H = input->shape[2], W = input->shape[3]; - - CHECK_ERROR(gamma->shape[0] == C && beta->shape[0] == C, "[BatchNorm2d] gamma, beta shape mismatch"); - CHECK_ERROR(output->shape[1] == C && output->shape[2] == H && output->shape[3] == W, "[BatchNorm2d] Output shape mismatch"); - - #pragma omp parallel for - for (int c=0; cshape[0], C = input->shape[1], H = input->shape[2], + W = input->shape[3]; + + CHECK_ERROR(gamma->shape[0] == C && beta->shape[0] == C, + "[BatchNorm2d] gamma, beta shape mismatch"); + CHECK_ERROR(output->shape[1] == C && output->shape[2] == H && + output->shape[3] == W, + "[BatchNorm2d] Output shape mismatch"); + +#pragma omp parallel for + for (int c = 0; c < C; ++c) { + for (int n = 0; n < N; ++n) { + for (int h = 0; h < H; ++h) { + for (int w = 0; w < W; ++w) { float mean = running_mean->buf[c]; float variance = running_var->buf[c]; float x = input->buf[n * C * H * W + c * H * W + h * W + w]; float x_hat = (x - mean) / sqrt(variance + eps); - output->buf[n * C * H * W + c * H * W + h * W + w] = gamma->buf[c] * x_hat + beta->buf[c]; + output->buf[n * C * H * W + c * H * W + h * W + w] = + gamma->buf[c] * x_hat + beta->buf[c]; } } } } - } - /* * Transposed convolution * input shape = (N, C, H, W) @@ -383,17 +467,22 @@ void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mea * where OH = (H - 1) * stride - 2 * pad + R * OW = (W - 1) * stride - 2 * pad + S */ -void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad) { - +void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, + Tensor *output, int stride, int pad) { + int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int K = weight->shape[1], R = weight->shape[2], S = weight->shape[3]; int OH = output->shape[2], OW = output->shape[3]; - CHECK_ERROR(OH == (H - 1) * stride - 2 * pad + R, "[ConvT2d] Output height mismatch"); - CHECK_ERROR(OW == (W - 1) * stride - 2 * pad + S, "[ConvT2d] Output width mismatch"); - CHECK_ERROR(weight->shape[0] == C && bias->shape[0] == K && output->shape[1] == K, "[ConvT2d] Channel size mismatch"); + CHECK_ERROR(OH == (H - 1) * stride - 2 * pad + R, + "[ConvT2d] Output height mismatch"); + CHECK_ERROR(OW == (W - 1) * stride - 2 * pad + S, + "[ConvT2d] Output width mismatch"); + CHECK_ERROR(weight->shape[0] == C && bias->shape[0] == K && + output->shape[1] == K, + "[ConvT2d] Channel size mismatch"); - #pragma omp parallel for +#pragma omp parallel for for (int k = 0; k < K; ++k) { for (int oh = 0; oh < OH; ++oh) { for (int ow = 0; ow < OW; ++ow) { @@ -401,11 +490,14 @@ void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output for (int c = 0; c < C; ++c) { for (int r = 0; r < R; ++r) { for (int s = 0; s < S; ++s) { - if ((oh + pad - r) % stride != 0) continue; - if ((ow + pad - s) % stride != 0) continue; + if ((oh + pad - r) % stride != 0) + continue; + if ((ow + pad - s) % stride != 0) + continue; int h = (oh + pad - r) / stride; int w = (ow + pad - s) / stride; - if (h < 0 || h >= H || w < 0 || w >= W) continue; + if (h < 0 || h >= H || w < 0 || w >= W) + continue; float i = input->buf[c * H * W + h * W + w]; float f = weight->buf[c * K * R * S + k * R * S + r * S + s]; o += i * f; @@ -418,14 +510,16 @@ void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output } } +float max4(float in0, float in1, float in2, float in3) { -float max4(float in0, float in1, float in2, float in3){ - float max = in0; - - if (in1 > max) max = in1; - if (in2 > max) max = in2; - if (in3 > max) max = in3; + + if (in1 > max) + max = in1; + if (in2 > max) + max = in2; + if (in3 > max) + max = in3; return max; } @@ -436,28 +530,27 @@ float max4(float in0, float in1, float in2, float in3){ * where OH = H / 2 * OW = W / 2 */ -void MaxPool2d(Tensor *input, Tensor *output){ - +void MaxPool2d(Tensor *input, Tensor *output) { + int C = input->shape[1], H = input->shape[2], W = input->shape[3]; int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; CHECK_ERROR(OW == W / 2, "[MaxPool2d] Output width mismatch"); CHECK_ERROR(OH == H / 2, "[MaxPool2d] Output height mismatch"); - CHECK_ERROR(OC== C, "[MaxPool2d] Output channel mismatch"); + CHECK_ERROR(OC == C, "[MaxPool2d] Output channel mismatch"); - #pragma omp parallel for - for (int oc=0; ocbuf[oc * H * W + 2 * oh * W + 2 * ow]; - float in1 = input->buf[oc * H * W + 2 * oh * W + 2 * ow + 1]; - float in2 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow]; - float in3 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow + 1]; +#pragma omp parallel for + for (int oc = 0; oc < OC; ++oc) { + for (int oh = 0; oh < OH; ++oh) { + for (int ow = 0; ow < OW; ++ow) { + float in0 = input->buf[oc * H * W + 2 * oh * W + 2 * ow]; + float in1 = input->buf[oc * H * W + 2 * oh * W + 2 * ow + 1]; + float in2 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow]; + float in3 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow + 1]; output->buf[oc * OH * OW + oh * OW + ow] = max4(in0, in1, in2, in3); } } } - } /* @@ -468,35 +561,38 @@ void MaxPool2d(Tensor *input, Tensor *output){ * where OH = H2, H1 * OW = W2 = W1 + 1 */ -void Concat(Tensor *input1, Tensor *input2, Tensor *output){ - +void Concat(Tensor *input1, Tensor *input2, Tensor *output) { + int C1 = input1->shape[1], H1 = input1->shape[2], W1 = input1->shape[3]; int C2 = input2->shape[1], H2 = input2->shape[2], W2 = input2->shape[3]; int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; - + CHECK_ERROR(OC == C1 * 2 && OC == C2 * 2, "[Concat] Output channel mismatch"); CHECK_ERROR(OW == W1 + 1 && OW == W2, "[Concat] Output width mismatch"); CHECK_ERROR(OH == H1 && OH == H2, "[Concat] Output height mismatch"); - #pragma omp parallel for - for (int oc=0; ocbuf[oc * OH * OW + oh * OW + ow] = input2->buf[oc * OH * OW + oh * OW + ow]; - } - } - } - - #pragma omp parallel for - for (int oc=OC/2; ocbuf[oc * OH * OW + oh * OW + ow] = 0.0; // zero padding - else output->buf[oc * OH * OW + oh * OW + ow] = input1->buf[(oc-OC/2) * H1 * W1 + oh * W1 + ow]; +#pragma omp parallel for + for (int oc = 0; oc < OC / 2; ++oc) { + for (int oh = 0; oh < OH; ++oh) { + for (int ow = 0; ow < OW; ++ow) { + output->buf[oc * OH * OW + oh * OW + ow] = + input2->buf[oc * OH * OW + oh * OW + ow]; } } } +#pragma omp parallel for + for (int oc = OC / 2; oc < OC; ++oc) { + for (int oh = 0; oh < OH; ++oh) { + for (int ow = 0; ow < OW; ++ow) { + if (ow == OW - 1) + output->buf[oc * OH * OW + oh * OW + ow] = 0.0; // zero padding + else + output->buf[oc * OH * OW + oh * OW + ow] = + input1->buf[(oc - OC / 2) * H1 * W1 + oh * W1 + ow]; + } + } + } } // /* @@ -508,15 +604,18 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * where OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, // * OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1 // */ -// void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias) { - +// void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int +// stride, int pad, int dilation, bool has_bias) { + // int C = input->shape[1], H = input->shape[2], W = input->shape[3]; // int K = weight->shape[0], R = weight->shape[2], S = weight->shape[3]; // int OH = output->shape[2], OW = output->shape[3]; - -// CHECK_ERROR(OH == (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, "[Conv2d] Output height mismatch"); -// CHECK_ERROR(OW == (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1, "[Conv2d] Output width mismatch"); -// CHECK_ERROR(weight->shape[1] == C && (!has_bias || bias->shape[0] == K) && output->shape[1] == K, "[Conv2d] Channel size mismatch"); + +// CHECK_ERROR(OH == (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1, +// "[Conv2d] Output height mismatch"); CHECK_ERROR(OW == (W + 2 * pad - +// dilation * (S - 1) - 1) / stride + 1, "[Conv2d] Output width mismatch"); +// CHECK_ERROR(weight->shape[1] == C && (!has_bias || bias->shape[0] == K) && +// output->shape[1] == K, "[Conv2d] Channel size mismatch"); // for (int k = 0; k < K; ++k) { // for (int oh = 0; oh < OH; ++oh) { @@ -547,9 +646,9 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * Formula: y = max(x, 0) // */ // void ReLU(Tensor *inout) { - + // int C = inout->shape[1], H = inout->shape[2], W = inout->shape[3]; - + // for (int c = 0; c < C; ++c) { // for (int h = 0; h < H; ++h) { // for (int w = 0; w < W; ++w) { @@ -567,13 +666,18 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * beta shape = (C) // * output shape = (N, C, H, W) // */ -// void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum) { - -// int N = input->shape[0], C = input->shape[1], H = input->shape[2], W = input->shape[3]; - -// CHECK_ERROR(gamma->shape[0] == C && beta->shape[0] == C, "[BatchNorm2d] gamma, beta shape mismatch"); -// CHECK_ERROR(output->shape[1] == C && output->shape[2] == H && output->shape[3] == W, "[BatchNorm2d] Output shape mismatch"); - +// void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor +// *running_mean, Tensor *running_var, Tensor *output, const float eps, const +// float momentum) { + +// int N = input->shape[0], C = input->shape[1], H = input->shape[2], W = +// input->shape[3]; + +// CHECK_ERROR(gamma->shape[0] == C && beta->shape[0] == C, "[BatchNorm2d] +// gamma, beta shape mismatch"); CHECK_ERROR(output->shape[1] == C && +// output->shape[2] == H && output->shape[3] == W, "[BatchNorm2d] Output shape +// mismatch"); + // for (int c=0; cbuf[c]; // float x = input->buf[n * C * H * W + c * H * W + h * W + w]; // float x_hat = (x - mean) / sqrt(variance + eps); -// output->buf[n * C * H * W + c * H * W + h * W + w] = gamma->buf[c] * x_hat + beta->buf[c]; +// output->buf[n * C * H * W + c * H * W + h * W + w] = gamma->buf[c] +// * x_hat + beta->buf[c]; // } // } // } @@ -590,7 +695,6 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // } - // /* // * Transposed convolution // * input shape = (N, C, H, W) @@ -600,15 +704,18 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * where OH = (H - 1) * stride - 2 * pad + R // * OW = (W - 1) * stride - 2 * pad + S // */ -// void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad) { - +// void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor +// *output, int stride, int pad) { + // int C = input->shape[1], H = input->shape[2], W = input->shape[3]; // int K = weight->shape[1], R = weight->shape[2], S = weight->shape[3]; // int OH = output->shape[2], OW = output->shape[3]; -// CHECK_ERROR(OH == (H - 1) * stride - 2 * pad + R, "[ConvT2d] Output height mismatch"); -// CHECK_ERROR(OW == (W - 1) * stride - 2 * pad + S, "[ConvT2d] Output width mismatch"); -// CHECK_ERROR(weight->shape[0] == C && bias->shape[0] == K && output->shape[1] == K, "[ConvT2d] Channel size mismatch"); +// CHECK_ERROR(OH == (H - 1) * stride - 2 * pad + R, "[ConvT2d] Output height +// mismatch"); CHECK_ERROR(OW == (W - 1) * stride - 2 * pad + S, "[ConvT2d] +// Output width mismatch"); CHECK_ERROR(weight->shape[0] == C && +// bias->shape[0] == K && output->shape[1] == K, "[ConvT2d] Channel size +// mismatch"); // for (int k = 0; k < K; ++k) { // for (int oh = 0; oh < OH; ++oh) { @@ -634,11 +741,10 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // } // } - // float max4(float in0, float in1, float in2, float in3){ - + // float max = in0; - + // if (in1 > max) max = in1; // if (in2 > max) max = in2; // if (in3 > max) max = in3; @@ -653,7 +759,7 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * OW = W / 2 // */ // void MaxPool2d(Tensor *input, Tensor *output){ - + // int C = input->shape[1], H = input->shape[2], W = input->shape[3]; // int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; @@ -664,10 +770,10 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // for (int oc=0; ocbuf[oc * H * W + 2 * oh * W + 2 * ow]; -// float in1 = input->buf[oc * H * W + 2 * oh * W + 2 * ow + 1]; -// float in2 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow]; -// float in3 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow + 1]; +// float in0 = input->buf[oc * H * W + 2 * oh * W + 2 * ow]; +// float in1 = input->buf[oc * H * W + 2 * oh * W + 2 * ow + 1]; +// float in2 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow]; +// float in3 = input->buf[oc * H * W + (2 * oh + 1) * W + 2 * ow + 1]; // output->buf[oc * OH * OW + oh * OW + ow] = max4(in0, in1, in2, in3); // } // } @@ -684,108 +790,143 @@ void Concat(Tensor *input1, Tensor *input2, Tensor *output){ // * OW = W2 = W1 + 1 // */ // void Concat(Tensor *input1, Tensor *input2, Tensor *output){ - + // int C1 = input1->shape[1], H1 = input1->shape[2], W1 = input1->shape[3]; // int C2 = input2->shape[1], H2 = input2->shape[2], W2 = input2->shape[3]; // int OC = output->shape[1], OH = output->shape[2], OW = output->shape[3]; - -// CHECK_ERROR(OC == C1 * 2 && OC == C2 * 2, "[Concat] Output channel mismatch"); -// CHECK_ERROR(OW == W1 + 1 && OW == W2, "[Concat] Output width mismatch"); -// CHECK_ERROR(OH == H1 && OH == H2, "[Concat] Output height mismatch"); + +// CHECK_ERROR(OC == C1 * 2 && OC == C2 * 2, "[Concat] Output channel +// mismatch"); CHECK_ERROR(OW == W1 + 1 && OW == W2, "[Concat] Output width +// mismatch"); CHECK_ERROR(OH == H1 && OH == H2, "[Concat] Output height +// mismatch"); // for (int oc=0; ocbuf[oc * OH * OW + oh * OW + ow] = input2->buf[oc * OH * OW + oh * OW + ow]; +// output->buf[oc * OH * OW + oh * OW + ow] = input2->buf[oc * OH * OW + +// oh * OW + ow]; // } // } // } - + // for (int oc=OC/2; ocbuf[oc * OH * OW + oh * OW + ow] = 0.0; // zero padding -// else output->buf[oc * OH * OW + oh * OW + ow] = input1->buf[(oc-OC/2) * H1 * W1 + oh * W1 + ow]; +// if (ow == OW-1) output->buf[oc * OH * OW + oh * OW + ow] = 0.0; // +// zero padding else output->buf[oc * OH * OW + oh * OW + ow] = +// input1->buf[(oc-OC/2) * H1 * W1 + oh * W1 + ow]; // } // } // } // } - /* * uNet_initialize * Initialize the model. Do input-independent job here. */ void uNet_initialize(int N, char *parameter_fname) { - size_t parameter_binary_size = 0; - float *parameter = (float *)read_binary(parameter_fname, ¶meter_binary_size); - + size_t parameter_binary_size = 0; + float *parameter = + (float *)read_binary(parameter_fname, ¶meter_binary_size); + // Parameters - inc_double_conv_0_weight = new Tensor({64,3,3,3}, parameter + OFFSET0); + inc_double_conv_0_weight = new Tensor({64, 3, 3, 3}, parameter + OFFSET0); inc_double_conv_1_weight = new Tensor({64}, parameter + OFFSET1); inc_double_conv_1_bias = new Tensor({64}, parameter + OFFSET2); - inc_double_conv_3_weight = new Tensor({64,64,3,3}, parameter + OFFSET3); + inc_double_conv_3_weight = new Tensor({64, 64, 3, 3}, parameter + OFFSET3); inc_double_conv_4_weight = new Tensor({64}, parameter + OFFSET4); inc_double_conv_4_bias = new Tensor({64}, parameter + OFFSET5); - down1_maxpool_conv_1_double_conv_0_weight = new Tensor({128,64,3,3}, parameter + OFFSET6); - down1_maxpool_conv_1_double_conv_1_weight = new Tensor({128}, parameter + OFFSET7); - down1_maxpool_conv_1_double_conv_1_bias = new Tensor({128}, parameter + OFFSET8); - down1_maxpool_conv_1_double_conv_3_weight = new Tensor({128,128,3,3}, parameter + OFFSET9); - down1_maxpool_conv_1_double_conv_4_weight = new Tensor({128}, parameter + OFFSET10); - down1_maxpool_conv_1_double_conv_4_bias = new Tensor({128}, parameter + OFFSET11); - down2_maxpool_conv_1_double_conv_0_weight = new Tensor({256,128,3,3}, parameter + OFFSET12); - down2_maxpool_conv_1_double_conv_1_weight = new Tensor({256}, parameter + OFFSET13); - down2_maxpool_conv_1_double_conv_1_bias = new Tensor({256}, parameter + OFFSET14); - down2_maxpool_conv_1_double_conv_3_weight = new Tensor({256,256,3,3}, parameter + OFFSET15); - down2_maxpool_conv_1_double_conv_4_weight = new Tensor({256}, parameter + OFFSET16); - down2_maxpool_conv_1_double_conv_4_bias = new Tensor({256}, parameter + OFFSET17); - down3_maxpool_conv_1_double_conv_0_weight = new Tensor({512,256,3,3}, parameter + OFFSET18); - down3_maxpool_conv_1_double_conv_1_weight = new Tensor({512}, parameter + OFFSET19); - down3_maxpool_conv_1_double_conv_1_bias = new Tensor({512}, parameter + OFFSET20); - down3_maxpool_conv_1_double_conv_3_weight = new Tensor({512,512,3,3}, parameter + OFFSET21); - down3_maxpool_conv_1_double_conv_4_weight = new Tensor({512}, parameter + OFFSET22); - down3_maxpool_conv_1_double_conv_4_bias = new Tensor({512}, parameter + OFFSET23); - down4_maxpool_conv_1_double_conv_0_weight = new Tensor({1024,512,3,3}, parameter + OFFSET24); - down4_maxpool_conv_1_double_conv_1_weight = new Tensor({1024}, parameter + OFFSET25); - down4_maxpool_conv_1_double_conv_1_bias = new Tensor({1024}, parameter + OFFSET26); - down4_maxpool_conv_1_double_conv_3_weight = new Tensor({1024,1024,3,3}, parameter + OFFSET27); - down4_maxpool_conv_1_double_conv_4_weight = new Tensor({1024}, parameter + OFFSET28); - down4_maxpool_conv_1_double_conv_4_bias = new Tensor({1024}, parameter + OFFSET29); - up1_up_weight = new Tensor({1024,512,2,2}, parameter + OFFSET30); + down1_maxpool_conv_1_double_conv_0_weight = + new Tensor({128, 64, 3, 3}, parameter + OFFSET6); + down1_maxpool_conv_1_double_conv_1_weight = + new Tensor({128}, parameter + OFFSET7); + down1_maxpool_conv_1_double_conv_1_bias = + new Tensor({128}, parameter + OFFSET8); + down1_maxpool_conv_1_double_conv_3_weight = + new Tensor({128, 128, 3, 3}, parameter + OFFSET9); + down1_maxpool_conv_1_double_conv_4_weight = + new Tensor({128}, parameter + OFFSET10); + down1_maxpool_conv_1_double_conv_4_bias = + new Tensor({128}, parameter + OFFSET11); + down2_maxpool_conv_1_double_conv_0_weight = + new Tensor({256, 128, 3, 3}, parameter + OFFSET12); + down2_maxpool_conv_1_double_conv_1_weight = + new Tensor({256}, parameter + OFFSET13); + down2_maxpool_conv_1_double_conv_1_bias = + new Tensor({256}, parameter + OFFSET14); + down2_maxpool_conv_1_double_conv_3_weight = + new Tensor({256, 256, 3, 3}, parameter + OFFSET15); + down2_maxpool_conv_1_double_conv_4_weight = + new Tensor({256}, parameter + OFFSET16); + down2_maxpool_conv_1_double_conv_4_bias = + new Tensor({256}, parameter + OFFSET17); + down3_maxpool_conv_1_double_conv_0_weight = + new Tensor({512, 256, 3, 3}, parameter + OFFSET18); + down3_maxpool_conv_1_double_conv_1_weight = + new Tensor({512}, parameter + OFFSET19); + down3_maxpool_conv_1_double_conv_1_bias = + new Tensor({512}, parameter + OFFSET20); + down3_maxpool_conv_1_double_conv_3_weight = + new Tensor({512, 512, 3, 3}, parameter + OFFSET21); + down3_maxpool_conv_1_double_conv_4_weight = + new Tensor({512}, parameter + OFFSET22); + down3_maxpool_conv_1_double_conv_4_bias = + new Tensor({512}, parameter + OFFSET23); + down4_maxpool_conv_1_double_conv_0_weight = + new Tensor({1024, 512, 3, 3}, parameter + OFFSET24); + down4_maxpool_conv_1_double_conv_1_weight = + new Tensor({1024}, parameter + OFFSET25); + down4_maxpool_conv_1_double_conv_1_bias = + new Tensor({1024}, parameter + OFFSET26); + down4_maxpool_conv_1_double_conv_3_weight = + new Tensor({1024, 1024, 3, 3}, parameter + OFFSET27); + down4_maxpool_conv_1_double_conv_4_weight = + new Tensor({1024}, parameter + OFFSET28); + down4_maxpool_conv_1_double_conv_4_bias = + new Tensor({1024}, parameter + OFFSET29); + up1_up_weight = new Tensor({1024, 512, 2, 2}, parameter + OFFSET30); up1_up_bias = new Tensor({512}, parameter + OFFSET31); - up1_conv_double_conv_0_weight = new Tensor({512,1024,3,3}, parameter + OFFSET32); + up1_conv_double_conv_0_weight = + new Tensor({512, 1024, 3, 3}, parameter + OFFSET32); up1_conv_double_conv_1_weight = new Tensor({512}, parameter + OFFSET33); - up1_conv_double_conv_1_bias = new Tensor({512}, parameter + OFFSET34); - up1_conv_double_conv_3_weight = new Tensor({512,512,3,3}, parameter + OFFSET35); + up1_conv_double_conv_1_bias = new Tensor({512}, parameter + OFFSET34); + up1_conv_double_conv_3_weight = + new Tensor({512, 512, 3, 3}, parameter + OFFSET35); up1_conv_double_conv_4_weight = new Tensor({512}, parameter + OFFSET36); - up1_conv_double_conv_4_bias = new Tensor({512}, parameter + OFFSET37); - up2_up_weight = new Tensor({512,256,2,2}, parameter + OFFSET38); + up1_conv_double_conv_4_bias = new Tensor({512}, parameter + OFFSET37); + up2_up_weight = new Tensor({512, 256, 2, 2}, parameter + OFFSET38); up2_up_bias = new Tensor({256}, parameter + OFFSET39); - up2_conv_double_conv_0_weight = new Tensor({256,512,3,3}, parameter + OFFSET40); + up2_conv_double_conv_0_weight = + new Tensor({256, 512, 3, 3}, parameter + OFFSET40); up2_conv_double_conv_1_weight = new Tensor({256}, parameter + OFFSET41); up2_conv_double_conv_1_bias = new Tensor({256}, parameter + OFFSET42); - up2_conv_double_conv_3_weight = new Tensor({256,256,3,3}, parameter + OFFSET43); + up2_conv_double_conv_3_weight = + new Tensor({256, 256, 3, 3}, parameter + OFFSET43); up2_conv_double_conv_4_weight = new Tensor({256}, parameter + OFFSET44); up2_conv_double_conv_4_bias = new Tensor({256}, parameter + OFFSET45); - up3_up_weight = new Tensor({256,128,2,2}, parameter + OFFSET46); + up3_up_weight = new Tensor({256, 128, 2, 2}, parameter + OFFSET46); up3_up_bias = new Tensor({128}, parameter + OFFSET47); - up3_conv_double_conv_0_weight = new Tensor({128,256,3,3}, parameter + OFFSET48); + up3_conv_double_conv_0_weight = + new Tensor({128, 256, 3, 3}, parameter + OFFSET48); up3_conv_double_conv_1_weight = new Tensor({128}, parameter + OFFSET49); up3_conv_double_conv_1_bias = new Tensor({128}, parameter + OFFSET50); - up3_conv_double_conv_3_weight = new Tensor({128,128,3,3}, parameter + OFFSET51); + up3_conv_double_conv_3_weight = + new Tensor({128, 128, 3, 3}, parameter + OFFSET51); up3_conv_double_conv_4_weight = new Tensor({128}, parameter + OFFSET52); up3_conv_double_conv_4_bias = new Tensor({128}, parameter + OFFSET53); - up4_up_weight = new Tensor({128,64,2,2}, parameter + OFFSET54); + up4_up_weight = new Tensor({128, 64, 2, 2}, parameter + OFFSET54); up4_up_bias = new Tensor({64}, parameter + OFFSET55); - up4_conv_double_conv_0_weight = new Tensor({64,128,3,3}, parameter + OFFSET56); + up4_conv_double_conv_0_weight = + new Tensor({64, 128, 3, 3}, parameter + OFFSET56); up4_conv_double_conv_1_weight = new Tensor({64}, parameter + OFFSET57); up4_conv_double_conv_1_bias = new Tensor({64}, parameter + OFFSET58); - up4_conv_double_conv_3_weight = new Tensor({64,64,3,3}, parameter + OFFSET59); + up4_conv_double_conv_3_weight = + new Tensor({64, 64, 3, 3}, parameter + OFFSET59); up4_conv_double_conv_4_weight = new Tensor({64}, parameter + OFFSET60); up4_conv_double_conv_4_bias = new Tensor({64}, parameter + OFFSET61); - outc_conv_weight = new Tensor({2,64,1,1}, parameter + OFFSET62); + outc_conv_weight = new Tensor({2, 64, 1, 1}, parameter + OFFSET62); outc_conv_bias = new Tensor({2}, parameter + OFFSET63); inc_batchnorm_0_running_mean = new Tensor({64}, parameter + OFFSET64); inc_batchnorm_0_running_var = new Tensor({64}, parameter + OFFSET65); @@ -852,18 +993,18 @@ void uNet_initialize(int N, char *parameter_fname) { up1_convt_0_output = new Tensor({1, 512, 80, 118}); up1_concat_0_output = new Tensor({1, 1024, 80, 119}); up1_conv_0_output = new Tensor({1, 512, 80, 119}); - up1_batchnorm_0_output = new Tensor({1, 512, 80, 119}); - up1_conv_1_output = new Tensor({1, 512, 80, 119}); - up1_batchnorm_1_output = new Tensor({1, 512, 80, 119}); + up1_batchnorm_0_output = new Tensor({1, 512, 80, 119}); + up1_conv_1_output = new Tensor({1, 512, 80, 119}); + up1_batchnorm_1_output = new Tensor({1, 512, 80, 119}); up2_convt_0_output = new Tensor({1, 256, 160, 238}); up2_concat_0_output = new Tensor({1, 512, 160, 239}); - up2_conv_0_output = new Tensor({1, 256, 160, 239}); + up2_conv_0_output = new Tensor({1, 256, 160, 239}); up2_batchnorm_0_output = new Tensor({1, 256, 160, 239}); up2_conv_1_output = new Tensor({1, 256, 160, 239}); up2_batchnorm_1_output = new Tensor({1, 256, 160, 239}); up3_convt_0_output = new Tensor({1, 128, 320, 478}); up3_concat_0_output = new Tensor({1, 256, 320, 479}); - up3_conv_0_output = new Tensor({1, 128, 320, 479}); + up3_conv_0_output = new Tensor({1, 128, 320, 479}); up3_batchnorm_0_output = new Tensor({1, 128, 320, 479}); up3_conv_1_output = new Tensor({1, 128, 320, 479}); up3_batchnorm_1_output = new Tensor({1, 128, 320, 479}); @@ -881,71 +1022,71 @@ void uNet_initialize(int N, char *parameter_fname) { * Finalize the model. */ void uNet_finalize() { - // delete parameters + // delete parameters delete inc_double_conv_0_weight; - delete inc_double_conv_1_weight; - delete inc_double_conv_1_bias; - delete inc_double_conv_3_weight; - delete inc_double_conv_4_weight; - delete inc_double_conv_4_bias; - delete down1_maxpool_conv_1_double_conv_0_weight; - delete down1_maxpool_conv_1_double_conv_1_weight; - delete down1_maxpool_conv_1_double_conv_1_bias; - delete down1_maxpool_conv_1_double_conv_3_weight; - delete down1_maxpool_conv_1_double_conv_4_weight; - delete down1_maxpool_conv_1_double_conv_4_bias; - delete down2_maxpool_conv_1_double_conv_0_weight; - delete down2_maxpool_conv_1_double_conv_1_weight; - delete down2_maxpool_conv_1_double_conv_1_bias; - delete down2_maxpool_conv_1_double_conv_3_weight; - delete down2_maxpool_conv_1_double_conv_4_weight; - delete down2_maxpool_conv_1_double_conv_4_bias; - delete down3_maxpool_conv_1_double_conv_0_weight; - delete down3_maxpool_conv_1_double_conv_1_weight; - delete down3_maxpool_conv_1_double_conv_1_bias; - delete down3_maxpool_conv_1_double_conv_3_weight; - delete down3_maxpool_conv_1_double_conv_4_weight; - delete down3_maxpool_conv_1_double_conv_4_bias; - delete down4_maxpool_conv_1_double_conv_0_weight; - delete down4_maxpool_conv_1_double_conv_1_weight; - delete down4_maxpool_conv_1_double_conv_1_bias; - delete down4_maxpool_conv_1_double_conv_3_weight; - delete down4_maxpool_conv_1_double_conv_4_weight; - delete down4_maxpool_conv_1_double_conv_4_bias; - delete up1_up_weight; - delete up1_up_bias; - delete up1_conv_double_conv_0_weight; - delete up1_conv_double_conv_1_weight; - delete up1_conv_double_conv_1_bias ; - delete up1_conv_double_conv_3_weight; - delete up1_conv_double_conv_4_weight; - delete up1_conv_double_conv_4_bias ; - delete up2_up_weight; - delete up2_up_bias; - delete up2_conv_double_conv_0_weight; - delete up2_conv_double_conv_1_weight; - delete up2_conv_double_conv_1_bias; - delete up2_conv_double_conv_3_weight; - delete up2_conv_double_conv_4_weight; - delete up2_conv_double_conv_4_bias; - delete up3_up_weight; - delete up3_up_bias; - delete up3_conv_double_conv_0_weight; - delete up3_conv_double_conv_1_weight; - delete up3_conv_double_conv_1_bias; - delete up3_conv_double_conv_3_weight; - delete up3_conv_double_conv_4_weight; - delete up3_conv_double_conv_4_bias; - delete up4_up_weight; - delete up4_up_bias; - delete up4_conv_double_conv_0_weight; - delete up4_conv_double_conv_1_weight; - delete up4_conv_double_conv_1_bias; - delete up4_conv_double_conv_3_weight; - delete up4_conv_double_conv_4_weight; - delete up4_conv_double_conv_4_bias; - delete outc_conv_weight; - delete outc_conv_bias; + delete inc_double_conv_1_weight; + delete inc_double_conv_1_bias; + delete inc_double_conv_3_weight; + delete inc_double_conv_4_weight; + delete inc_double_conv_4_bias; + delete down1_maxpool_conv_1_double_conv_0_weight; + delete down1_maxpool_conv_1_double_conv_1_weight; + delete down1_maxpool_conv_1_double_conv_1_bias; + delete down1_maxpool_conv_1_double_conv_3_weight; + delete down1_maxpool_conv_1_double_conv_4_weight; + delete down1_maxpool_conv_1_double_conv_4_bias; + delete down2_maxpool_conv_1_double_conv_0_weight; + delete down2_maxpool_conv_1_double_conv_1_weight; + delete down2_maxpool_conv_1_double_conv_1_bias; + delete down2_maxpool_conv_1_double_conv_3_weight; + delete down2_maxpool_conv_1_double_conv_4_weight; + delete down2_maxpool_conv_1_double_conv_4_bias; + delete down3_maxpool_conv_1_double_conv_0_weight; + delete down3_maxpool_conv_1_double_conv_1_weight; + delete down3_maxpool_conv_1_double_conv_1_bias; + delete down3_maxpool_conv_1_double_conv_3_weight; + delete down3_maxpool_conv_1_double_conv_4_weight; + delete down3_maxpool_conv_1_double_conv_4_bias; + delete down4_maxpool_conv_1_double_conv_0_weight; + delete down4_maxpool_conv_1_double_conv_1_weight; + delete down4_maxpool_conv_1_double_conv_1_bias; + delete down4_maxpool_conv_1_double_conv_3_weight; + delete down4_maxpool_conv_1_double_conv_4_weight; + delete down4_maxpool_conv_1_double_conv_4_bias; + delete up1_up_weight; + delete up1_up_bias; + delete up1_conv_double_conv_0_weight; + delete up1_conv_double_conv_1_weight; + delete up1_conv_double_conv_1_bias; + delete up1_conv_double_conv_3_weight; + delete up1_conv_double_conv_4_weight; + delete up1_conv_double_conv_4_bias; + delete up2_up_weight; + delete up2_up_bias; + delete up2_conv_double_conv_0_weight; + delete up2_conv_double_conv_1_weight; + delete up2_conv_double_conv_1_bias; + delete up2_conv_double_conv_3_weight; + delete up2_conv_double_conv_4_weight; + delete up2_conv_double_conv_4_bias; + delete up3_up_weight; + delete up3_up_bias; + delete up3_conv_double_conv_0_weight; + delete up3_conv_double_conv_1_weight; + delete up3_conv_double_conv_1_bias; + delete up3_conv_double_conv_3_weight; + delete up3_conv_double_conv_4_weight; + delete up3_conv_double_conv_4_bias; + delete up4_up_weight; + delete up4_up_bias; + delete up4_conv_double_conv_0_weight; + delete up4_conv_double_conv_1_weight; + delete up4_conv_double_conv_1_bias; + delete up4_conv_double_conv_3_weight; + delete up4_conv_double_conv_4_weight; + delete up4_conv_double_conv_4_bias; + delete outc_conv_weight; + delete outc_conv_bias; delete inc_batchnorm_0_running_mean; delete inc_batchnorm_0_running_var; delete down1_batchnorm_0_running_mean; @@ -985,38 +1126,38 @@ void uNet_finalize() { // delete activations delete inc_conv_0_output; - delete inc_batchnorm_0_output; - delete inc_conv_1_output; - delete inc_batchnorm_1_output; - delete down1_maxpool2d_0_output; - delete down1_conv_0_output; - delete down1_batchnorm_0_output; - delete down1_conv_1_output; - delete down1_batchnorm_1_output; - delete down2_maxpool2d_0_output; - delete down2_conv_0_output; - delete down2_batchnorm_0_output; - delete down2_conv_1_output; - delete down2_batchnorm_1_output; - delete down3_maxpool2d_0_output; - delete down3_conv_0_output; - delete down3_batchnorm_0_output; - delete down3_conv_1_output; - delete down3_batchnorm_1_output; - delete down4_maxpool2d_0_output; - delete down4_conv_0_output; - delete down4_batchnorm_0_output; - delete down4_conv_1_output; - delete down4_batchnorm_1_output; - delete up1_convt_0_output; - delete up1_concat_0_output; - delete up2_convt_0_output; - delete up2_concat_0_output; - delete up3_convt_0_output; - delete up3_concat_0_output; - delete up4_convt_0_output; - delete up4_concat_0_output; - delete outc_conv_0_output; + delete inc_batchnorm_0_output; + delete inc_conv_1_output; + delete inc_batchnorm_1_output; + delete down1_maxpool2d_0_output; + delete down1_conv_0_output; + delete down1_batchnorm_0_output; + delete down1_conv_1_output; + delete down1_batchnorm_1_output; + delete down2_maxpool2d_0_output; + delete down2_conv_0_output; + delete down2_batchnorm_0_output; + delete down2_conv_1_output; + delete down2_batchnorm_1_output; + delete down3_maxpool2d_0_output; + delete down3_conv_0_output; + delete down3_batchnorm_0_output; + delete down3_conv_1_output; + delete down3_batchnorm_1_output; + delete down4_maxpool2d_0_output; + delete down4_conv_0_output; + delete down4_batchnorm_0_output; + delete down4_conv_1_output; + delete down4_batchnorm_1_output; + delete up1_convt_0_output; + delete up1_concat_0_output; + delete up2_convt_0_output; + delete up2_concat_0_output; + delete up3_convt_0_output; + delete up3_concat_0_output; + delete up4_convt_0_output; + delete up4_concat_0_output; + delete outc_conv_0_output; delete up1_conv_0_output; delete up1_batchnorm_0_output; delete up1_conv_1_output; @@ -1025,12 +1166,12 @@ void uNet_finalize() { delete up2_batchnorm_0_output; delete up2_conv_1_output; delete up2_batchnorm_1_output; - delete up3_conv_0_output; + delete up3_conv_0_output; delete up3_batchnorm_0_output; delete up3_conv_1_output; delete up3_batchnorm_1_output; delete up4_conv_0_output; delete up4_batchnorm_0_output; - delete up4_conv_1_output; + delete up4_conv_1_output; delete up4_batchnorm_1_output; } diff --git a/APWS23/project/uNet.h b/APWS23/project/uNet.h index 3ccf90e..62590a4 100644 --- a/APWS23/project/uNet.h +++ b/APWS23/project/uNet.h @@ -7,115 +7,118 @@ #define NUM_IMAGES 256 #define OFFSET0 0 -#define OFFSET1 (OFFSET0 + 64*3*3*3) +#define OFFSET1 (OFFSET0 + 64 * 3 * 3 * 3) #define OFFSET2 (OFFSET1 + 64) #define OFFSET3 (OFFSET2 + 64) -#define OFFSET4 (OFFSET3 + 64*64*3*3) +#define OFFSET4 (OFFSET3 + 64 * 64 * 3 * 3) #define OFFSET5 (OFFSET4 + 64) #define OFFSET6 (OFFSET5 + 64) -#define OFFSET7 (OFFSET6 + 128*64*3*3) +#define OFFSET7 (OFFSET6 + 128 * 64 * 3 * 3) #define OFFSET8 (OFFSET7 + 128) #define OFFSET9 (OFFSET8 + 128) -#define OFFSET10 (OFFSET9 + 128*128*3*3) +#define OFFSET10 (OFFSET9 + 128 * 128 * 3 * 3) #define OFFSET11 (OFFSET10 + 128) #define OFFSET12 (OFFSET11 + 128) -#define OFFSET13 (OFFSET12 + 256*128*3*3) +#define OFFSET13 (OFFSET12 + 256 * 128 * 3 * 3) #define OFFSET14 (OFFSET13 + 256) #define OFFSET15 (OFFSET14 + 256) -#define OFFSET16 (OFFSET15 + 256*256*3*3) +#define OFFSET16 (OFFSET15 + 256 * 256 * 3 * 3) #define OFFSET17 (OFFSET16 + 256) #define OFFSET18 (OFFSET17 + 256) -#define OFFSET19 (OFFSET18 + 512*256*3*3) +#define OFFSET19 (OFFSET18 + 512 * 256 * 3 * 3) #define OFFSET20 (OFFSET19 + 512) #define OFFSET21 (OFFSET20 + 512) -#define OFFSET22 (OFFSET21 + 512*512*3*3) +#define OFFSET22 (OFFSET21 + 512 * 512 * 3 * 3) #define OFFSET23 (OFFSET22 + 512) #define OFFSET24 (OFFSET23 + 512) -#define OFFSET25 (OFFSET24 + 1024*512*3*3) +#define OFFSET25 (OFFSET24 + 1024 * 512 * 3 * 3) #define OFFSET26 (OFFSET25 + 1024) #define OFFSET27 (OFFSET26 + 1024) -#define OFFSET28 (OFFSET27 + 1024*1024*3*3) +#define OFFSET28 (OFFSET27 + 1024 * 1024 * 3 * 3) #define OFFSET29 (OFFSET28 + 1024) #define OFFSET30 (OFFSET29 + 1024) -#define OFFSET31 (OFFSET30 + 1024*512*2*2) -#define OFFSET32 (OFFSET31 + 512) -#define OFFSET33 (OFFSET32 + 512*1024*3*3) -#define OFFSET34 (OFFSET33 + 512) -#define OFFSET35 (OFFSET34 + 512) -#define OFFSET36 (OFFSET35 + 512*512*3*3) -#define OFFSET37 (OFFSET36 + 512) +#define OFFSET31 (OFFSET30 + 1024 * 512 * 2 * 2) +#define OFFSET32 (OFFSET31 + 512) +#define OFFSET33 (OFFSET32 + 512 * 1024 * 3 * 3) +#define OFFSET34 (OFFSET33 + 512) +#define OFFSET35 (OFFSET34 + 512) +#define OFFSET36 (OFFSET35 + 512 * 512 * 3 * 3) +#define OFFSET37 (OFFSET36 + 512) #define OFFSET38 (OFFSET37 + 512) -#define OFFSET39 (OFFSET38 + 512*256*2*2) -#define OFFSET40 (OFFSET39 + 256) -#define OFFSET41 (OFFSET40 + 256*512*3*3) -#define OFFSET42 (OFFSET41 + 256) -#define OFFSET43 (OFFSET42 + 256) -#define OFFSET44 (OFFSET43 + 256*256*3*3) -#define OFFSET45 (OFFSET44 + 256) -#define OFFSET46 (OFFSET45 + 256) -#define OFFSET47 (OFFSET46 + 256*128*2*2) -#define OFFSET48 (OFFSET47 + 128) -#define OFFSET49 (OFFSET48 + 128*256*3*3) -#define OFFSET50 (OFFSET49 + 128) -#define OFFSET51 (OFFSET50 + 128) -#define OFFSET52 (OFFSET51 + 128*128*3*3) -#define OFFSET53 (OFFSET52 + 128) -#define OFFSET54 (OFFSET53 + 128) -#define OFFSET55 (OFFSET54 + 128*64*2*2) -#define OFFSET56 (OFFSET55 + 64) -#define OFFSET57 (OFFSET56 + 64*128*3*3) -#define OFFSET58 (OFFSET57 + 64) -#define OFFSET59 (OFFSET58 + 64) -#define OFFSET60 (OFFSET59 + 64*64*3*3) -#define OFFSET61 (OFFSET60 + 64) +#define OFFSET39 (OFFSET38 + 512 * 256 * 2 * 2) +#define OFFSET40 (OFFSET39 + 256) +#define OFFSET41 (OFFSET40 + 256 * 512 * 3 * 3) +#define OFFSET42 (OFFSET41 + 256) +#define OFFSET43 (OFFSET42 + 256) +#define OFFSET44 (OFFSET43 + 256 * 256 * 3 * 3) +#define OFFSET45 (OFFSET44 + 256) +#define OFFSET46 (OFFSET45 + 256) +#define OFFSET47 (OFFSET46 + 256 * 128 * 2 * 2) +#define OFFSET48 (OFFSET47 + 128) +#define OFFSET49 (OFFSET48 + 128 * 256 * 3 * 3) +#define OFFSET50 (OFFSET49 + 128) +#define OFFSET51 (OFFSET50 + 128) +#define OFFSET52 (OFFSET51 + 128 * 128 * 3 * 3) +#define OFFSET53 (OFFSET52 + 128) +#define OFFSET54 (OFFSET53 + 128) +#define OFFSET55 (OFFSET54 + 128 * 64 * 2 * 2) +#define OFFSET56 (OFFSET55 + 64) +#define OFFSET57 (OFFSET56 + 64 * 128 * 3 * 3) +#define OFFSET58 (OFFSET57 + 64) +#define OFFSET59 (OFFSET58 + 64) +#define OFFSET60 (OFFSET59 + 64 * 64 * 3 * 3) +#define OFFSET61 (OFFSET60 + 64) #define OFFSET62 (OFFSET61 + 64) -#define OFFSET63 (OFFSET62 + 2*64*1*1) -#define OFFSET64 (OFFSET63 + 2) -#define OFFSET65 (OFFSET64 + 64) -#define OFFSET66 (OFFSET65 + 64) -#define OFFSET67 (OFFSET66 + 64) +#define OFFSET63 (OFFSET62 + 2 * 64 * 1 * 1) +#define OFFSET64 (OFFSET63 + 2) +#define OFFSET65 (OFFSET64 + 64) +#define OFFSET66 (OFFSET65 + 64) +#define OFFSET67 (OFFSET66 + 64) #define OFFSET68 (OFFSET67 + 64) -#define OFFSET69 (OFFSET68 + 128) -#define OFFSET70 (OFFSET69 + 128) -#define OFFSET71 (OFFSET70 + 128) -#define OFFSET72 (OFFSET71 + 128) -#define OFFSET73 (OFFSET72 + 256) -#define OFFSET74 (OFFSET73 + 256) -#define OFFSET75 (OFFSET74 + 256) -#define OFFSET76 (OFFSET75 + 256) -#define OFFSET77 (OFFSET76 + 512) -#define OFFSET78 (OFFSET77 + 512) -#define OFFSET79 (OFFSET78 + 512) -#define OFFSET80 (OFFSET79 + 512) -#define OFFSET81 (OFFSET80 + 1024) -#define OFFSET82 (OFFSET81 + 1024) -#define OFFSET83 (OFFSET82 + 1024) -#define OFFSET84 (OFFSET83 + 1024) -#define OFFSET85 (OFFSET84 + 512) -#define OFFSET86 (OFFSET85 + 512) -#define OFFSET87 (OFFSET86 + 512) -#define OFFSET88 (OFFSET87 + 512) -#define OFFSET89 (OFFSET88 + 256) -#define OFFSET90 (OFFSET89 + 256) -#define OFFSET91 (OFFSET90 + 256) -#define OFFSET92 (OFFSET91 + 256) -#define OFFSET93 (OFFSET92 + 128) -#define OFFSET94 (OFFSET93 + 128) -#define OFFSET95 (OFFSET94 + 128) -#define OFFSET96 (OFFSET95 + 128) -#define OFFSET97 (OFFSET96 + 64) -#define OFFSET98 (OFFSET97 + 64) -#define OFFSET99 (OFFSET98 + 64) +#define OFFSET69 (OFFSET68 + 128) +#define OFFSET70 (OFFSET69 + 128) +#define OFFSET71 (OFFSET70 + 128) +#define OFFSET72 (OFFSET71 + 128) +#define OFFSET73 (OFFSET72 + 256) +#define OFFSET74 (OFFSET73 + 256) +#define OFFSET75 (OFFSET74 + 256) +#define OFFSET76 (OFFSET75 + 256) +#define OFFSET77 (OFFSET76 + 512) +#define OFFSET78 (OFFSET77 + 512) +#define OFFSET79 (OFFSET78 + 512) +#define OFFSET80 (OFFSET79 + 512) +#define OFFSET81 (OFFSET80 + 1024) +#define OFFSET82 (OFFSET81 + 1024) +#define OFFSET83 (OFFSET82 + 1024) +#define OFFSET84 (OFFSET83 + 1024) +#define OFFSET85 (OFFSET84 + 512) +#define OFFSET86 (OFFSET85 + 512) +#define OFFSET87 (OFFSET86 + 512) +#define OFFSET88 (OFFSET87 + 512) +#define OFFSET89 (OFFSET88 + 256) +#define OFFSET90 (OFFSET89 + 256) +#define OFFSET91 (OFFSET90 + 256) +#define OFFSET92 (OFFSET91 + 256) +#define OFFSET93 (OFFSET92 + 128) +#define OFFSET94 (OFFSET93 + 128) +#define OFFSET95 (OFFSET94 + 128) +#define OFFSET96 (OFFSET95 + 128) +#define OFFSET97 (OFFSET96 + 64) +#define OFFSET98 (OFFSET97 + 64) +#define OFFSET99 (OFFSET98 + 64) #define OFFSET100 (OFFSET99 + 64) - -void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad, int dilation, bool has_bias); +void Conv2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, + int stride, int pad, int dilation, bool has_bias); void ReLU(Tensor *inout); -void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, Tensor *running_mean, Tensor *running_var, Tensor *output, const float eps, const float momentum); -void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output, int stride, int pad); +void BatchNorm2d(Tensor *input, Tensor *gamma, Tensor *beta, + Tensor *running_mean, Tensor *running_var, Tensor *output, + const float eps, const float momentum); +void ConvTranspose2d(Tensor *input, Tensor *weight, Tensor *bias, + Tensor *output, int stride, int pad); float max4(float, float, float, float); void MaxPool2d(Tensor *input, Tensor *output); void Concat(Tensor *input1, Tensor *input2, Tensor *output); -void uNet_initialize(int, char*); -void uNet(Tensor*, Tensor*, int); +void uNet_initialize(int, char *); +void uNet(Tensor *, Tensor *, int); void uNet_finalize(); diff --git a/APWS23/project/util.cpp b/APWS23/project/util.cpp index 23b96fb..4a4c73a 100644 --- a/APWS23/project/util.cpp +++ b/APWS23/project/util.cpp @@ -2,9 +2,8 @@ #include #include -#include #include - +#include // Defined in main.cpp extern int N; @@ -12,7 +11,7 @@ extern char *parameter_fname; extern char *output_fname; extern char *input_fname; -void read_binary(void* dst, const char *filename, size_t *size) { +void read_binary(void *dst, const char *filename, size_t *size) { size_t size_; FILE *f = fopen(filename, "rb"); CHECK_ERROR(f != NULL, "Failed to read %s", filename); @@ -27,7 +26,7 @@ void read_binary(void* dst, const char *filename, size_t *size) { *size = (size_t)(size_ / 4); // float } -void* read_binary(const char *filename, size_t *size) { +void *read_binary(const char *filename, size_t *size) { size_t size_; FILE *f = fopen(filename, "rb"); CHECK_ERROR(f != NULL, "Failed to read %s", filename); @@ -44,7 +43,7 @@ void* read_binary(const char *filename, size_t *size) { return buf; } -void write_binary(void* dst, const char *filename, size_t size){ +void write_binary(void *dst, const char *filename, size_t size) { FILE *output_fp = (FILE *)fopen(output_fname, "wb"); fwrite(dst, sizeof(float), size, output_fp); fclose(output_fp); @@ -57,11 +56,11 @@ double get_time() { } void print_usage_exit(int argc, char **argv) { - printf("Usage %s [parameter bin] [input bin] [output] [N]\n", argv[0]); - printf(" parameter bin: File containing DNN parameters\n"); + printf("Usage %s [parameter bin] [input bin] [output] [N]\n", argv[0]); + printf(" parameter bin: File containing DNN parameters\n"); printf(" input bin : File containing input images\n"); - printf(" output: File to write results\n"); - printf(" N: Number of images to mask\n"); + printf(" output: File to write results\n"); + printf(" N: Number of images to mask\n"); EXIT(0); } @@ -85,14 +84,17 @@ void check_and_parse_args(int argc, char **argv) { N = atoi(argv[4]); } -void print_model(){ +void print_model() { printf("\n Model : U-Net\n"); - printf("------------------------------------------------------------------\n"); + printf( + "------------------------------------------------------------------\n"); printf(" Automatically identify the boundaries of the images in input.bin\n"); - printf("==================================================================\n"); + printf( + "==================================================================\n"); printf(" Number of input images : %d\n", N); printf(" Parameter file : %s\n", parameter_fname); printf(" Input file : %s\n", input_fname); printf(" Output file to write results : %s\n", output_fname); - printf("==================================================================\n"); + printf( + "==================================================================\n"); } diff --git a/APWS23/project/util.h b/APWS23/project/util.h index 0e13cb0..23fce6d 100644 --- a/APWS23/project/util.h +++ b/APWS23/project/util.h @@ -2,8 +2,8 @@ #include #include -#include #include +#include /* Useful macros */ #define EXIT(status) \ @@ -13,18 +13,17 @@ #define CHECK_ERROR(cond, fmt, ...) \ do { \ - if (!(cond)) {\ - printf(fmt "\n", ##__VA_ARGS__); \ + if (!(cond)) { \ + printf(fmt "\n", ##__VA_ARGS__); \ EXIT(EXIT_FAILURE); \ } \ } while (false) - void print_usage_exit(int argc, char **argv); void check_and_parse_args(int argc, char **argv); double get_time(); -void read_binary(void* dst, const char *filename, size_t *size); +void read_binary(void *dst, const char *filename, size_t *size); void *read_binary(const char *filename, size_t *size); -void write_binary(void* dst, const char *filename, size_t size); +void write_binary(void *dst, const char *filename, size_t size); void print_first_few_result(float *output, int print_max, double elapsed_time); void print_model();