chundoong-lab-ta/APWS23/project/main.cpp

#include <cuda_runtime.h>
#include <unistd.h>

#include <cstdio>
#include <cstdlib>

#include "uNet.h"
#include "util.h"

int WARMUP = 0;
int MEASURE = 1;

// Global variables
int N = 1;
char *parameter_fname;
char *output_fname;
char *input_fname;

/*
Execute Parameters
  argv[1] = unet model parameters path
  argv[2] = input image path
  argv[3] = output save path
  argv[4] = the number of images to inference
  
  // optional
  argv[5] = warming up count
  argv[6] = performance measuring count 

*/
int main(int argc, char **argv) {
  check_and_parse_args(argc, argv);
  print_model();

  // Initialize model
  uNet_initialize(N, parameter_fname);

  Tensor *input = new Tensor({N, 3, 128, 191});
  Tensor *output = new Tensor({N, 2, 128, 191});

  size_t input_size = 0;
  read_binary((void *) input->buf, input_fname, &input_size);

  if (argc > 5){
    WARMUP = atoi(argv[5]);
    MEASURE = atoi(argv[6]);
  }

  printf(" process %d image(s)...", N);
  printf(" Warm up [%d], Performance measure [%d]\n",WARMUP, MEASURE);
  fflush(stdout);

  // warming up
  printf("\nWarmimg up.");
  fflush(stdout);
  for (int i = 0; i < WARMUP; ++i) {
    uNet(input, output, N);
    fflush(stdout);
    printf(".");
  }
  cudaDeviceSynchronize();
  printf("\n");

  // performance measure
  printf("\nProcess.");
  double uNet_st = get_time();
  for (int j = 0; j < MEASURE; ++j) {
    uNet(input, output, N);
    printf(".");
  }
  cudaDeviceSynchronize();
  double uNet_en = get_time();
  printf("\n");
  double elapsed_time = uNet_en - uNet_st;
  elapsed_time = elapsed_time / MEASURE;
  printf("%lfsec (%lf img/sec)\n", elapsed_time, N / elapsed_time);

  write_binary((void *) output->buf, output_fname, (size_t)(N * 2 * 128 * 191));

  printf("Writing final result to %s ...", output_fname);
  fflush(stdout);

  printf("Done!\n\n");

  // Finalize program
  uNet_finalize();
}
Large Update 2023-02-15 01:33:28 +09:00			`#include <cuda_runtime.h>`
			`#include <unistd.h>`

implement skeleton code 2023-02-01 22:30:00 +09:00			`#include <cstdio>`
			`#include <cstdlib>`

change name + add relu, tconv 2023-02-02 19:02:36 +09:00			`#include "uNet.h"`
implement skeleton code 2023-02-01 22:30:00 +09:00			`#include "util.h"`
apply batched process 2023-02-06 15:07:20 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`int WARMUP = 0;`
			`int MEASURE = 1;`
implement skeleton code 2023-02-01 22:30:00 +09:00
			`// Global variables`
			`int N = 1;`
			`char *parameter_fname;`
			`char *output_fname;`
add reference pytorch code + check writing input & parameters is working correctly 2023-02-04 17:43:29 +09:00			`char *input_fname;`
implement skeleton code 2023-02-01 22:30:00 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`/*`
			`Execute Parameters`
			`argv[1] = unet model parameters path`
			`argv[2] = input image path`
			`argv[3] = output save path`
			`argv[4] = the number of images to inference`

			`// optional`
			`argv[5] = warming up count`
			`argv[6] = performance measuring count`

			`*/`
apply batched process 2023-02-06 15:07:20 +09:00			`int main(int argc, char **argv) {`
implement skeleton code 2023-02-01 22:30:00 +09:00			`check_and_parse_args(argc, argv);`
implement naive version with openmp 2023-02-06 01:38:42 +09:00			`print_model();`
implement skeleton code 2023-02-01 22:30:00 +09:00
			`// Initialize model`
implement naive version with openmp 2023-02-06 01:38:42 +09:00			`uNet_initialize(N, parameter_fname);`
implement skeleton code 2023-02-01 22:30:00 +09:00
reduce input image size for code execution time;21.9s 2023-02-12 03:06:22 +09:00			`Tensor *input = new Tensor({N, 3, 128, 191});`
			`Tensor *output = new Tensor({N, 2, 128, 191});`
Large Update 2023-02-15 01:33:28 +09:00
implement naive version with openmp 2023-02-06 01:38:42 +09:00			`size_t input_size = 0;`
Large Update 2023-02-15 01:33:28 +09:00			`read_binary((void *) input->buf, input_fname, &input_size);`
implement layers 2023-02-03 01:25:19 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`if (argc > 5){`
			`WARMUP = atoi(argv[5]);`
			`MEASURE = atoi(argv[6]);`
			`}`

apply batched process 2023-02-06 15:07:20 +09:00			`printf(" process %d image(s)...", N);`
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`printf(" Warm up [%d], Performance measure [%d]\n",WARMUP, MEASURE);`
Large Update 2023-02-15 01:33:28 +09:00			`fflush(stdout);`
implement skeleton code 2023-02-01 22:30:00 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`// warming up`
add warm-up 2023-02-06 15:21:16 +09:00			`printf("\nWarmimg up.");`
fix branch 2023-02-14 17:45:28 +09:00			`fflush(stdout);`
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`for (int i = 0; i < WARMUP; ++i) {`
apply batched process 2023-02-06 15:07:20 +09:00			`uNet(input, output, N);`
fix branch 2023-02-14 17:45:28 +09:00			`fflush(stdout);`
add warm-up 2023-02-06 15:21:16 +09:00			`printf(".");`
			`}`
			`cudaDeviceSynchronize();`
			`printf("\n");`
apply batched process 2023-02-06 15:07:20 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`// performance measure`
add warm-up 2023-02-06 15:21:16 +09:00			`printf("\nProcess.");`
revised error while reading parameters 2023-02-02 23:37:40 +09:00			`double uNet_st = get_time();`
Large Update 2023-02-15 01:33:28 +09:00			`for (int j = 0; j < MEASURE; ++j) {`
add warm-up 2023-02-06 15:21:16 +09:00			`uNet(input, output, N);`
			`printf(".");`
			`}`
apply batched process 2023-02-06 15:07:20 +09:00			`cudaDeviceSynchronize();`
Large Update 2023-02-15 01:33:28 +09:00			`double uNet_en = get_time();`
add warm-up 2023-02-06 15:21:16 +09:00			`printf("\n");`
Large Update 2023-02-15 01:33:28 +09:00			`double elapsed_time = uNet_en - uNet_st;`
			`elapsed_time = elapsed_time / MEASURE;`
			`printf("%lfsec (%lf img/sec)\n", elapsed_time, N / elapsed_time);`
apply batched process 2023-02-06 15:07:20 +09:00
Large Update 2023-02-15 01:33:28 +09:00			`write_binary((void ) output->buf, output_fname, (size_t)(N 2 * 128 * 191));`
implement naive version with openmp 2023-02-06 01:38:42 +09:00
change project dir name project-> project_full, project_release -> project 2023-02-20 17:05:39 +09:00			`printf("Writing final result to %s ...", output_fname);`
implement naive version with openmp 2023-02-06 01:38:42 +09:00			`fflush(stdout);`
Large Update 2023-02-15 01:33:28 +09:00
implement naive version with openmp 2023-02-06 01:38:42 +09:00			`printf("Done!\n\n");`
implement skeleton code 2023-02-01 22:30:00 +09:00
			`// Finalize program`
revised error while reading parameters 2023-02-02 23:37:40 +09:00			`uNet_finalize();`
implement skeleton code 2023-02-01 22:30:00 +09:00			`}`