chundoong-lab-ta/SamsungDS22/submissions/final/ss1.eom/B/util.cpp

#include "util.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>

static double start_time[8];

static double get_time() {
    struct timeval tv;
    gettimeofday(&tv, 0);
    return tv.tv_sec + tv.tv_usec * 1e-6;
}

void timer_start(int i) {
    start_time[i] = get_time();
}

double timer_stop(int i) {
    return get_time() - start_time[i];
}

void check_convolution(float *input, float *output, float *filter,
    int N, int C, int H, int W, int K, int R, int S,
    int pad, int dilation, int stride) {
  printf("Validating...\n");

  int OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1;
  int OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1;

  float *O_ans;
  alloc_tensor(&O_ans, N, K, OH, OW);
  zero_tensor(O_ans, N, K, OH, OW);

#pragma omp parallel for collapse(2) num_threads(20)
  for (int n = 0; n < N; ++n) {
    for (int k = 0; k < K; ++k) {
      for (int oh = 0; oh < OH; ++oh) {
        for (int ow = 0; ow < OW; ++ow) {
          float o = 0.f;
          for (int c = 0; c < C; ++c) {
            for (int r = 0; r < R; ++r) {
              for (int s = 0; s < S; ++s) {
                int h = oh * stride - pad + r * dilation;
                int w = ow * stride - pad + s * dilation;
                if (h < 0 || h >= H || w < 0 || w >= W) continue;
                float i = input[n * C * H * W + c * H * W + h * W + w];
                float f = filter[k * C * R * S + c * R * S + r * S + s];
                o += i * f;
              }
            }
          }
          O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow] = o;
        }
      }
    }
  }

  bool is_valid = true;
  int cnt = 0, thr = 10;
  float eps = 1e-3;
  for (int n = 0; n < N; ++n) {
    for (int k = 0; k < K; ++k) {
      for (int oh = 0; oh < OH; ++oh) {
        for (int ow = 0; ow < OW; ++ow) {
          float o = output[n * K * OH * OW + k * OH * OW + oh * OW + ow];
          float o_ans = O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow];
          if (fabsf(o - o_ans) > eps && (o_ans == 0 || fabsf((o - o_ans) / o_ans) > eps)) {
            ++cnt;
            if (cnt <= thr)
              printf("output[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n", n, k, oh, ow, o_ans, o);
            if (cnt == thr + 1)
              printf("Too many error, only first %d values are printed.\n", thr);
            is_valid = false;
          }
        }
      }
    }
  }


  if (is_valid) {
    printf("Result: VALID\n");
  } else {
    printf("Result: INVALID\n");
  }
}

void alloc_tensor(float **t, int D0, int D1, int D2, int D3) {
  *t = (float *) aligned_alloc(32, sizeof(float) * D0 * D1 * D2 * D3);
  if (*t == NULL) {
    printf("Failed to allocate memory for matrix.\n");
    exit(0);
  }
}

void rand_tensor(float *m, int D0, int D1, int D2, int D3) {
  for (int i = 0; i < D0; i++) { 
    for (int j = 0; j < D1; j++) { 
      for (int k = 0; k < D2; k++) { 
        for (int l = 0; l < D3; l++) { 
          m[i * D1 * D2 * D3
            +  j * D2 * D3
            +  k * D3 
            //+  l] = (float) rand() / RAND_MAX - 0.5;
            +  l] = 1.0;
        }
      }
    }
  }
}


void zero_tensor(float *t, int D0, int D1, int D2, int D3) {
  memset(t, 0, sizeof(float) * D0 * D1 * D2 * D3);
}
. 2022-09-29 18:01:45 +09:00			`#include "util.h"`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <stdbool.h>`
			`#include <math.h>`
			`#include <sys/time.h>`
			`#include <omp.h>`

			`static double start_time[8];`

			`static double get_time() {`
			`struct timeval tv;`
			`gettimeofday(&tv, 0);`
			`return tv.tv_sec + tv.tv_usec * 1e-6;`
			`}`

			`void timer_start(int i) {`
			`start_time[i] = get_time();`
			`}`

			`double timer_stop(int i) {`
			`return get_time() - start_time[i];`
			`}`

			`void check_convolution(float input, float output, float *filter,`
			`int N, int C, int H, int W, int K, int R, int S,`
			`int pad, int dilation, int stride) {`
			`printf("Validating...\n");`

			`int OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1;`
			`int OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1;`

			`float *O_ans;`
			`alloc_tensor(&O_ans, N, K, OH, OW);`
			`zero_tensor(O_ans, N, K, OH, OW);`

			`#pragma omp parallel for collapse(2) num_threads(20)`
			`for (int n = 0; n < N; ++n) {`
			`for (int k = 0; k < K; ++k) {`
			`for (int oh = 0; oh < OH; ++oh) {`
			`for (int ow = 0; ow < OW; ++ow) {`
			`float o = 0.f;`
			`for (int c = 0; c < C; ++c) {`
			`for (int r = 0; r < R; ++r) {`
			`for (int s = 0; s < S; ++s) {`
			`int h = oh * stride - pad + r * dilation;`
			`int w = ow * stride - pad + s * dilation;`
			`if (h < 0 \|\| h >= H \|\| w < 0 \|\| w >= W) continue;`
			`float i = input[n * C * H * W + c * H * W + h * W + w];`
			`float f = filter[k * C * R * S + c * R * S + r * S + s];`
			`o += i * f;`
			`}`
			`}`
			`}`
			`O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow] = o;`
			`}`
			`}`
			`}`
			`}`

			`bool is_valid = true;`
			`int cnt = 0, thr = 10;`
			`float eps = 1e-3;`
			`for (int n = 0; n < N; ++n) {`
			`for (int k = 0; k < K; ++k) {`
			`for (int oh = 0; oh < OH; ++oh) {`
			`for (int ow = 0; ow < OW; ++ow) {`
			`float o = output[n * K * OH * OW + k * OH * OW + oh * OW + ow];`
			`float o_ans = O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow];`
			`if (fabsf(o - o_ans) > eps && (o_ans == 0 \|\| fabsf((o - o_ans) / o_ans) > eps)) {`
			`++cnt;`
			`if (cnt <= thr)`
			`printf("output[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n", n, k, oh, ow, o_ans, o);`
			`if (cnt == thr + 1)`
			`printf("Too many error, only first %d values are printed.\n", thr);`
			`is_valid = false;`
			`}`
			`}`
			`}`
			`}`
			`}`


			`if (is_valid) {`
			`printf("Result: VALID\n");`
			`} else {`
			`printf("Result: INVALID\n");`
			`}`
			`}`

			`void alloc_tensor(float **t, int D0, int D1, int D2, int D3) {`
			`t = (float ) aligned_alloc(32, sizeof(float) * D0 * D1 * D2 * D3);`
			`if (*t == NULL) {`
			`printf("Failed to allocate memory for matrix.\n");`
			`exit(0);`
			`}`
			`}`

			`void rand_tensor(float *m, int D0, int D1, int D2, int D3) {`
			`for (int i = 0; i < D0; i++) {`
			`for (int j = 0; j < D1; j++) {`
			`for (int k = 0; k < D2; k++) {`
			`for (int l = 0; l < D3; l++) {`
			`m[i * D1 * D2 * D3`
			`+ j * D2 * D3`
			`+ k * D3`
			`//+ l] = (float) rand() / RAND_MAX - 0.5;`
			`+ l] = 1.0;`
			`}`
			`}`
			`}`
			`}`
			`}`


			`void zero_tensor(float *t, int D0, int D1, int D2, int D3) {`
			`memset(t, 0, sizeof(float) * D0 * D1 * D2 * D3);`
			`}`