chundoong-lab-ta/SamsungDS22/submissions/final/changju1.lee/B/util.cpp

120 lines
3.2 KiB
C++
Raw Normal View History

2022-09-29 18:01:45 +09:00
#include "util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>
static double start_time[8];
static double get_time() {
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec + tv.tv_usec * 1e-6;
}
void timer_start(int i) {
start_time[i] = get_time();
}
double timer_stop(int i) {
return get_time() - start_time[i];
}
void check_convolution(float *input, float *output, float *filter,
int N, int C, int H, int W, int K, int R, int S,
int pad, int dilation, int stride) {
printf("Validating...\n");
int OH = (H + 2 * pad - dilation * (R - 1) - 1) / stride + 1;
int OW = (W + 2 * pad - dilation * (S - 1) - 1) / stride + 1;
float *O_ans;
alloc_tensor(&O_ans, N, K, OH, OW);
zero_tensor(O_ans, N, K, OH, OW);
#pragma omp parallel for collapse(2) num_threads(20)
for (int n = 0; n < N; ++n) {
for (int k = 0; k < K; ++k) {
for (int oh = 0; oh < OH; ++oh) {
for (int ow = 0; ow < OW; ++ow) {
float o = 0.f;
for (int c = 0; c < C; ++c) {
for (int r = 0; r < R; ++r) {
for (int s = 0; s < S; ++s) {
int h = oh * stride - pad + r * dilation;
int w = ow * stride - pad + s * dilation;
if (h < 0 || h >= H || w < 0 || w >= W) continue;
float i = input[n * C * H * W + c * H * W + h * W + w];
float f = filter[k * C * R * S + c * R * S + r * S + s];
o += i * f;
}
}
}
O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow] = o;
}
}
}
}
bool is_valid = true;
int cnt = 0, thr = 10;
float eps = 1e-3;
for (int n = 0; n < N; ++n) {
for (int k = 0; k < K; ++k) {
for (int oh = 0; oh < OH; ++oh) {
for (int ow = 0; ow < OW; ++ow) {
float o = output[n * K * OH * OW + k * OH * OW + oh * OW + ow];
float o_ans = O_ans[n * K * OH * OW + k * OH * OW + oh * OW + ow];
if (fabsf(o - o_ans) > eps && (o_ans == 0 || fabsf((o - o_ans) / o_ans) > eps)) {
++cnt;
if (cnt <= thr)
printf("output[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n", n, k, oh, ow, o_ans, o);
if (cnt == thr + 1)
printf("Too many error, only first %d values are printed.\n", thr);
is_valid = false;
}
}
}
}
}
if (is_valid) {
printf("Result: VALID\n");
} else {
printf("Result: INVALID\n");
}
}
void alloc_tensor(float **t, int D0, int D1, int D2, int D3) {
*t = (float *) aligned_alloc(32, sizeof(float) * D0 * D1 * D2 * D3);
if (*t == NULL) {
printf("Failed to allocate memory for matrix.\n");
exit(0);
}
}
void rand_tensor(float *m, int D0, int D1, int D2, int D3) {
for (int i = 0; i < D0; i++) {
for (int j = 0; j < D1; j++) {
for (int k = 0; k < D2; k++) {
for (int l = 0; l < D3; l++) {
m[i * D1 * D2 * D3
+ j * D2 * D3
+ k * D3
+ l] = (float) rand() / RAND_MAX - 0.5;
}
}
}
}
}
void zero_tensor(float *t, int D0, int D1, int D2, int D3) {
memset(t, 0, sizeof(float) * D0 * D1 * D2 * D3);
}