add project by chaewon

2023-08-13 11:04:11 +00:00 · 2023-08-13 11:04:11 +00:00 · 71c77a785e
parent 346aaa097a
commit 71c77a785e
5 changed files with 29 additions and 45 deletions
--- a/APSS23/convolution_im2col_TC/convolution.cu
+++ b/APSS23/convolution_im2col_TC/convolution.cu
@ -84,10 +84,9 @@ void naive_cpu_convolution_im2col(half *_I, half *_F, half *_O, half *_BUF1,
  reshape(BUF2, O, N, K, OH, OW);
 }

-void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2,
-                 int N, int C, int H, int W, int K, int R, int S, int pad_h,
-                 int pad_w, int stride_h, int stride_w, int dilation_h,
-                 int dilation_w) {
+void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2, int N,
+                 int C, int H, int W, int K, int R, int S, int pad_h, int pad_w,
+                 int stride_h, int stride_w, int dilation_h, int dilation_w) {
  // Remove this line after you complete the convolution on GPU
  naive_cpu_convolution_im2col(_I, _F, _O, _BUF1, _BUF2, N, C, H, W, K, R, S,
                               pad_h, pad_w, stride_h, stride_w, dilation_h,
--- a/APSS23/convolution_im2col_TC/half.hpp
+++ b/APSS23/convolution_im2col_TC/half.hpp
@ -2342,8 +2342,7 @@ unsigned int gamma(unsigned int arg) {
     s = p[0]; for(unsigned int i=0; i<5; ++i) s += p[i+1] / (arg+i); return
     std::log(s) + (arg-0.5)*std::log(t) - t;
  */
-  static const f31 pi(0xC90FDAA2, 1),
-      lbe(0xB8AA3B29, 0);
+  static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
  unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
  bool bsign = sign != 0;
  f31 z(abs),
@ -2456,7 +2455,7 @@ struct half_caster;
 /// alignment requirements, it is a reasonable assumption that the data of a
 /// half is just comprised of the 2 bytes of the underlying IEEE representation.
 class half {
- public:
+public:
  /// \name Construction and assignment
  /// \{

@ -2577,7 +2576,7 @@ class half {
  }
  /// \}

- private:
+private:
  /// Rounding mode to use
  static const std::float_round_style round_style =
      (std::float_round_style)(HALF_ROUND_STYLE);
@ -2725,7 +2724,7 @@ struct half_caster<half, U, R> {

  static half cast(U arg) { return cast_impl(arg, is_float<U>()); };

- private:
+private:
  static half cast_impl(U arg, true_type) {
    return half(binary, float2half<R>(arg));
  }
@ -2742,7 +2741,7 @@ struct half_caster<T, half, R> {

  static T cast(half arg) { return cast_impl(arg, is_float<T>()); }

- private:
+private:
  static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
  static T cast_impl(half arg, false_type) {
    return half2int<R, true, true, T>(arg.data_);
@ -2762,7 +2761,7 @@ namespace std {
 /// [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
 template <>
 class numeric_limits<half_float::half> {
- public:
+public:
  /// Is template specialization.
  static HALF_CONSTEXPR_CONST bool is_specialized = true;

@ -3185,7 +3184,7 @@ inline half operator/(half x, half y) {

 /// Output operator.
 ///	This uses the built-in functionality for streaming out floating-point
-///numbers.
+/// numbers.
 /// \param out output stream to write into
 /// \param arg half expression to write
 /// \return reference to output stream
@ -3201,7 +3200,7 @@ std::basic_ostream<charT, traits> &operator<<(

 /// Input operator.
 ///	This uses the built-in functionality for streaming in floating-point
-///numbers, specifically double precision floating
+/// numbers, specifically double precision floating
 /// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref
 /// HALF_ARITHMETIC_TYPE)). So the input string is first rounded to double
 /// precision using the underlying platform's current floating-point rounding
@ -3652,14 +3651,10 @@ inline half log10(half arg) {
    return (abs == 0x7C00) ? arg
                           : half(detail::binary, detail::signal(arg.data_));
  switch (abs) {
-    case 0x4900:
-      return half(detail::binary, 0x3C00);
-    case 0x5640:
-      return half(detail::binary, 0x4000);
-    case 0x63D0:
-      return half(detail::binary, 0x4200);
-    case 0x70E2:
-      return half(detail::binary, 0x4400);
+    case 0x4900: return half(detail::binary, 0x3C00);
+    case 0x5640: return half(detail::binary, 0x4000);
+    case 0x63D0: return half(detail::binary, 0x4200);
+    case 0x70E2: return half(detail::binary, 0x4400);
  }
  for (; abs < 0x400; abs <<= 1, --exp)
    ;
@ -4099,14 +4094,10 @@ inline half pow(half x, half y) {
    return half(detail::binary, detail::invalid());
  if (x.data_ == 0xBC00) return half(detail::binary, sign | 0x3C00);
  switch (y.data_) {
-    case 0x3800:
-      return sqrt(x);
-    case 0x3C00:
-      return half(detail::binary, detail::check_underflow(x.data_));
-    case 0x4000:
-      return x * x;
-    case 0xBC00:
-      return half(detail::binary, 0x3C00) / x;
+    case 0x3800: return sqrt(x);
+    case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_));
+    case 0x4000: return x * x;
+    case 0xBC00: return half(detail::binary, 0x3C00) / x;
  }
  for (; absx < 0x400; absx <<= 1, --exp)
    ;
@ -4148,7 +4139,7 @@ inline half pow(half x, half y) {

 /// Compute sine and cosine simultaneously.
 ///	This returns the same results as sin() and cos() but is faster than
-///calling each function individually.
+/// calling each function individually.
 ///
 /// This function is exact to rounding for all rounding modes.
 /// \param arg function argument
@ -4211,15 +4202,9 @@ inline void sincos(half arg, half *sin, half *cos) {
    std::pair<detail::uint32, detail::uint32> sc =
        detail::sincos(detail::angle_arg(abs, k), 28);
    switch (k & 3) {
-      case 1:
-        sc = std::make_pair(sc.second, -sc.first);
-        break;
-      case 2:
-        sc = std::make_pair(-sc.first, -sc.second);
-        break;
-      case 3:
-        sc = std::make_pair(-sc.second, sc.first);
-        break;
+      case 1: sc = std::make_pair(sc.second, -sc.first); break;
+      case 2: sc = std::make_pair(-sc.first, -sc.second); break;
+      case 3: sc = std::make_pair(-sc.second, sc.first); break;
    }
    *sin = half(detail::binary,
                detail::fixed2half<half::round_style, 30, true, true, true>(
--- a/APSS23/convolution_im2col_TC/main.cpp
+++ b/APSS23/convolution_im2col_TC/main.cpp
@ -5,8 +5,8 @@
 #include <string.h>

 #include "convolution.cuh"
-#include "util.h"
 #include "half.hpp"
+#include "util.h"

 using half_float::half;
 using namespace half_float::literal;
--- a/APSS23/convolution_im2col_TC/util.cpp
+++ b/APSS23/convolution_im2col_TC/util.cpp
@ -15,7 +15,7 @@ double get_time() {
  return tv.tv_sec + tv.tv_usec * 1e-6;
 }

-half* alloc_tensor( int N, int C, int H, int W) {
+half *alloc_tensor(int N, int C, int H, int W) {
  half *m = (half *) aligned_alloc(32, N * C * H * W * sizeof(half));
  return m;
 }
@ -27,7 +27,7 @@ void rand_tensor(half *m, int N, int C, int H, int W) {

 void zero_tensor(half *m, int N, int C, int H, int W) {
  int L = N * C * H * W;
-  memset((void*)m, 0, sizeof(half) * L);
+  memset((void *) m, 0, sizeof(half) * L);
 }

 void print_tensor(half *m, int N, int C, int H, int W) {
@ -36,7 +36,7 @@ void print_tensor(half *m, int N, int C, int H, int W) {
      printf("Batch %d, Channel %d\n", n, c);
      for (int h = 0; h < H; ++h) {
        for (int w = 0; w < W; ++w) {
-          printf("%+.3f ", (float)(m[((n * C + c) * H + h) * W + w]));
+          printf("%+.3f ", (float) (m[((n * C + c) * H + h) * W + w]));
        }
        printf("\n");
      }
@ -95,7 +95,7 @@ void check_convolution(half *I, half *F, half *O, int N, int C, int H, int W,
            if (cnt <= thr)
              printf(
                  "O[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n",
-                  on, oc, oh, ow, (float)o_ans, (float)o);
+                  on, oc, oh, ow, (float) o_ans, (float) o);
            if (cnt == thr + 1)
              printf("Too many error, only first %d values are printed.\n",
                     thr);
--- a/APSS23/convolution_im2col_TC/util.h
+++ b/APSS23/convolution_im2col_TC/util.h
@ -7,7 +7,7 @@ using namespace half_float::literal;

 double get_time();

-half* alloc_tensor(int N, int C, int H, int W);
+half *alloc_tensor(int N, int C, int H, int W);

 void rand_tensor(half *m, int N, int C, int H, int W);