add project by chaewon

This commit is contained in:
bhko 2023-08-13 11:04:11 +00:00
parent 346aaa097a
commit 71c77a785e
5 changed files with 29 additions and 45 deletions

View File

@ -84,10 +84,9 @@ void naive_cpu_convolution_im2col(half *_I, half *_F, half *_O, half *_BUF1,
reshape(BUF2, O, N, K, OH, OW);
}
void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2,
int N, int C, int H, int W, int K, int R, int S, int pad_h,
int pad_w, int stride_h, int stride_w, int dilation_h,
int dilation_w) {
void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2, int N,
int C, int H, int W, int K, int R, int S, int pad_h, int pad_w,
int stride_h, int stride_w, int dilation_h, int dilation_w) {
// Remove this line after you complete the convolution on GPU
naive_cpu_convolution_im2col(_I, _F, _O, _BUF1, _BUF2, N, C, H, W, K, R, S,
pad_h, pad_w, stride_h, stride_w, dilation_h,

View File

@ -2342,8 +2342,7 @@ unsigned int gamma(unsigned int arg) {
s = p[0]; for(unsigned int i=0; i<5; ++i) s += p[i+1] / (arg+i); return
std::log(s) + (arg-0.5)*std::log(t) - t;
*/
static const f31 pi(0xC90FDAA2, 1),
lbe(0xB8AA3B29, 0);
static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
bool bsign = sign != 0;
f31 z(abs),
@ -2456,7 +2455,7 @@ struct half_caster;
/// alignment requirements, it is a reasonable assumption that the data of a
/// half is just comprised of the 2 bytes of the underlying IEEE representation.
class half {
public:
public:
/// \name Construction and assignment
/// \{
@ -2577,7 +2576,7 @@ class half {
}
/// \}
private:
private:
/// Rounding mode to use
static const std::float_round_style round_style =
(std::float_round_style)(HALF_ROUND_STYLE);
@ -2725,7 +2724,7 @@ struct half_caster<half, U, R> {
static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
private:
private:
static half cast_impl(U arg, true_type) {
return half(binary, float2half<R>(arg));
}
@ -2742,7 +2741,7 @@ struct half_caster<T, half, R> {
static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
private:
private:
static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
static T cast_impl(half arg, false_type) {
return half2int<R, true, true, T>(arg.data_);
@ -2762,7 +2761,7 @@ namespace std {
/// [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
template <>
class numeric_limits<half_float::half> {
public:
public:
/// Is template specialization.
static HALF_CONSTEXPR_CONST bool is_specialized = true;
@ -3185,7 +3184,7 @@ inline half operator/(half x, half y) {
/// Output operator.
/// This uses the built-in functionality for streaming out floating-point
///numbers.
/// numbers.
/// \param out output stream to write into
/// \param arg half expression to write
/// \return reference to output stream
@ -3201,7 +3200,7 @@ std::basic_ostream<charT, traits> &operator<<(
/// Input operator.
/// This uses the built-in functionality for streaming in floating-point
///numbers, specifically double precision floating
/// numbers, specifically double precision floating
/// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref
/// HALF_ARITHMETIC_TYPE)). So the input string is first rounded to double
/// precision using the underlying platform's current floating-point rounding
@ -3652,14 +3651,10 @@ inline half log10(half arg) {
return (abs == 0x7C00) ? arg
: half(detail::binary, detail::signal(arg.data_));
switch (abs) {
case 0x4900:
return half(detail::binary, 0x3C00);
case 0x5640:
return half(detail::binary, 0x4000);
case 0x63D0:
return half(detail::binary, 0x4200);
case 0x70E2:
return half(detail::binary, 0x4400);
case 0x4900: return half(detail::binary, 0x3C00);
case 0x5640: return half(detail::binary, 0x4000);
case 0x63D0: return half(detail::binary, 0x4200);
case 0x70E2: return half(detail::binary, 0x4400);
}
for (; abs < 0x400; abs <<= 1, --exp)
;
@ -4099,14 +4094,10 @@ inline half pow(half x, half y) {
return half(detail::binary, detail::invalid());
if (x.data_ == 0xBC00) return half(detail::binary, sign | 0x3C00);
switch (y.data_) {
case 0x3800:
return sqrt(x);
case 0x3C00:
return half(detail::binary, detail::check_underflow(x.data_));
case 0x4000:
return x * x;
case 0xBC00:
return half(detail::binary, 0x3C00) / x;
case 0x3800: return sqrt(x);
case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_));
case 0x4000: return x * x;
case 0xBC00: return half(detail::binary, 0x3C00) / x;
}
for (; absx < 0x400; absx <<= 1, --exp)
;
@ -4148,7 +4139,7 @@ inline half pow(half x, half y) {
/// Compute sine and cosine simultaneously.
/// This returns the same results as sin() and cos() but is faster than
///calling each function individually.
/// calling each function individually.
///
/// This function is exact to rounding for all rounding modes.
/// \param arg function argument
@ -4211,15 +4202,9 @@ inline void sincos(half arg, half *sin, half *cos) {
std::pair<detail::uint32, detail::uint32> sc =
detail::sincos(detail::angle_arg(abs, k), 28);
switch (k & 3) {
case 1:
sc = std::make_pair(sc.second, -sc.first);
break;
case 2:
sc = std::make_pair(-sc.first, -sc.second);
break;
case 3:
sc = std::make_pair(-sc.second, sc.first);
break;
case 1: sc = std::make_pair(sc.second, -sc.first); break;
case 2: sc = std::make_pair(-sc.first, -sc.second); break;
case 3: sc = std::make_pair(-sc.second, sc.first); break;
}
*sin = half(detail::binary,
detail::fixed2half<half::round_style, 30, true, true, true>(

View File

@ -5,8 +5,8 @@
#include <string.h>
#include "convolution.cuh"
#include "util.h"
#include "half.hpp"
#include "util.h"
using half_float::half;
using namespace half_float::literal;

View File

@ -15,7 +15,7 @@ double get_time() {
return tv.tv_sec + tv.tv_usec * 1e-6;
}
half* alloc_tensor( int N, int C, int H, int W) {
half *alloc_tensor(int N, int C, int H, int W) {
half *m = (half *) aligned_alloc(32, N * C * H * W * sizeof(half));
return m;
}
@ -27,7 +27,7 @@ void rand_tensor(half *m, int N, int C, int H, int W) {
void zero_tensor(half *m, int N, int C, int H, int W) {
int L = N * C * H * W;
memset((void*)m, 0, sizeof(half) * L);
memset((void *) m, 0, sizeof(half) * L);
}
void print_tensor(half *m, int N, int C, int H, int W) {
@ -36,7 +36,7 @@ void print_tensor(half *m, int N, int C, int H, int W) {
printf("Batch %d, Channel %d\n", n, c);
for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) {
printf("%+.3f ", (float)(m[((n * C + c) * H + h) * W + w]));
printf("%+.3f ", (float) (m[((n * C + c) * H + h) * W + w]));
}
printf("\n");
}
@ -95,7 +95,7 @@ void check_convolution(half *I, half *F, half *O, int N, int C, int H, int W,
if (cnt <= thr)
printf(
"O[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n",
on, oc, oh, ow, (float)o_ans, (float)o);
on, oc, oh, ow, (float) o_ans, (float) o);
if (cnt == thr + 1)
printf("Too many error, only first %d values are printed.\n",
thr);

View File

@ -7,7 +7,7 @@ using namespace half_float::literal;
double get_time();
half* alloc_tensor(int N, int C, int H, int W);
half *alloc_tensor(int N, int C, int H, int W);
void rand_tensor(half *m, int N, int C, int H, int W);