add project by chaewon
This commit is contained in:
parent
346aaa097a
commit
71c77a785e
|
@ -84,10 +84,9 @@ void naive_cpu_convolution_im2col(half *_I, half *_F, half *_O, half *_BUF1,
|
|||
reshape(BUF2, O, N, K, OH, OW);
|
||||
}
|
||||
|
||||
void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2,
|
||||
int N, int C, int H, int W, int K, int R, int S, int pad_h,
|
||||
int pad_w, int stride_h, int stride_w, int dilation_h,
|
||||
int dilation_w) {
|
||||
void convolution(half *_I, half *_F, half *_O, half *_BUF1, half *_BUF2, int N,
|
||||
int C, int H, int W, int K, int R, int S, int pad_h, int pad_w,
|
||||
int stride_h, int stride_w, int dilation_h, int dilation_w) {
|
||||
// Remove this line after you complete the convolution on GPU
|
||||
naive_cpu_convolution_im2col(_I, _F, _O, _BUF1, _BUF2, N, C, H, W, K, R, S,
|
||||
pad_h, pad_w, stride_h, stride_w, dilation_h,
|
||||
|
|
|
@ -2342,8 +2342,7 @@ unsigned int gamma(unsigned int arg) {
|
|||
s = p[0]; for(unsigned int i=0; i<5; ++i) s += p[i+1] / (arg+i); return
|
||||
std::log(s) + (arg-0.5)*std::log(t) - t;
|
||||
*/
|
||||
static const f31 pi(0xC90FDAA2, 1),
|
||||
lbe(0xB8AA3B29, 0);
|
||||
static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
|
||||
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
|
||||
bool bsign = sign != 0;
|
||||
f31 z(abs),
|
||||
|
@ -2456,7 +2455,7 @@ struct half_caster;
|
|||
/// alignment requirements, it is a reasonable assumption that the data of a
|
||||
/// half is just comprised of the 2 bytes of the underlying IEEE representation.
|
||||
class half {
|
||||
public:
|
||||
public:
|
||||
/// \name Construction and assignment
|
||||
/// \{
|
||||
|
||||
|
@ -2577,7 +2576,7 @@ class half {
|
|||
}
|
||||
/// \}
|
||||
|
||||
private:
|
||||
private:
|
||||
/// Rounding mode to use
|
||||
static const std::float_round_style round_style =
|
||||
(std::float_round_style)(HALF_ROUND_STYLE);
|
||||
|
@ -2725,7 +2724,7 @@ struct half_caster<half, U, R> {
|
|||
|
||||
static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
|
||||
|
||||
private:
|
||||
private:
|
||||
static half cast_impl(U arg, true_type) {
|
||||
return half(binary, float2half<R>(arg));
|
||||
}
|
||||
|
@ -2742,7 +2741,7 @@ struct half_caster<T, half, R> {
|
|||
|
||||
static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
|
||||
|
||||
private:
|
||||
private:
|
||||
static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
|
||||
static T cast_impl(half arg, false_type) {
|
||||
return half2int<R, true, true, T>(arg.data_);
|
||||
|
@ -2762,7 +2761,7 @@ namespace std {
|
|||
/// [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
|
||||
template <>
|
||||
class numeric_limits<half_float::half> {
|
||||
public:
|
||||
public:
|
||||
/// Is template specialization.
|
||||
static HALF_CONSTEXPR_CONST bool is_specialized = true;
|
||||
|
||||
|
@ -3185,7 +3184,7 @@ inline half operator/(half x, half y) {
|
|||
|
||||
/// Output operator.
|
||||
/// This uses the built-in functionality for streaming out floating-point
|
||||
///numbers.
|
||||
/// numbers.
|
||||
/// \param out output stream to write into
|
||||
/// \param arg half expression to write
|
||||
/// \return reference to output stream
|
||||
|
@ -3201,7 +3200,7 @@ std::basic_ostream<charT, traits> &operator<<(
|
|||
|
||||
/// Input operator.
|
||||
/// This uses the built-in functionality for streaming in floating-point
|
||||
///numbers, specifically double precision floating
|
||||
/// numbers, specifically double precision floating
|
||||
/// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref
|
||||
/// HALF_ARITHMETIC_TYPE)). So the input string is first rounded to double
|
||||
/// precision using the underlying platform's current floating-point rounding
|
||||
|
@ -3652,14 +3651,10 @@ inline half log10(half arg) {
|
|||
return (abs == 0x7C00) ? arg
|
||||
: half(detail::binary, detail::signal(arg.data_));
|
||||
switch (abs) {
|
||||
case 0x4900:
|
||||
return half(detail::binary, 0x3C00);
|
||||
case 0x5640:
|
||||
return half(detail::binary, 0x4000);
|
||||
case 0x63D0:
|
||||
return half(detail::binary, 0x4200);
|
||||
case 0x70E2:
|
||||
return half(detail::binary, 0x4400);
|
||||
case 0x4900: return half(detail::binary, 0x3C00);
|
||||
case 0x5640: return half(detail::binary, 0x4000);
|
||||
case 0x63D0: return half(detail::binary, 0x4200);
|
||||
case 0x70E2: return half(detail::binary, 0x4400);
|
||||
}
|
||||
for (; abs < 0x400; abs <<= 1, --exp)
|
||||
;
|
||||
|
@ -4099,14 +4094,10 @@ inline half pow(half x, half y) {
|
|||
return half(detail::binary, detail::invalid());
|
||||
if (x.data_ == 0xBC00) return half(detail::binary, sign | 0x3C00);
|
||||
switch (y.data_) {
|
||||
case 0x3800:
|
||||
return sqrt(x);
|
||||
case 0x3C00:
|
||||
return half(detail::binary, detail::check_underflow(x.data_));
|
||||
case 0x4000:
|
||||
return x * x;
|
||||
case 0xBC00:
|
||||
return half(detail::binary, 0x3C00) / x;
|
||||
case 0x3800: return sqrt(x);
|
||||
case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_));
|
||||
case 0x4000: return x * x;
|
||||
case 0xBC00: return half(detail::binary, 0x3C00) / x;
|
||||
}
|
||||
for (; absx < 0x400; absx <<= 1, --exp)
|
||||
;
|
||||
|
@ -4148,7 +4139,7 @@ inline half pow(half x, half y) {
|
|||
|
||||
/// Compute sine and cosine simultaneously.
|
||||
/// This returns the same results as sin() and cos() but is faster than
|
||||
///calling each function individually.
|
||||
/// calling each function individually.
|
||||
///
|
||||
/// This function is exact to rounding for all rounding modes.
|
||||
/// \param arg function argument
|
||||
|
@ -4211,15 +4202,9 @@ inline void sincos(half arg, half *sin, half *cos) {
|
|||
std::pair<detail::uint32, detail::uint32> sc =
|
||||
detail::sincos(detail::angle_arg(abs, k), 28);
|
||||
switch (k & 3) {
|
||||
case 1:
|
||||
sc = std::make_pair(sc.second, -sc.first);
|
||||
break;
|
||||
case 2:
|
||||
sc = std::make_pair(-sc.first, -sc.second);
|
||||
break;
|
||||
case 3:
|
||||
sc = std::make_pair(-sc.second, sc.first);
|
||||
break;
|
||||
case 1: sc = std::make_pair(sc.second, -sc.first); break;
|
||||
case 2: sc = std::make_pair(-sc.first, -sc.second); break;
|
||||
case 3: sc = std::make_pair(-sc.second, sc.first); break;
|
||||
}
|
||||
*sin = half(detail::binary,
|
||||
detail::fixed2half<half::round_style, 30, true, true, true>(
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "convolution.cuh"
|
||||
#include "util.h"
|
||||
#include "half.hpp"
|
||||
#include "util.h"
|
||||
|
||||
using half_float::half;
|
||||
using namespace half_float::literal;
|
||||
|
|
|
@ -15,7 +15,7 @@ double get_time() {
|
|||
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||
}
|
||||
|
||||
half* alloc_tensor( int N, int C, int H, int W) {
|
||||
half *alloc_tensor(int N, int C, int H, int W) {
|
||||
half *m = (half *) aligned_alloc(32, N * C * H * W * sizeof(half));
|
||||
return m;
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ void rand_tensor(half *m, int N, int C, int H, int W) {
|
|||
|
||||
void zero_tensor(half *m, int N, int C, int H, int W) {
|
||||
int L = N * C * H * W;
|
||||
memset((void*)m, 0, sizeof(half) * L);
|
||||
memset((void *) m, 0, sizeof(half) * L);
|
||||
}
|
||||
|
||||
void print_tensor(half *m, int N, int C, int H, int W) {
|
||||
|
@ -36,7 +36,7 @@ void print_tensor(half *m, int N, int C, int H, int W) {
|
|||
printf("Batch %d, Channel %d\n", n, c);
|
||||
for (int h = 0; h < H; ++h) {
|
||||
for (int w = 0; w < W; ++w) {
|
||||
printf("%+.3f ", (float)(m[((n * C + c) * H + h) * W + w]));
|
||||
printf("%+.3f ", (float) (m[((n * C + c) * H + h) * W + w]));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ void check_convolution(half *I, half *F, half *O, int N, int C, int H, int W,
|
|||
if (cnt <= thr)
|
||||
printf(
|
||||
"O[%d][%d][%d][%d] : correct_value = %f, your_value = %f\n",
|
||||
on, oc, oh, ow, (float)o_ans, (float)o);
|
||||
on, oc, oh, ow, (float) o_ans, (float) o);
|
||||
if (cnt == thr + 1)
|
||||
printf("Too many error, only first %d values are printed.\n",
|
||||
thr);
|
||||
|
|
|
@ -7,7 +7,7 @@ using namespace half_float::literal;
|
|||
|
||||
double get_time();
|
||||
|
||||
half* alloc_tensor(int N, int C, int H, int W);
|
||||
half *alloc_tensor(int N, int C, int H, int W);
|
||||
|
||||
void rand_tensor(half *m, int N, int C, int H, int W);
|
||||
|
||||
|
|
Loading…
Reference in New Issue