26 lines
745 B
C
26 lines
745 B
C
|
#pragma once
|
||
|
|
||
|
void convolution(
|
||
|
float *_input, float *_output, float *_weight,
|
||
|
int _N, int _C, int _H, int _W,
|
||
|
int _K, int _R, int _S,
|
||
|
int _pad, int _dilation, int _stride);
|
||
|
|
||
|
void convolution_init(
|
||
|
int _N, int _C, int _H, int _W,
|
||
|
int _K, int _R, int _S,
|
||
|
int _pad, int _dilation, int _stride);
|
||
|
|
||
|
void convolution_final(
|
||
|
int _N, int _C, int _H, int _W,
|
||
|
int _K, int _R, int _S,
|
||
|
int _pad, int _dilation, int _stride);
|
||
|
|
||
|
void cuda_device_init(void);
|
||
|
void cuda_device_malloc(void);
|
||
|
void cuda_memcpy_host_to_device(void);
|
||
|
void cuda_memcpy_device_to_host(void);
|
||
|
void cuda_kernel_call(void);
|
||
|
|
||
|
void print_filter(float *filter, int K, int C, int R, int S);
|
||
|
void print_input(float *input, int N, int C, int H, int W);
|