chundoong-lab-ta/APWS23/vec-add-skeleton/vec_add.cu

45 lines
1.4 KiB
Plaintext

#include <cstdio>
#include <cstdlib>
#define CHECK_CUDA(call) \
do { \
cudaError_t status_ = call; \
if (status_ != cudaSuccess) { \
fprintf(stderr, "CUDA error (%s:%d): %s:%s\n", __FILE__, __LINE__, \
cudaGetErrorName(status_), cudaGetErrorString(status_)); \
exit(EXIT_FAILURE); \
} \
} while (0)
__global__ void vec_add_kernel(const int *A, const int *B, int *C, int N) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < N) C[i] = A[i] + B[i];
}
int main() {
int N = 16384;
int *A = (int *) malloc(N * sizeof(int));
int *B = (int *) malloc(N * sizeof(int));
int *C = (int *) malloc(N * sizeof(int));
int *C_ans = (int *) malloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
A[i] = rand() % 1000;
B[i] = rand() % 1000;
C_ans[i] = A[i] + B[i];
}
// TODO: Run vector addition on GPU
// Save the result in C
for (int i = 0; i < N; i++) {
if (C[i] != C_ans[i]) {
printf("Result differ at %d: %d vs %d\n", i, C[i], C_ans[i]);
}
}
printf("Validation done.\n");
return 0;
}