chundoong-lab-ta/SHPC2022/hw3_answer/prefix_sum/prefix_sum.c

50 lines
945 B
C
Raw Normal View History

2022-10-18 20:27:08 +09:00
#include <immintrin.h>
#include <math.h>
void prefix_sum_sequential(double *out, const double *in, int N) {
out[0] = in[0];
for (int i = 1; i < N; ++i) {
2022-10-20 10:43:49 +09:00
out[i] = in[i] + out[i - 1];
2022-10-18 20:27:08 +09:00
}
}
void prefix_sum_parallel(double *out, const double *in, int N) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
2022-10-20 10:43:49 +09:00
out[i] = in[i];
2022-10-18 20:27:08 +09:00
}
int st = 1;
while (st < N) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
2022-10-20 10:43:49 +09:00
if ((i + 1) % (2 * st) == 0) {
out[i] = out[i - st] + out[i];
2022-10-18 20:27:08 +09:00
}
}
st *= 2;
}
2022-10-20 10:43:49 +09:00
// printf("st-1: %d\n", st-1);
out[st - 1] = 0;
2022-10-18 20:27:08 +09:00
st /= 2;
while (st > 0) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
2022-10-20 10:43:49 +09:00
if ((i + 1) % (2 * st) == 0) {
2022-10-18 20:27:08 +09:00
double tmp = out[i];
2022-10-20 10:43:49 +09:00
out[i] = out[i - st] + out[i];
out[i - st] = tmp;
2022-10-18 20:27:08 +09:00
}
}
st /= 2;
}
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
2022-10-20 10:43:49 +09:00
out[i] = out[i] + in[i];
2022-10-18 20:27:08 +09:00
}
}