chundoong-lab-ta/SHPC2022/hw3_answer/prefix_sum/prefix_sum.c

50 lines
945 B
C

#include <immintrin.h>
#include <math.h>
void prefix_sum_sequential(double *out, const double *in, int N) {
out[0] = in[0];
for (int i = 1; i < N; ++i) {
out[i] = in[i] + out[i - 1];
}
}
void prefix_sum_parallel(double *out, const double *in, int N) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
out[i] = in[i];
}
int st = 1;
while (st < N) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
if ((i + 1) % (2 * st) == 0) {
out[i] = out[i - st] + out[i];
}
}
st *= 2;
}
// printf("st-1: %d\n", st-1);
out[st - 1] = 0;
st /= 2;
while (st > 0) {
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
if ((i + 1) % (2 * st) == 0) {
double tmp = out[i];
out[i] = out[i - st] + out[i];
out[i - st] = tmp;
}
}
st /= 2;
}
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
out[i] = out[i] + in[i];
}
}