Update project skeleton.

This commit is contained in:
Jinpyo Kim 2022-11-18 20:31:57 +09:00
parent c6d9b62306
commit a34a6b8b04
8 changed files with 222 additions and 89 deletions

View File

@ -5,10 +5,10 @@ CFLAGS=-std=c++14 -O3 -Wall -march=native -mavx2 -mfma -mno-avx512f -fopenmp -I/
CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option)) CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option))
LDFLAGS=-pthread -L/usr/local/cuda/lib64 LDFLAGS=-pthread -L/usr/local/cuda/lib64
LDLIBS=-lmpi_cxx -lmpi -lstdc++ -lcuda -lcudart -lm LDLIBS=-lmpi_cxx -lmpi -lstdc++ -lcudart -lm
CXX=g++ CXX=g++
CUX=nvcc CUX=/usr/local/cuda/bin/nvcc
all: $(TARGET) all: $(TARGET)

View File

@ -11,25 +11,25 @@ int mpi_rank = 0, mpi_size = 1;
/* Global arguments */ /* Global arguments */
int rng_seed = 4155; int rng_seed = 4155;
int N = 1; int N = 1;
int L = 10;
static char *parameter_fname; static char *parameter_fname;
static char *output_fname; static char *output_fname;
const int print_max = 8;
void print_usage_exit(int argc, char **argv) { void print_usage_exit(int argc, char **argv) {
if (mpi_rank == 0) { if (mpi_rank == 0) {
printf("Usage %s [parameter bin] [output] [N] [L] [seed] \n", argv[0]); printf("Usage %s [parameter bin] [output] [N] [seed] \n", argv[0]);
printf(" parameter bin: File conatining DNN parameters\n"); printf(" parameter bin: File conatining DNN parameters\n");
printf(" output: File to write namegen results\n"); printf(" output: File to write namegen results\n");
printf(" N: Number of names to generate\n"); printf(" N: Number of names to generate\n");
printf(" L: Maximum length of a name\n");
printf(" seed: An integer RNG seed\n"); printf(" seed: An integer RNG seed\n");
} }
EXIT(0); EXIT(0);
} }
void check_and_parse_args(int argc, char **argv) { void check_and_parse_args(int argc, char **argv) {
if (argc != 6) if (argc != 5)
print_usage_exit(argc, argv); print_usage_exit(argc, argv);
int c; int c;
@ -45,17 +45,7 @@ void check_and_parse_args(int argc, char **argv) {
parameter_fname = argv[1]; parameter_fname = argv[1];
output_fname = argv[2]; output_fname = argv[2];
N = atoi(argv[3]); N = atoi(argv[3]);
L = atoi(argv[4]); rng_seed = atoi(argv[4]);
rng_seed = atoi(argv[5]);
if (mpi_rank == 0) {
printf("Options\n");
printf(" parameter bin: %s\n", parameter_fname);
printf(" output: %s\n", output_fname);
printf(" N: %d\n", N);
printf(" L: %d\n", L);
printf(" seed: %d\n", rng_seed);
}
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
@ -69,29 +59,62 @@ int main(int argc, char **argv) {
check_and_parse_args(argc, argv); check_and_parse_args(argc, argv);
/* Initialize model */ /* Initialize model */
namegen_init(N, L, rng_seed, parameter_fname); namegen_initialize(N, rng_seed, parameter_fname);
float *random_floats = nullptr; float *random_floats = nullptr;
char *output = nullptr; char *output = nullptr;
/* Initialize input and output */
if (mpi_rank == 0) { if (mpi_rank == 0) {
random_floats = (float *)malloc(N * L * sizeof(float)); random_floats = (float *)malloc(N * MAX_LEN * sizeof(float));
output = (char *)malloc(N * (L + 1) * sizeof(char)); output = (char *)malloc(N * (MAX_LEN + 1) * sizeof(char));
srand(rng_seed); srand(rng_seed);
for (int i = 0; i < N * L; i++) { for (int i = 0; i < N * MAX_LEN; i++) {
random_floats[i] = ((float)rand()) / ((float)RAND_MAX); random_floats[i] = ((float)rand()) / ((float)RAND_MAX);
} }
} }
/* Run model inference */
namegen(N, L, random_floats, output);
/* Print output */
if (mpi_rank == 0) { if (mpi_rank == 0) {
for (int i = 0; i < N; i++) { printf("Generating %d names...", N);
printf("%s\n", output + i * (L + 1)); fflush(stdout);
}
} }
/* Generate names and measure time */
MPI_Barrier(MPI_COMM_WORLD);
double namegen_st = get_time();
namegen(N, random_floats, output);
MPI_Barrier(MPI_COMM_WORLD);
double namegen_en = get_time();
if (mpi_rank == 0) {
double elapsed_time = namegen_en - namegen_st;
printf("Done!\n");
/* Print first few result */
int print_cnt = N < print_max ? N : print_max;
printf("First %d results are:", print_cnt);
for (int i = 0; i < print_cnt; i++) {
printf(" %s%c", output + i * (MAX_LEN + 1),
i == (print_cnt - 1) ? '\n' : ',');
}
/* Write the results to file */
printf("Writing to %s ...", output_fname);
fflush(stdout);
FILE *output_fp = (FILE *)fopen(output_fname, "w");
for (int i = 0; i < N; i++) {
fprintf(output_fp, "%s\n", output + i * (MAX_LEN + 1));
}
fclose(output_fp);
printf("Done!\n");
printf("Elapsed time: %.6f seconds\n", elapsed_time);
printf("Throughput: %.3f names/sec\n", (double)N / elapsed_time);
}
/* Finalize program */
namegen_finalize(); namegen_finalize();
MPI_Finalize();
} }

View File

@ -11,24 +11,24 @@ extern int mpi_rank, mpi_size;
// You can modify the data structure as you want // You can modify the data structure as you want
struct Tensor { struct Tensor {
/* Alloc memory */
Tensor(std::vector<int> shape_) { Tensor(std::vector<int> shape_) {
ndim = shape_.size(); ndim = shape_.size();
for (size_t i = 0; i < ndim; i++) { for (size_t i = 0; i < ndim; i++) {
shape[i] = shape_[i]; shape[i] = shape_[i];
} }
/* Alloc memory */
size_t n = num_elem(); size_t n = num_elem();
buf = (float *)malloc(n * sizeof(float)); buf = (float *)malloc(n * sizeof(float));
} }
/* Alloc memory and copy */
Tensor(std::vector<int> shape_, float *buf_) { Tensor(std::vector<int> shape_, float *buf_) {
ndim = shape_.size(); ndim = shape_.size();
for (size_t i = 0; i < ndim; i++) { for (size_t i = 0; i < ndim; i++) {
shape[i] = shape_[i]; shape[i] = shape_[i];
} }
/* Alloc memory and copy */
size_t n = num_elem(); size_t n = num_elem();
buf = (float *)malloc(n * sizeof(float)); buf = (float *)malloc(n * sizeof(float));
memcpy(buf, buf_, n * sizeof(float)); memcpy(buf, buf_, n * sizeof(float));
@ -52,15 +52,9 @@ struct Tensor {
return sz; return sz;
} }
void print() {
for (int i = 0; i < 5; i++) {
printf("%.3e ", buf[i]);
}
printf("\n");
}
// Pointer to data // Pointer to data
float *buf = nullptr; float *buf = nullptr;
// Shape of tensor, from outermost dimension to innermost dimension. // Shape of tensor, from outermost dimension to innermost dimension.
// e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3} // e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3}
size_t ndim = 0; size_t ndim = 0;
@ -76,7 +70,7 @@ Tensor *b_hr0, *b_hz0, *b_hn0, *b_hr1, *b_hz1, *b_hn1;
Tensor *W_fc, *b_fc; Tensor *W_fc, *b_fc;
Tensor *rfloats; Tensor *rfloats;
/* Input, activations, output */ /* input, activations, output */
Tensor *input, *emb_out; Tensor *input, *emb_out;
Tensor *hidden0, *hidden1; Tensor *hidden0, *hidden1;
Tensor *r0, *r1, *z0, *z1, *n0, *n1, *f, *char_prob; Tensor *r0, *r1, *z0, *z1, *n0, *n1, *f, *char_prob;
@ -90,6 +84,14 @@ Tensor *htmp00, *htmp01, *htmp02;
Tensor *htmp10, *htmp11, *htmp12; Tensor *htmp10, *htmp11, *htmp12;
Tensor *ftmp0; Tensor *ftmp0;
/* Operations */
/*
* Embedding
* input: [1] (scalar)
* weight: [NUM_CHAR x EMBEDDING_DIM]
* output: [EMBEDDING_DIM]
*/
void embedding(Tensor *input, Tensor *weight, Tensor *output) { void embedding(Tensor *input, Tensor *weight, Tensor *output) {
size_t n = weight->shape[1]; size_t n = weight->shape[1];
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
@ -98,6 +100,12 @@ void embedding(Tensor *input, Tensor *weight, Tensor *output) {
} }
} }
/*
* Elementwise addition
* input1: [*]
* input2: [*] (same shape as input1)
* output: [*] (same shape as input1)
*/
void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) { void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) {
size_t sn = input1->num_elem(); size_t sn = input1->num_elem();
for (size_t i = 0; i < sn; i++) { for (size_t i = 0; i < sn; i++) {
@ -105,6 +113,11 @@ void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) {
} }
} }
/*
* Elementwise (1-x)
* input: [*]
* output: [*] (same shape as input)
*/
void elemwise_oneminus(Tensor *input, Tensor *output) { void elemwise_oneminus(Tensor *input, Tensor *output) {
size_t n = input->num_elem(); size_t n = input->num_elem();
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
@ -113,6 +126,12 @@ void elemwise_oneminus(Tensor *input, Tensor *output) {
} }
} }
/*
* Elementwise multiplication
* input1: [*]
* input2: [*] (same shape as input1)
* output: [*] (same shape as input1)
*/
void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) { void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) {
size_t sn = input1->num_elem(); size_t sn = input1->num_elem();
for (size_t i = 0; i < sn; i++) { for (size_t i = 0; i < sn; i++) {
@ -120,6 +139,11 @@ void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) {
} }
} }
/*
* Elementwise tanh(x)
* input: [*]
* output: [*] (same shape as input)
*/
void elemwise_tanh(Tensor *input, Tensor *output) { void elemwise_tanh(Tensor *input, Tensor *output) {
size_t n = input->num_elem(); size_t n = input->num_elem();
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
@ -128,6 +152,11 @@ void elemwise_tanh(Tensor *input, Tensor *output) {
} }
} }
/*
* Elementwise Sigmoid 1 / (1 + exp(-x))
* input: [*]
* output: [*] (same shape as input)
*/
void elemwise_sigmoid(Tensor *input, Tensor *output) { void elemwise_sigmoid(Tensor *input, Tensor *output) {
size_t n = input->num_elem(); size_t n = input->num_elem();
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
@ -136,19 +165,52 @@ void elemwise_sigmoid(Tensor *input, Tensor *output) {
} }
} }
int random_select(Tensor *input, Tensor *rng_seq, int rng_offset) { /*
float r = rng_seq->buf[rng_offset]; * SGEMV
size_t n = input->num_elem(); * input1: [N x K]
float psum = 0.0; * input2: [K]
for (size_t i = 0; i < n; i++) { * output: [N]
psum += input->buf[i]; */
if (psum > r) { void matvec(Tensor *input1, Tensor *input2, Tensor *output) {
return i; size_t N_ = input1->shape[0];
size_t K_ = input1->shape[1];
for (size_t i = 0; i < N_; i++) {
float c = 0.0;
for (size_t j = 0; j < K_; j++) {
c += input1->buf[i * K_ + j] * input2->buf[j];
} }
output->buf[i] = c;
} }
return n - 1;
} }
/*
* SGEMM
* input1: [M x K]
* input2: [K x N]
* output: [M x N]
*/
void matmul(Tensor *input1, Tensor *input2, Tensor *output) {
size_t M_ = input1->shape[0];
size_t K_ = input1->shape[1];
size_t N_ = input2->shape[1];
for (size_t i = 0; i < M_; i++) {
for (size_t j = 0; j < N_; j++) {
float c = 0.0;
for (size_t k = 0; k < K_; k++) {
c += input1->buf[i * K_ + k] * input2->buf[k * N_ + j];
}
output->buf[i * N_ + j] = c;
}
}
}
/*
* Softmax
* Normalize the input elements according to its exp value.
* The result can be interpreted as a probability distribution.
* input: [*]
* output: [*], (same shape as input)
*/
void softmax(Tensor *input, Tensor *output) { void softmax(Tensor *input, Tensor *output) {
size_t n = input->num_elem(); size_t n = input->num_elem();
float sum = 0.0; float sum = 0.0;
@ -162,42 +224,37 @@ void softmax(Tensor *input, Tensor *output) {
} }
} }
void matvec(Tensor *input1, Tensor *input2, Tensor *output) { /*
size_t N_ = input1->shape[0]; * Sample a random index according to the given probability distribution
size_t K_ = input1->shape[1]; * This function is called at most N*MAX_LEN times. Each call uses a
for (size_t i = 0; i < N_; i++) { * random float in [0,1] to sample an index from the given distribution.
float c = 0.0; * input: [NUM_CHAR], probability distribution of the characters
for (size_t j = 0; j < K_; j++) { * rng_seq: [N*MAX_LEN],
c += input1->buf[i * K_ + j] * input2->buf[j]; */
} int random_select(Tensor *input, Tensor *rng_seq, int rng_offset) {
output->buf[i] = c; float r = rng_seq->buf[rng_offset];
} size_t n = input->num_elem();
} float psum = 0.0;
for (size_t i = 0; i < n; i++) {
void matmul(Tensor *input1, Tensor *input2, Tensor *output) { psum += input->buf[i];
size_t N_ = input1->shape[0]; if (psum > r) {
size_t K_ = input1->shape[1]; return i;
size_t M_ = input2->shape[1];
for (size_t i = 0; i < N_; i++) {
for (size_t j = 0; j < M_; j++) {
float c = 0.0;
for (size_t k = 0; k < K_; k++) {
c += input1->buf[i * K_ + k] * input2->buf[k * M_ + j];
}
output->buf[i * M_ + j] = c;
} }
} }
return n - 1;
} }
void namegen_init(int N, int L, int rng_seed, char *parameter_fname) { /*
* Initialize the model.
* Do input-independent job here.
*/
void namegen_initialize(int N, int rng_seed, char *parameter_fname) {
/* Only the root process reads the parameter */ /* Only the root process reads the parameter */
if (mpi_rank == 0) { if (mpi_rank == 0) {
size_t parameter_binary_size = 0; size_t parameter_binary_size = 0;
void *parameter_binary = float *parameter =
read_binary(parameter_fname, &parameter_binary_size); (float *)read_binary(parameter_fname, &parameter_binary_size);
assert(parameter_binary_size == PARAMETER_FILE_SIZE);
float *parameter = (float *)parameter_binary;
/* Network parameters */ /* Network parameters */
character_embedding = character_embedding =
@ -234,9 +291,7 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
W_fc = new Tensor({NUM_CHAR, HIDDEN_DIM}, parameter + OFFSET25); W_fc = new Tensor({NUM_CHAR, HIDDEN_DIM}, parameter + OFFSET25);
b_fc = new Tensor({NUM_CHAR}, parameter + OFFSET26); b_fc = new Tensor({NUM_CHAR}, parameter + OFFSET26);
rfloats = new Tensor({N * L}); /* input, activations, output, etc. */
/* Input, activations, output */
input = new Tensor({1}); input = new Tensor({1});
emb_out = new Tensor({EMBEDDING_DIM}); emb_out = new Tensor({EMBEDDING_DIM});
@ -293,6 +348,7 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
htmp11 = new Tensor({HIDDEN_DIM}); htmp11 = new Tensor({HIDDEN_DIM});
htmp12 = new Tensor({HIDDEN_DIM}); htmp12 = new Tensor({HIDDEN_DIM});
rfloats = new Tensor({N * MAX_LEN});
ftmp0 = new Tensor({NUM_CHAR}); ftmp0 = new Tensor({NUM_CHAR});
char_prob = new Tensor({NUM_CHAR}); char_prob = new Tensor({NUM_CHAR});
} else { } else {
@ -300,18 +356,22 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
} }
/* /*
* Generate names.
* Any input-dependent computation/communication must be done here.
* N: # of names to generate * N: # of names to generate
* L: Maximum length of a name * random_floats: N*MAX_LEN sequence of random floats in [0,1].
* output: 2D-array of size N*(L+1), allocaetd at main.cpp * output: 2D-array of size N x (MAX_LEN+1), allocaetd at main.cpp
*/ */
void namegen(int N, int L, float *random_floats, char *output) { void namegen(int N, float *random_floats, char *output) {
/* Only root process does the job, for now... */
if (mpi_rank != 0) if (mpi_rank != 0)
return; return;
memcpy(rfloats->buf, random_floats, N * L * sizeof(float)); memcpy(rfloats->buf, random_floats, N * MAX_LEN * sizeof(float));
memset(output, 0, N * (L + 1) * sizeof(char)); memset(output, 0, N * (MAX_LEN + 1) * sizeof(char));
/* Generate N names */
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
/* Initialize input and hidden vector. */ /* Initialize input and hidden vector. */
/* One hidden vector for each GRU layer */ /* One hidden vector for each GRU layer */
@ -319,7 +379,7 @@ void namegen(int N, int L, float *random_floats, char *output) {
hidden0->set_zero(); hidden0->set_zero();
hidden1->set_zero(); hidden1->set_zero();
for (int l = 0; l < L; l++) { for (int l = 0; l < MAX_LEN; l++) {
/* Embedding */ /* Embedding */
embedding(input, character_embedding, emb_out); embedding(input, character_embedding, emb_out);
@ -393,9 +453,9 @@ void namegen(int N, int L, float *random_floats, char *output) {
softmax(f, char_prob); softmax(f, char_prob);
/* Random select */ /* Random select */
int selected_char = random_select(char_prob, rfloats, n * L + l); int selected_char = random_select(char_prob, rfloats, n * MAX_LEN + l);
output[n * (L + 1) + l] = selected_char; output[n * (MAX_LEN + 1) + l] = selected_char;
input->buf[0] = selected_char; input->buf[0] = selected_char;
if (selected_char == EOS) if (selected_char == EOS)
@ -404,6 +464,11 @@ void namegen(int N, int L, float *random_floats, char *output) {
} }
} }
/*
* Finalize the model.
* Although it is not neccessary, we recommend to deallocate and destruct
* everything you made in namegen_initalize() and namegen().
*/
void namegen_finalize() { void namegen_finalize() {
if (mpi_rank == 0) { if (mpi_rank == 0) {
delete character_embedding; delete character_embedding;

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#define MAX_LEN 10
// Model parameters // Model parameters
#define PARAMETER_FILE_SIZE 45663232 #define PARAMETER_FILE_SIZE 45663232
#define NUM_CHAR 256 #define NUM_CHAR 256
@ -40,6 +42,6 @@
#define SOS 1 #define SOS 1
#define PAD 2 #define PAD 2
void namegen_init(int N, int L, int rng_seed, char *network_fname); void namegen_initialize(int N, int rng_seed, char *network_fname);
void namegen(int N, int L, float *random_floats, char *output); void namegen(int N, float *random_floats, char *output);
void namegen_finalize(); void namegen_finalize();

View File

@ -0,0 +1,30 @@
Karlen
Elisah
Devonda
Stephen
Christiano
Mikelle
Madaline
Benuel
Crespin
Kolette
Librada
Yaminah
Dezmariah
Daria
Kelso
Shavar
Muriel
Lanna
Italo
Ritchaen
Raeanna
Geneal
Duace
Chiffon
Jazmin
Kennith
Leonid
Synthious
Jocquita
Ramira

View File

@ -0,0 +1,7 @@
#!/bin/bash
: ${NODES:=1}
srun -N $NODES --partition shpc22 --exclusive \
mpirun --bind-to none -mca btl ^openib -npernode 1 \
numactl --physcpubind 0-63 \
./main $@

View File

@ -31,3 +31,9 @@ void WriteFile(const char *filename, size_t size, void *buf) {
fclose(f); fclose(f);
CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename); CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename);
} }
double get_time() {
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return tv.tv_sec + tv.tv_nsec * 1e-9;
}

View File

@ -30,5 +30,5 @@
} \ } \
} while (false) } while (false)
double gettime(); double get_time();
void *read_binary(const char *filename, size_t *size); void *read_binary(const char *filename, size_t *size);