Update project skeleton.
This commit is contained in:
parent
c6d9b62306
commit
a34a6b8b04
|
@ -5,10 +5,10 @@ CFLAGS=-std=c++14 -O3 -Wall -march=native -mavx2 -mfma -mno-avx512f -fopenmp -I/
|
||||||
CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option))
|
CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option))
|
||||||
|
|
||||||
LDFLAGS=-pthread -L/usr/local/cuda/lib64
|
LDFLAGS=-pthread -L/usr/local/cuda/lib64
|
||||||
LDLIBS=-lmpi_cxx -lmpi -lstdc++ -lcuda -lcudart -lm
|
LDLIBS=-lmpi_cxx -lmpi -lstdc++ -lcudart -lm
|
||||||
|
|
||||||
CXX=g++
|
CXX=g++
|
||||||
CUX=nvcc
|
CUX=/usr/local/cuda/bin/nvcc
|
||||||
|
|
||||||
all: $(TARGET)
|
all: $(TARGET)
|
||||||
|
|
||||||
|
|
|
@ -11,25 +11,25 @@ int mpi_rank = 0, mpi_size = 1;
|
||||||
/* Global arguments */
|
/* Global arguments */
|
||||||
int rng_seed = 4155;
|
int rng_seed = 4155;
|
||||||
int N = 1;
|
int N = 1;
|
||||||
int L = 10;
|
|
||||||
|
|
||||||
static char *parameter_fname;
|
static char *parameter_fname;
|
||||||
static char *output_fname;
|
static char *output_fname;
|
||||||
|
|
||||||
|
const int print_max = 8;
|
||||||
|
|
||||||
void print_usage_exit(int argc, char **argv) {
|
void print_usage_exit(int argc, char **argv) {
|
||||||
if (mpi_rank == 0) {
|
if (mpi_rank == 0) {
|
||||||
printf("Usage %s [parameter bin] [output] [N] [L] [seed] \n", argv[0]);
|
printf("Usage %s [parameter bin] [output] [N] [seed] \n", argv[0]);
|
||||||
printf(" parameter bin: File conatining DNN parameters\n");
|
printf(" parameter bin: File conatining DNN parameters\n");
|
||||||
printf(" output: File to write namegen results\n");
|
printf(" output: File to write namegen results\n");
|
||||||
printf(" N: Number of names to generate\n");
|
printf(" N: Number of names to generate\n");
|
||||||
printf(" L: Maximum length of a name\n");
|
|
||||||
printf(" seed: An integer RNG seed\n");
|
printf(" seed: An integer RNG seed\n");
|
||||||
}
|
}
|
||||||
EXIT(0);
|
EXIT(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void check_and_parse_args(int argc, char **argv) {
|
void check_and_parse_args(int argc, char **argv) {
|
||||||
if (argc != 6)
|
if (argc != 5)
|
||||||
print_usage_exit(argc, argv);
|
print_usage_exit(argc, argv);
|
||||||
|
|
||||||
int c;
|
int c;
|
||||||
|
@ -45,17 +45,7 @@ void check_and_parse_args(int argc, char **argv) {
|
||||||
parameter_fname = argv[1];
|
parameter_fname = argv[1];
|
||||||
output_fname = argv[2];
|
output_fname = argv[2];
|
||||||
N = atoi(argv[3]);
|
N = atoi(argv[3]);
|
||||||
L = atoi(argv[4]);
|
rng_seed = atoi(argv[4]);
|
||||||
rng_seed = atoi(argv[5]);
|
|
||||||
|
|
||||||
if (mpi_rank == 0) {
|
|
||||||
printf("Options\n");
|
|
||||||
printf(" parameter bin: %s\n", parameter_fname);
|
|
||||||
printf(" output: %s\n", output_fname);
|
|
||||||
printf(" N: %d\n", N);
|
|
||||||
printf(" L: %d\n", L);
|
|
||||||
printf(" seed: %d\n", rng_seed);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
@ -69,29 +59,62 @@ int main(int argc, char **argv) {
|
||||||
check_and_parse_args(argc, argv);
|
check_and_parse_args(argc, argv);
|
||||||
|
|
||||||
/* Initialize model */
|
/* Initialize model */
|
||||||
namegen_init(N, L, rng_seed, parameter_fname);
|
namegen_initialize(N, rng_seed, parameter_fname);
|
||||||
|
|
||||||
float *random_floats = nullptr;
|
float *random_floats = nullptr;
|
||||||
char *output = nullptr;
|
char *output = nullptr;
|
||||||
|
|
||||||
|
/* Initialize input and output */
|
||||||
if (mpi_rank == 0) {
|
if (mpi_rank == 0) {
|
||||||
random_floats = (float *)malloc(N * L * sizeof(float));
|
random_floats = (float *)malloc(N * MAX_LEN * sizeof(float));
|
||||||
output = (char *)malloc(N * (L + 1) * sizeof(char));
|
output = (char *)malloc(N * (MAX_LEN + 1) * sizeof(char));
|
||||||
srand(rng_seed);
|
srand(rng_seed);
|
||||||
for (int i = 0; i < N * L; i++) {
|
for (int i = 0; i < N * MAX_LEN; i++) {
|
||||||
random_floats[i] = ((float)rand()) / ((float)RAND_MAX);
|
random_floats[i] = ((float)rand()) / ((float)RAND_MAX);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Run model inference */
|
|
||||||
namegen(N, L, random_floats, output);
|
|
||||||
|
|
||||||
/* Print output */
|
|
||||||
if (mpi_rank == 0) {
|
if (mpi_rank == 0) {
|
||||||
for (int i = 0; i < N; i++) {
|
printf("Generating %d names...", N);
|
||||||
printf("%s\n", output + i * (L + 1));
|
fflush(stdout);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Generate names and measure time */
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
double namegen_st = get_time();
|
||||||
|
|
||||||
|
namegen(N, random_floats, output);
|
||||||
|
|
||||||
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
|
double namegen_en = get_time();
|
||||||
|
|
||||||
|
if (mpi_rank == 0) {
|
||||||
|
double elapsed_time = namegen_en - namegen_st;
|
||||||
|
printf("Done!\n");
|
||||||
|
|
||||||
|
/* Print first few result */
|
||||||
|
int print_cnt = N < print_max ? N : print_max;
|
||||||
|
printf("First %d results are:", print_cnt);
|
||||||
|
for (int i = 0; i < print_cnt; i++) {
|
||||||
|
printf(" %s%c", output + i * (MAX_LEN + 1),
|
||||||
|
i == (print_cnt - 1) ? '\n' : ',');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write the results to file */
|
||||||
|
printf("Writing to %s ...", output_fname);
|
||||||
|
fflush(stdout);
|
||||||
|
FILE *output_fp = (FILE *)fopen(output_fname, "w");
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
fprintf(output_fp, "%s\n", output + i * (MAX_LEN + 1));
|
||||||
|
}
|
||||||
|
fclose(output_fp);
|
||||||
|
printf("Done!\n");
|
||||||
|
|
||||||
|
printf("Elapsed time: %.6f seconds\n", elapsed_time);
|
||||||
|
printf("Throughput: %.3f names/sec\n", (double)N / elapsed_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finalize program */
|
||||||
namegen_finalize();
|
namegen_finalize();
|
||||||
|
MPI_Finalize();
|
||||||
}
|
}
|
|
@ -11,24 +11,24 @@ extern int mpi_rank, mpi_size;
|
||||||
// You can modify the data structure as you want
|
// You can modify the data structure as you want
|
||||||
struct Tensor {
|
struct Tensor {
|
||||||
|
|
||||||
|
/* Alloc memory */
|
||||||
Tensor(std::vector<int> shape_) {
|
Tensor(std::vector<int> shape_) {
|
||||||
ndim = shape_.size();
|
ndim = shape_.size();
|
||||||
for (size_t i = 0; i < ndim; i++) {
|
for (size_t i = 0; i < ndim; i++) {
|
||||||
shape[i] = shape_[i];
|
shape[i] = shape_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Alloc memory */
|
|
||||||
size_t n = num_elem();
|
size_t n = num_elem();
|
||||||
buf = (float *)malloc(n * sizeof(float));
|
buf = (float *)malloc(n * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Alloc memory and copy */
|
||||||
Tensor(std::vector<int> shape_, float *buf_) {
|
Tensor(std::vector<int> shape_, float *buf_) {
|
||||||
ndim = shape_.size();
|
ndim = shape_.size();
|
||||||
for (size_t i = 0; i < ndim; i++) {
|
for (size_t i = 0; i < ndim; i++) {
|
||||||
shape[i] = shape_[i];
|
shape[i] = shape_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Alloc memory and copy */
|
|
||||||
size_t n = num_elem();
|
size_t n = num_elem();
|
||||||
buf = (float *)malloc(n * sizeof(float));
|
buf = (float *)malloc(n * sizeof(float));
|
||||||
memcpy(buf, buf_, n * sizeof(float));
|
memcpy(buf, buf_, n * sizeof(float));
|
||||||
|
@ -52,15 +52,9 @@ struct Tensor {
|
||||||
return sz;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print() {
|
|
||||||
for (int i = 0; i < 5; i++) {
|
|
||||||
printf("%.3e ", buf[i]);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pointer to data
|
// Pointer to data
|
||||||
float *buf = nullptr;
|
float *buf = nullptr;
|
||||||
|
|
||||||
// Shape of tensor, from outermost dimension to innermost dimension.
|
// Shape of tensor, from outermost dimension to innermost dimension.
|
||||||
// e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3}
|
// e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3}
|
||||||
size_t ndim = 0;
|
size_t ndim = 0;
|
||||||
|
@ -76,7 +70,7 @@ Tensor *b_hr0, *b_hz0, *b_hn0, *b_hr1, *b_hz1, *b_hn1;
|
||||||
Tensor *W_fc, *b_fc;
|
Tensor *W_fc, *b_fc;
|
||||||
Tensor *rfloats;
|
Tensor *rfloats;
|
||||||
|
|
||||||
/* Input, activations, output */
|
/* input, activations, output */
|
||||||
Tensor *input, *emb_out;
|
Tensor *input, *emb_out;
|
||||||
Tensor *hidden0, *hidden1;
|
Tensor *hidden0, *hidden1;
|
||||||
Tensor *r0, *r1, *z0, *z1, *n0, *n1, *f, *char_prob;
|
Tensor *r0, *r1, *z0, *z1, *n0, *n1, *f, *char_prob;
|
||||||
|
@ -90,6 +84,14 @@ Tensor *htmp00, *htmp01, *htmp02;
|
||||||
Tensor *htmp10, *htmp11, *htmp12;
|
Tensor *htmp10, *htmp11, *htmp12;
|
||||||
Tensor *ftmp0;
|
Tensor *ftmp0;
|
||||||
|
|
||||||
|
/* Operations */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Embedding
|
||||||
|
* input: [1] (scalar)
|
||||||
|
* weight: [NUM_CHAR x EMBEDDING_DIM]
|
||||||
|
* output: [EMBEDDING_DIM]
|
||||||
|
*/
|
||||||
void embedding(Tensor *input, Tensor *weight, Tensor *output) {
|
void embedding(Tensor *input, Tensor *weight, Tensor *output) {
|
||||||
size_t n = weight->shape[1];
|
size_t n = weight->shape[1];
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -98,6 +100,12 @@ void embedding(Tensor *input, Tensor *weight, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Elementwise addition
|
||||||
|
* input1: [*]
|
||||||
|
* input2: [*] (same shape as input1)
|
||||||
|
* output: [*] (same shape as input1)
|
||||||
|
*/
|
||||||
void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) {
|
void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
size_t sn = input1->num_elem();
|
size_t sn = input1->num_elem();
|
||||||
for (size_t i = 0; i < sn; i++) {
|
for (size_t i = 0; i < sn; i++) {
|
||||||
|
@ -105,6 +113,11 @@ void elemwise_add(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Elementwise (1-x)
|
||||||
|
* input: [*]
|
||||||
|
* output: [*] (same shape as input)
|
||||||
|
*/
|
||||||
void elemwise_oneminus(Tensor *input, Tensor *output) {
|
void elemwise_oneminus(Tensor *input, Tensor *output) {
|
||||||
size_t n = input->num_elem();
|
size_t n = input->num_elem();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -113,6 +126,12 @@ void elemwise_oneminus(Tensor *input, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Elementwise multiplication
|
||||||
|
* input1: [*]
|
||||||
|
* input2: [*] (same shape as input1)
|
||||||
|
* output: [*] (same shape as input1)
|
||||||
|
*/
|
||||||
void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) {
|
void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
size_t sn = input1->num_elem();
|
size_t sn = input1->num_elem();
|
||||||
for (size_t i = 0; i < sn; i++) {
|
for (size_t i = 0; i < sn; i++) {
|
||||||
|
@ -120,6 +139,11 @@ void elemwise_mul(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Elementwise tanh(x)
|
||||||
|
* input: [*]
|
||||||
|
* output: [*] (same shape as input)
|
||||||
|
*/
|
||||||
void elemwise_tanh(Tensor *input, Tensor *output) {
|
void elemwise_tanh(Tensor *input, Tensor *output) {
|
||||||
size_t n = input->num_elem();
|
size_t n = input->num_elem();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -128,6 +152,11 @@ void elemwise_tanh(Tensor *input, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Elementwise Sigmoid 1 / (1 + exp(-x))
|
||||||
|
* input: [*]
|
||||||
|
* output: [*] (same shape as input)
|
||||||
|
*/
|
||||||
void elemwise_sigmoid(Tensor *input, Tensor *output) {
|
void elemwise_sigmoid(Tensor *input, Tensor *output) {
|
||||||
size_t n = input->num_elem();
|
size_t n = input->num_elem();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -136,19 +165,52 @@ void elemwise_sigmoid(Tensor *input, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int random_select(Tensor *input, Tensor *rng_seq, int rng_offset) {
|
/*
|
||||||
float r = rng_seq->buf[rng_offset];
|
* SGEMV
|
||||||
size_t n = input->num_elem();
|
* input1: [N x K]
|
||||||
float psum = 0.0;
|
* input2: [K]
|
||||||
for (size_t i = 0; i < n; i++) {
|
* output: [N]
|
||||||
psum += input->buf[i];
|
*/
|
||||||
if (psum > r) {
|
void matvec(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
return i;
|
size_t N_ = input1->shape[0];
|
||||||
|
size_t K_ = input1->shape[1];
|
||||||
|
for (size_t i = 0; i < N_; i++) {
|
||||||
|
float c = 0.0;
|
||||||
|
for (size_t j = 0; j < K_; j++) {
|
||||||
|
c += input1->buf[i * K_ + j] * input2->buf[j];
|
||||||
}
|
}
|
||||||
|
output->buf[i] = c;
|
||||||
}
|
}
|
||||||
return n - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SGEMM
|
||||||
|
* input1: [M x K]
|
||||||
|
* input2: [K x N]
|
||||||
|
* output: [M x N]
|
||||||
|
*/
|
||||||
|
void matmul(Tensor *input1, Tensor *input2, Tensor *output) {
|
||||||
|
size_t M_ = input1->shape[0];
|
||||||
|
size_t K_ = input1->shape[1];
|
||||||
|
size_t N_ = input2->shape[1];
|
||||||
|
for (size_t i = 0; i < M_; i++) {
|
||||||
|
for (size_t j = 0; j < N_; j++) {
|
||||||
|
float c = 0.0;
|
||||||
|
for (size_t k = 0; k < K_; k++) {
|
||||||
|
c += input1->buf[i * K_ + k] * input2->buf[k * N_ + j];
|
||||||
|
}
|
||||||
|
output->buf[i * N_ + j] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Softmax
|
||||||
|
* Normalize the input elements according to its exp value.
|
||||||
|
* The result can be interpreted as a probability distribution.
|
||||||
|
* input: [*]
|
||||||
|
* output: [*], (same shape as input)
|
||||||
|
*/
|
||||||
void softmax(Tensor *input, Tensor *output) {
|
void softmax(Tensor *input, Tensor *output) {
|
||||||
size_t n = input->num_elem();
|
size_t n = input->num_elem();
|
||||||
float sum = 0.0;
|
float sum = 0.0;
|
||||||
|
@ -162,42 +224,37 @@ void softmax(Tensor *input, Tensor *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matvec(Tensor *input1, Tensor *input2, Tensor *output) {
|
/*
|
||||||
size_t N_ = input1->shape[0];
|
* Sample a random index according to the given probability distribution
|
||||||
size_t K_ = input1->shape[1];
|
* This function is called at most N*MAX_LEN times. Each call uses a
|
||||||
for (size_t i = 0; i < N_; i++) {
|
* random float in [0,1] to sample an index from the given distribution.
|
||||||
float c = 0.0;
|
* input: [NUM_CHAR], probability distribution of the characters
|
||||||
for (size_t j = 0; j < K_; j++) {
|
* rng_seq: [N*MAX_LEN],
|
||||||
c += input1->buf[i * K_ + j] * input2->buf[j];
|
*/
|
||||||
}
|
int random_select(Tensor *input, Tensor *rng_seq, int rng_offset) {
|
||||||
output->buf[i] = c;
|
float r = rng_seq->buf[rng_offset];
|
||||||
}
|
size_t n = input->num_elem();
|
||||||
}
|
float psum = 0.0;
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
void matmul(Tensor *input1, Tensor *input2, Tensor *output) {
|
psum += input->buf[i];
|
||||||
size_t N_ = input1->shape[0];
|
if (psum > r) {
|
||||||
size_t K_ = input1->shape[1];
|
return i;
|
||||||
size_t M_ = input2->shape[1];
|
|
||||||
for (size_t i = 0; i < N_; i++) {
|
|
||||||
for (size_t j = 0; j < M_; j++) {
|
|
||||||
float c = 0.0;
|
|
||||||
for (size_t k = 0; k < K_; k++) {
|
|
||||||
c += input1->buf[i * K_ + k] * input2->buf[k * M_ + j];
|
|
||||||
}
|
|
||||||
output->buf[i * M_ + j] = c;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return n - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
|
/*
|
||||||
|
* Initialize the model.
|
||||||
|
* Do input-independent job here.
|
||||||
|
*/
|
||||||
|
void namegen_initialize(int N, int rng_seed, char *parameter_fname) {
|
||||||
|
|
||||||
/* Only the root process reads the parameter */
|
/* Only the root process reads the parameter */
|
||||||
if (mpi_rank == 0) {
|
if (mpi_rank == 0) {
|
||||||
size_t parameter_binary_size = 0;
|
size_t parameter_binary_size = 0;
|
||||||
void *parameter_binary =
|
float *parameter =
|
||||||
read_binary(parameter_fname, ¶meter_binary_size);
|
(float *)read_binary(parameter_fname, ¶meter_binary_size);
|
||||||
assert(parameter_binary_size == PARAMETER_FILE_SIZE);
|
|
||||||
float *parameter = (float *)parameter_binary;
|
|
||||||
|
|
||||||
/* Network parameters */
|
/* Network parameters */
|
||||||
character_embedding =
|
character_embedding =
|
||||||
|
@ -234,9 +291,7 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
|
||||||
W_fc = new Tensor({NUM_CHAR, HIDDEN_DIM}, parameter + OFFSET25);
|
W_fc = new Tensor({NUM_CHAR, HIDDEN_DIM}, parameter + OFFSET25);
|
||||||
b_fc = new Tensor({NUM_CHAR}, parameter + OFFSET26);
|
b_fc = new Tensor({NUM_CHAR}, parameter + OFFSET26);
|
||||||
|
|
||||||
rfloats = new Tensor({N * L});
|
/* input, activations, output, etc. */
|
||||||
|
|
||||||
/* Input, activations, output */
|
|
||||||
input = new Tensor({1});
|
input = new Tensor({1});
|
||||||
emb_out = new Tensor({EMBEDDING_DIM});
|
emb_out = new Tensor({EMBEDDING_DIM});
|
||||||
|
|
||||||
|
@ -293,6 +348,7 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
|
||||||
htmp11 = new Tensor({HIDDEN_DIM});
|
htmp11 = new Tensor({HIDDEN_DIM});
|
||||||
htmp12 = new Tensor({HIDDEN_DIM});
|
htmp12 = new Tensor({HIDDEN_DIM});
|
||||||
|
|
||||||
|
rfloats = new Tensor({N * MAX_LEN});
|
||||||
ftmp0 = new Tensor({NUM_CHAR});
|
ftmp0 = new Tensor({NUM_CHAR});
|
||||||
char_prob = new Tensor({NUM_CHAR});
|
char_prob = new Tensor({NUM_CHAR});
|
||||||
} else {
|
} else {
|
||||||
|
@ -300,18 +356,22 @@ void namegen_init(int N, int L, int rng_seed, char *parameter_fname) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* Generate names.
|
||||||
|
* Any input-dependent computation/communication must be done here.
|
||||||
* N: # of names to generate
|
* N: # of names to generate
|
||||||
* L: Maximum length of a name
|
* random_floats: N*MAX_LEN sequence of random floats in [0,1].
|
||||||
* output: 2D-array of size N*(L+1), allocaetd at main.cpp
|
* output: 2D-array of size N x (MAX_LEN+1), allocaetd at main.cpp
|
||||||
*/
|
*/
|
||||||
void namegen(int N, int L, float *random_floats, char *output) {
|
void namegen(int N, float *random_floats, char *output) {
|
||||||
|
|
||||||
|
/* Only root process does the job, for now... */
|
||||||
if (mpi_rank != 0)
|
if (mpi_rank != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
memcpy(rfloats->buf, random_floats, N * L * sizeof(float));
|
memcpy(rfloats->buf, random_floats, N * MAX_LEN * sizeof(float));
|
||||||
memset(output, 0, N * (L + 1) * sizeof(char));
|
memset(output, 0, N * (MAX_LEN + 1) * sizeof(char));
|
||||||
|
|
||||||
|
/* Generate N names */
|
||||||
for (int n = 0; n < N; n++) {
|
for (int n = 0; n < N; n++) {
|
||||||
/* Initialize input and hidden vector. */
|
/* Initialize input and hidden vector. */
|
||||||
/* One hidden vector for each GRU layer */
|
/* One hidden vector for each GRU layer */
|
||||||
|
@ -319,7 +379,7 @@ void namegen(int N, int L, float *random_floats, char *output) {
|
||||||
hidden0->set_zero();
|
hidden0->set_zero();
|
||||||
hidden1->set_zero();
|
hidden1->set_zero();
|
||||||
|
|
||||||
for (int l = 0; l < L; l++) {
|
for (int l = 0; l < MAX_LEN; l++) {
|
||||||
/* Embedding */
|
/* Embedding */
|
||||||
embedding(input, character_embedding, emb_out);
|
embedding(input, character_embedding, emb_out);
|
||||||
|
|
||||||
|
@ -393,9 +453,9 @@ void namegen(int N, int L, float *random_floats, char *output) {
|
||||||
softmax(f, char_prob);
|
softmax(f, char_prob);
|
||||||
|
|
||||||
/* Random select */
|
/* Random select */
|
||||||
int selected_char = random_select(char_prob, rfloats, n * L + l);
|
int selected_char = random_select(char_prob, rfloats, n * MAX_LEN + l);
|
||||||
|
|
||||||
output[n * (L + 1) + l] = selected_char;
|
output[n * (MAX_LEN + 1) + l] = selected_char;
|
||||||
input->buf[0] = selected_char;
|
input->buf[0] = selected_char;
|
||||||
|
|
||||||
if (selected_char == EOS)
|
if (selected_char == EOS)
|
||||||
|
@ -404,6 +464,11 @@ void namegen(int N, int L, float *random_floats, char *output) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finalize the model.
|
||||||
|
* Although it is not neccessary, we recommend to deallocate and destruct
|
||||||
|
* everything you made in namegen_initalize() and namegen().
|
||||||
|
*/
|
||||||
void namegen_finalize() {
|
void namegen_finalize() {
|
||||||
if (mpi_rank == 0) {
|
if (mpi_rank == 0) {
|
||||||
delete character_embedding;
|
delete character_embedding;
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#define MAX_LEN 10
|
||||||
|
|
||||||
// Model parameters
|
// Model parameters
|
||||||
#define PARAMETER_FILE_SIZE 45663232
|
#define PARAMETER_FILE_SIZE 45663232
|
||||||
#define NUM_CHAR 256
|
#define NUM_CHAR 256
|
||||||
|
@ -40,6 +42,6 @@
|
||||||
#define SOS 1
|
#define SOS 1
|
||||||
#define PAD 2
|
#define PAD 2
|
||||||
|
|
||||||
void namegen_init(int N, int L, int rng_seed, char *network_fname);
|
void namegen_initialize(int N, int rng_seed, char *network_fname);
|
||||||
void namegen(int N, int L, float *random_floats, char *output);
|
void namegen(int N, float *random_floats, char *output);
|
||||||
void namegen_finalize();
|
void namegen_finalize();
|
|
@ -0,0 +1,30 @@
|
||||||
|
Karlen
|
||||||
|
Elisah
|
||||||
|
Devonda
|
||||||
|
Stephen
|
||||||
|
Christiano
|
||||||
|
Mikelle
|
||||||
|
Madaline
|
||||||
|
Benuel
|
||||||
|
Crespin
|
||||||
|
Kolette
|
||||||
|
Librada
|
||||||
|
Yaminah
|
||||||
|
Dezmariah
|
||||||
|
Daria
|
||||||
|
Kelso
|
||||||
|
Shavar
|
||||||
|
Muriel
|
||||||
|
Lanna
|
||||||
|
Italo
|
||||||
|
Ritchaen
|
||||||
|
Raeanna
|
||||||
|
Geneal
|
||||||
|
Duace
|
||||||
|
Chiffon
|
||||||
|
Jazmin
|
||||||
|
Kennith
|
||||||
|
Leonid
|
||||||
|
Synthious
|
||||||
|
Jocquita
|
||||||
|
Ramira
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
: ${NODES:=1}
|
||||||
|
srun -N $NODES --partition shpc22 --exclusive \
|
||||||
|
mpirun --bind-to none -mca btl ^openib -npernode 1 \
|
||||||
|
numactl --physcpubind 0-63 \
|
||||||
|
./main $@
|
|
@ -31,3 +31,9 @@ void WriteFile(const char *filename, size_t size, void *buf) {
|
||||||
fclose(f);
|
fclose(f);
|
||||||
CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename);
|
CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double get_time() {
|
||||||
|
struct timespec tv;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tv);
|
||||||
|
return tv.tv_sec + tv.tv_nsec * 1e-9;
|
||||||
|
}
|
||||||
|
|
|
@ -30,5 +30,5 @@
|
||||||
} \
|
} \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
double gettime();
|
double get_time();
|
||||||
void *read_binary(const char *filename, size_t *size);
|
void *read_binary(const char *filename, size_t *size);
|
Loading…
Reference in New Issue