implement skeleton code

This commit is contained in:
sota-junsik 2023-02-01 13:30:00 +00:00
parent 1124f4dbcf
commit c9d6ea507a
13 changed files with 484 additions and 0 deletions

26
APWS23/project/Makefile Normal file
View File

@ -0,0 +1,26 @@
TARGET=main
OBJECTS=main.o util.o styler.o tensor.o
CFLAGS=-std=c++14 -O3 -Wall -march=native -mavx2 -mfma -mno-avx512f -fopenmp -I/usr/local/cuda/include
CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option))
LDFLAGS=-pthread -L/usr/local/cuda/lib64
LDLIBS= -lstdc++ -lcudart -lm
CXX=g++
CUX=/usr/local/cuda/bin/nvcc
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(CFLAGS) -o $(TARGET) $(OBJECTS) $(LDFLAGS) $(LDLIBS)
%.o: %.cpp
$(CXX) $(CFLAGS) -c -o $@ $^
%.o: %.cu
$(CUX) $(CUDA_CFLAGS) -c -o $@ $^
clean:
rm -rf $(TARGET) $(OBJECTS)

2
APWS23/project/README.md Normal file
View File

@ -0,0 +1,2 @@
# 2023-winter-school-project
2023년 2월 겨울학교 프로젝트

BIN
APWS23/project/main Executable file

Binary file not shown.

55
APWS23/project/main.cpp Normal file
View File

@ -0,0 +1,55 @@
#include <cstdio>
#include <cstdlib>
#include <unistd.h>
#include "styler.h"
#include "util.h"
// Global variables
int N = 1;
int random_seed = 1;
int print_max = 8;
int MAX_LEN = 10;
char *parameter_fname;
char *output_fname;
int main(int argc, char **argv) {
check_and_parse_args(argc, argv);
// Initialize model
styler_initialize(N, random_seed, parameter_fname);
float *random_floats = nullptr;
char *output = nullptr;
// Initialize input and output
random_floats = (float *)malloc(N * MAX_LEN * sizeof(float));
output = (char *)malloc(N * (MAX_LEN + 1) * sizeof(char));
srand(random_seed);
for (int i = 0; i < N * MAX_LEN; i++) {
random_floats[i] = ((float)rand()) / ((float)RAND_MAX);
}
printf("Styling %d images...", N);
fflush(stdout);
// Styling images and measure time
double styler_st = get_time();
styler(N, random_floats, output);
double styler_en = get_time();
double elapsed_time = styler_en - styler_st;
printf("Done!\n");
// Print first few result
print_first_few_result(output, print_max, elapsed_time);
// Finalize program
styler_finalize();
}

BIN
APWS23/project/model.bin Normal file

Binary file not shown.

View File

@ -0,0 +1 @@

5
APWS23/project/run.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
salloc -N 1 --partition ??? --exclusive --gres=gpu:1 \
numactl --physcpubind 0-63 \
./main $@

183
APWS23/project/styler.cu Normal file
View File

@ -0,0 +1,183 @@
#include "styler.h"
#include "util.h"
#include "tensor.h"
#include <stdlib.h>
#include <cstdint>
#include <cstdlib>
#include <cstdio>
#include <algorithm>
// Parameters, Activations
Tensor *input;
Tensor *weight;
Tensor *bias;
Tensor *activation;
// Operations
void conv(Tensor *input, Tensor *kernel, Tensor *bias, int stride, int padding, int dilation, Tensor *output){
int ic_ = input->shape[0];
int ih_ = input->shape[1];
int iw_ = input->shape[2];
int kn_ = kernel->shape[0];
int kh_ = kernel->shape[2];
int kw_ = kernel->shape[3];
int oc_ = kn_;
int oh_ = (ih_ + 2 * padding - dilation * (kh_ - 1) - 1 ) / stride + 1;
int ow_ = (iw_ + 2 * padding - dilation * (kw_ - 1) - 1 ) / stride + 1;
for (int oc=0; oc<oc_; ++oc){
for (int oh=0; oh<oh_; ++oh){
for (int ow=0; ow<ow_; ++ow){
float sum=0.0;
for (int ic=0; ic<ic_; ++ic){
for (int kh=0; kh<kh_; ++kh){
for (int kw=0; kw<kw_; ++kw){
int ih = oh * stride + (kh-1);
int iw = ow * stride + (kw-1);
if ((ih < 0) || (iw < 0)) continue;
sum += kernel->buf[kh * kw_ + kw] * input->buf[ic * ih_ * iw_ + ih * iw_ + iw];
}
}
}
output->buf[oc * oh_ * ow_ + oh * ow_ + ow] = sum + bias->buf[oc * oh_ * ow_ + oh * ow_ + ow];
}
}
}
}
void maxpool(Tensor *input, Tensor *output){
const int pool_size = 2;
int ic_ = input->shape[0];
int ih_ = input->shape[1];
int iw_ = input->shape[2];
int th_ = ih_ / pool_size;
int tw_ = iw_ / pool_size;
for (int c=0; c<ic_; ++c){
for (int th=0; th<th_; ++th){
for (int tw=0; tw<tw_; ++tw){
int start_w_idx = tw * pool_size;
int start_h_idx = th * pool_size;
float val[4] = {0.0};
val[0*2 + 0] = input->buf[c * ih_ * iw_ + start_h_idx * iw_ + start_w_idx];
val[0*2 + 1] = input->buf[c * ih_ * iw_ + start_h_idx * iw_ + start_w_idx + 1];
val[1*2 + 0] = input->buf[c * ih_ * iw_ + (start_h_idx + 1) * iw_ + start_w_idx];
val[1*2 + 1] = input->buf[c * ih_ * iw_ + (start_h_idx + 1) * iw_ + start_w_idx + 1];
output->buf[c * th_ * tw_ + th * tw_ + tw] = (float)(*std::max_element(val, val+4));
}
}
}
}
void fc(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output){
int i_ = input->shape[0];
int o_ = output->shape[0];
for (int o=0; o<o_; ++o){
float sum = 0.0;
for (int i=0; i<i_; ++i){
sum += input->buf[i] * weight->buf[i * o_ + o];
}
sum += bias->buf[o];
output->buf[o] = sum;
}
}
void softmax(Tensor *input, Tensor *output){
int i_ = input->shape[0];
int o_ = output->shape[0];
float sum = 0.0;
for (int i=0; i<i_; ++i){
sum += exp(input->buf[i]);
}
for (int o=0; o<o_; ++o){
output->buf[o] = exp(input->buf[o] / sum);
}
}
void batchNorm(Tensor *input, float gamma, float beta, float eps, Tensor *output){
int in_ = input->shape[0];
int ic_ = input->shape[1];
int ih_ = input->shape[2];
int iw_ = input->shape[3];
for (int ic=0; ic<ic_; ++ic){
// mini-batch mean
float mean=0.0;
for (int in=0; in<in_; ++in){
for (int ih=0; ih<ih_; ++ih){
for (int iw=0; iw<iw_; ++iw){
mean += input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw];
}
}
}
mean /= in_ * ih_ * iw_;
// mini-batch variance
float var=0.0;
for (int in=0; in<in_; ++in){
for (int ih=0; ih<ih_; ++ih){
for (int iw=0; iw<iw_; ++iw){
var += pow(input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw] - mean, 2);
}
}
}
var /= in_ * ih_ * iw_;
// normalize, scale and shift
for (int in=0; in<in_; ++in){
for (int ih=0; ih<ih_; ++ih){
for (int iw=0; iw<iw_; ++iw){
float xi = input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw];
float xhat = (xi - mean) / sqrt(var + eps);
output->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw] = gamma * xhat + beta;
}
}
}
}
}
// Initialize the model. Do input-independent job here.
void styler_initialize(int N, int random_seed, char *parameter_fname) {
size_t parameter_binary_size = 0;
float *parameter = (float *)read_binary(parameter_fname, &parameter_binary_size);
// Network Parameters
input = new Tensor({1, C, H, W}, parameter + OFFSET0);
weight = new Tensor({C, H, W}, parameter + OFFSET0);
bias = new Tensor({H, W}, parameter + OFFSET1);
activation = new Tensor({1, C, H, W}, parameter + OFFSET2);
}
// Styler model
void styler(int N, float *random_floats, char *output) {
}
// Finalize the model.
void styler_finalize() {
delete weight;
delete bias;
delete activation;
}

26
APWS23/project/styler.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include "tensor.h"
// Model parameters
#define PARAMETER_FILE_SIZE 45663232
#define NUM_IMAGES 256
#define C 512
#define H 1024
#define W 1024
#define OFFSET0 0
#define OFFSET1 (OFFSET0 + NUM_IMAGES)
#define OFFSET2 (OFFSET1 + NUM_IMAGES)
#define OFFSET3 (OFFSET2 + NUM_IMAGES)
void conv(Tensor*, Tensor*, Tensor*, Tensor*);
void maxpool(Tensor*, Tensor*);
void fc(Tensor*, Tensor*, Tensor*, Tensor*);
void softmax(Tensor*, Tensor*);
void batchNorm(Tensor*, float, float, float, Tensor*);
void styler_initialize(int, int, char*);
void styler(int, float*, char*);
void styler_finalize();

View File

@ -0,0 +1 @@
#include "tensor.h"

57
APWS23/project/tensor.h Normal file
View File

@ -0,0 +1,57 @@
#pragma once
#include <cstdlib>
#include <vector>
// You can modify the data structure as you want
struct Tensor {
// Alloc memory
Tensor(std::vector<int> shape_) {
ndim = shape_.size();
for (size_t i = 0; i < ndim; i++) {
shape[i] = shape_[i];
}
size_t n = num_elem();
buf = (float*)malloc(n * sizeof(float));
}
// Alloc memory and copy
Tensor(std::vector<int> shape_, float *buf_) {
ndim = shape_.size();
for (size_t i = 0; i < ndim; i++) {
shape[i] = shape_[i];
}
size_t n = num_elem();
buf = (float*)malloc(n * sizeof(float));
}
~Tensor() {
if (buf != nullptr)
free(buf);
}
void set_zero() {
size_t n = num_elem();
buf = (float*)malloc(n * sizeof(float));
for (size_t i = 0; i < n; i++){
buf[i] = 0.0;
}
}
size_t num_elem() {
size_t sz = 1;
for (size_t i = 0; i < ndim; i++)
sz *= shape[i];
return sz;
}
// Pointer to data
float *buf = nullptr;
// Shape of tensor, from outermost dimension to innermost dimension.
// e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3}
size_t ndim = 0;
size_t shape[4];
};

101
APWS23/project/util.cpp Normal file
View File

@ -0,0 +1,101 @@
#include "util.h"
#include <cstdio>
#include <cstdlib>
#include <unistd.h>
#include <time.h>
// Defined in main.cpp
extern int random_seed;
extern int N;
extern int print_max;
extern int MAX_LEN;
extern char *parameter_fname;
extern char *output_fname;
void *read_binary(const char *filename, size_t *size) {
size_t size_;
FILE *f = fopen(filename, "rb");
CHECK_ERROR(f != NULL, "Failed to read %s", filename);
fseek(f, 0, SEEK_END);
size_ = ftell(f);
rewind(f);
void *buf = malloc(size_);
size_t ret = fread(buf, 1, size_, f);
fclose(f);
CHECK_ERROR(size_ == ret, "Failed to read %ld bytes from %s", size_,
filename);
if (size != NULL)
*size = size_;
return buf;
}
void WriteFile(const char *filename, size_t size, void *buf) {
FILE *f = fopen(filename, "wb");
CHECK_ERROR(f != NULL, "Failed to write %s", filename);
size_t ret = fwrite(buf, 1, size, f);
fclose(f);
CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename);
}
double get_time() {
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return tv.tv_sec + tv.tv_nsec * 1e-9;
}
void print_usage_exit(int argc, char **argv) {
printf("Usage %s [parameter bin] [output] [N] [seed] \n", argv[0]);
printf(" parameter bin: File conatining DNN parameters\n");
printf(" output: File to write results\n");
printf(" N: Number of images to style\n");
printf(" seed: Random seed\n");
EXIT(0);
}
void check_and_parse_args(int argc, char **argv) {
if (argc != 5)
print_usage_exit(argc, argv);
int c;
while ((c = getopt(argc, argv, "h")) != -1) {
switch (c) {
case 'h':
break;
default:
print_usage_exit(argc, argv);
}
}
parameter_fname = argv[1];
output_fname = argv[2];
N = atoi(argv[3]);
random_seed = atoi(argv[4]);
}
void print_first_few_result(char *output, int print_max, double elapsed_time){
// Print first few results
int print_cnt = N < print_max ? N : print_max;
printf("First %d results are:", print_cnt);
for (int i = 0; i < print_cnt; i++) {
printf(" %s%c", output + i * (MAX_LEN + 1),
i == (print_cnt - 1) ? '\n' : ',');
}
// Write the results to file
printf("Writing to %s ...", output_fname);
fflush(stdout);
FILE *output_fp = (FILE *)fopen(output_fname, "w");
for (int i = 0; i < N; i++) {
fprintf(output_fp, "%s\n", output + i * (MAX_LEN + 1));
}
fclose(output_fp);
printf("Done!\n");
// Print elapsed time
printf("Elapsed time: %.6f seconds\n", elapsed_time);
printf("Throughput: %.3f images/sec\n", (double)N / elapsed_time);
}

27
APWS23/project/util.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <cstdio>
#include <cstdlib>
#include <unistd.h>
#include <time.h>
/* Useful macros */
#define EXIT(status) \
do { \
exit(status); \
} while (0)
#define CHECK_ERROR(cond, fmt, ...) \
do { \
if (!(cond)) {\
printf(fmt "\n", ##__VA_ARGS__); \
EXIT(EXIT_FAILURE); \
} \
} while (false)
void print_usage_exit(int argc, char **argv);
void check_and_parse_args(int argc, char **argv);
double get_time();
void *read_binary(const char *filename, size_t *size);
void print_first_few_result(char *output, int print_max, double elapsed_time);