implement skeleton code
This commit is contained in:
parent
1124f4dbcf
commit
c9d6ea507a
|
@ -0,0 +1,26 @@
|
|||
TARGET=main
|
||||
OBJECTS=main.o util.o styler.o tensor.o
|
||||
|
||||
CFLAGS=-std=c++14 -O3 -Wall -march=native -mavx2 -mfma -mno-avx512f -fopenmp -I/usr/local/cuda/include
|
||||
CUDA_CFLAGS:=$(foreach option, $(CFLAGS),-Xcompiler=$(option))
|
||||
|
||||
LDFLAGS=-pthread -L/usr/local/cuda/lib64
|
||||
LDLIBS= -lstdc++ -lcudart -lm
|
||||
|
||||
CXX=g++
|
||||
CUX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(CFLAGS) -o $(TARGET) $(OBJECTS) $(LDFLAGS) $(LDLIBS)
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) $(CFLAGS) -c -o $@ $^
|
||||
|
||||
%.o: %.cu
|
||||
$(CUX) $(CUDA_CFLAGS) -c -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -rf $(TARGET) $(OBJECTS)
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
# 2023-winter-school-project
|
||||
2023년 2월 겨울학교 프로젝트
|
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "styler.h"
|
||||
#include "util.h"
|
||||
|
||||
// Global variables
|
||||
int N = 1;
|
||||
int random_seed = 1;
|
||||
int print_max = 8;
|
||||
int MAX_LEN = 10;
|
||||
char *parameter_fname;
|
||||
char *output_fname;
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
check_and_parse_args(argc, argv);
|
||||
|
||||
// Initialize model
|
||||
styler_initialize(N, random_seed, parameter_fname);
|
||||
|
||||
float *random_floats = nullptr;
|
||||
char *output = nullptr;
|
||||
|
||||
// Initialize input and output
|
||||
random_floats = (float *)malloc(N * MAX_LEN * sizeof(float));
|
||||
output = (char *)malloc(N * (MAX_LEN + 1) * sizeof(char));
|
||||
srand(random_seed);
|
||||
for (int i = 0; i < N * MAX_LEN; i++) {
|
||||
random_floats[i] = ((float)rand()) / ((float)RAND_MAX);
|
||||
}
|
||||
|
||||
printf("Styling %d images...", N);
|
||||
fflush(stdout);
|
||||
|
||||
// Styling images and measure time
|
||||
double styler_st = get_time();
|
||||
|
||||
styler(N, random_floats, output);
|
||||
|
||||
double styler_en = get_time();
|
||||
|
||||
double elapsed_time = styler_en - styler_st;
|
||||
printf("Done!\n");
|
||||
|
||||
// Print first few result
|
||||
print_first_few_result(output, print_max, elapsed_time);
|
||||
|
||||
// Finalize program
|
||||
styler_finalize();
|
||||
}
|
||||
|
||||
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
salloc -N 1 --partition ??? --exclusive --gres=gpu:1 \
|
||||
numactl --physcpubind 0-63 \
|
||||
./main $@
|
|
@ -0,0 +1,183 @@
|
|||
#include "styler.h"
|
||||
#include "util.h"
|
||||
#include "tensor.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
|
||||
// Parameters, Activations
|
||||
Tensor *input;
|
||||
Tensor *weight;
|
||||
Tensor *bias;
|
||||
Tensor *activation;
|
||||
|
||||
|
||||
// Operations
|
||||
void conv(Tensor *input, Tensor *kernel, Tensor *bias, int stride, int padding, int dilation, Tensor *output){
|
||||
|
||||
int ic_ = input->shape[0];
|
||||
int ih_ = input->shape[1];
|
||||
int iw_ = input->shape[2];
|
||||
int kn_ = kernel->shape[0];
|
||||
int kh_ = kernel->shape[2];
|
||||
int kw_ = kernel->shape[3];
|
||||
int oc_ = kn_;
|
||||
int oh_ = (ih_ + 2 * padding - dilation * (kh_ - 1) - 1 ) / stride + 1;
|
||||
int ow_ = (iw_ + 2 * padding - dilation * (kw_ - 1) - 1 ) / stride + 1;
|
||||
|
||||
|
||||
for (int oc=0; oc<oc_; ++oc){
|
||||
for (int oh=0; oh<oh_; ++oh){
|
||||
for (int ow=0; ow<ow_; ++ow){
|
||||
float sum=0.0;
|
||||
for (int ic=0; ic<ic_; ++ic){
|
||||
for (int kh=0; kh<kh_; ++kh){
|
||||
for (int kw=0; kw<kw_; ++kw){
|
||||
int ih = oh * stride + (kh-1);
|
||||
int iw = ow * stride + (kw-1);
|
||||
if ((ih < 0) || (iw < 0)) continue;
|
||||
sum += kernel->buf[kh * kw_ + kw] * input->buf[ic * ih_ * iw_ + ih * iw_ + iw];
|
||||
}
|
||||
}
|
||||
}
|
||||
output->buf[oc * oh_ * ow_ + oh * ow_ + ow] = sum + bias->buf[oc * oh_ * ow_ + oh * ow_ + ow];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void maxpool(Tensor *input, Tensor *output){
|
||||
|
||||
const int pool_size = 2;
|
||||
int ic_ = input->shape[0];
|
||||
int ih_ = input->shape[1];
|
||||
int iw_ = input->shape[2];
|
||||
int th_ = ih_ / pool_size;
|
||||
int tw_ = iw_ / pool_size;
|
||||
|
||||
for (int c=0; c<ic_; ++c){
|
||||
for (int th=0; th<th_; ++th){
|
||||
for (int tw=0; tw<tw_; ++tw){
|
||||
int start_w_idx = tw * pool_size;
|
||||
int start_h_idx = th * pool_size;
|
||||
float val[4] = {0.0};
|
||||
val[0*2 + 0] = input->buf[c * ih_ * iw_ + start_h_idx * iw_ + start_w_idx];
|
||||
val[0*2 + 1] = input->buf[c * ih_ * iw_ + start_h_idx * iw_ + start_w_idx + 1];
|
||||
val[1*2 + 0] = input->buf[c * ih_ * iw_ + (start_h_idx + 1) * iw_ + start_w_idx];
|
||||
val[1*2 + 1] = input->buf[c * ih_ * iw_ + (start_h_idx + 1) * iw_ + start_w_idx + 1];
|
||||
|
||||
output->buf[c * th_ * tw_ + th * tw_ + tw] = (float)(*std::max_element(val, val+4));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void fc(Tensor *input, Tensor *weight, Tensor *bias, Tensor *output){
|
||||
|
||||
int i_ = input->shape[0];
|
||||
int o_ = output->shape[0];
|
||||
|
||||
for (int o=0; o<o_; ++o){
|
||||
float sum = 0.0;
|
||||
for (int i=0; i<i_; ++i){
|
||||
sum += input->buf[i] * weight->buf[i * o_ + o];
|
||||
}
|
||||
sum += bias->buf[o];
|
||||
output->buf[o] = sum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void softmax(Tensor *input, Tensor *output){
|
||||
|
||||
int i_ = input->shape[0];
|
||||
int o_ = output->shape[0];
|
||||
|
||||
float sum = 0.0;
|
||||
for (int i=0; i<i_; ++i){
|
||||
sum += exp(input->buf[i]);
|
||||
}
|
||||
|
||||
for (int o=0; o<o_; ++o){
|
||||
output->buf[o] = exp(input->buf[o] / sum);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void batchNorm(Tensor *input, float gamma, float beta, float eps, Tensor *output){
|
||||
|
||||
int in_ = input->shape[0];
|
||||
int ic_ = input->shape[1];
|
||||
int ih_ = input->shape[2];
|
||||
int iw_ = input->shape[3];
|
||||
|
||||
|
||||
for (int ic=0; ic<ic_; ++ic){
|
||||
|
||||
// mini-batch mean
|
||||
float mean=0.0;
|
||||
for (int in=0; in<in_; ++in){
|
||||
for (int ih=0; ih<ih_; ++ih){
|
||||
for (int iw=0; iw<iw_; ++iw){
|
||||
mean += input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw];
|
||||
}
|
||||
}
|
||||
}
|
||||
mean /= in_ * ih_ * iw_;
|
||||
|
||||
// mini-batch variance
|
||||
float var=0.0;
|
||||
for (int in=0; in<in_; ++in){
|
||||
for (int ih=0; ih<ih_; ++ih){
|
||||
for (int iw=0; iw<iw_; ++iw){
|
||||
var += pow(input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw] - mean, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
var /= in_ * ih_ * iw_;
|
||||
|
||||
// normalize, scale and shift
|
||||
for (int in=0; in<in_; ++in){
|
||||
for (int ih=0; ih<ih_; ++ih){
|
||||
for (int iw=0; iw<iw_; ++iw){
|
||||
float xi = input->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw];
|
||||
float xhat = (xi - mean) / sqrt(var + eps);
|
||||
output->buf[in * ic_ * ih_ * iw_ + ic * ih_ * iw_ + ih * iw_ + iw] = gamma * xhat + beta;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Initialize the model. Do input-independent job here.
|
||||
void styler_initialize(int N, int random_seed, char *parameter_fname) {
|
||||
|
||||
size_t parameter_binary_size = 0;
|
||||
float *parameter = (float *)read_binary(parameter_fname, ¶meter_binary_size);
|
||||
|
||||
// Network Parameters
|
||||
input = new Tensor({1, C, H, W}, parameter + OFFSET0);
|
||||
weight = new Tensor({C, H, W}, parameter + OFFSET0);
|
||||
bias = new Tensor({H, W}, parameter + OFFSET1);
|
||||
activation = new Tensor({1, C, H, W}, parameter + OFFSET2);
|
||||
|
||||
}
|
||||
|
||||
// Styler model
|
||||
void styler(int N, float *random_floats, char *output) {
|
||||
|
||||
}
|
||||
|
||||
// Finalize the model.
|
||||
void styler_finalize() {
|
||||
delete weight;
|
||||
delete bias;
|
||||
delete activation;
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "tensor.h"
|
||||
|
||||
// Model parameters
|
||||
#define PARAMETER_FILE_SIZE 45663232
|
||||
#define NUM_IMAGES 256
|
||||
#define C 512
|
||||
#define H 1024
|
||||
#define W 1024
|
||||
|
||||
#define OFFSET0 0
|
||||
|
||||
#define OFFSET1 (OFFSET0 + NUM_IMAGES)
|
||||
#define OFFSET2 (OFFSET1 + NUM_IMAGES)
|
||||
#define OFFSET3 (OFFSET2 + NUM_IMAGES)
|
||||
|
||||
void conv(Tensor*, Tensor*, Tensor*, Tensor*);
|
||||
void maxpool(Tensor*, Tensor*);
|
||||
void fc(Tensor*, Tensor*, Tensor*, Tensor*);
|
||||
void softmax(Tensor*, Tensor*);
|
||||
void batchNorm(Tensor*, float, float, float, Tensor*);
|
||||
|
||||
void styler_initialize(int, int, char*);
|
||||
void styler(int, float*, char*);
|
||||
void styler_finalize();
|
|
@ -0,0 +1 @@
|
|||
#include "tensor.h"
|
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
// You can modify the data structure as you want
|
||||
struct Tensor {
|
||||
|
||||
// Alloc memory
|
||||
Tensor(std::vector<int> shape_) {
|
||||
ndim = shape_.size();
|
||||
for (size_t i = 0; i < ndim; i++) {
|
||||
shape[i] = shape_[i];
|
||||
}
|
||||
size_t n = num_elem();
|
||||
buf = (float*)malloc(n * sizeof(float));
|
||||
}
|
||||
|
||||
// Alloc memory and copy
|
||||
Tensor(std::vector<int> shape_, float *buf_) {
|
||||
ndim = shape_.size();
|
||||
for (size_t i = 0; i < ndim; i++) {
|
||||
shape[i] = shape_[i];
|
||||
}
|
||||
size_t n = num_elem();
|
||||
buf = (float*)malloc(n * sizeof(float));
|
||||
}
|
||||
|
||||
~Tensor() {
|
||||
if (buf != nullptr)
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void set_zero() {
|
||||
size_t n = num_elem();
|
||||
buf = (float*)malloc(n * sizeof(float));
|
||||
for (size_t i = 0; i < n; i++){
|
||||
buf[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t num_elem() {
|
||||
size_t sz = 1;
|
||||
for (size_t i = 0; i < ndim; i++)
|
||||
sz *= shape[i];
|
||||
return sz;
|
||||
}
|
||||
|
||||
// Pointer to data
|
||||
float *buf = nullptr;
|
||||
|
||||
// Shape of tensor, from outermost dimension to innermost dimension.
|
||||
// e.g., {{1.0, -0.5, 2.3}, {4.3, 5.6, -7.8}} => shape = {2, 3}
|
||||
size_t ndim = 0;
|
||||
size_t shape[4];
|
||||
};
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
#include "util.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
// Defined in main.cpp
|
||||
extern int random_seed;
|
||||
extern int N;
|
||||
extern int print_max;
|
||||
extern int MAX_LEN;
|
||||
extern char *parameter_fname;
|
||||
extern char *output_fname;
|
||||
|
||||
void *read_binary(const char *filename, size_t *size) {
|
||||
size_t size_;
|
||||
FILE *f = fopen(filename, "rb");
|
||||
CHECK_ERROR(f != NULL, "Failed to read %s", filename);
|
||||
fseek(f, 0, SEEK_END);
|
||||
size_ = ftell(f);
|
||||
rewind(f);
|
||||
void *buf = malloc(size_);
|
||||
size_t ret = fread(buf, 1, size_, f);
|
||||
fclose(f);
|
||||
CHECK_ERROR(size_ == ret, "Failed to read %ld bytes from %s", size_,
|
||||
filename);
|
||||
if (size != NULL)
|
||||
*size = size_;
|
||||
return buf;
|
||||
}
|
||||
|
||||
void WriteFile(const char *filename, size_t size, void *buf) {
|
||||
FILE *f = fopen(filename, "wb");
|
||||
CHECK_ERROR(f != NULL, "Failed to write %s", filename);
|
||||
size_t ret = fwrite(buf, 1, size, f);
|
||||
fclose(f);
|
||||
CHECK_ERROR(size == ret, "Failed to write %ld bytes to %s", size, filename);
|
||||
}
|
||||
|
||||
double get_time() {
|
||||
struct timespec tv;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tv);
|
||||
return tv.tv_sec + tv.tv_nsec * 1e-9;
|
||||
}
|
||||
|
||||
void print_usage_exit(int argc, char **argv) {
|
||||
printf("Usage %s [parameter bin] [output] [N] [seed] \n", argv[0]);
|
||||
printf(" parameter bin: File conatining DNN parameters\n");
|
||||
printf(" output: File to write results\n");
|
||||
printf(" N: Number of images to style\n");
|
||||
printf(" seed: Random seed\n");
|
||||
EXIT(0);
|
||||
}
|
||||
|
||||
void check_and_parse_args(int argc, char **argv) {
|
||||
if (argc != 5)
|
||||
print_usage_exit(argc, argv);
|
||||
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "h")) != -1) {
|
||||
switch (c) {
|
||||
case 'h':
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(argc, argv);
|
||||
}
|
||||
}
|
||||
|
||||
parameter_fname = argv[1];
|
||||
output_fname = argv[2];
|
||||
N = atoi(argv[3]);
|
||||
random_seed = atoi(argv[4]);
|
||||
}
|
||||
|
||||
|
||||
void print_first_few_result(char *output, int print_max, double elapsed_time){
|
||||
|
||||
// Print first few results
|
||||
int print_cnt = N < print_max ? N : print_max;
|
||||
printf("First %d results are:", print_cnt);
|
||||
for (int i = 0; i < print_cnt; i++) {
|
||||
printf(" %s%c", output + i * (MAX_LEN + 1),
|
||||
i == (print_cnt - 1) ? '\n' : ',');
|
||||
}
|
||||
|
||||
// Write the results to file
|
||||
printf("Writing to %s ...", output_fname);
|
||||
fflush(stdout);
|
||||
FILE *output_fp = (FILE *)fopen(output_fname, "w");
|
||||
for (int i = 0; i < N; i++) {
|
||||
fprintf(output_fp, "%s\n", output + i * (MAX_LEN + 1));
|
||||
}
|
||||
fclose(output_fp);
|
||||
printf("Done!\n");
|
||||
|
||||
// Print elapsed time
|
||||
printf("Elapsed time: %.6f seconds\n", elapsed_time);
|
||||
printf("Throughput: %.3f images/sec\n", (double)N / elapsed_time);
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
|
||||
/* Useful macros */
|
||||
#define EXIT(status) \
|
||||
do { \
|
||||
exit(status); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ERROR(cond, fmt, ...) \
|
||||
do { \
|
||||
if (!(cond)) {\
|
||||
printf(fmt "\n", ##__VA_ARGS__); \
|
||||
EXIT(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
|
||||
void print_usage_exit(int argc, char **argv);
|
||||
void check_and_parse_args(int argc, char **argv);
|
||||
double get_time();
|
||||
void *read_binary(const char *filename, size_t *size);
|
||||
void print_first_few_result(char *output, int print_max, double elapsed_time);
|
Loading…
Reference in New Issue