chundoong-lab-ta/SHPC2022/final-project/inference.py

124 lines
3.6 KiB
Python

import torch
import torch.nn as nn
import arguments
import dataset
from constants import *
from dataset import NaiveTokenizer, PasswordDataset
def tensor_to_bytes(t):
print(len(t.detach().numpy().tobytes()))
return t.detach().numpy().tobytes()
def print_tensor(t):
t = torch.flatten(t)
for i in range(5):
print(f"{t[i].item():.4e} ", end='')
print()
class LMModel(nn.Module):
def __init__(self, hidden_size, vocab_size, num_layers=2):
super(LMModel, self).__init__()
self.args = arguments.parse_args()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.embedding = torch.nn.Embedding(
vocab_size, self.args.embedding_dim)
self.backbone = torch.nn.GRU(input_size=self.args.embedding_dim, hidden_size=hidden_size, num_layers=num_layers,
batch_first=True, dropout=0.1)
self.linear = torch.nn.Linear(
in_features=hidden_size, out_features=vocab_size)
def forward(self, input, hidden=None):
x = self.embedding(input)
print_tensor(self.embedding.weight)
print_tensor(x)
x, h_n = self.backbone(x, hidden)
x = self.linear(x)
return x, h_n
def initHidden(self, batch_size):
return \
torch.zeros((self.num_layers, batch_size,
self.hidden_size))
def convert(self, fname):
print(self.embedding.weight.shape)
print(self.backbone.weight_ih_l0.shape)
print(self.backbone.weight_ih_l1.shape)
print(self.backbone.weight_hh_l0.shape)
print(self.backbone.weight_hh_l1.shape)
print(self.backbone.bias_ih_l0.shape)
print(self.backbone.bias_ih_l1.shape)
print(self.backbone.bias_hh_l0.shape)
print(self.backbone.bias_hh_l1.shape)
f = open(fname, 'wb')
f.write(tensor_to_bytes(self.embedding.weight))
f.write(tensor_to_bytes(self.backbone.weight_ih_l0))
f.write(tensor_to_bytes(self.backbone.weight_ih_l1))
f.write(tensor_to_bytes(self.backbone.weight_hh_l0))
f.write(tensor_to_bytes(self.backbone.weight_hh_l1))
f.write(tensor_to_bytes(self.backbone.bias_ih_l0))
f.write(tensor_to_bytes(self.backbone.bias_ih_l1))
f.write(tensor_to_bytes(self.backbone.bias_hh_l0))
f.write(tensor_to_bytes(self.backbone.bias_hh_l1))
f.write(tensor_to_bytes(self.linear.weight))
f.write(tensor_to_bytes(self.linear.bias))
f.close()
def idtensor_to_string(id_tensor2d):
ret = []
for i in range(id_tensor2d.shape[0]):
s = []
for j in range(id_tensor2d.shape[1]):
o = id_tensor2d[i][j].item()
if o == SOS_ID:
s.append(SOS)
elif o == EOS_ID:
s.append(EOS)
elif o == PAD_ID:
s.append(PAD)
else:
s.append(chr(o))
ret.append(''.join(s))
return ret
NUM_CHAR = 256
args = arguments.parse_args()
model = LMModel(
args.hidden_dim, 256)
model.load_state_dict(torch.load('model-0.0750.pt'))
model.eval()
# model.convert("skeleton/model.bin")
num_gen = 1
input = torch.tensor([[SOS_ID]
for _ in range(num_gen)])
hidden = model.initHidden(num_gen)
ret = input.clone().detach()
for i in range(args.password_length):
output, hidden = model(input, hidden)
output = output.view((output.size(0), output.size(2)))
output = nn.functional.softmax(output, dim=1)
choices = torch.multinomial(output, num_samples=1)
ret = torch.cat((ret, choices), dim=1)
input = choices
print(idtensor_to_string(ret))