124 lines
3.6 KiB
Python
124 lines
3.6 KiB
Python
|
import torch
|
||
|
import torch.nn as nn
|
||
|
|
||
|
import arguments
|
||
|
import dataset
|
||
|
from constants import *
|
||
|
from dataset import NaiveTokenizer, PasswordDataset
|
||
|
|
||
|
|
||
|
def tensor_to_bytes(t):
|
||
|
print(len(t.detach().numpy().tobytes()))
|
||
|
return t.detach().numpy().tobytes()
|
||
|
|
||
|
|
||
|
def print_tensor(t):
|
||
|
t = torch.flatten(t)
|
||
|
for i in range(5):
|
||
|
print(f"{t[i].item():.4e} ", end='')
|
||
|
print()
|
||
|
|
||
|
|
||
|
class LMModel(nn.Module):
|
||
|
def __init__(self, hidden_size, vocab_size, num_layers=2):
|
||
|
super(LMModel, self).__init__()
|
||
|
self.args = arguments.parse_args()
|
||
|
self.hidden_size = hidden_size
|
||
|
self.num_layers = num_layers
|
||
|
|
||
|
self.embedding = torch.nn.Embedding(
|
||
|
vocab_size, self.args.embedding_dim)
|
||
|
|
||
|
self.backbone = torch.nn.GRU(input_size=self.args.embedding_dim, hidden_size=hidden_size, num_layers=num_layers,
|
||
|
batch_first=True, dropout=0.1)
|
||
|
|
||
|
self.linear = torch.nn.Linear(
|
||
|
in_features=hidden_size, out_features=vocab_size)
|
||
|
|
||
|
def forward(self, input, hidden=None):
|
||
|
x = self.embedding(input)
|
||
|
print_tensor(self.embedding.weight)
|
||
|
print_tensor(x)
|
||
|
x, h_n = self.backbone(x, hidden)
|
||
|
x = self.linear(x)
|
||
|
return x, h_n
|
||
|
|
||
|
def initHidden(self, batch_size):
|
||
|
return \
|
||
|
torch.zeros((self.num_layers, batch_size,
|
||
|
self.hidden_size))
|
||
|
|
||
|
def convert(self, fname):
|
||
|
print(self.embedding.weight.shape)
|
||
|
print(self.backbone.weight_ih_l0.shape)
|
||
|
print(self.backbone.weight_ih_l1.shape)
|
||
|
print(self.backbone.weight_hh_l0.shape)
|
||
|
print(self.backbone.weight_hh_l1.shape)
|
||
|
print(self.backbone.bias_ih_l0.shape)
|
||
|
print(self.backbone.bias_ih_l1.shape)
|
||
|
print(self.backbone.bias_hh_l0.shape)
|
||
|
print(self.backbone.bias_hh_l1.shape)
|
||
|
|
||
|
f = open(fname, 'wb')
|
||
|
f.write(tensor_to_bytes(self.embedding.weight))
|
||
|
f.write(tensor_to_bytes(self.backbone.weight_ih_l0))
|
||
|
f.write(tensor_to_bytes(self.backbone.weight_ih_l1))
|
||
|
f.write(tensor_to_bytes(self.backbone.weight_hh_l0))
|
||
|
f.write(tensor_to_bytes(self.backbone.weight_hh_l1))
|
||
|
f.write(tensor_to_bytes(self.backbone.bias_ih_l0))
|
||
|
f.write(tensor_to_bytes(self.backbone.bias_ih_l1))
|
||
|
f.write(tensor_to_bytes(self.backbone.bias_hh_l0))
|
||
|
f.write(tensor_to_bytes(self.backbone.bias_hh_l1))
|
||
|
|
||
|
f.write(tensor_to_bytes(self.linear.weight))
|
||
|
f.write(tensor_to_bytes(self.linear.bias))
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
def idtensor_to_string(id_tensor2d):
|
||
|
ret = []
|
||
|
for i in range(id_tensor2d.shape[0]):
|
||
|
s = []
|
||
|
for j in range(id_tensor2d.shape[1]):
|
||
|
o = id_tensor2d[i][j].item()
|
||
|
if o == SOS_ID:
|
||
|
s.append(SOS)
|
||
|
elif o == EOS_ID:
|
||
|
s.append(EOS)
|
||
|
elif o == PAD_ID:
|
||
|
s.append(PAD)
|
||
|
else:
|
||
|
s.append(chr(o))
|
||
|
ret.append(''.join(s))
|
||
|
return ret
|
||
|
|
||
|
|
||
|
NUM_CHAR = 256
|
||
|
args = arguments.parse_args()
|
||
|
|
||
|
model = LMModel(
|
||
|
args.hidden_dim, 256)
|
||
|
model.load_state_dict(torch.load('model-0.0750.pt'))
|
||
|
model.eval()
|
||
|
|
||
|
# model.convert("skeleton/model.bin")
|
||
|
|
||
|
num_gen = 1
|
||
|
|
||
|
input = torch.tensor([[SOS_ID]
|
||
|
for _ in range(num_gen)])
|
||
|
hidden = model.initHidden(num_gen)
|
||
|
ret = input.clone().detach()
|
||
|
|
||
|
for i in range(args.password_length):
|
||
|
output, hidden = model(input, hidden)
|
||
|
output = output.view((output.size(0), output.size(2)))
|
||
|
output = nn.functional.softmax(output, dim=1)
|
||
|
|
||
|
choices = torch.multinomial(output, num_samples=1)
|
||
|
ret = torch.cat((ret, choices), dim=1)
|
||
|
input = choices
|
||
|
|
||
|
print(idtensor_to_string(ret))
|