import torch import torch.nn as nn import arguments import dataset from constants import * from dataset import NaiveTokenizer, PasswordDataset def tensor_to_bytes(t): print(len(t.detach().numpy().tobytes())) return t.detach().numpy().tobytes() def print_tensor(t): t = torch.flatten(t) for i in range(5): print(f"{t[i].item():.4e} ", end='') print() class LMModel(nn.Module): def __init__(self, hidden_size, vocab_size, num_layers=2): super(LMModel, self).__init__() self.args = arguments.parse_args() self.hidden_size = hidden_size self.num_layers = num_layers self.embedding = torch.nn.Embedding( vocab_size, self.args.embedding_dim) self.backbone = torch.nn.GRU(input_size=self.args.embedding_dim, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=0.1) self.linear = torch.nn.Linear( in_features=hidden_size, out_features=vocab_size) def forward(self, input, hidden=None): x = self.embedding(input) print_tensor(self.embedding.weight) print_tensor(x) x, h_n = self.backbone(x, hidden) x = self.linear(x) return x, h_n def initHidden(self, batch_size): return \ torch.zeros((self.num_layers, batch_size, self.hidden_size)) def convert(self, fname): print(self.embedding.weight.shape) print(self.backbone.weight_ih_l0.shape) print(self.backbone.weight_ih_l1.shape) print(self.backbone.weight_hh_l0.shape) print(self.backbone.weight_hh_l1.shape) print(self.backbone.bias_ih_l0.shape) print(self.backbone.bias_ih_l1.shape) print(self.backbone.bias_hh_l0.shape) print(self.backbone.bias_hh_l1.shape) f = open(fname, 'wb') f.write(tensor_to_bytes(self.embedding.weight)) f.write(tensor_to_bytes(self.backbone.weight_ih_l0)) f.write(tensor_to_bytes(self.backbone.weight_ih_l1)) f.write(tensor_to_bytes(self.backbone.weight_hh_l0)) f.write(tensor_to_bytes(self.backbone.weight_hh_l1)) f.write(tensor_to_bytes(self.backbone.bias_ih_l0)) f.write(tensor_to_bytes(self.backbone.bias_ih_l1)) f.write(tensor_to_bytes(self.backbone.bias_hh_l0)) f.write(tensor_to_bytes(self.backbone.bias_hh_l1)) f.write(tensor_to_bytes(self.linear.weight)) f.write(tensor_to_bytes(self.linear.bias)) f.close() def idtensor_to_string(id_tensor2d): ret = [] for i in range(id_tensor2d.shape[0]): s = [] for j in range(id_tensor2d.shape[1]): o = id_tensor2d[i][j].item() if o == SOS_ID: s.append(SOS) elif o == EOS_ID: s.append(EOS) elif o == PAD_ID: s.append(PAD) else: s.append(chr(o)) ret.append(''.join(s)) return ret NUM_CHAR = 256 args = arguments.parse_args() model = LMModel( args.hidden_dim, 256) model.load_state_dict(torch.load('model-0.0750.pt')) model.eval() # model.convert("skeleton/model.bin") num_gen = 1 input = torch.tensor([[SOS_ID] for _ in range(num_gen)]) hidden = model.initHidden(num_gen) ret = input.clone().detach() for i in range(args.password_length): output, hidden = model(input, hidden) output = output.view((output.size(0), output.size(2))) output = nn.functional.softmax(output, dim=1) choices = torch.multinomial(output, num_samples=1) ret = torch.cat((ret, choices), dim=1) input = choices print(idtensor_to_string(ret))