def __init__(self): super(NUMEncoder, self).__init__() self.vocab_size = len(NUMSentence()) self.embedding_dim = 300 self.embedding = nn.Embedding(self.vocab_size, embedding_dim=self.embedding_dim, padding_idx=NUMSentence().PAD) self.gru = nn.GRU(input_size=self.embedding_dim, hidden_size=32, num_layers=1, batch_first=True)
def __init__(self): super(NUMDecoder, self).__init__() self.max_seq_len = config.MAX_LEN self.vocab_size = len(NUMSentence()) self.embedding_dim = 300 self.embedding = nn.Embedding(self.vocab_size, embedding_dim=self.embedding_dim, padding_idx=NUMSentence().PAD) self.gru = nn.GRU(self.embedding_dim, hidden_size=32, num_layers=1, batch_first=True) self.logsoftmax = nn.LogSoftmax() self.lr = nn.Linear(32, self.vocab_size)
def forward(self, encoder_hidden, target): # encoder_hidden [1,batch_size,hidden_size] # target [batch_size,max_len] # 初始化为sos,编码器的结果作为初始的隐层状态,定义一个[batch_size,1]的全为SOS的数据作为最开始的输入,告诉解码器,要开始工作了 decoder_input = torch.LongTensor([[NUMSentence().SOS]] * config.BATCH_SIZE) # 定义解码器的输出 decoder_output = torch.zeros( [config.BATCH_SIZE, config.MAX_LEN, self.vocab_size]) # 定义decoder的隐藏层状态 decoder_hidden = encoder_hidden # 进行每个word的计算 for t in range(config.MAX_LEN): decoder_out, decoder_hidden = self.forward_step( decoder_input, decoder_hidden) # 将每个时间步上的数据进行拼接 decoder_output[:, t, :] = decoder_out # 在训练过程中使用teacher forcing use_teacher_forcing = random.random() > 0.5 if use_teacher_forcing: # 下一次的输入使用真实值 decoder_input = target[:, t].unsqueeze(1) # [batch_size,1] else: # 使用预测值,topk中k=1,即获取最后一个维度的最大的一个值 value, index = torch.topk(decoder_out, 1) # index [batch_size,1] decoder_input = index return decoder_output, decoder_hidden
def evalute(): model.eval() model.load_state_dict(torch.load("./model/seq2seq_num_model.pkl")) data = [str(i) for i in np.random.randint(0, 100000000, [10])] data = sorted(data, key=lambda x: len(x), reverse=True) print(data) data_len = torch.LongTensor([len(i) for i in data]) model_input = torch.LongTensor([NUMSentence().transform(i,max_len=config.MAX_LEN) for i in data]) result = model.evalute(model_input,data_len) value,index = result.topk(k=1) index = index.squeeze() array = index.detach().numpy() i = 0 for line in array: sen = NUMSentence().reverse_transform(list(line)) print('当前句子为:{},预测句子为: {}'.format(data[i],sen)) i += 1
def forward(self, data, data_len=None): """ param: data 输入数据 param: data_len 输入数据的真实长度 """ embeded = self.embedding(data) # 使用内置函数加快gru的运行速度 # 对文本对齐之后的句子进行打包,能够加速在LSTM or GRU中的计算过程 embeded = nn.utils.rnn.pack_padded_sequence(embeded, lengths=data_len, batch_first=True) # 使用gru进行计算 out, hidden = self.gru(embeded) # 对打包后的结果进行解包 out, out_length = nn.utils.rnn.pad_packed_sequence( out, batch_first=True, padding_value=NUMSentence().PAD) return out, hidden
def evalute(self, encoder_hidden): """ 对数据进行预测 """ batch_size = encoder_hidden.size()[1] # 构造刚开始的输入 decoder_input = torch.LongTensor([[NUMSentence().SOS]] * batch_size) decoder_hidden = encoder_hidden # 构造输入 decoder_output = torch.zeros( [batch_size, config.MAX_LEN, self.vocab_size]) for t in range(config.MAX_LEN): out, decoder_hidden = self.forward_step(decoder_input, decoder_hidden) decoder_output[:, t, :] = out value, index = out.topk(k=1) # [20,1] decoder_input = index return decoder_output
self.total_data_size = 500000 np.random.seed(10) self.data = np.random.randint(1, 100000000, size=[self.total_data_size]) def __getitem__(self, index): content = str(self.data[index]) return content, content + '0', len(content), len(content) + 1 def __len__(self): return self.total_data_size num_sequence = NUMSentence() def collate_fn(batch): # 对结果进行降序排序 batch = sorted(batch, key=lambda x: x[3], reverse=True) data, label, data_lengths, label_lengths = list(zip(*batch)) input = torch.LongTensor( [num_sequence.transform(i, max_len=config.MAX_LEN) for i in data]) target = torch.LongTensor([ num_sequence.transform(i, max_len=config.MAX_LEN, add_eos=True) for i in label ]) input_length = torch.LongTensor(data_lengths)
from num_model import NUMModel import torch from seq2seq_datsets import data_loader from torch.optim import Adam from torch.nn import NLLLoss from num_sentence import NUMSentence import config import numpy as np model = NUMModel() optimizer = Adam(model.parameters()) criterion= NLLLoss(ignore_index=NUMSentence().PAD,reduction="mean") def get_loss(decoder_outputs,target): #很多时候如果tensor进行了转置等操作,直接调用view进行形状的修改是无法成功的 #target = target.contiguous().view(-1) #[batch_size*max_len] target = target.view(-1) decoder_outputs = decoder_outputs.view(config.BATCH_SIZE*config.MAX_LEN,-1) return criterion(decoder_outputs,target) def train(epoch): for idx,(input,target,input_length,target_len) in enumerate(data_loader): optimizer.zero_grad() decoder_outputs,decoder_hidden = model(input,target,input_length) loss = get_loss(decoder_outputs,target) loss.backward()