def evaluate(encoder, decoder, sentence, dictionary, max_length=MAX_LENGTH): with torch.no_grad(): input_tensor = tensorFromSentence(dictionary, sentence) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1) if topi.item() == EOS_token: decoded_words.append('<EOS>') break else: decoded_words.append(dictionary.index2token[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1]
def embed_input_sentence(input_pair, encoder, max_length=MAX_LENGTH): """Embeds the input sentence using a trained encoder model""" with torch.no_grad(): if encoder.trainable_model: input_tensor, target_tensor = utils.tensorsFromPair(input_pair) input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length+1, encoder.hidden_size, device=DEVICE) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] decoder_hidden = encoder_hidden return decoder_hidden, target_tensor, encoder_outputs else: target_tensor = utils.tensorFromSentence(vocab_index, input_pair[1]) decoder_hidden = encoder.sentence_embedding(input_pair[0]) decoder_hidden = layer_normalize(decoder_hidden) return decoder_hidden, target_tensor, None
def evaluate(encoder, decoder, sentence, training_ans, input_lang, output_lang, max_length=utils.MAX_LENGTH, rl=True): with torch.no_grad(): input_tensor = utils.tensorFromSentence(input_lang, sentence, device ) input_length = input_tensor.size(0) print(" evaluation input_length: ", input_length) """#!!! encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei] ) encoder_outputs[ei] = encoder_output[0, 0] ; """ encoder_hidden = encoder(input_tensor) encoder_hidden = encoder_hidden.unsqueeze(0) decoder_input = torch.tensor([[utils.SOS_token]], device=device) decoder_hidden = encoder_hidden #decoder_hidden_input = decoder_hidden #!!! # Without teacher forcing: use its own predictions as the next input decoded_words = [] #! for di in range(max_length): #print(di, " decoder_hidden shape: ", decoder_hidden.size(), " \n ", decoder_hidden ) decoder_hidden = decoder_hidden[:, 0, :] decoder_hidden = decoder_hidden.view(1,1,256) decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) # decoder_input, decoder_hidden, encoder_outputs ) topv, topi = decoder_output.topk(1) #!!! if topi.item() == utils.EOS_token: decoded_words.append('<EOS>') break else: decoded_words.append(output_lang.index2word[topi.item()]) #!!! decoder_input = topi.squeeze().detach() # detach from history as input if decoder_input.item() == utils.EOS_token: break; decoded_sentence = str(" ").join(decoded_words) print("\n --query--> ", decoded_sentence, "\n ") if (not rl) or (training_ans is None): return decoded_sentence else: rewrd = reward.get_reward(decoded_sentence, training_ans ) print("\n --reward--> ", rewrd) return rewrd
def evaluate(encoder, decoder, sentence, input_lang,output_lang, max_length, device): with torch.no_grad(): input_tensor = tensorFromSentence(input_lang, sentence) input_length = input_tensor.size()[0] encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) if encoder.__class__.__name__ == 'EncoderGRU' or encoder.__class__.__name__ == 'EncoderLSTM': encoder_hidden = encoder.initHidden() for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0, 0] elif encoder.__class__.__name__ == 'EncoderPositional' or encoder.__class__.__name__ == 'EncoderPositional_AIAYN': encoder_outputs, encoder_hidden = encoder(input_tensor) decoder_input = torch.tensor([[0]], device=device) # SOS decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) if encoder.__class__.__name__ == 'EncoderLSTM': decoder_hidden = decoder_hidden[0] for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) #TODO encoder_outputs --> encoder_hidden? decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1) if topi.item() == 1: #EOS token decoded_words.append('<EOS>') break else: decoded_words.append(output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1]
def tensorsFromPair(pair): input_tensor = tensorFromSentence(input_lang, pair[0]) target_tensor = tensorFromSentence(output_lang, pair[1]) return (input_tensor, target_tensor)
def train(input_pair, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio, max_length=MAX_LENGTH): """Model training logic, initializes graph, creates encoder outputs matrix for attention model, applies teacher forcing (randomly), calculates the loss and trains the models""" if encoder.trainable_model: # Encode sentences using encoder model input_tensor, target_tensor = utils.tensorsFromPair(input_pair) decoder_hidden, encoder_outputs, encoder_optimizer = train_encoder( input_tensor, encoder, encoder_optimizer, max_length) else: # Encode sentences using pretrained encoder model target_tensor = utils.tensorFromSentence(vocab_index, input_pair[1]) decoder_hidden = encoder.sentence_embedding(input_pair[0]) decoder_hidden = layer_normalize(decoder_hidden) # Clear the gradients from the decoder optimizer decoder_optimizer.zero_grad() target_length = target_tensor.size(0) decoder_input = torch.tensor([[SOS_token]], device=DEVICE) loss = 0 # Randomly apply teacher forcing subject to teacher forcing ratio use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): if decoder.uses_attention: decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs) else: decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing: set next input to correct target else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): if decoder.uses_attention: decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs) else: decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() # detach from history as input loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: break # Calculate the error and blackpropogate through the network loss.backward() if encoder.trainable_model: encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length