def outputVar(l, voc): indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l] max_target_len = max([len(indexes) for indexes in indexes_batch]) padList = zeroPadding(indexes_batch, PAD_token) mask = binaryMatrix(padList, PAD_token) mask = torch.BoolTensor(mask) padVar = torch.LongTensor(padList) return padVar, mask, max_target_len
def evaluate(encoder, decoder, searcher, voc, sentence, max_length, device): ### Format input sentence as a batch # words -> indexes indexes_batch = [indexesFromSentence(voc, sentence)] # Create lengths tensor lengths = torch.tensor([len(indexes) for indexes in indexes_batch]) # Transpose dimensions of batch to match models' expectations input_batch = torch.LongTensor(indexes_batch).transpose(0, 1) # Use appropriate device input_batch = input_batch.to(device) # lengths = lengths.to(device) lengths = torch.as_tensor(lengths, dtype=torch.int64, device='cpu') # Decode sentence with searcher tokens, scores = searcher(input_batch, lengths, max_length) # indexes -> words decoded_words = [voc.index2word[token.item()] for token in tokens] return decoded_words
def evaluate(sentence, max_length=MAX_LENGTH): time_start = time.time() sentence = normalizeString(sentence) sentence = unicodedata.normalize('NFD', sentence) indexes_batch = [indexesFromSentence(voc, sentence)] lengths = torch.tensor([len(indexes) for indexes in indexes_batch]) input_batch = torch.LongTensor(indexes_batch).transpose(0, 1) input_batch = input_batch.to(device) lengths = lengths.to(device) tokens, score = searcher(input_batch, lengths, max_length) decoded_words = [voc.index2word[token.item()] for token in tokens] result = '' for char in decoded_words: if char != 'EOS': result += char else: break time_pred = time.time() - time_start return result, torch.sum(score) / len(result), time_pred
def inputVar(l, voc): indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l] lengths = torch.tensor([len(indexes) for indexes in indexes_batch]) padList = zeroPadding(indexes_batch, PAD_token) padVar = torch.LongTensor(padList) return padVar, lengths