reviews.append( sentence2tensor(words, w2i, padding_idx, sent_length)) if len(words) > sent_length: lengths.append(sent_length) else: lengths.append(len(words)) labels.append(int(label)) if count % 100000 == 0: print('Encoded reviews: ', count) return reviews, labels, lengths if __name__ == '__main__': train_file, validation_file = '../Data/train.csv', '../Data/validation.csv' w2i = pp.obtainW2i(train=train_file, validate=validation_file) print('Loaded vocabulary') w2i['<PAD>'] = 0 vocab_size = len(w2i) padding_idx = 0 sent_length = 80 translator = str.maketrans('', '', string.punctuation) embedding_size = 50 embedding = nn.Embedding(vocab_size, embedding_size, padding_idx=padding_idx) print('Embeddings calculated')
encoder_hidden = encoder.initHidden() input_length = sentence_tensor.size(0) for ei in range(input_length): output, encoder_hidden = encoder(sentence_tensor[ei], encoder_hidden) output = torch.round(output) if torch.equal(output, label_tensor): accuracy += 1 return accuracy / len(test_sentences) if __name__ == '__main__': # file name for male reviews and female reviews vocabulary, w2i, sentences_m, sentences_f = pp.obtainW2i( "../Data/sample_male", "../Data/sample_female") train_senetences, train_labels, test_sentences, test_labels = pp.testTrainSplit( sentences_m, sentences_f) hidden_size = 20 input_size = len(w2i) output_size = 1 encoder = Encoder(input_size, hidden_size, output_size) encoder = encoder.to(device) batch_train(encoder, train_senetences, train_labels, 3, w2i) #accuracy = evaluate(encoder,test_sentences, test_labels,w2i) #print(accuracy)