def main(train_data_path: str, model_path: str): TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset( train_data_path) batch_size = 32 output_size = 2 hidden_size = 256 embedding_length = 300 # TODO: try other types of learning algorithms model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) for epoch in range(10): train_loss, train_acc = train_model(model, train_iter, epoch) val_loss, val_acc = eval_model(model, valid_iter) print( f'Epoch: {epoch + 1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) test_loss, test_acc = eval_model(model, test_iter) print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%') ''' Let us now predict the sentiment on a single sentence just for the testing purpose. ''' test_sen1 = "This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues." test_sen1 = TEXT.preprocess(test_sen1) test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]] test_sen = np.asarray(test_sen1) test_sen = torch.from_numpy(test_sen) if torch.cuda.is_available(): test_sen = test_sen.cuda() model.eval() output = model(test_sen, 1) out = F.softmax(output, 1) if (torch.argmax(out[0]) == 1): print("Sentiment: Positive") else: print("Sentiment: Negative") # save the model torch.save(model.state_dict(), model_path)
def main(args): TEXT, LABEL, vocab_size, word_embeddings, train_iter, valid_iter = load_data.load_dataset( args) #learning_rate = 2e-5 learning_rate = 0.0001 batch_size = BATCH_SIZE output_size = 2 hidden_size = 256 #hidden_size = 64 embedding_length = 300 model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) #model = AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) #model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) #loss_fn = F.cross_entropy print(LABEL.vocab.stoi) print(LABEL.vocab.freqs) label_weights = torch.FloatTensor(np.asarray([1.0, 2.0])) label_weights_tensor = Variable(label_weights, volatile=True).cuda() loss_fn = torch.nn.CrossEntropyLoss(weight=label_weights_tensor) for epoch in range(10): train_loss, train_acc = train_model(model, loss_fn, train_iter, epoch) val_loss, val_acc = eval_model(model, loss_fn, valid_iter) print( f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) evaluate(model, TEXT, LABEL, args, epoch) torch.save(model.state_dict(), args.save_model_file + '.epoch' + str(epoch + 1)) test_loss, test_acc = eval_model(model, loss_fn, test_iter) print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')
def classifier(): ################################################################################# # Write the output data into the infer data for the classifier """ #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/BM25/test" #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/PREFIX/test" #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/random/test" #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/seq2seq/withatt/test" #path = "/data1/home2/Headline/PointerSumm/log/decode_model_95000_1555784722/test" #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/exp_0223/test" #path = "/home/yunzhu/Headline/FASum/PORLHG_v3/save_decode_result/exp_0907/extractor/test" #path = "/data1/home2/Headline/Dataset/CNNDM/finished_files_cleaned_single_m2/refs/test" #path = "/home/yunzhu/Headline/FASum/PORLHG_v3/save_decode_result/exp_0912/rl/test" #path = "/home/yunzhu/Headline/FASum/PORLHG_v3/save_decode_result/exp_0823_v4/test" path = "/home/yunzhu/Headline/FASum/PORLHG_v3/save_decode_result/exp_0823/rl_3/test" #path_in = path path_in = os.path.join(path, "output") print('We are testing:{}'.format(path)) filename = "temp.tsv" path_out= "/home/yunzhu/Headline/FASum/FASRL/model/classifier/cls_data/{}".format(filename) write_file(path_in, path_out) TEXT, vocab_size, word_embeddings, _, _, test_iter = load_data.load_dataset(corpusdir, batch_size, filename=path_out) model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, conv_hidden, 0.0) print('Loading the pretrained model: {}'.format(save_path.split('/')[-1])) state_dict = torch.load(save_path) model.load_state_dict(state_dict) loss_fn = F.cross_entropy test_loss, test_acc, test_uar = eval_model(model, test_iter, loss_fn) print('Inference popularity predictor for: {}'.format(path_in)) print('Test Loss: {:.2f}, Test Acc: {:.2f}%, Test Uar: {:.2f}'.format(test_loss, test_acc, test_uar)) print('There are {:.2f}% are classified as positive'.format(100-test_acc)) with open(os.path.join(path, "popularity.txt"), 'w') as f: f.write("Inference by: {}".format(save_path)) f.write("model: {}".format(path)) f.write("score: {}".format(100-test_acc)) """ ######################################################### """ TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset() loss_fn = F.cross_entropy with open('TEXT.Field', 'rb') as f: TEXT = dill.load(f) #path = "/home/yunzhu/Headline/FASum/FASRL/save_decode_result/exp_0224/test/output" path = "/home/yunzhu/Headline/Datasets/CNNDM/finished_files_cleaned/refs/test" num = len(os.listdir(path)) total_score = 0 for i in range(num): sentence = read_data(path, i, '.ref') score = do_inference(sentence, TEXT, vocab_size, word_embeddings) total_score += score print("{}/{} finished, score:{}".format(i, num, score)) print("total_score: {}".format(total_score)) print("avg score: {}".format(total_score/num)) """ ##################################################### TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset(corpusdir, batch_size) loss_fn = F.binary_cross_entropy_with_logits model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, conv_hidden, 0.1) val_acc_best=0. optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters())) ## move the optim from train() to here scheduler = ReduceLROnPlateau(optim, 'min', verbose=True, patience=2) for epoch in range(10): train_loss, train_acc = train_model(model, train_iter, epoch, loss_fn, optim) val_loss, val_acc, _ = eval_model(model, valid_iter, loss_fn) scheduler.step(val_loss) if val_acc_best < val_acc: torch.save(model.state_dict(), save_path) test_loss, test_acc, _ = eval_model(model, test_iter, loss_fn) print('[info] Epoch{} Test Loss: {:.2f}, Test Acc: {:.2f}%'.format(epoch, test_loss, test_acc)) val_acc_best = val_acc print('Epoch: {}, Train Loss: {:.2f}, Train Acc: {:.2f}%, Val Loss: {:.2f}, Val Acc: {:.2f}%'.format(epoch+1, train_loss, train_acc, val_loss, val_acc)) #test_loss, test_acc = eval_model(model, test_iter, loss_fn) #print('Test Loss: {}, Test Acc: {}'.format(test_loss, test_acc)) ################################################################## return TEXT, vocab_size, word_embeddings