def main(): cuda = int(torch.cuda.is_available()) - 1 TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>") LABELS = data.Field(sequential=False) train, val, test = data.TabularDataset.splits( path='../tencent/data/', train='train_0.8.tsv', validation='train_dev_0.8_common.tsv', test='train_dev_0.8_uncommon.tsv', format='tsv', fields=[('text', TEXT), ('label', LABELS)]) TEXT.build_vocab(train) LABELS.build_vocab(train) train_iter, val_iter, test_iter = data.BucketIterator.splits( (train, val, test), batch_sizes=(8, 8, 8), sort_key=lambda x: len(x.text), repeat=False) model = torch.load( '../tencent/models/common_0.8/net-lstm_e25_bs8_opt-adam_ly1_hs300_dr2_ed200_fembFalse_ptembFalse_drp0.3/acc47.60_e6.pt' ) (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects, mrr) = eval(val_iter, model, TEXT, 300) print('COMMON ACCURACY:', accuracy) (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects, mrr) = eval(test_iter, model, TEXT, 300) print('UNCOMMON ACCURACY:', accuracy)
def train(args): ############################################################################### # Load data ############################################################################### cuda = int(torch.cuda.is_available()) - 1 TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>") LABELS = data.Field(sequential=False) train, val, test = data.TabularDataset.splits(path=args.data_path, train=args.train_path, validation=args.dev_path, test=args.test_path, format='tsv', fields=[('text', TEXT), ('label', LABELS)]) """ # Our data path='../new_data/kdata', train='_train.tsv', validation='_dev.tsv', test='_test.tsv', format='tsv', fields=[('text', TEXT), ('label', LABELS)]) """ prevecs = None if (args.pretr_emb == True): #print('Making vocab w/ glove.6B.' + str(args.emb_dim) + ' dim vectors') TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=args.emb_dim), min_freq=args.mf) #wv_type="glove.6B") prevecs = TEXT.vocab.vectors else: TEXT.build_vocab(train) LABELS.build_vocab(train) #vecs = Vecs(args.emb_dim) #print('Making interator for splits...') train_iter, val_iter, test_iter = data.BucketIterator.splits( (train, val, test), batch_sizes=(args.batch_size, args.batch_size, args.batch_size), sort_key=lambda x: len(x.text)) #, device=cuda) num_classes = len(LABELS.vocab) #print('num_classes:', num_classes) input_size = len(TEXT.vocab) #print('input_size:', input_size) ############################################################################### # Build the model ############################################################################### model = m.Model(input_size=input_size, hidden_size=args.hidden_sz, num_classes=num_classes, prevecs=prevecs, num_layers=args.num_layers, num_dir=args.num_dir, batch_size=args.batch_size, emb_dim=args.emb_dim, embfix=args.embfix, dropout=args.dropout, net_type=args.net_type) #, device=args.device) criterion = nn.CrossEntropyLoss() # Select optimizer if (args.opt == 'adamax'): optimizer = torch.optim.Adamax(model.parameters()) #, lr=args.lr) elif (args.opt == 'adam'): optimizer = torch.optim.Adam(model.parameters()) #, lr=args.lr) elif (args.opt == 'sgd'): optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.5) #,lr=args.lr,momentum=0.5) else: #print('Optimizer unknown, defaulting to adamax') optimizer = torch.optim.Adamax(model.parameters()) ############################################################################### # Training the Model ############################################################################### if cuda == 0: model = model.cuda() #args.device) highest_t1_acc = 0 highest_t1_acc_metrics = '' highest_t1_acc_params = '' results = '' for epoch in range(args.epochs): tot_loss = 0 train_iter.repeat = False for batch_count, batch in enumerate(train_iter): model.zero_grad() inp = batch.text.t() inp3d = torch.cuda.FloatTensor(inp.size(0), inp.size(1), args.emb_dim) #for i in range(inp.size(0)): # for j in range(inp.size(1)): # inp3d[i,j,:] = vecs[TEXT.vocab.itos[inp[i,j].data[0]]] #print("INP: ",inp.size()) preds = model(inp) #(Variable(inp)) #print("PREDS: ",preds.size()) #print("LABELS: ",batch.label.size()) loss = criterion(preds, batch.label) loss.backward() optimizer.step() tot_loss += loss.data[0] #if (batch_count % 20 == 0): # print('Batch: ', batch_count, '\tLoss: ', str(losses[-1].data[0])) #print('Average loss over epoch ' + str(epoch) + ': ' + str(tot_loss/len(losses))) (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects, mrr) = eval(val_iter, model, TEXT, args.emb_dim) #, args.device) if accuracy > args.acc_thresh: save_path = '{}/acc{:.2f}_e{}.pt'.format(args.save_path_full, accuracy, epoch) if not os.path.isdir(args.save_path_full): os.makedirs(args.save_path_full) if args.save: #torch.save(model.state_dict(), save_path) torch.save(model, save_path) if highest_t1_acc < accuracy: highest_t1_acc = accuracy highest_t1_acc_metrics = ('acc: {:6.4f}%({:3d}/{}) EPOCH{:2d} - loss: {:.4f} t5_acc: {:6.4f}%({:3d}' \ '/{}) MRR: {:.6f}'.format(accuracy, corrects, size,epoch, avg_loss, t5_acc, t5_corrects, size, mrr)) highest_t1_acc_params = (('PARAMETERS:' \ 'net-%s' \ '_e%i' \ '_bs%i' \ '_opt-%s' \ '_ly%i' \ '_hs%i' \ '_dr%i' '_ed%i' \ '_femb%s' \ '_ptemb%s' \ '_drp%.1f' \ '_mf%d\n' % (args.net_type, args.epochs, args.batch_size, args.opt, args.num_layers, args.hidden_sz, args.num_dir, args.emb_dim, args.embfix, args.pretr_emb, args.dropout, args.mf))) results += ('\nEPOCH{:2d} - loss: {:.4f} acc: {:6.4f}%({:3d}/{}) t5_acc: {:6.4f}%({:3d}' \ '/{}) MRR: {:.6f}'.format(epoch, avg_loss, accuracy, corrects, size, t5_acc, t5_corrects, size, mrr)) print(highest_t1_acc_metrics + '\n') writeResults(args, results, highest_t1_acc, highest_t1_acc_metrics, highest_t1_acc_params)