def load_model(args, data): model = BIMPM(args, data) model.load_state_dict(torch.load(args.model_path)) if args.gpu > -1: model.cuda(args.gpu) return model
def load_model(args, data): if args.use_my_model: model = CBIMPM(args, data) elif args.use_only_conv: model = CONV(args, data) else: model = BIMPM(args, data) model.load_state_dict(torch.load(args.model_path)) if args.gpu > -1: model.cuda(args.gpu) return model
def train(args, data): model = BIMPM(args, data) viz.line(X=np.array([0]), Y=np.array([0]), win=args.loss_curve, name='train-%s' % args.line_suffix, opts={'title': args.title}) viz.line(X=np.array([0]), Y=np.array([0]), win=args.loss_curve, name='dev-%s' % args.line_suffix, update='append') viz.line(X=np.array([0]), Y=np.array([0]), win=args.loss_curve, name='test-%s' % args.line_suffix, update='append') viz.line(X=np.array([0]), Y=np.array([0]), win=args.acc_curve, name='test-%s' % args.line_suffix, opts={'title': args.title}) viz.line(X=np.array([0]), Y=np.array([0]), win=args.acc_curve, name='dev-%s' % args.line_suffix, update='append') viz.line(X=np.array([0]), Y=np.array([0]), win=args.auc_curve, name='auc-test-%s' % args.line_suffix, opts={'title': args.title}) viz.line(X=np.array([0]), Y=np.array([0]), win=args.auc_curve, name='auc-dev-%s' % args.line_suffix, update='append') if args.gpu > -1: model.cuda(args.gpu) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() model.train() loss, last_epoch = 0, -1 max_dev_auc, max_test_auc = 0, 0 iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: logger.info('epoch: %s' % (present_epoch + 1)) last_epoch = present_epoch if args.data_type == 'SNLI': s1, s2 = 'premise', 'hypothesis' else: s1, s2 = 'q1', 'q2' s1, s2 = getattr(batch, s1), getattr(batch, s2) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.size()[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.size()[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = Variable(torch.LongTensor(data.characterize(s1))) char_h = Variable(torch.LongTensor(data.characterize(s2))) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) optimizer.zero_grad() batch_loss = criterion(pred, batch.label) loss += batch_loss.data[0] batch_loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: dev_loss, dev_acc, dev_auc_pr = test(model, args, data, mode='dev') test_loss, test_acc, test_auc_pr = test(model, args, data) c = (i + 1) // args.print_freq viz.line(X=np.array([c]), Y=np.array([loss]), win=args.loss_curve, name='train-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([dev_loss]), win=args.loss_curve, name='dev-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([test_loss]), win=args.loss_curve, name='test-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([dev_acc]), win=args.acc_curve, name='dev-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([test_acc]), win=args.acc_curve, name='test-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([dev_auc_pr]), win=args.auc_curve, name='auc-dev-%s' % args.line_suffix, update='append') viz.line(X=np.array([c]), Y=np.array([test_auc_pr]), win=args.auc_curve, name='auc-test-%s' % args.line_suffix, update='append') logger.info('train loss: %.3f / dev loss: %.3f / test loss: %.3f' % (loss, dev_loss, test_loss)) logger.info('dev acc: %.3f / test acc: %.3f' % (dev_acc, test_acc)) logger.info('dev auc of pr : %.3f , test auc pr : %.3f' % (dev_auc_pr, test_auc_pr)) if dev_auc_pr > max_dev_auc: max_dev_auc = dev_auc_pr max_test_auc = test_auc_pr best_model = copy.deepcopy(model) torch.save( best_model.state_dict(), 'saved_models/BIBPM_%s_%s.pt' % (args.data_type, args.model_time)) loss = 0 model.train() logger.info('max dev acc: %.3f / max test acc: %.3f' % (max_dev_auc, max_test_auc))
class_size = len(data.LABEL.vocab) print(f"Creating model with class_size: {class_size}, word_vocab_size: {word_vocab_size}, char_vocab_size: {char_vocab_size}") print(f"pretrained_word_embedding: {pretrained_word_embedding}") print(f"pretrained_char_embedding: {pretrained_char_embedding}") model = BIMPM(class_size, word_vocab_size, char_vocab_size, pretrained_word_embedding=pretrained_word_embedding, pretrained_char_embedding=pretrained_char_embedding, word_dim=args.word_dim, char_dim=args.char_dim, num_perspective=args.num_perspective, use_char_emb=(not args.wo_char), context_lstm_dim=args.context_lstm_dim, context_layer_num=args.context_layer_num, aggregation_lstm_dim=args.aggregation_lstm_dim, aggregation_layer_num=args.aggregation_layer_num, char_lstm_dim=args.char_lstm_dim, dropout=args.dropout, wo_full_match=args.wo_full_match, wo_maxpool_match=args.wo_maxpool_match, wo_attentive_match=args.wo_attentive_match, wo_max_attentive_match=args.wo_max_attentive_match, ) if args.gpu >= 0: model.cuda(args.gpu) print(model) print('Training start!')
def train(args, data): if args.use_my_model: model = CBIMPM(args, data) elif args.use_only_conv: model = CONV(args, data) else: model = BIMPM(args, data) if args.gpu > -1: model.cuda(args.gpu) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 iterator = data.train_iter savenow = False for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: savenow = True with codecs.open('saved_models/' + args.model_time + "/acc.txt", "a+", "utf-8") as output: output.write('\nEpoch: ' + str(present_epoch + 1)) print('Epoch: ' + str(present_epoch + 1)) last_epoch = present_epoch if args.data_type == 'SNLI': s1, s2 = 'premise', 'hypothesis' else: s1, s2 = 'q1', 'q2' s1, s2 = getattr(batch, s1), getattr(batch, s2) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.size()[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.size()[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = Variable(torch.LongTensor(data.characterize(s1))) char_h = Variable(torch.LongTensor(data.characterize(s2))) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) optimizer.zero_grad() batch_loss = criterion(pred, batch.label) loss += batch_loss.data[0] batch_loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: dev_loss, dev_acc = test(model, args, data, mode='dev') test_loss, test_acc = test(model, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('acc/dev', dev_acc, c) writer.add_scalar('loss/test', test_loss, c) writer.add_scalar('acc/test', test_acc, c) print("[" + str(i) + "][loss] train: " + "{:.3f}".format(loss) + " dev: " + "{:.3f}".format(dev_loss) + " test: " + "{:.3f}".format(test_loss) + "\n[" + str(i) + "][acc] dev: " + "{:.3f}".format(dev_acc) + " test: " + "{:.3f}".format(test_acc)) with codecs.open('saved_models/' + args.model_time + "/acc.txt", "a+", "utf-8") as output: output.write("\n[" + str(i) + "][loss] train: " + "{:.3f}".format(loss) + " dev: " + "{:.3f}".format(dev_loss) + " test: " + "{:.3f}".format(test_loss) + "\n[" + str(i) + "][acc] dev: " + "{:.3f}".format(dev_acc) + " test: " + "{:.3f}".format(test_acc)) if test_acc > max_test_acc: max_test_acc = test_acc best_model = copy.deepcopy(model) # with codecs.open('saved_models/' + args.model_time + "/best.json", "w+", "utf-8") as out: # out.write(json.dumps(test_res)) if savenow: print('Saving model...', present_epoch) torch.save( best_model.state_dict(), "saved_models/" + args.model_time + "/Epoch_" + str(present_epoch) + "_" + "{:.5f}".format(max_test_acc) + "_" + str(args.model_time)) savenow = False loss = 0 model.train() writer.close() print("max dev acc: " + str(max_dev_acc) + " max test acc: " + str(max_test_acc)) return best_model
def train(args, data): model = BIMPM(args, data) if args.gpu > -1: model.cuda(args.gpu) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, 0 # loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 iterator = data.train_iter while last_epoch < args.epoch: for i, batch in enumerate(iterator): # present_epoch = int(iterator.epoch) # if present_epoch == args.epoch: # break # if present_epoch > last_epoch: # print('epoch:', present_epoch + 1) # last_epoch = present_epoch if args.data_type == 'SNLI': s1, s2 = 'premise', 'hypothesis' else: s1, s2 = 'q1', 'q2' s1, s2 = getattr(batch, s1), getattr(batch, s2) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.size()[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.size()[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = torch.LongTensor(data.characterize(s1)) char_h = torch.LongTensor(data.characterize(s2)) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) optimizer.zero_grad() batch_loss = criterion(pred, batch.label) loss += batch_loss.item() batch_loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: dev_loss, dev_acc = test(model, args, data, mode='dev') test_loss, test_acc = test(model, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('acc/dev', dev_acc, c) writer.add_scalar('loss/test', test_loss, c) writer.add_scalar('acc/test', test_acc, c) print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}' f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}') if dev_acc > max_dev_acc: max_dev_acc = dev_acc max_test_acc = test_acc best_model = copy.deepcopy(model) to_save = {"model": model.state_dict(), "args": args} torch.save(to_save, f'saved_models/BIMPM_best.pt') loss = 0 model.train() iterator.init_epoch() last_epoch += 1 writer.close() print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}') return best_model
def train(args, data): model = (BIMPM(args, data)) if args.gpu > -1: model.cuda(args.gpu) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: print('epoch:', str(present_epoch + 1)) last_epoch = present_epoch if args.data_type == 'SNLI': s1, s2 = 'premise', 'hypothesis' else: s1, s2 = 'q1', 'q2' s1, s2 = getattr(batch, s1), getattr(batch, s2) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.size()[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.size()[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = Variable(torch.LongTensor(data.characterize(s1))) char_h = Variable(torch.LongTensor(data.characterize(s2))) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = (model(**kwargs)) optimizer.zero_grad() batch_loss = criterion(pred, batch.label) loss += batch_loss.data[0] batch_loss.backward() optimizer.step() del pred del batch_loss if (i + 1) % args.print_freq == 0: dev_loss, dev_acc = test(model, args, data, mode='dev') test_loss, test_acc = test(model, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('acc/dev', dev_acc, c) writer.add_scalar('loss/test', test_loss, c) writer.add_scalar('acc/test', test_acc, c) print('train loss: '+ str(loss) +' / dev loss: '+ str(dev_loss) + '/ test loss:' + str(test_loss) + ' / dev acc:' + str(dev_acc) + 'test acc:' + str(test_acc)) if dev_acc > max_dev_acc: max_dev_acc = dev_acc max_test_acc = test_acc best_model = copy.deepcopy(model) loss = 0 model.train() writer.close() print('max dev acc:'+ str(max_dev_acc) + '/ max test acc: ' + str(max_test_acc)) return best_model
def train(args, data): model = BIMPM(args, data) if args.gpu > -1: model.cuda(args.gpu) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) criterion = nn.BCEWithLogitsLoss() writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 print(args.epoch) iterator = data.train_iter for j in enumerate(range(args.epoch)): for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch if args.data_type == 'SNLI': s1, s2 = 'premise', 'hypothesis' else: s1, s2 = 'q1', 'q2' s1, s2 = getattr(batch, s1), getattr(batch, s2) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.shape[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.shape[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = torch.LongTensor(data.characterize(s1)) char_h = torch.LongTensor(data.characterize(s2)) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) if(i%3==0): optimizer.zero_grad() batch_loss = criterion(pred, batch.label.view(pred.shape[0],-1).float()) loss += batch_loss.item() batch_loss.backward() if(i%3==0): optimizer.step() if i % args.print_freq == 0: dev_loss, dev_acc = test(model, args, data, mode='dev') test_loss, test_acc = test(model, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('acc/dev', dev_acc, c) writer.add_scalar('loss/test', test_loss, c) writer.add_scalar('acc/test', test_acc, c) print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}' f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}') max_dev_acc = dev_acc max_test_acc = test_acc best_model = copy.deepcopy(model) if not os.path.exists('saved_models'): os.makedirs('saved_models') torch.save(best_model.state_dict(), f'saved_models/BIBPM_{args.data_type}_{args.model_time}.pt') loss = 0 model.train() writer.close() print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}') return best_model
def train(args, data): model = BIMPM(args, data) if args.cuda: model = model.cuda() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=args.learning_rate) writer = SummaryWriter(log_dir='runs/' + args.model_time) model.train() loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 for epoch in range(args.epoch): print("当前为训练第{%s}轮" % str(epoch + 1)) iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == args.epoch: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch s1, s2, label = 'q1', 'q2', 'label' s1, s2, label = getattr(batch, s1), getattr(batch, s2), getattr(batch, label) # limit the lengths of input sentences up to max_sent_len if args.max_sent_len >= 0: if s1.size()[1] > args.max_sent_len: s1 = s1[:, :args.max_sent_len] if s2.size()[1] > args.max_sent_len: s2 = s2[:, :args.max_sent_len] if args.cuda: s1, s2, label = s1.cuda(), s2.cuda(), label.cuda() kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = Variable(torch.LongTensor(data.characterize(s1))) char_h = Variable(torch.LongTensor(data.characterize(s2))) if args.cuda: char_p = char_p.cuda() char_h = char_h.cuda() kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) optimizer.zero_grad() loss = F.cross_entropy(pred, label) loss += loss.data loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: dev_loss, dev_acc = test(model, args, data, mode='dev') test_loss, test_acc = test(model, args, data) c = (i + 1) // args.print_freq writer.add_scalar('loss/train', loss, c) writer.add_scalar('loss/dev', dev_loss, c) writer.add_scalar('acc/dev', dev_acc, c) writer.add_scalar('loss/test', test_loss, c) writer.add_scalar('acc/test', test_acc, c) print( f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f} / test loss: {test_loss:.3f}' f' / dev acc: {dev_acc:.3f} / test acc: {test_acc:.3f}') if dev_acc > max_dev_acc: max_dev_acc = dev_acc max_test_acc = test_acc best_model = copy.deepcopy(model) # loss = 0 model.train() writer.close() print(f'max dev acc: {max_dev_acc:.3f} / max test acc: {max_test_acc:.3f}') return best_model