def main(): data_holder, task2id, id2task, num_feat, num_voc, num_char, tgt_dict, embeddings = Dataloader_elmo1.multitask_dataloader( pkl_path, num_task=num_task, batch_size=BATCH_SIZE) para = model_para task2label = {"conll2000": "chunk", "unidep": "POS", "conll2003": "NER"} #task2label = {"conll2000": "chunk", "wsjpos": "POS", "conll2003": "NER"} #logger = Logger('./logs/'+str(args.gpu)) para["id2task"] = id2task para["n_feats"] = num_feat para["n_vocs"] = num_voc para["n_tasks"] = num_task para["out_size"] = [ len(tgt_dict[task2label[id2task[ids]]]) for ids in range(num_task) ] para["n_chars"] = num_char model = Model_s.build_model_cnn(para) model.Word_embeddings.apply_weights(embeddings) params = list(filter(lambda p: p.requires_grad, model.parameters())) num_params = sum(p.numel() for p in model.parameters()) print(model) print("Num of paras:", num_params) print(model.concat_flag) def lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015): lr = init_lr / (1 + decay_rate * epoch) print(" Learning rate is set as:", lr) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer def exp_lr_decay(optimizer, epoch, decay_rate=0.05, init_lr=0.015): lr = init_lr * decay_rate**epoch print(" Learning rate is set as:", lr) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer if args.optim == "noam": model_optim = optim_custorm.NoamOpt( para["d_hid"], 1, 1000, torch.optim.Adam(params, lr=0.0015, betas=(0.9, 0.98), eps=1e-9, weight_decay=L2)) args.decay = None elif args.optim == "sgd": model_optim = optim.SGD(params, lr=0.015, momentum=args.momentum, weight_decay=1e-8) elif args.optim == "adam": model_optim = optim.Adam(params, lr=0.0, betas=(0.9, 0.98), eps=1e-9, weight_decay=1e-8) if args.mode == "train": best_F1 = 0 if not para["crf"]: calculate_loss = nn.NLLLoss() else: calculate_loss = None #calculate_loss = [CRFLoss_vb(len(tgt_dict[task2label[id2task[idx]]])+2, len(tgt_dict[task2label[id2task[idx]]]), len(tgt_dict[task2label[id2task[idx]]])+1) for idx in range(num_task)] #if USE_CUDA: # for x in calculate_loss: # x = x.cuda() print("Start training...") print('-' * 60) KLLoss = None #nn.KLDivLoss() start_point = time.time() for epoch_idx in range(NUM_EPOCH): if args.optim == "sgd": if args.decay == "exp": model_optim = exp_lr_decay(model_optim, epoch_idx) elif args.decay == "normal": model_optim = lr_decay(model_optim, epoch_idx) Pre, Rec, F1, loss_list = run_epoch(model, data_holder, model_optim, calculate_loss, KLLoss, para, epoch_idx, id2task) use_time = time.time() - start_point print("Time using: %f mins" % (use_time / 60)) if not best_F1 or best_F1 < F1: best_F1 = F1 Model_s.save_model(model_path, model, para) print('*' * 60) print( "Save model with average Pre: %f, Rec: %f, F1: %f on dev set." % (Pre, Rec, F1)) save_idx = epoch_idx print('*' * 60) print("save model at epoch:", save_idx) else: para_path = os.path.join(path, 'para.pkl') with open(para_path, "wb") as f: para_save = pickle.load(f) model = Model_s.build_model(para_save) model = Model_s.read_model(model_path, model) prec_list, rec_list, f1_list = infer(model, data_holder, "test")
def main(): data_holder, task2id, id2task, num_feat, num_voc, num_voc, tgt_dict, embeddings = DataLoader.multitask_dataloader( pkl_path, num_task=num_task, batch_size=BATCH_SIZE) para = model_para task2label = {"conll2000": "chunk", "unidep": "POS", "conll2003": "NER"} #task2label = {"conll2000": "chunk", "wsjpos": "POS", "conll2003": "NER"} #logger = Logger('./logs/'+str(args.gpu)) para["id2task"] = id2task para["n_feats"] = num_feat para["n_vocs"] = num_voc para["n_tasks"] = num_task para["out_size"] = [ len(tgt_dict[task2label[id2task[ids]]]) for ids in range(num_task) ] model = Model.build_model(para) model.Word_embeddings.apply_weights(embeddings) params = model.parameters() num_params = sum(p.numel() for p in model.parameters()) print(model) print("Num of paras:", num_params) print(model.concat_flag) model_optim = optim_custorm.NoamOpt( para["d_hid"], 1, 1000, torch.optim.Adam(params, lr=0.0, betas=(0.9, 0.98), eps=1e-9, weight_decay=L2)) #model_optim = optim_custorm.NoamOpt(para["d_hid"], 1, 1000, torch.optim.SGD(params, lr=0.001, momentum=0.9)) if args.mode == "train": best_F1 = 0 if not para["crf"]: calculate_loss = nn.NLLLoss() else: calculate_loss = None print("Start training...") print('-' * 60) KLLoss = nn.KLDivLoss() start_point = time.time() for epoch_idx in range(NUM_EPOCH): Pre, Rec, F1 = run_epoch(model, data_holder, model_optim, calculate_loss, KLLoss, para, epoch_idx, id2task) use_time = time.time() - start_point print("Time using: %f mins" % (use_time / 60)) if not best_F1 or best_F1 < F1: best_F1 = F1 Model.save_model(model_path, model, para) print('*' * 60) print( "Save model with average Pre: %f, Rec: %f, F1: %f on dev set." % (Pre, Rec, F1)) save_idx = epoch_idx print('*' * 60) print("save model at epoch:", save_idx) else: para_path = os.path.join(path, 'para.pkl') with open(para_path, "wb") as f: para_save = pickle.load(f) model = Model.build_model(para_save) model = Model.read_model(model_path, model) prec_list, rec_list, f1_list = infer(model, data_holder, "test")
def main(): critorion = loss_custorm.loss_fuc(nn.NLLLoss, ignore_index=0) model = Model.build_san_model(args.d_emb, args.d_hid, args.n_layers, args.dropout, n_voc, args.beam_num) #logger = Logger('./logs/'+args.gpu) if args.mode == "train": params = model.parameters() model_optim = optim_custorm.NoamOpt(args.d_hid, args.factor, args.warm, torch.optim.Adam(params, lr=0, betas=(0.9, 0.98), eps=1e-9, weight_decay=args.L2)) model.embeddings.apply_weights(weight) print("Begin training...") start_time = time.time() best_loss = 0 for epoch_idx in range(args.max_epoch): val_loss, train_loss, step = run_epoch(model, critorion, model_optim, epoch_idx)#, logger) print('-' * 70) print('| val_loss: %4.4f | train_loss: %4.4f' %(val_loss, train_loss)) print('-' * 70) if not best_loss or best_loss > val_loss: best_loss = val_loss is_best = True #Model.save_model(args.model_path+args.gpu+"/", model) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': val_loss, 'optimizer' : model_optim.optimizer.state_dict(), 'step' : step }, is_best) #if epoch_idx % 5 == 0 or not epoch_idx: predict(model, epoch_idx)#, logger) if args.mode == "resume": path_save = args.model_path+args.gpu+"/"+"checkpoint.pth.tar" if os.path.isfile(path_save): print("=> loading checkpoint '{}'".format(path_save)) checkpoint = torch.load(path_save) start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) step = checkpoint['step'] print("=> loaded checkpoint '{}' (epoch {})" .format(path_save, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) print("Continue training from %d epoch." %epoch) model_optim = optim_custorm.NoamOpt(args.d_hid, args.factor, args.warm, optimizer, step=step) for epoch_idx in range(start_epoch, args.max_epoch): val_loss, train_loss, step = run_epoch(model, critorion, model_optim, epoch_idx)#, logger) print('-' * 70) print('| val_loss: %4.4f | train_loss: %4.4f' %(val_loss, train_loss)) print('-' * 70) if not best_loss or best_loss > val_loss: best_loss = val_loss is_best = True #Model.save_model(args.model_path+args.gpu+"/", model) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': val_loss, 'optimizer' : model_optim.optimizer.state_dict(), 'step' : step }, is_best) #if epoch_idx % 5 == 0 or not epoch_idx: predict(model, epoch_idx)#, logger) else: model = Model.read_model(args.model_path, model) save_hyp = pridict(model)