def preprocess() -> argparse.Namespace: """ preprocess of training :return: config args """ print('preprocessing starts...\n') # ====== parse arguments ====== # args = parse_args() # ====== set random seed ====== # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # ====== save path ====== # now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') args.save_path = os.path.join('./logs/', 'my_log-' + now_time) if not os.path.exists(args.save_path) and not args.debug: os.makedirs(args.save_path) # ====== fitlog init ====== # fitlog.commit(__file__) fitlog.debug(args.debug) fitlog.add_hyper(args) # ====== tb VisualLogger init ====== # args.visual_logger = VisualLogger( args.save_path) if not args.debug else None # ====== cuda enable ====== # os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid) args.device = torch.device( 'cuda') if args.cuda and torch.cuda.is_available() else torch.device( 'cpu') # ====== others ====== # os.environ['TOKENIZERS_PARALLELISM'] = 'false' torch.set_num_threads(6) print(args, end='\n\n') return args
def record_hyper_params(hyper_dict: dict): for k, v in hyper_dict.items(): if k not in ('model_dir', 'record_dir', 'data_path'): v = v if v is not None else '-' fitlog.add_hyper(value=str(v), name=str(k)) if 'dilate' not in hyper_dict.keys(): fitlog.add_hyper(value='-', name='dilate') fitlog.add_other(value=platform.system(), name='platform')
def main(): from config import get_config C, logger = get_config() #----- prepare data and some global variables ----- data_train, data_test, data_valid, relations, rel_weights = load_data( C, logger) n_rel_typs, loss_func, generator = initialize(C, logger, relations, rel_weights) #----- train & test ----- trained_models = [] for i in range(C.ensemble_size): model, best_valid = train( C, logger, data_train, data_valid, loss_func, generator, n_rel_typs, run_name=str(i), test_data=data_test, ) if hasattr(model, "module"): #dataparallel model = model.module model = model.cpu() trained_models.append(model) #----- ensemble test ----- micro_f1, macro_f1, loss = test( C, logger, data_test, trained_models, loss_func, generator, mode="test", epoch_id=C.epoch_numb, run_name='final', ) fitlog.add_hyper("t%.4f v%.4f" % (macro_f1, best_valid), name="result") #----- save ensembled model ----- if C.model_save: with open(C.model_save, "wb") as fil: pickle.dump(trained_models, fil) logger.log("final model saved at %s" % C.model_save) #----- finish ----- fitlog.finish()
def train(model , train_data , test_data): train_iter = DataSetIter(train_data , batch_size = C.batch_size) test_iter = DataSetIter(test_data , batch_size = C.batch_size) loss_func = nn.CrossEntropyLoss(ignore_index = 0) optim = tc.optim.Adam(params = model.parameters() , lr = C.lr , weight_decay = C.weight_decay) scheduler = get_cosine_schedule_with_warmup( optim , num_warmup_steps = C.warmup , num_training_steps = train_iter.num_batches * C.epoch_number , ) best_test_loss = -1 best_test_epoch = -1 best_step = -1 try: for epoch_n in range(C.epoch_number): tra_loss = run(model , train_iter , loss_func , epoch_n , optim , scheduler , True) tes_loss = run(model , test_iter , loss_func , epoch_n , None , None , False) logger.log ("Epoch %d ended. Train loss = %.4f , Valid loss = %.4f" % ( epoch_n , tra_loss , tes_loss , )) fitlog.add_metric( tes_loss , step = train_iter.num_batches * (epoch_n + 1) , epoch = epoch_n , name = "valid loss" ) if best_test_epoch < 0 or tes_loss < best_test_loss: best_test_loss = tes_loss best_test_epoch = epoch_n best_step = fitlog_loss_step["train loss"] fitlog.add_best_metric(best_test_loss , name = "loss") with open(C.model_save , "wb") as fil:#暂时保存目前最好的模型 pickle.dump(model , fil) fitlog.add_hyper(name = "best_step" , value = "%d / %d" % ( best_step , train_iter.num_batches * C.epoch_number , )) except KeyboardInterrupt: # 手动提前停止 pass logger.log ("Train end.") logger.log ("Got best valid loss %.4f in epoch %d" % (best_test_loss , best_test_epoch)) return model
def after_parse_t2g(C , need_logger = False): #----- make logger ----- logger = Logger(C.log_file) logger.log = logger.log_print_w_time if C.no_log: logger.log = logger.nolog C.tmp_file_name = random_tmp_name() #----- other stuff ----- if C.auto_hyperparam: auto_hyperparam(C) logger.log("Hyper parameters autoset.") if C.no_fitlog: fitlog.debug() fitlog.set_log_dir("logs") fitlog.add_hyper(C) logger.log ("------------------------------------------------------") logger.log (pformat(C.__dict__)) logger.log ("------------------------------------------------------") C.gpus = list(range(tc.cuda.device_count())) #----- initialize ----- if C.t2g_seed > 0: random.seed(C.t2g_seed) tc.manual_seed(C.t2g_seed) np.random.seed(C.t2g_seed) tc.cuda.manual_seed_all(C.t2g_seed) tc.backends.cudnn.deterministic = True tc.backends.cudnn.benchmark = False logger.log ("Seed set. %d" % (C.t2g_seed)) tc.cuda.set_device(C.gpus[0]) C.device = C.gpus[0] if need_logger: return C , logger return C
def train(): args = parse_args() if args.debug: fitlog.debug() args.save_model = False # ================= define ================= tokenizer = RobertaTokenizer.from_pretrained('roberta-base') word_mask_index = tokenizer.mask_token_id word_vocab_size = len(tokenizer) if get_local_rank() == 0: fitlog.set_log_dir(args.log_dir) fitlog.commit(__file__, fit_msg=args.name) fitlog.add_hyper_in_file(__file__) fitlog.add_hyper(args) # ================= load data ================= dist.init_process_group('nccl') init_logger_dist() n_proc = dist.get_world_size() bsz = args.batch_size // args.grad_accumulation // n_proc args.local_rank = get_local_rank() args.save_dir = os.path.join(args.save_dir, args.name) if args.save_model else None if args.save_dir is not None and os.path.exists(args.save_dir): raise RuntimeError('save_dir has already existed.') logger.info('save directory: {}'.format( 'None' if args.save_dir is None else args.save_dir)) devices = list(range(torch.cuda.device_count())) NUM_WORKERS = 4 ent_vocab, rel_vocab = load_ent_rel_vocabs() logger.info('# entities: {}'.format(len(ent_vocab))) logger.info('# relations: {}'.format(len(rel_vocab))) ent_freq = get_ent_freq() assert len(ent_vocab) == len(ent_freq), '{} {}'.format( len(ent_vocab), len(ent_freq)) ##### root = args.data_dir dirs = os.listdir(root) drop_files = [] for dir in dirs: path = os.path.join(root, dir) max_idx = 0 for file_name in os.listdir(path): if 'large' in file_name: continue max_idx = int(file_name) if int(file_name) > max_idx else max_idx drop_files.append(os.path.join(path, str(max_idx))) ##### file_list = [] for path, _, filenames in os.walk(args.data_dir): for filename in filenames: file = os.path.join(path, filename) if 'large' in file or file in drop_files: continue file_list.append(file) logger.info('used {} files in {}.'.format(len(file_list), args.data_dir)) if args.data_prop > 1: used_files = file_list[:int(args.data_prop)] else: used_files = file_list[:round(args.data_prop * len(file_list))] data = GraphOTFDataSet(used_files, n_proc, args.local_rank, word_mask_index, word_vocab_size, args.n_negs, ent_vocab, rel_vocab, ent_freq) dev_data = GraphDataSet(used_files[0], word_mask_index, word_vocab_size, args.n_negs, ent_vocab, rel_vocab, ent_freq) sampler = OTFDistributedSampler(used_files, n_proc, get_local_rank()) train_data_iter = TorchLoaderIter(dataset=data, batch_size=bsz, sampler=sampler, num_workers=NUM_WORKERS, collate_fn=data.collate_fn) dev_data_iter = TorchLoaderIter(dataset=dev_data, batch_size=bsz, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=dev_data.collate_fn) if args.test_data is not None: test_data = FewRelDevDataSet(path=args.test_data, label_vocab=rel_vocab, ent_vocab=ent_vocab) test_data_iter = TorchLoaderIter(dataset=test_data, batch_size=32, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=test_data.collate_fn) if args.local_rank == 0: print('full wiki files: {}'.format(len(file_list))) print('used wiki files: {}'.format(len(used_files))) print('# of trained samples: {}'.format(len(data) * n_proc)) print('# of trained entities: {}'.format(len(ent_vocab))) print('# of trained relations: {}'.format(len(rel_vocab))) # ================= prepare model ================= logger.info('model init') if args.rel_emb is not None: # load pretrained relation embeddings rel_emb = np.load(args.rel_emb) # add_embs = np.random.randn(3, rel_emb.shape[1]) # add <pad>, <mask>, <unk> # rel_emb = np.r_[add_embs, rel_emb] rel_emb = torch.from_numpy(rel_emb).float() assert rel_emb.shape[0] == len(rel_vocab), '{} {}'.format( rel_emb.shape[0], len(rel_vocab)) # assert rel_emb.shape[1] == args.rel_dim logger.info('loaded pretrained relation embeddings. dim: {}'.format( rel_emb.shape[1])) else: rel_emb = None if args.model_name is not None: logger.info('further pre-train.') config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3) model = CoLAKE(config=config, num_ent=len(ent_vocab), num_rel=len(rel_vocab), ent_dim=args.ent_dim, rel_dim=args.rel_dim, ent_lr=args.ent_lr, ip_config=args.ip_config, rel_emb=None, emb_name=args.emb_name) states_dict = torch.load(args.model_name) model.load_state_dict(states_dict, strict=True) else: model = CoLAKE.from_pretrained( 'roberta-base', num_ent=len(ent_vocab), num_rel=len(rel_vocab), ent_lr=args.ent_lr, ip_config=args.ip_config, rel_emb=rel_emb, emb_name=args.emb_name, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'dist_{}'.format(args.local_rank)) model.extend_type_embedding(token_type=3) # if args.local_rank == 0: # for name, param in model.named_parameters(): # if param.requires_grad is True: # print('{}: {}'.format(name, param.shape)) # ================= train model ================= # lr=1e-4 for peak value, lr=5e-5 for initial value logger.info('trainer init') no_decay = [ 'bias', 'LayerNorm.bias', 'LayerNorm.weight', 'layer_norm.bias', 'layer_norm.weight' ] param_optimizer = list(model.named_parameters()) optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] word_acc = WordMLMAccuracy(pred='word_pred', target='masked_lm_labels', seq_len='word_seq_len') ent_acc = EntityMLMAccuracy(pred='entity_pred', target='ent_masked_lm_labels', seq_len='ent_seq_len') rel_acc = RelationMLMAccuracy(pred='relation_pred', target='rel_masked_lm_labels', seq_len='rel_seq_len') metrics = [word_acc, ent_acc, rel_acc] if args.test_data is not None: test_metric = [rel_acc] tester = Tester(data=test_data_iter, model=model, metrics=test_metric, device=list(range(torch.cuda.device_count()))) # tester.test() else: tester = None optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.lr, betas=(0.9, args.beta), eps=1e-6) # warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear') fitlog_callback = MyFitlogCallback(tester=tester, log_loss_every=100, verbose=1) gradient_clip_callback = GradientClipCallback(clip_value=1, clip_type='norm') emb_callback = EmbUpdateCallback(model.ent_embeddings) all_callbacks = [gradient_clip_callback, emb_callback] if args.save_dir is None: master_callbacks = [fitlog_callback] else: save_callback = SaveModelCallback(args.save_dir, model.ent_embeddings, only_params=True) master_callbacks = [fitlog_callback, save_callback] if args.do_test: states_dict = torch.load(os.path.join(args.save_dir, args.model_name)).state_dict() model.load_state_dict(states_dict) data_iter = TorchLoaderIter(dataset=data, batch_size=args.batch_size, sampler=RandomSampler(), num_workers=NUM_WORKERS, collate_fn=data.collate_fn) tester = Tester(data=data_iter, model=model, metrics=metrics, device=devices) tester.test() else: trainer = DistTrainer(train_data=train_data_iter, dev_data=dev_data_iter, model=model, optimizer=optimizer, loss=LossInForward(), batch_size_per_gpu=bsz, update_every=args.grad_accumulation, n_epochs=args.epoch, metrics=metrics, callbacks_master=master_callbacks, callbacks_all=all_callbacks, validate_every=5000, use_tqdm=True, fp16='O1' if args.fp16 else '') trainer.train(load_best_model=False)
config.mode = args.mode config.setting = args.setting # save model if not os.path.exists(config.model_path): if config.model_path.__contains__("/"): os.makedirs(config.model_path, 0o777) else: os.mkdir(config.model_path) # fitlog dir logger.info(f"set fitlog dir to {args.fitlog_dir}") if not os.path.exists(args.fitlog_dir): os.mkdir(args.fitlog_dir) fitlog.set_log_dir(args.fitlog_dir) fitlog.add_hyper(args) if not os.path.exists(config.model_path): os.mkdir(config.model_path) if args.visible_gpu != -1: config.use_gpu = True torch.cuda.set_device(args.visible_gpu) device = torch.device(args.visible_gpu) else: config.use_gpu = False mode = args.mode logger.info("------start mode train------") run_train()
fitlog.commit(__file__) # auto commit your codes if __name__ == '__main__': argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='cnn.cfg') argparser.add_argument('--w', default='cnn', help='word encoder') argparser.add_argument('--s', default='lstm', help='sent encoder') argparser.add_argument('--seed', default=888, type=int, help='seed') argparser.add_argument('--gpu', default=0, type=int, help='gpu id') argparser.add_argument('--fold', default=9, type=int, help='fold for test') args = argparser.parse_args() config = Config(args) torch.set_num_threads(config.threads) fitlog.add_hyper({'model': args.w, 'fold': args.fold}) # set cuda config.use_cuda = args.gpu >= 0 and torch.cuda.is_available() if config.use_cuda: torch.cuda.set_device(args.gpu) config.device = torch.device("cuda", args.gpu) else: config.device = torch.device("cpu") logging.info("Use cuda: %s, gpu id: %d.", config.use_cuda, args.gpu) # vocab cache_name = "./save/vocab/" + str(args.fold) + ".pickle" if Path(cache_name).exists(): vocab_file = open(cache_name, 'rb') vocab = pickle.load(vocab_file)
def _train(self, criterion, optimizer, train_data_loader, val_data_loader, test_data_loader): fitlog.add_hyper({ "model_name": self.opt.model_name, "dataset": self.opt.dataset, 'resplit': self.opt.resplit, "domain": self.opt.domain, "aug": self.opt.aug, "adv": self.opt.adv, "aux": self.opt.aux, "adv_aux": self.opt.adv_aux, 'chg': self.opt.chg }) max_val_acc = 0 max_val_f1 = 0 global_step = 0 last_model_path = None # model_path =None path = None pgd = PGD(self.model) k = 3 for epoch in range(self.opt.num_epoch): logger.info('>' * 100) logger.info('epoch: {}'.format(epoch)) n_correct, n_total, loss_total = 0, 0, 0 # switch model to training mode self.model.train() for i_batch, sample_batched in enumerate(train_data_loader): global_step += 1 # clear gradient accumulators optimizer.zero_grad() inputs = [ sample_batched[col].to(self.opt.device) for col in self.opt.inputs_cols ] if self.opt.model_name == 'bert_multi_target': targets = sample_batched['polarity'].to(self.opt.device) else: targets = sample_batched['polarity'].to(self.opt.device) if self.opt.model_name in reg_list: aux_cls_logeits, outputs, reg_can_loss, reg_aux_loss, bert_word_output, reg_chg_loss = self.model( inputs, None) else: outputs = self.model(inputs) reg_can_loss = 0 reg_aux_loss = 0 reg_chg_loss = 0 # print('outputs',outputs.shape) # print('targets',targets.shape) # print(outputs,'outputs') # print(targets,'polarity') loss_1 = criterion(outputs, targets) loss_2 = reg_can_loss loss_3 = reg_aux_loss loss_4 = reg_chg_loss weighted_loss_2 = loss_2 * self.opt.can weighted_loss_3 = loss_3 * self.opt.aux weighted_loss_4 = loss_4 * self.opt.chg loss = 1 * loss_1 + weighted_loss_2 + weighted_loss_3 + weighted_loss_4 if self.opt.adv > 0: # print(inputs.shape) if self.opt.adv_aux == 1: loss_adv = self._loss_adv(weighted_loss_3, bert_word_output, criterion, inputs, targets, p_mult=self.opt.adv) else: loss_adv = self._loss_adv(loss, bert_word_output, criterion, inputs, targets, p_mult=self.opt.adv) loss += loss_adv else: loss_adv = 0 loss.backward() # pgd.backup_grad() # for t in range(K): # pgd.attack(is_first_attack=(t==0)) # 在embedding上添加对抗扰动, first attack时备份param.data # if t != K-1: # model.zero_grad() # else: # pgd.restore_grad() # loss_adv = model(batch_input, batch_label) # loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 # pgd.restore() # 恢复embedding参数 optimizer.step() n_correct += (torch.argmax(outputs, -1) == targets).sum().item() # print(outputs.shape) # n_correct += (torch.argmax(aux_cls_logeits, -1) == 4*targets).sum().item() n_total += len(outputs) loss_total += loss.item() * len(outputs) if global_step % self.opt.log_step == 0: train_acc = n_correct / n_total train_loss = loss_total / n_total logger.info( 'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}' .format(train_loss, train_acc, loss_1, weighted_loss_2, loss_adv, weighted_loss_3, weighted_loss_4)) fitlog.add_metric( { "Train": { 'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}' .format(train_loss, train_acc, loss_1, weighted_loss_2, loss_adv, weighted_loss_3, weighted_loss_4) } }, step=global_step) val_acc, val_f1 = self._evaluate_acc_f1(val_data_loader) test_acc, test_f1 = self._evaluate_acc_f1(test_data_loader) logger.info('> val_acc: {:.4f}, val_f1: {:.4f}'.format( val_acc, val_f1)) logger.info('> test_acc: {:.4f}, test_f1: {:.4f}'.format( test_acc, test_f1)) if val_acc > max_val_acc: max_val_acc = val_acc if not os.path.exists('state_dict'): os.mkdir('state_dict') model_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}'.format( self.opt.model_name, self.opt.dataset, self.opt.domain, self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux, round(val_acc, 4), self.opt.resplit) bert_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}_bert'.format( self.opt.model_name, self.opt.dataset, self.opt.domain, self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux, round(val_acc, 4), self.opt.resplit) # fitlog.add_hyper({"model_name":self.opt.model_name,"dataset":self.opt.dataset,'resplit':self.opt.resplit,"domain":self.opt.domain,"aug":self.opt.aug,"adv":self.opt.adv,"aux":self.opt.aux}) fitlog.add_metric( {"val": { "val_acc": val_acc, "val_f1": val_f1 }}, step=global_step) fitlog.add_metric( {"test": { "test_acc": test_acc, "test_f1": test_f1 }}, step=global_step) fitlog.add_best_metric( {"val": { "val_acc": val_acc, "val_f1": val_f1 }}) fitlog.add_best_metric( {"test": { "test_acc": test_acc, "test_f1": test_f1 }}) if last_model_path != None: os.remove(last_model_path) if self.opt.model_name not in ['lcf_bert']: os.remove(last_bert_path) last_model_path = model_path last_bert_path = bert_path torch.save(self.model.state_dict(), model_path) if self.opt.model_name not in ['lcf_bert']: torch.save(self.model.bert.state_dict(), bert_path) logger.info('>> saved: {}'.format(model_path)) # max_val_f1 = val_f1 if val_f1 > max_val_f1: max_val_f1 = val_f1 # fitlog.add_metric(acc,name="Acc",step=step) return model_path
def main(): args = parse_args() if args.debug: fitlog.debug() fitlog.set_log_dir(args.log_dir) fitlog.commit(__file__) fitlog.add_hyper_in_file(__file__) fitlog.add_hyper(args) if args.gpu != 'all': os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_set, dev_set, test_set, temp_ent_vocab = load_fewrel_graph_data( data_dir=args.data_dir) print('data directory: {}'.format(args.data_dir)) print('# of train samples: {}'.format(len(train_set))) print('# of dev samples: {}'.format(len(dev_set))) print('# of test samples: {}'.format(len(test_set))) ent_vocab, rel_vocab = load_ent_rel_vocabs(path='../') # load entity embeddings ent_index = [] for k, v in temp_ent_vocab.items(): ent_index.append(ent_vocab[k]) ent_index = torch.tensor(ent_index) ent_emb = np.load(os.path.join(args.model_path, 'entities.npy')) ent_embedding = nn.Embedding.from_pretrained(torch.from_numpy(ent_emb)) ent_emb = ent_embedding(ent_index.view(1, -1)).squeeze().detach() # load CoLAKE parameters config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3) model = CoLAKEForRE(config, num_types=len(train_set.label_vocab), ent_emb=ent_emb) states_dict = torch.load(os.path.join(args.model_path, 'model.bin')) model.load_state_dict(states_dict, strict=False) print('parameters below are randomly initializecd:') for name, param in model.named_parameters(): if name not in states_dict: print(name) # tie relation classification head rel_index = [] for k, v in train_set.label_vocab.items(): rel_index.append(rel_vocab[k]) rel_index = torch.LongTensor(rel_index) rel_embeddings = nn.Embedding.from_pretrained( states_dict['rel_embeddings.weight']) rel_index = rel_index.cuda() rel_cls_weight = rel_embeddings(rel_index.view(1, -1)).squeeze() model.tie_rel_weights(rel_cls_weight) model.rel_head.dense.weight.data = states_dict['rel_lm_head.dense.weight'] model.rel_head.dense.bias.data = states_dict['rel_lm_head.dense.bias'] model.rel_head.layer_norm.weight.data = states_dict[ 'rel_lm_head.layer_norm.weight'] model.rel_head.layer_norm.bias.data = states_dict[ 'rel_lm_head.layer_norm.bias'] model.resize_token_embeddings( len(RobertaTokenizer.from_pretrained('roberta-base')) + 4) print('parameters of CoLAKE has been loaded.') # fine-tune no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight', 'embedding'] param_optimizer = list(model.named_parameters()) optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.lr, betas=(0.9, args.beta), eps=1e-6) metrics = [MacroMetric(pred='pred', target='target')] test_data_iter = TorchLoaderIter(dataset=test_set, batch_size=args.batch_size, sampler=RandomSampler(), num_workers=4, collate_fn=test_set.collate_fn) devices = list(range(torch.cuda.device_count())) tester = Tester(data=test_data_iter, model=model, metrics=metrics, device=devices) # tester.test() fitlog_callback = FitlogCallback(tester=tester, log_loss_every=100, verbose=1) gradient_clip_callback = GradientClipCallback(clip_value=1, clip_type='norm') warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear') bsz = args.batch_size // args.grad_accumulation train_data_iter = TorchLoaderIter(dataset=train_set, batch_size=bsz, sampler=RandomSampler(), num_workers=4, collate_fn=train_set.collate_fn) dev_data_iter = TorchLoaderIter(dataset=dev_set, batch_size=bsz, sampler=RandomSampler(), num_workers=4, collate_fn=dev_set.collate_fn) trainer = Trainer( train_data=train_data_iter, dev_data=dev_data_iter, model=model, optimizer=optimizer, loss=LossInForward(), batch_size=bsz, update_every=args.grad_accumulation, n_epochs=args.epoch, metrics=metrics, callbacks=[fitlog_callback, gradient_clip_callback, warmup_callback], device=devices, use_tqdm=True) trainer.train(load_best_model=False)
) parser.add_argument("--learning_rate", default=0.001, type=float) parser.add_argument("--l2reg", default=0.00001, type=float) parser.add_argument("--num_epoch", default=100, type=int) parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--embed_dim", default=300, type=int) parser.add_argument("--hidden_dim", default=300, type=int) parser.add_argument("--dropout", default=0.7, type=float) opt = parser.parse_args() # opt--->all args if opt.dataset.endswith("/"): opt.dataset = opt.dataset[:-1] ################fitlog code#################### fitlog.set_log_dir("logs") fitlog.set_rng_seed() fitlog.add_hyper(opt) fitlog.add_hyper(value="ASGCN", name="model") ################fitlog code#################### opt.polarities_dim = 3 opt.initializer = "xavier_uniform_" opt.optimizer = "adam" opt.model_name = "asgcn" opt.log_step = 20 opt.l2reg = 1e-5 opt.early_stop = 25 if "/" in opt.dataset: pre_model_name, layer, dataset = opt.dataset.split("/")[-3:] else: pre_model_name, dataset = "None", opt.dataset layer = "0"
def parse_args(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--dataset_name", type=str, default="rest", help="Choose absa dataset.") parser.add_argument("--refresh", type=int, default=0, help="Generate data again") # Model parameters parser.add_argument( "--glove_dir", type=str, help="Directory storing glove embeddings", ) parser.add_argument("--highway", action="store_true", help="Use highway embed.") parser.add_argument( "--num_layers", type=int, default=2, help="Number of layers of bilstm or highway or elmo.", ) parser.add_argument("--max_hop", type=int, default=4, help="max number of hops") parser.add_argument("--num_heads", type=int, default=6, help="Number of heads for gat.") parser.add_argument("--dropout", type=float, default=0.7, help="Dropout rate for embedding.") parser.add_argument("--num_gcn_layers", type=int, default=1, help="Number of GCN layers.") parser.add_argument("--gcn_mem_dim", type=int, default=300, help="Dimension of the W in GCN.") parser.add_argument("--gcn_dropout", type=float, default=0.2, help="Dropout rate for GCN.") # GAT parser.add_argument( "--gat_attention_type", type=str, choices=["linear", "dotprod", "gcn"], default="dotprod", help="The attention used for gat", ) parser.add_argument( "--embedding_type", type=str, default="glove", choices=["glove", "bert", "roberta"], ) parser.add_argument("--embedding_dim", type=int, default=300, help="Dimension of glove embeddings") parser.add_argument( "--dep_relation_embed_dim", type=int, default=300, help="Dimension for dependency relation embeddings.", ) parser.add_argument( "--hidden_size", type=int, default=300, help="Hidden size of bilstm, in early stage.", ) parser.add_argument( "--final_hidden_size", type=int, default=300, help="Hidden size of bilstm, in early stage.", ) parser.add_argument("--num_mlps", type=int, default=2, help="Number of mlps in the last of model.") # Training parameters parser.add_argument( "--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.", ) parser.add_argument( "--learning_rate", default=1e-3, type=float, help="The initial learning rate for Adam.", ) parser.add_argument( "--num_train_epochs", default=25, type=int, help="Total number of training epochs to perform.", ) args = parser.parse_args() if args.dataset_name.endswith("/"): args.dataset_name = args.dataset_name[:-1] fitlog.add_hyper(args) if "/" in args.dataset_name: data = os.path.basename(args.dataset_name) output_dir = f"data/{data}" else: output_dir = f"data/{args.dataset_name}" args.output_dir = output_dir args.lower = 1 args.logging_steps = 30 args.max_steps = -1 args.max_grad_norm = 10 args.adam_epsilon = 1e-8 args.weight_decay = 0 args.gradient_accumulation_steps = 1 args.per_gpu_train_batch_size = args.batch_size args.per_gpu_eval_batch_size = args.batch_size * 2 args.add_non_connect = 1 args.multi_hop = True args.num_classes = 3 args.cuda_id = "0" # args.bert_model_dir = "/data1/SHENWZH/models/bert_base" args.pure_bert = False args.gat_our = True args.gat_roberta = False args.gat = False args.gat_bert = False return args
def main(): # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) # Parse args args = parse_args() if args.dataset_name.endswith("/"): args.dataset_name = args.dataset_name[:-1] dataset_name = args.dataset_name # 形如 ~/rgat/bert/11/Restaurants if "/" in dataset_name: pre_model_name, layer, dataset = dataset_name.split("/")[-3:] else: pre_model_name, dataset = "None", dataset_name layer = "-1" fitlog.add_hyper(value=pre_model_name, name="model_name") fitlog.add_hyper(value=dataset, name="dataset") fitlog.add_hyper(value=layer, name="pre_layer") fitlog.add_hyper(value="RGAT", name="model") # if 'Laptop' in args.dataset_name: # assert args.lower == 0 check_args(args) # Setup CUDA, GPU training # os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device logger.info("Device is %s", args.device) # Bert, load pretrained model and tokenizer, check if neccesary to put bert here if args.embedding_type == "bert": tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir) args.tokenizer = tokenizer elif args.embedding_type == "roberta": tokenizer = RobertaTokenizer.from_pretrained(args.bert_model_dir) args.tokenizer = tokenizer # Load datasets and vocabs ( train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab, ) = load_datasets_and_vocabs(args) # Build Model # model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len']) if args.pure_bert: model = Pure_Bert(args) elif args.gat_roberta: model = Aspect_Roberta_GAT(args, dep_tag_vocab["len"], pos_tag_vocab["len"]) elif args.gat_bert: model = Aspect_Bert_GAT(args, dep_tag_vocab["len"], pos_tag_vocab["len"]) # R-GAT + Bert elif args.gat_our: model = Aspect_Text_GAT_ours( args, dep_tag_vocab["len"], pos_tag_vocab["len"]) # R-GAT with reshaped tree else: model = Aspect_Text_GAT_only( args, dep_tag_vocab["len"], pos_tag_vocab["len"]) # original GAT with reshaped tree model.to(args.device) # Train _, _, all_eval_results = train(args, train_dataset, model, test_dataset) print("\n\nBest Results:") if len(all_eval_results): best_eval_result = max(all_eval_results, key=lambda x: x["acc"]) step = [ i for i, result in enumerate(all_eval_results) if result == best_eval_result ][0] logger.info("Achieve at step {}/{}".format(step, len(all_eval_results))) for key in sorted(best_eval_result.keys()): logger.info(" %s = %s", key, str(best_eval_result[key])) # fitlog.add_best_metric(value=best_eval_result['acc'], name='acc') # fitlog.add_best_metric(value=best_eval_result['f1'], name='f1') fitlog.finish()
C = _par.parse_args() if not C.name: C.name = autoname() os.makedirs("./model_save", exist_ok=True) C.model_save = "./model_save/model_%s" % C.name now_time = time.localtime(time.time()) C.time = "%d-%d-%d %d:%d" % ( (now_time.tm_year) % 100, now_time.tm_mon, now_time.tm_mday, now_time.tm_hour, now_time.tm_min, ) fitlog.add_hyper(C) def listize(s): return [int(x) for x in s.strip().split(",")] C.gpus = listize(C.gpus) if C.seed >= 0: fitlog.set_rng_seed(C.seed) else: fitlog.set_rng_seed()
#----- get data & model ----- vocab , data = load_data(C.data_path , C.force_reprocess , C.data_save) train_data , valid_data = data[:-1000] , data[-1000:] Model = models[C.model] model = Model( vocab = vocab , logger = logger , d_model = C.d_model , num_layers = C.num_layers , d_hid = C.d_hid , h = C.h , dropout = C.dropout , ) model = model.cuda(C.gpus[0]) if len(C.gpus) > 1: #tc.distributed.init_process_group(backend = "nccl") model = nn.DataParallel(model , C.gpus) #----- train ----- start_time = time.time() model = train(model , train_data , valid_data) end_time = time.time() fitlog.add_hyper(name = "training time" , value = "%.3f" % (end_time - start_time)) #----- save model ----- logger.log("model saved.") logger.log("my name is %s." % C.name) #----- finish ----- fitlog.finish()
import fitlog use_fitlog = False if not use_fitlog: fitlog.debug() fitlog.set_log_dir('logs') load_dataset_seed = 100 fitlog.add_hyper(load_dataset_seed, 'load_dataset_seed') fitlog.set_rng_seed(load_dataset_seed) import sys sys.path.append('../') import argparse from fastNLP.core import Trainer from fastNLP.core import Callback from fastNLP import LossInForward from fastNLP.core.metrics import SpanFPreRecMetric, AccuracyMetric from fastNLP.core.callback import WarmupCallback, GradientClipCallback, EarlyStopCallback, FitlogCallback from fastNLP import LRScheduler from fastNLP import logger import torch import torch.optim as optim import torch.nn as nn from torch.optim.lr_scheduler import LambdaLR import collections from load_data import *
fitlog.commit(__file__) fitlog.add_hyper_in_file(__file__) # hypers model_name = 'skip_lstm' task = 'text_classification' hidden_units = 128 num_layers = 1 batch_size = 32 learning_rate = 1e-3 # hypers fitlog.add_hyper({ 'model_name': model_name, 'task': task, 'hidden_units': hidden_units, 'num_layers': num_layers, 'batch_size': batch_size, 'learning_rate': learning_rate }) class TextModel(nn.Module): def __init__(self, cells, model, embed_num, embed_dim, hidden_dim, output_dim, pre_weight=None): super(TextModel, self).__init__()