Exemplo n.º 1
0
def preprocess() -> argparse.Namespace:
    """
    preprocess of training
    :return: config args
    """
    print('preprocessing starts...\n')
    # ====== parse arguments ====== #
    args = parse_args()
    # ====== set random seed ====== #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    # ====== save path ====== #
    now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    args.save_path = os.path.join('./logs/', 'my_log-' + now_time)
    if not os.path.exists(args.save_path) and not args.debug:
        os.makedirs(args.save_path)
    # ====== fitlog init ====== #
    fitlog.commit(__file__)
    fitlog.debug(args.debug)
    fitlog.add_hyper(args)
    # ====== tb VisualLogger init ====== #
    args.visual_logger = VisualLogger(
        args.save_path) if not args.debug else None
    # ====== cuda enable ====== #
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
    args.device = torch.device(
        'cuda') if args.cuda and torch.cuda.is_available() else torch.device(
            'cpu')
    # ====== others ====== #
    os.environ['TOKENIZERS_PARALLELISM'] = 'false'
    torch.set_num_threads(6)
    print(args, end='\n\n')
    return args
Exemplo n.º 2
0
def record_hyper_params(hyper_dict: dict):
    for k, v in hyper_dict.items():
        if k not in ('model_dir', 'record_dir', 'data_path'):
            v = v if v is not None else '-'
            fitlog.add_hyper(value=str(v), name=str(k))
    if 'dilate' not in hyper_dict.keys():
        fitlog.add_hyper(value='-', name='dilate')
    fitlog.add_other(value=platform.system(), name='platform')
Exemplo n.º 3
0
def main():
    from config import get_config

    C, logger = get_config()

    #----- prepare data and some global variables -----
    data_train, data_test, data_valid, relations, rel_weights = load_data(
        C, logger)

    n_rel_typs, loss_func, generator = initialize(C, logger, relations,
                                                  rel_weights)
    #----- train & test -----
    trained_models = []
    for i in range(C.ensemble_size):
        model, best_valid = train(
            C,
            logger,
            data_train,
            data_valid,
            loss_func,
            generator,
            n_rel_typs,
            run_name=str(i),
            test_data=data_test,
        )

        if hasattr(model, "module"):  #dataparallel
            model = model.module

        model = model.cpu()
        trained_models.append(model)

    #----- ensemble test -----
    micro_f1, macro_f1, loss = test(
        C,
        logger,
        data_test,
        trained_models,
        loss_func,
        generator,
        mode="test",
        epoch_id=C.epoch_numb,
        run_name='final',
    )
    fitlog.add_hyper("t%.4f v%.4f" % (macro_f1, best_valid), name="result")

    #----- save ensembled model -----
    if C.model_save:
        with open(C.model_save, "wb") as fil:
            pickle.dump(trained_models, fil)
    logger.log("final model saved at %s" % C.model_save)

    #----- finish -----
    fitlog.finish()
Exemplo n.º 4
0
Arquivo: train.py Projeto: FFTYYY/Poem
def train(model , train_data , test_data):
	train_iter = DataSetIter(train_data , batch_size = C.batch_size)
	test_iter  = DataSetIter(test_data  , batch_size = C.batch_size)

	loss_func = nn.CrossEntropyLoss(ignore_index = 0)
	optim = tc.optim.Adam(params = model.parameters() , lr = C.lr , weight_decay = C.weight_decay)	
	scheduler = get_cosine_schedule_with_warmup(
		optim , 
		num_warmup_steps = C.warmup ,
		num_training_steps = train_iter.num_batches * C.epoch_number , 
	)

	best_test_loss 	= -1
	best_test_epoch = -1
	best_step 		= -1
	try:
		for epoch_n in range(C.epoch_number):
			tra_loss = run(model , train_iter , loss_func , epoch_n , optim , scheduler , True)
			tes_loss = run(model , test_iter , loss_func , epoch_n , None , None , False)

			logger.log ("Epoch %d ended. Train loss = %.4f , Valid loss = %.4f" % (
				epoch_n , tra_loss , tes_loss ,
			))
			fitlog.add_metric(
				tes_loss , 
				step = train_iter.num_batches * (epoch_n + 1) , 
				epoch = epoch_n , 
				name = "valid loss"
			)

			if best_test_epoch < 0 or tes_loss < best_test_loss:
				best_test_loss = tes_loss
				best_test_epoch = epoch_n
				best_step = fitlog_loss_step["train loss"]

				fitlog.add_best_metric(best_test_loss , name = "loss")
				with open(C.model_save , "wb") as fil:#暂时保存目前最好的模型
					pickle.dump(model , fil)
				fitlog.add_hyper(name = "best_step" , value =  "%d / %d" % (
					best_step ,
					train_iter.num_batches * C.epoch_number , 
				))

	except KeyboardInterrupt: # 手动提前停止
		pass

	logger.log ("Train end.")
	logger.log ("Got best valid loss %.4f in epoch %d" % (best_test_loss , best_test_epoch))

	return model
def after_parse_t2g(C , need_logger = False):

	#----- make logger -----

	logger = Logger(C.log_file)
	logger.log = logger.log_print_w_time
	if C.no_log:
		logger.log = logger.nolog

	C.tmp_file_name = random_tmp_name()

	#----- other stuff -----

	if C.auto_hyperparam:
		auto_hyperparam(C)
		logger.log("Hyper parameters autoset.")

	if C.no_fitlog:
		fitlog.debug()

	fitlog.set_log_dir("logs")
	fitlog.add_hyper(C)

	logger.log ("------------------------------------------------------")
	logger.log (pformat(C.__dict__))
	logger.log ("------------------------------------------------------")

	C.gpus = list(range(tc.cuda.device_count()))


	#----- initialize -----

	if C.t2g_seed > 0:
		random.seed(C.t2g_seed)
		tc.manual_seed(C.t2g_seed)
		np.random.seed(C.t2g_seed)
		tc.cuda.manual_seed_all(C.t2g_seed)
		tc.backends.cudnn.deterministic = True
		tc.backends.cudnn.benchmark = False

		logger.log ("Seed set. %d" % (C.t2g_seed))

	tc.cuda.set_device(C.gpus[0])
	C.device = C.gpus[0]

	if need_logger:
		return C , logger

	return C
Exemplo n.º 6
0
def train():
    args = parse_args()
    if args.debug:
        fitlog.debug()
        args.save_model = False
    # ================= define =================
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    word_mask_index = tokenizer.mask_token_id
    word_vocab_size = len(tokenizer)

    if get_local_rank() == 0:
        fitlog.set_log_dir(args.log_dir)
        fitlog.commit(__file__, fit_msg=args.name)
        fitlog.add_hyper_in_file(__file__)
        fitlog.add_hyper(args)

    # ================= load data =================
    dist.init_process_group('nccl')
    init_logger_dist()

    n_proc = dist.get_world_size()
    bsz = args.batch_size // args.grad_accumulation // n_proc
    args.local_rank = get_local_rank()
    args.save_dir = os.path.join(args.save_dir,
                                 args.name) if args.save_model else None
    if args.save_dir is not None and os.path.exists(args.save_dir):
        raise RuntimeError('save_dir has already existed.')
    logger.info('save directory: {}'.format(
        'None' if args.save_dir is None else args.save_dir))
    devices = list(range(torch.cuda.device_count()))
    NUM_WORKERS = 4

    ent_vocab, rel_vocab = load_ent_rel_vocabs()
    logger.info('# entities: {}'.format(len(ent_vocab)))
    logger.info('# relations: {}'.format(len(rel_vocab)))
    ent_freq = get_ent_freq()
    assert len(ent_vocab) == len(ent_freq), '{} {}'.format(
        len(ent_vocab), len(ent_freq))

    #####
    root = args.data_dir
    dirs = os.listdir(root)
    drop_files = []
    for dir in dirs:
        path = os.path.join(root, dir)
        max_idx = 0
        for file_name in os.listdir(path):
            if 'large' in file_name:
                continue
            max_idx = int(file_name) if int(file_name) > max_idx else max_idx
        drop_files.append(os.path.join(path, str(max_idx)))
    #####

    file_list = []
    for path, _, filenames in os.walk(args.data_dir):
        for filename in filenames:
            file = os.path.join(path, filename)
            if 'large' in file or file in drop_files:
                continue
            file_list.append(file)
    logger.info('used {} files in {}.'.format(len(file_list), args.data_dir))
    if args.data_prop > 1:
        used_files = file_list[:int(args.data_prop)]
    else:
        used_files = file_list[:round(args.data_prop * len(file_list))]

    data = GraphOTFDataSet(used_files, n_proc, args.local_rank,
                           word_mask_index, word_vocab_size, args.n_negs,
                           ent_vocab, rel_vocab, ent_freq)
    dev_data = GraphDataSet(used_files[0], word_mask_index, word_vocab_size,
                            args.n_negs, ent_vocab, rel_vocab, ent_freq)

    sampler = OTFDistributedSampler(used_files, n_proc, get_local_rank())
    train_data_iter = TorchLoaderIter(dataset=data,
                                      batch_size=bsz,
                                      sampler=sampler,
                                      num_workers=NUM_WORKERS,
                                      collate_fn=data.collate_fn)
    dev_data_iter = TorchLoaderIter(dataset=dev_data,
                                    batch_size=bsz,
                                    sampler=RandomSampler(),
                                    num_workers=NUM_WORKERS,
                                    collate_fn=dev_data.collate_fn)
    if args.test_data is not None:
        test_data = FewRelDevDataSet(path=args.test_data,
                                     label_vocab=rel_vocab,
                                     ent_vocab=ent_vocab)
        test_data_iter = TorchLoaderIter(dataset=test_data,
                                         batch_size=32,
                                         sampler=RandomSampler(),
                                         num_workers=NUM_WORKERS,
                                         collate_fn=test_data.collate_fn)

    if args.local_rank == 0:
        print('full wiki files: {}'.format(len(file_list)))
        print('used wiki files: {}'.format(len(used_files)))
        print('# of trained samples: {}'.format(len(data) * n_proc))
        print('# of trained entities: {}'.format(len(ent_vocab)))
        print('# of trained relations: {}'.format(len(rel_vocab)))

    # ================= prepare model =================
    logger.info('model init')
    if args.rel_emb is not None:  # load pretrained relation embeddings
        rel_emb = np.load(args.rel_emb)
        # add_embs = np.random.randn(3, rel_emb.shape[1])  # add <pad>, <mask>, <unk>
        # rel_emb = np.r_[add_embs, rel_emb]
        rel_emb = torch.from_numpy(rel_emb).float()
        assert rel_emb.shape[0] == len(rel_vocab), '{} {}'.format(
            rel_emb.shape[0], len(rel_vocab))
        # assert rel_emb.shape[1] == args.rel_dim
        logger.info('loaded pretrained relation embeddings. dim: {}'.format(
            rel_emb.shape[1]))
    else:
        rel_emb = None
    if args.model_name is not None:
        logger.info('further pre-train.')
        config = RobertaConfig.from_pretrained('roberta-base',
                                               type_vocab_size=3)
        model = CoLAKE(config=config,
                       num_ent=len(ent_vocab),
                       num_rel=len(rel_vocab),
                       ent_dim=args.ent_dim,
                       rel_dim=args.rel_dim,
                       ent_lr=args.ent_lr,
                       ip_config=args.ip_config,
                       rel_emb=None,
                       emb_name=args.emb_name)
        states_dict = torch.load(args.model_name)
        model.load_state_dict(states_dict, strict=True)
    else:
        model = CoLAKE.from_pretrained(
            'roberta-base',
            num_ent=len(ent_vocab),
            num_rel=len(rel_vocab),
            ent_lr=args.ent_lr,
            ip_config=args.ip_config,
            rel_emb=rel_emb,
            emb_name=args.emb_name,
            cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
            'dist_{}'.format(args.local_rank))
        model.extend_type_embedding(token_type=3)
    # if args.local_rank == 0:
    #     for name, param in model.named_parameters():
    #         if param.requires_grad is True:
    #             print('{}: {}'.format(name, param.shape))

    # ================= train model =================
    # lr=1e-4 for peak value, lr=5e-5 for initial value
    logger.info('trainer init')
    no_decay = [
        'bias', 'LayerNorm.bias', 'LayerNorm.weight', 'layer_norm.bias',
        'layer_norm.weight'
    ]
    param_optimizer = list(model.named_parameters())
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    word_acc = WordMLMAccuracy(pred='word_pred',
                               target='masked_lm_labels',
                               seq_len='word_seq_len')
    ent_acc = EntityMLMAccuracy(pred='entity_pred',
                                target='ent_masked_lm_labels',
                                seq_len='ent_seq_len')
    rel_acc = RelationMLMAccuracy(pred='relation_pred',
                                  target='rel_masked_lm_labels',
                                  seq_len='rel_seq_len')
    metrics = [word_acc, ent_acc, rel_acc]

    if args.test_data is not None:
        test_metric = [rel_acc]
        tester = Tester(data=test_data_iter,
                        model=model,
                        metrics=test_metric,
                        device=list(range(torch.cuda.device_count())))
        # tester.test()
    else:
        tester = None

    optimizer = optim.AdamW(optimizer_grouped_parameters,
                            lr=args.lr,
                            betas=(0.9, args.beta),
                            eps=1e-6)
    # warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear')
    fitlog_callback = MyFitlogCallback(tester=tester,
                                       log_loss_every=100,
                                       verbose=1)
    gradient_clip_callback = GradientClipCallback(clip_value=1,
                                                  clip_type='norm')
    emb_callback = EmbUpdateCallback(model.ent_embeddings)
    all_callbacks = [gradient_clip_callback, emb_callback]
    if args.save_dir is None:
        master_callbacks = [fitlog_callback]
    else:
        save_callback = SaveModelCallback(args.save_dir,
                                          model.ent_embeddings,
                                          only_params=True)
        master_callbacks = [fitlog_callback, save_callback]

    if args.do_test:
        states_dict = torch.load(os.path.join(args.save_dir,
                                              args.model_name)).state_dict()
        model.load_state_dict(states_dict)
        data_iter = TorchLoaderIter(dataset=data,
                                    batch_size=args.batch_size,
                                    sampler=RandomSampler(),
                                    num_workers=NUM_WORKERS,
                                    collate_fn=data.collate_fn)
        tester = Tester(data=data_iter,
                        model=model,
                        metrics=metrics,
                        device=devices)
        tester.test()
    else:
        trainer = DistTrainer(train_data=train_data_iter,
                              dev_data=dev_data_iter,
                              model=model,
                              optimizer=optimizer,
                              loss=LossInForward(),
                              batch_size_per_gpu=bsz,
                              update_every=args.grad_accumulation,
                              n_epochs=args.epoch,
                              metrics=metrics,
                              callbacks_master=master_callbacks,
                              callbacks_all=all_callbacks,
                              validate_every=5000,
                              use_tqdm=True,
                              fp16='O1' if args.fp16 else '')
        trainer.train(load_best_model=False)
Exemplo n.º 7
0
    config.mode = args.mode
    config.setting = args.setting

    # save model
    if not os.path.exists(config.model_path):
        if config.model_path.__contains__("/"):
            os.makedirs(config.model_path, 0o777)
        else:
            os.mkdir(config.model_path)

    # fitlog dir
    logger.info(f"set fitlog dir to {args.fitlog_dir}")
    if not os.path.exists(args.fitlog_dir):
        os.mkdir(args.fitlog_dir)
    fitlog.set_log_dir(args.fitlog_dir)
    fitlog.add_hyper(args)

    if not os.path.exists(config.model_path):
        os.mkdir(config.model_path)

    if args.visible_gpu != -1:
        config.use_gpu = True
        torch.cuda.set_device(args.visible_gpu)
        device = torch.device(args.visible_gpu)
    else:
        config.use_gpu = False

    mode = args.mode
    logger.info("------start mode train------")
    run_train()
fitlog.commit(__file__)  # auto commit your codes

if __name__ == '__main__':
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--config_file', default='cnn.cfg')
    argparser.add_argument('--w', default='cnn', help='word encoder')
    argparser.add_argument('--s', default='lstm', help='sent encoder')
    argparser.add_argument('--seed', default=888, type=int, help='seed')
    argparser.add_argument('--gpu', default=0, type=int, help='gpu id')
    argparser.add_argument('--fold', default=9, type=int, help='fold for test')
    args = argparser.parse_args()

    config = Config(args)
    torch.set_num_threads(config.threads)

    fitlog.add_hyper({'model': args.w, 'fold': args.fold})

    # set cuda
    config.use_cuda = args.gpu >= 0 and torch.cuda.is_available()
    if config.use_cuda:
        torch.cuda.set_device(args.gpu)
        config.device = torch.device("cuda", args.gpu)
    else:
        config.device = torch.device("cpu")
    logging.info("Use cuda: %s, gpu id: %d.", config.use_cuda, args.gpu)

    # vocab
    cache_name = "./save/vocab/" + str(args.fold) + ".pickle"
    if Path(cache_name).exists():
        vocab_file = open(cache_name, 'rb')
        vocab = pickle.load(vocab_file)
Exemplo n.º 9
0
    def _train(self, criterion, optimizer, train_data_loader, val_data_loader,
               test_data_loader):
        fitlog.add_hyper({
            "model_name": self.opt.model_name,
            "dataset": self.opt.dataset,
            'resplit': self.opt.resplit,
            "domain": self.opt.domain,
            "aug": self.opt.aug,
            "adv": self.opt.adv,
            "aux": self.opt.aux,
            "adv_aux": self.opt.adv_aux,
            'chg': self.opt.chg
        })

        max_val_acc = 0
        max_val_f1 = 0
        global_step = 0
        last_model_path = None
        # model_path =None
        path = None

        pgd = PGD(self.model)
        k = 3
        for epoch in range(self.opt.num_epoch):
            logger.info('>' * 100)
            logger.info('epoch: {}'.format(epoch))
            n_correct, n_total, loss_total = 0, 0, 0
            # switch model to training mode
            self.model.train()
            for i_batch, sample_batched in enumerate(train_data_loader):
                global_step += 1
                # clear gradient accumulators
                optimizer.zero_grad()

                inputs = [
                    sample_batched[col].to(self.opt.device)
                    for col in self.opt.inputs_cols
                ]
                if self.opt.model_name == 'bert_multi_target':
                    targets = sample_batched['polarity'].to(self.opt.device)
                else:
                    targets = sample_batched['polarity'].to(self.opt.device)

                if self.opt.model_name in reg_list:
                    aux_cls_logeits, outputs, reg_can_loss, reg_aux_loss, bert_word_output, reg_chg_loss = self.model(
                        inputs, None)
                else:
                    outputs = self.model(inputs)
                    reg_can_loss = 0
                    reg_aux_loss = 0
                    reg_chg_loss = 0
                # print('outputs',outputs.shape)
                # print('targets',targets.shape)

                # print(outputs,'outputs')
                # print(targets,'polarity')

                loss_1 = criterion(outputs, targets)
                loss_2 = reg_can_loss
                loss_3 = reg_aux_loss
                loss_4 = reg_chg_loss

                weighted_loss_2 = loss_2 * self.opt.can
                weighted_loss_3 = loss_3 * self.opt.aux
                weighted_loss_4 = loss_4 * self.opt.chg

                loss = 1 * loss_1 + weighted_loss_2 + weighted_loss_3 + weighted_loss_4

                if self.opt.adv > 0:
                    # print(inputs.shape)
                    if self.opt.adv_aux == 1:
                        loss_adv = self._loss_adv(weighted_loss_3,
                                                  bert_word_output,
                                                  criterion,
                                                  inputs,
                                                  targets,
                                                  p_mult=self.opt.adv)
                    else:
                        loss_adv = self._loss_adv(loss,
                                                  bert_word_output,
                                                  criterion,
                                                  inputs,
                                                  targets,
                                                  p_mult=self.opt.adv)
                    loss += loss_adv
                else:
                    loss_adv = 0
                loss.backward()

                # pgd.backup_grad()
                #     for t in range(K):
                #         pgd.attack(is_first_attack=(t==0)) # 在embedding上添加对抗扰动, first attack时备份param.data
                #         if t != K-1:
                #             model.zero_grad()
                #         else:
                #             pgd.restore_grad()
                #         loss_adv = model(batch_input, batch_label)
                #         loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
                #     pgd.restore() # 恢复embedding参数

                optimizer.step()

                n_correct += (torch.argmax(outputs,
                                           -1) == targets).sum().item()
                # print(outputs.shape)
                # n_correct += (torch.argmax(aux_cls_logeits, -1) == 4*targets).sum().item()
                n_total += len(outputs)
                loss_total += loss.item() * len(outputs)
                if global_step % self.opt.log_step == 0:
                    train_acc = n_correct / n_total
                    train_loss = loss_total / n_total
                    logger.info(
                        'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}'
                        .format(train_loss, train_acc, loss_1, weighted_loss_2,
                                loss_adv, weighted_loss_3, weighted_loss_4))
                    fitlog.add_metric(
                        {
                            "Train": {
                                'loss_total: {:.4f}, acc: {:.4f},loss_main: {:.4f},reg_can_loss: {:.4f},loss_adv: {:.4f},reg_aux_loss {:.4f},reg_chg_loss {:.4f}'
                                .format(train_loss, train_acc, loss_1,
                                        weighted_loss_2, loss_adv,
                                        weighted_loss_3, weighted_loss_4)
                            }
                        },
                        step=global_step)
            val_acc, val_f1 = self._evaluate_acc_f1(val_data_loader)
            test_acc, test_f1 = self._evaluate_acc_f1(test_data_loader)

            logger.info('> val_acc: {:.4f}, val_f1: {:.4f}'.format(
                val_acc, val_f1))
            logger.info('> test_acc: {:.4f}, test_f1: {:.4f}'.format(
                test_acc, test_f1))

            if val_acc > max_val_acc:
                max_val_acc = val_acc
                if not os.path.exists('state_dict'):
                    os.mkdir('state_dict')
                model_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}'.format(
                    self.opt.model_name, self.opt.dataset, self.opt.domain,
                    self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux,
                    round(val_acc, 4), self.opt.resplit)
                bert_path = 'state_dict/{0}_{1}_doamin-{2}_can{3}_aug{4}_adv{5}_aux{6}_val_acc{7}_resplit{8}_bert'.format(
                    self.opt.model_name, self.opt.dataset, self.opt.domain,
                    self.opt.can, self.opt.aug, self.opt.adv, self.opt.aux,
                    round(val_acc, 4), self.opt.resplit)

                # fitlog.add_hyper({"model_name":self.opt.model_name,"dataset":self.opt.dataset,'resplit':self.opt.resplit,"domain":self.opt.domain,"aug":self.opt.aug,"adv":self.opt.adv,"aux":self.opt.aux})

                fitlog.add_metric(
                    {"val": {
                        "val_acc": val_acc,
                        "val_f1": val_f1
                    }},
                    step=global_step)
                fitlog.add_metric(
                    {"test": {
                        "test_acc": test_acc,
                        "test_f1": test_f1
                    }},
                    step=global_step)

                fitlog.add_best_metric(
                    {"val": {
                        "val_acc": val_acc,
                        "val_f1": val_f1
                    }})
                fitlog.add_best_metric(
                    {"test": {
                        "test_acc": test_acc,
                        "test_f1": test_f1
                    }})

                if last_model_path != None:
                    os.remove(last_model_path)
                    if self.opt.model_name not in ['lcf_bert']:
                        os.remove(last_bert_path)
                last_model_path = model_path
                last_bert_path = bert_path
                torch.save(self.model.state_dict(), model_path)
                if self.opt.model_name not in ['lcf_bert']:
                    torch.save(self.model.bert.state_dict(), bert_path)
                logger.info('>> saved: {}'.format(model_path))

                # max_val_f1 = val_f1
            if val_f1 > max_val_f1:
                max_val_f1 = val_f1
                # fitlog.add_metric(acc,name="Acc",step=step)

        return model_path
Exemplo n.º 10
0
def main():
    args = parse_args()

    if args.debug:
        fitlog.debug()

    fitlog.set_log_dir(args.log_dir)
    fitlog.commit(__file__)
    fitlog.add_hyper_in_file(__file__)
    fitlog.add_hyper(args)
    if args.gpu != 'all':
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    train_set, dev_set, test_set, temp_ent_vocab = load_fewrel_graph_data(
        data_dir=args.data_dir)

    print('data directory: {}'.format(args.data_dir))
    print('# of train samples: {}'.format(len(train_set)))
    print('# of dev samples: {}'.format(len(dev_set)))
    print('# of test samples: {}'.format(len(test_set)))

    ent_vocab, rel_vocab = load_ent_rel_vocabs(path='../')

    # load entity embeddings
    ent_index = []
    for k, v in temp_ent_vocab.items():
        ent_index.append(ent_vocab[k])
    ent_index = torch.tensor(ent_index)
    ent_emb = np.load(os.path.join(args.model_path, 'entities.npy'))
    ent_embedding = nn.Embedding.from_pretrained(torch.from_numpy(ent_emb))
    ent_emb = ent_embedding(ent_index.view(1, -1)).squeeze().detach()

    # load CoLAKE parameters
    config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3)
    model = CoLAKEForRE(config,
                        num_types=len(train_set.label_vocab),
                        ent_emb=ent_emb)
    states_dict = torch.load(os.path.join(args.model_path, 'model.bin'))
    model.load_state_dict(states_dict, strict=False)
    print('parameters below are randomly initializecd:')
    for name, param in model.named_parameters():
        if name not in states_dict:
            print(name)

    # tie relation classification head
    rel_index = []
    for k, v in train_set.label_vocab.items():
        rel_index.append(rel_vocab[k])
    rel_index = torch.LongTensor(rel_index)
    rel_embeddings = nn.Embedding.from_pretrained(
        states_dict['rel_embeddings.weight'])
    rel_index = rel_index.cuda()
    rel_cls_weight = rel_embeddings(rel_index.view(1, -1)).squeeze()
    model.tie_rel_weights(rel_cls_weight)

    model.rel_head.dense.weight.data = states_dict['rel_lm_head.dense.weight']
    model.rel_head.dense.bias.data = states_dict['rel_lm_head.dense.bias']
    model.rel_head.layer_norm.weight.data = states_dict[
        'rel_lm_head.layer_norm.weight']
    model.rel_head.layer_norm.bias.data = states_dict[
        'rel_lm_head.layer_norm.bias']

    model.resize_token_embeddings(
        len(RobertaTokenizer.from_pretrained('roberta-base')) + 4)
    print('parameters of CoLAKE has been loaded.')

    # fine-tune
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight', 'embedding']
    param_optimizer = list(model.named_parameters())
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        args.weight_decay
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    optimizer = optim.AdamW(optimizer_grouped_parameters,
                            lr=args.lr,
                            betas=(0.9, args.beta),
                            eps=1e-6)

    metrics = [MacroMetric(pred='pred', target='target')]

    test_data_iter = TorchLoaderIter(dataset=test_set,
                                     batch_size=args.batch_size,
                                     sampler=RandomSampler(),
                                     num_workers=4,
                                     collate_fn=test_set.collate_fn)
    devices = list(range(torch.cuda.device_count()))
    tester = Tester(data=test_data_iter,
                    model=model,
                    metrics=metrics,
                    device=devices)
    # tester.test()

    fitlog_callback = FitlogCallback(tester=tester,
                                     log_loss_every=100,
                                     verbose=1)
    gradient_clip_callback = GradientClipCallback(clip_value=1,
                                                  clip_type='norm')
    warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear')

    bsz = args.batch_size // args.grad_accumulation

    train_data_iter = TorchLoaderIter(dataset=train_set,
                                      batch_size=bsz,
                                      sampler=RandomSampler(),
                                      num_workers=4,
                                      collate_fn=train_set.collate_fn)
    dev_data_iter = TorchLoaderIter(dataset=dev_set,
                                    batch_size=bsz,
                                    sampler=RandomSampler(),
                                    num_workers=4,
                                    collate_fn=dev_set.collate_fn)

    trainer = Trainer(
        train_data=train_data_iter,
        dev_data=dev_data_iter,
        model=model,
        optimizer=optimizer,
        loss=LossInForward(),
        batch_size=bsz,
        update_every=args.grad_accumulation,
        n_epochs=args.epoch,
        metrics=metrics,
        callbacks=[fitlog_callback, gradient_clip_callback, warmup_callback],
        device=devices,
        use_tqdm=True)

    trainer.train(load_best_model=False)
Exemplo n.º 11
0
    )
    parser.add_argument("--learning_rate", default=0.001, type=float)
    parser.add_argument("--l2reg", default=0.00001, type=float)
    parser.add_argument("--num_epoch", default=100, type=int)
    parser.add_argument("--batch_size", default=32, type=int)
    parser.add_argument("--embed_dim", default=300, type=int)
    parser.add_argument("--hidden_dim", default=300, type=int)
    parser.add_argument("--dropout", default=0.7, type=float)

    opt = parser.parse_args()  # opt--->all args
    if opt.dataset.endswith("/"):
        opt.dataset = opt.dataset[:-1]
    ################fitlog code####################
    fitlog.set_log_dir("logs")
    fitlog.set_rng_seed()
    fitlog.add_hyper(opt)
    fitlog.add_hyper(value="ASGCN", name="model")
    ################fitlog code####################
    opt.polarities_dim = 3
    opt.initializer = "xavier_uniform_"
    opt.optimizer = "adam"
    opt.model_name = "asgcn"
    opt.log_step = 20
    opt.l2reg = 1e-5
    opt.early_stop = 25

    if "/" in opt.dataset:
        pre_model_name, layer, dataset = opt.dataset.split("/")[-3:]
    else:
        pre_model_name, dataset = "None", opt.dataset
        layer = "0"
Exemplo n.º 12
0
def parse_args():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--dataset_name",
                        type=str,
                        default="rest",
                        help="Choose absa dataset.")
    parser.add_argument("--refresh",
                        type=int,
                        default=0,
                        help="Generate data again")

    # Model parameters
    parser.add_argument(
        "--glove_dir",
        type=str,
        help="Directory storing glove embeddings",
    )
    parser.add_argument("--highway",
                        action="store_true",
                        help="Use highway embed.")
    parser.add_argument(
        "--num_layers",
        type=int,
        default=2,
        help="Number of layers of bilstm or highway or elmo.",
    )
    parser.add_argument("--max_hop",
                        type=int,
                        default=4,
                        help="max number of hops")
    parser.add_argument("--num_heads",
                        type=int,
                        default=6,
                        help="Number of heads for gat.")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.7,
                        help="Dropout rate for embedding.")
    parser.add_argument("--num_gcn_layers",
                        type=int,
                        default=1,
                        help="Number of GCN layers.")
    parser.add_argument("--gcn_mem_dim",
                        type=int,
                        default=300,
                        help="Dimension of the W in GCN.")
    parser.add_argument("--gcn_dropout",
                        type=float,
                        default=0.2,
                        help="Dropout rate for GCN.")
    # GAT
    parser.add_argument(
        "--gat_attention_type",
        type=str,
        choices=["linear", "dotprod", "gcn"],
        default="dotprod",
        help="The attention used for gat",
    )

    parser.add_argument(
        "--embedding_type",
        type=str,
        default="glove",
        choices=["glove", "bert", "roberta"],
    )
    parser.add_argument("--embedding_dim",
                        type=int,
                        default=300,
                        help="Dimension of glove embeddings")
    parser.add_argument(
        "--dep_relation_embed_dim",
        type=int,
        default=300,
        help="Dimension for dependency relation embeddings.",
    )

    parser.add_argument(
        "--hidden_size",
        type=int,
        default=300,
        help="Hidden size of bilstm, in early stage.",
    )
    parser.add_argument(
        "--final_hidden_size",
        type=int,
        default=300,
        help="Hidden size of bilstm, in early stage.",
    )
    parser.add_argument("--num_mlps",
                        type=int,
                        default=2,
                        help="Number of mlps in the last of model.")

    # Training parameters
    parser.add_argument(
        "--batch_size",
        default=32,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--learning_rate",
        default=1e-3,
        type=float,
        help="The initial learning rate for Adam.",
    )

    parser.add_argument(
        "--num_train_epochs",
        default=25,
        type=int,
        help="Total number of training epochs to perform.",
    )

    args = parser.parse_args()
    if args.dataset_name.endswith("/"):
        args.dataset_name = args.dataset_name[:-1]
    fitlog.add_hyper(args)

    if "/" in args.dataset_name:
        data = os.path.basename(args.dataset_name)
        output_dir = f"data/{data}"
    else:
        output_dir = f"data/{args.dataset_name}"

    args.output_dir = output_dir

    args.lower = 1
    args.logging_steps = 30
    args.max_steps = -1
    args.max_grad_norm = 10
    args.adam_epsilon = 1e-8
    args.weight_decay = 0
    args.gradient_accumulation_steps = 1
    args.per_gpu_train_batch_size = args.batch_size
    args.per_gpu_eval_batch_size = args.batch_size * 2
    args.add_non_connect = 1
    args.multi_hop = True
    args.num_classes = 3
    args.cuda_id = "0"
    # args.bert_model_dir = "/data1/SHENWZH/models/bert_base"
    args.pure_bert = False
    args.gat_our = True
    args.gat_roberta = False
    args.gat = False
    args.gat_bert = False

    return args
Exemplo n.º 13
0
def main():
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

    # Parse args
    args = parse_args()
    if args.dataset_name.endswith("/"):
        args.dataset_name = args.dataset_name[:-1]
    dataset_name = args.dataset_name
    # 形如 ~/rgat/bert/11/Restaurants
    if "/" in dataset_name:
        pre_model_name, layer, dataset = dataset_name.split("/")[-3:]
    else:
        pre_model_name, dataset = "None", dataset_name
        layer = "-1"
    fitlog.add_hyper(value=pre_model_name, name="model_name")
    fitlog.add_hyper(value=dataset, name="dataset")
    fitlog.add_hyper(value=layer, name="pre_layer")
    fitlog.add_hyper(value="RGAT", name="model")

    # if 'Laptop' in args.dataset_name:
    #     assert args.lower == 0
    check_args(args)

    # Setup CUDA, GPU training
    # os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device
    logger.info("Device is %s", args.device)

    # Bert, load pretrained model and tokenizer, check if neccesary to put bert here
    if args.embedding_type == "bert":
        tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
        args.tokenizer = tokenizer
    elif args.embedding_type == "roberta":
        tokenizer = RobertaTokenizer.from_pretrained(args.bert_model_dir)
        args.tokenizer = tokenizer

    # Load datasets and vocabs
    (
        train_dataset,
        test_dataset,
        word_vocab,
        dep_tag_vocab,
        pos_tag_vocab,
    ) = load_datasets_and_vocabs(args)

    # Build Model
    # model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
    if args.pure_bert:
        model = Pure_Bert(args)
    elif args.gat_roberta:
        model = Aspect_Roberta_GAT(args, dep_tag_vocab["len"],
                                   pos_tag_vocab["len"])
    elif args.gat_bert:
        model = Aspect_Bert_GAT(args, dep_tag_vocab["len"],
                                pos_tag_vocab["len"])  # R-GAT + Bert
    elif args.gat_our:
        model = Aspect_Text_GAT_ours(
            args, dep_tag_vocab["len"],
            pos_tag_vocab["len"])  # R-GAT with reshaped tree
    else:
        model = Aspect_Text_GAT_only(
            args, dep_tag_vocab["len"],
            pos_tag_vocab["len"])  # original GAT with reshaped tree

    model.to(args.device)
    # Train
    _, _, all_eval_results = train(args, train_dataset, model, test_dataset)

    print("\n\nBest Results:")
    if len(all_eval_results):
        best_eval_result = max(all_eval_results, key=lambda x: x["acc"])
        step = [
            i for i, result in enumerate(all_eval_results)
            if result == best_eval_result
        ][0]
        logger.info("Achieve at step {}/{}".format(step,
                                                   len(all_eval_results)))
        for key in sorted(best_eval_result.keys()):
            logger.info("  %s = %s", key, str(best_eval_result[key]))
        # fitlog.add_best_metric(value=best_eval_result['acc'], name='acc')
        # fitlog.add_best_metric(value=best_eval_result['f1'], name='f1')
    fitlog.finish()
Exemplo n.º 14
0
C = _par.parse_args()

if not C.name:
    C.name = autoname()
os.makedirs("./model_save", exist_ok=True)
C.model_save = "./model_save/model_%s" % C.name

now_time = time.localtime(time.time())
C.time = "%d-%d-%d %d:%d" % (
    (now_time.tm_year) % 100,
    now_time.tm_mon,
    now_time.tm_mday,
    now_time.tm_hour,
    now_time.tm_min,
)

fitlog.add_hyper(C)


def listize(s):
    return [int(x) for x in s.strip().split(",")]


C.gpus = listize(C.gpus)

if C.seed >= 0:
    fitlog.set_rng_seed(C.seed)
else:
    fitlog.set_rng_seed()
Exemplo n.º 15
0
Arquivo: train.py Projeto: FFTYYY/Poem
	#----- get data & model -----

	vocab , data = load_data(C.data_path , C.force_reprocess , C.data_save)
	train_data , valid_data = data[:-1000] , data[-1000:]

	Model = models[C.model]
	model = Model(
		vocab = vocab , logger = logger ,
		d_model = C.d_model , num_layers = C.num_layers , d_hid = C.d_hid , h = C.h ,
		dropout = C.dropout ,
	 )
	model = model.cuda(C.gpus[0])
	if len(C.gpus) > 1:
		#tc.distributed.init_process_group(backend = "nccl")
		model = nn.DataParallel(model , C.gpus)

	#----- train -----

	start_time = time.time()
	model = train(model , train_data , valid_data)
	end_time = time.time()
	fitlog.add_hyper(name = "training time" , value = "%.3f" % (end_time - start_time))


	#----- save model -----
	logger.log("model saved.")
	logger.log("my name is %s." % C.name)

	#----- finish -----

	fitlog.finish()
import fitlog

use_fitlog = False
if not use_fitlog:
    fitlog.debug()
fitlog.set_log_dir('logs')
load_dataset_seed = 100
fitlog.add_hyper(load_dataset_seed, 'load_dataset_seed')
fitlog.set_rng_seed(load_dataset_seed)

import sys

sys.path.append('../')

import argparse
from fastNLP.core import Trainer
from fastNLP.core import Callback
from fastNLP import LossInForward
from fastNLP.core.metrics import SpanFPreRecMetric, AccuracyMetric
from fastNLP.core.callback import WarmupCallback, GradientClipCallback, EarlyStopCallback, FitlogCallback
from fastNLP import LRScheduler
from fastNLP import logger

import torch
import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import LambdaLR

import collections

from load_data import *
Exemplo n.º 17
0
fitlog.commit(__file__)
fitlog.add_hyper_in_file(__file__)

# hypers
model_name = 'skip_lstm'
task = 'text_classification'
hidden_units = 128
num_layers = 1
batch_size = 32
learning_rate = 1e-3
# hypers

fitlog.add_hyper({
    'model_name': model_name,
    'task': task,
    'hidden_units': hidden_units,
    'num_layers': num_layers,
    'batch_size': batch_size,
    'learning_rate': learning_rate
})


class TextModel(nn.Module):
    def __init__(self,
                 cells,
                 model,
                 embed_num,
                 embed_dim,
                 hidden_dim,
                 output_dim,
                 pre_weight=None):
        super(TextModel, self).__init__()