def train(path): # test saving pipeline save_pipe(path) # Trainer trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=ParserLoss(), metrics=ParserMetric(), metric_key='UAS', **train_args.data, optimizer=fastNLP.Adam(**optim_args.data), save_path=path) # model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False) model.word_embedding.padding_idx = word_v.padding_idx model.word_embedding.weight.data[word_v.padding_idx].fill_(0) model.pos_embedding.padding_idx = pos_v.padding_idx model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0) # try: # ModelLoader.load_pytorch(model, "./save/saved_model.pkl") # print('model parameter loaded!') # except Exception as _: # print("No saved model. Continue.") # pass # Start training trainer.train() print("Training finished!") # save pipeline save_pipe(path) print('pipe saved')
def test_trainer_suggestion3(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,但是forward没有返回loss这个key dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'wrong_loss_key': loss} model = Model() with self.assertRaises(NameError): trainer = Trainer(train_data=dataset, model=model, use_tqdm=False, print_every=2) trainer.train()
def test_trainer_suggestion2(self): # 检查报错提示能否正确提醒用户 # 这里传入forward需要的数据,看是否可以运行 dataset = prepare_fake_dataset2('x1', 'x2') dataset.set_input('x1', 'x2', 'y', flag=True) class Model(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(5, 4) def forward(self, x1, x2, y): x1 = self.fc(x1) x2 = self.fc(x2) x = x1 + x2 loss = F.cross_entropy(x, y) return {'loss': loss} model = Model() trainer = Trainer(train_data=dataset, model=model, use_tqdm=False, print_every=2) trainer.train() """
def test_case(self): def prepare_fake_dataset(): mean = np.array([-3, -3]) cov = np.array([[1, 0], [0, 1]]) class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) mean = np.array([3, 3]) cov = np.array([[1, 0], [0, 1]]) class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=[0.0]) for item in class_A] + [Instance(x=[float(item[0]), float(item[1])], y=[1.0]) for item in class_B]) return data_set data_set = prepare_fake_dataset() data_set.set_input("x") data_set.set_target("y") model = NaiveClassifier(2, 1) trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=1, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[EchoCallback()]) trainer.train()
def train(path): # Trainer trainer = Trainer(**train_args.data) def _define_optim(obj): lr = optim_args.data['lr'] embed_params = set(obj._model.word_embedding.parameters()) decay_params = set(obj._model.arc_predictor.parameters()) | set( obj._model.label_predictor.parameters()) params = [ p for p in obj._model.parameters() if p not in decay_params and p not in embed_params ] obj._optimizer = torch.optim.Adam([{ 'params': list(embed_params), 'lr': lr * 0.1 }, { 'params': list(decay_params), **optim_args.data }, { 'params': params }], lr=lr, betas=(0.9, 0.9)) obj._scheduler = torch.optim.lr_scheduler.LambdaLR( obj._optimizer, lambda ep: max(.75**(ep / 5e4), 0.05)) def _update(obj): # torch.nn.utils.clip_grad_norm_(obj._model.parameters(), 5.0) obj._scheduler.step() obj._optimizer.step() trainer.define_optimizer = lambda: _define_optim(trainer) trainer.update = lambda: _update(trainer) trainer.set_validator( Tester(**test_args.data, evaluator=ParserEvaluator(ignore_label))) model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False) model.word_embedding.padding_idx = word_v.padding_idx model.word_embedding.weight.data[word_v.padding_idx].fill_(0) model.pos_embedding.padding_idx = pos_v.padding_idx model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0) # try: # ModelLoader.load_pytorch(model, "./save/saved_model.pkl") # print('model parameter loaded!') # except Exception as _: # print("No saved model. Continue.") # pass # Start training trainer.train(model, train_data, dev_data) print("Training finished!") # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) print("Model saved!")
def train(): loader = LMDataSetLoader() train_data = loader.load() pre = Preprocessor(label_is_seq=True, share_vocab=True) train_set = pre.run(train_data, pickle_path=PICKLE) model = CharLM(50, 50, pre.vocab_size, pre.char_vocab_size) trainer = Trainer(task="language_model", loss=Loss("cross_entropy")) trainer.train(model, train_set)
def train(path): # test saving pipeline save_pipe(path) embed = EmbedLoader.fast_load_embedding(model_args['word_emb_dim'], emb_file_name, word_v) embed = torch.tensor(embed, dtype=torch.float32) # embed = EmbedLoader.fast_load_embedding(emb_dim=model_args['word_emb_dim'], emb_file=emb_file_name, vocab=word_v) # embed = torch.tensor(embed, dtype=torch.float32) # model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=True) model.word_embedding.padding_idx = word_v.padding_idx model.word_embedding.weight.data[word_v.padding_idx].fill_(0) model.pos_embedding.padding_idx = pos_v.padding_idx model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0) class MyCallback(Callback): def on_step_end(self, optimizer): step = self.trainer.step # learning rate decay if step > 0 and step % 1000 == 0: for pg in optimizer.param_groups: pg['lr'] *= 0.93 print('decay lr to {}'.format( [pg['lr'] for pg in optimizer.param_groups])) if step == 3000: # start training embedding print('start training embedding at {}'.format(step)) model = self.trainer.model for m in model.modules(): if isinstance(m, torch.nn.Embedding): m.weight.requires_grad = True # Trainer trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=ParserLoss(), metrics=ParserMetric(), metric_key='UAS', **train_args.data, optimizer=fastNLP.Adam(**optim_args.data), save_path=path, callbacks=[MyCallback()]) # Start training try: trainer.train() print("Training finished!") finally: # save pipeline save_pipe(path) print('pipe saved')
def test_KeyBoardInterrupt(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[ControlC(False)]) trainer.train()
def test_echo_callback(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=2, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, callbacks=[EchoCallback()]) trainer.train()
def train_model(args): # check if the data_path and save_path exists data_paths = get_data_path(args.mode, args.label_type) for name in data_paths: assert exists(data_paths[name]) if not exists(args.save_path): os.makedirs(args.save_path) # load summarization datasets datasets = BertSumLoader().process(data_paths) print('Information of dataset is:') print(datasets) train_set = datasets.datasets['train'] valid_set = datasets.datasets['val'] # configure training devices, train_params = configure_training(args) with open(join(args.save_path, 'params.json'), 'w') as f: json.dump(train_params, f, indent=4) print('Devices is:') print(devices) # configure model model = BertSum() optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0) callbacks = [MyCallback(args), SaveModelCallback(args.save_path)] criterion = MyBCELoss() val_metric = [LossMetric()] # sampler = BucketSampler(num_buckets=32, batch_size=args.batch_size) trainer = Trainer( train_data=train_set, model=model, optimizer=optimizer, loss=criterion, batch_size=args.batch_size, # sampler=sampler, update_every=args.accum_count, n_epochs=args.n_epochs, print_every=100, dev_data=valid_set, metrics=val_metric, metric_key='-loss', validate_every=args.valid_steps, save_path=args.save_path, device=devices, callbacks=callbacks) print('Start training with the following hyper-parameters:') print(train_params) trainer.train()
def train(): # Trainer trainer = Trainer(**train_args.data) def _define_optim(obj): obj._optimizer = torch.optim.Adam(obj._model.parameters(), **optim_args.data) obj._scheduler = torch.optim.lr_scheduler.LambdaLR( obj._optimizer, lambda ep: .75**(ep / 5e4)) def _update(obj): obj._scheduler.step() obj._optimizer.step() trainer.define_optimizer = lambda: _define_optim(trainer) trainer.update = lambda: _update(trainer) trainer.get_loss = lambda predict, truth: trainer._loss_func( **predict, **truth) trainer._create_validator = lambda x: MyTester(**test_args.data) # Model model = BiaffineParser(**model_args.data) # use pretrain embedding embed, _ = EmbedLoader.load_embedding( model_args['word_emb_dim'], emb_file_name, 'glove', word_v, os.path.join(processed_datadir, 'word_emb.pkl')) model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False) model.word_embedding.padding_idx = word_v.padding_idx model.word_embedding.weight.data[word_v.padding_idx].fill_(0) model.pos_embedding.padding_idx = pos_v.padding_idx model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0) try: ModelLoader.load_pytorch(model, "./save/saved_model.pkl") print('model parameter loaded!') except Exception as _: print("No saved model. Continue.") pass # Start training trainer.train(model, train_data, dev_data) print("Training finished!") # Saver saver = ModelSaver("./save/saved_model.pkl") saver.save_pytorch(model) print("Model saved!")
def test_early_stop(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=20, batch_size=32, print_every=50, optimizer=SGD(lr=0.01), check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[EarlyStopCallback(5)]) trainer.train()
def test_TensorboardCallback(self): data_set, model = prepare_env() trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[TensorboardCallback("loss", "metric")]) trainer.train()
def test_gradient_clip(self): data_set, model = prepare_env() trainer = Trainer( data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=20, batch_size=32, print_every=50, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[GradientClipCallback(model.parameters(), clip_value=2)]) trainer.train()
def run_training(model, train_loader, valid_loader, hps): """Repeatedly runs training iterations, logging loss to screen and writing summaries""" logger.info("[INFO] Starting run_training") train_dir = os.path.join(hps.save_root, "train") if not os.path.exists(train_dir): os.makedirs(train_dir) eval_dir = os.path.join(hps.save_root, "eval") # make a subdir of the root dir for eval data if not os.path.exists(eval_dir): os.makedirs(eval_dir) lr = hps.lr optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr) criterion = MyCrossEntropyLoss(pred = "p_sent", target=Const.TARGET, mask=Const.INPUT_LEN, reduce='none') # criterion = torch.nn.CrossEntropyLoss(reduce="none") trainer = Trainer(model=model, train_data=train_loader, optimizer=optimizer, loss=criterion, n_epochs=hps.n_epochs, print_every=100, dev_data=valid_loader, metrics=[LabelFMetric(pred="prediction"), FastRougeMetric(hps, pred="prediction")], metric_key="f", validate_every=-1, save_path=eval_dir, callbacks=[TrainCallback(hps, patience=5)], use_tqdm=False) train_info = trainer.train(load_best_model=True) logger.info(' | end of Train | time: {:5.2f}s | '.format(train_info["seconds"])) logger.info('[INFO] best eval model in epoch %d and iter %d', train_info["best_epoch"], train_info["best_step"]) logger.info(train_info["best_eval"]) bestmodel_save_path = os.path.join(eval_dir, 'bestmodel.pkl') # this is where checkpoints of best models are saved saver = ModelSaver(bestmodel_save_path) saver.save_pytorch(model) logger.info('[INFO] Saving eval best model to %s', bestmodel_save_path)
def train(model, datainfo, loss, metrics, optimizer, num_epochs=100): trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss(target='target'), metrics=[metrics(target='target')], dev_data=datainfo.datasets['test'], device=0, check_code_level=-1, n_epochs=num_epochs) print(trainer.train())
def test_lr_scheduler(self): data_set, model = prepare_env() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) trainer = Trainer(data_set, model, loss=BCELoss(pred="predict", target="y"), n_epochs=5, batch_size=32, print_every=50, optimizer=optimizer, check_code_level=2, use_tqdm=False, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), callbacks=[ LRScheduler( torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)) ]) trainer.train()
def train(model, datainfo, loss, metrics, optimizer, num_epochs=100): trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss(target='target'), batch_size=ops.batch_size, metrics=[metrics(target='target')], dev_data=datainfo.datasets['test'], device=[0, 1, 2], check_code_level=-1, n_epochs=num_epochs, callbacks=callbacks) print(trainer.train())
def test_case(self): data_set = prepare_fake_dataset() data_set.set_input("x", flag=True) data_set.set_target("y", flag=True) train_set, dev_set = data_set.split(0.3) model = NaiveClassifier(2, 1) trainer = Trainer(train_set, model, loss=BCELoss(pred="predict", target="y"), metrics=AccuracyMetric(pred="predict", target="y"), n_epochs=10, batch_size=32, print_every=50, validate_every=-1, dev_data=dev_set, optimizer=SGD(lr=0.1), check_code_level=2, use_tqdm=True, save_path=None) trainer.train() """
def train(model, datainfo, loss, metrics, optimizer, num_epochs=ops.train_epoch): trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss, metrics=[metrics], dev_data=datainfo.datasets['test'], device=device, check_code_level=-1, batch_size=ops.batch_size, callbacks=callbacks, n_epochs=num_epochs) print(trainer.train())
def train(checkpoint=None): # load config train_param = ConfigSection() model_param = ConfigSection() ConfigLoader().load_config(cfgfile, { "train": train_param, "model": model_param }) print("config loaded") # Data Loader dataset = ZhConllPOSReader().load("/home/hyan/train.conllx") print(dataset) print("dataset transformed") dataset.rename_field("tag", "truth") vocab_proc = VocabIndexerProcessor("words", new_added_filed_name="word_seq") tag_proc = VocabIndexerProcessor("truth") seq_len_proc = SeqLenProcessor(field_name="word_seq", new_added_field_name="word_seq_origin_len", is_input=True) vocab_proc(dataset) tag_proc(dataset) seq_len_proc(dataset) dataset.set_input("word_seq", "word_seq_origin_len", "truth") dataset.set_target("truth", "word_seq_origin_len") print("processors defined") # dataset.set_is_target(tag_ids=True) model_param["vocab_size"] = vocab_proc.get_vocab_size() model_param["num_classes"] = tag_proc.get_vocab_size() print("vocab_size={} num_classes={}".format(model_param["vocab_size"], model_param["num_classes"])) # define a model if checkpoint is None: # pre_trained = load_tencent_embed("/home/zyfeng/data/char_tencent_embedding.pkl", vocab_proc.vocab.word2idx) pre_trained = None model = AdvSeqLabel(model_param, id2words=tag_proc.vocab.idx2word, emb=pre_trained) print(model) else: model = torch.load(checkpoint) # call trainer to train trainer = Trainer(dataset, model, loss=None, metrics=SpanFPreRecMetric( tag_proc.vocab, pred="predict", target="truth", seq_lens="word_seq_origin_len"), dev_data=dataset, metric_key="f", use_tqdm=True, use_cuda=True, print_every=5, n_epochs=6, save_path="./save") trainer.train(load_best_model=True) # save model & pipeline model_proc = ModelProcessor(model, seq_len_field_name="word_seq_origin_len") id2tag = Index2WordProcessor(tag_proc.vocab, "predict", "tag") pp = Pipeline([vocab_proc, seq_len_proc, model_proc, id2tag]) save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_proc.vocab} torch.save(save_dict, "model_pp.pkl") print("pipeline saved") torch.save(model, "./save/best_model.pkl")
def train_model(args): # check if the data_path and save_path exists data_paths = get_data_path(args.mode, args.encoder) for name in data_paths: assert exists(data_paths[name]) if not exists(args.save_path): os.makedirs(args.save_path) # load summarization datasets datasets = MatchSumPipe(args.candidate_num, args.encoder).process_from_file(data_paths) print('Information of dataset is:') print(datasets) train_set = datasets.datasets['train'] valid_set = datasets.datasets['val'] # configure training devices, train_params = configure_training(args) with open(join(args.save_path, 'params.json'), 'w') as f: json.dump(train_params, f, indent=4) print('Devices is:') print(devices) # configure model model = MatchSum(args.candidate_num, args.encoder) optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0) callbacks = [ MyCallback(args), SaveModelCallback(save_dir=args.save_path, top=5) ] criterion = MarginRankingLoss(args.margin) val_metric = [ ValidMetric(save_path=args.save_path, data=read_jsonl(data_paths['val'])) ] assert args.batch_size % len(devices) == 0 trainer = Trainer(train_data=train_set, model=model, optimizer=optimizer, loss=criterion, batch_size=args.batch_size, update_every=args.accum_count, n_epochs=args.n_epochs, print_every=10, dev_data=valid_set, metrics=val_metric, metric_key='ROUGE', validate_every=args.valid_steps, save_path=args.save_path, device=devices, callbacks=callbacks) print('Start training with the following hyper-parameters:') print(train_params) trainer.train()
def fit(self, train_data, dev_data=None, **train_args): trainer = Trainer(**train_args) trainer.train(self, train_data, dev_data)
lr=ops.lr, momentum=0.9, weight_decay=ops.weight_decay) callbacks = [] callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 5))) # callbacks.append( # LRScheduler(LambdaLR(optimizer, lambda epoch: ops.lr if epoch < # ops.train_epoch * 0.8 else ops.lr * 0.1)) # ) # callbacks.append( # FitlogCallback(data=datainfo.datasets, verbose=1) # ) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print(device) # 4.定义train方法 trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss, sampler=BucketSampler(num_buckets=50, batch_size=ops.batch_size), metrics=[metric], dev_data=datainfo.datasets['test'], device=device, check_code_level=-1, batch_size=ops.batch_size, callbacks=callbacks, n_epochs=ops.train_epoch, num_workers=4) if __name__ == "__main__": print(trainer.train())
model = Model(data_bundle.get_vocab(Const.INPUTS(0)), config) print(model) loss = SoftmaxLoss() metric = CRMetric() optim = Adam(model.parameters(), lr=config.lr) lr_decay_callback = LRCallback(optim.param_groups, config.lr_decay) trainer = Trainer( model=model, train_data=data_bundle.datasets["train"], dev_data=data_bundle.datasets["dev"], loss=loss, metrics=metric, check_code_level=-1, sampler=None, batch_size=1, device=torch.device("cuda:" + config.cuda) if torch.cuda.is_available() else None, metric_key='f', n_epochs=config.epoch, optimizer=optim, save_path=None, callbacks=[lr_decay_callback, GradientClipCallback(clip_value=5)]) print() trainer.train()
def train(train_data_path, dev_data_path, checkpoint=None, save=None): # load config train_param = ConfigSection() model_param = ConfigSection() ConfigLoader().load_config(cfgfile, { "train": train_param, "model": model_param }) print("config loaded") # Data Loader print("loading training set...") dataset = ConllxDataLoader().load(train_data_path, return_dataset=True) print("loading dev set...") dev_data = ConllxDataLoader().load(dev_data_path, return_dataset=True) print(dataset) print("================= dataset ready =====================") dataset.rename_field("tag", "truth") dev_data.rename_field("tag", "truth") vocab_proc = VocabIndexerProcessor("words", new_added_filed_name="word_seq") tag_proc = VocabIndexerProcessor("truth", is_input=True) seq_len_proc = SeqLenProcessor(field_name="word_seq", new_added_field_name="word_seq_origin_len", is_input=True) set_input_proc = SetInputProcessor("word_seq", "word_seq_origin_len") vocab_proc(dataset) tag_proc(dataset) seq_len_proc(dataset) # index dev set word_vocab, tag_vocab = vocab_proc.vocab, tag_proc.vocab dev_data.apply(lambda ins: [word_vocab.to_index(w) for w in ins["words"]], new_field_name="word_seq") dev_data.apply(lambda ins: [tag_vocab.to_index(w) for w in ins["truth"]], new_field_name="truth") dev_data.apply(lambda ins: len(ins["word_seq"]), new_field_name="word_seq_origin_len") # set input & target dataset.set_input("word_seq", "word_seq_origin_len", "truth") dev_data.set_input("word_seq", "word_seq_origin_len", "truth") dataset.set_target("truth", "word_seq_origin_len") dev_data.set_target("truth", "word_seq_origin_len") # dataset.set_is_target(tag_ids=True) model_param["vocab_size"] = vocab_proc.get_vocab_size() model_param["num_classes"] = tag_proc.get_vocab_size() print("vocab_size={} num_classes={}".format(model_param["vocab_size"], model_param["num_classes"])) # define a model if checkpoint is None: # pre_trained = load_tencent_embed("/home/zyfeng/data/char_tencent_embedding.pkl", vocab_proc.vocab.word2idx) pre_trained = None model = AdvSeqLabel(model_param, id2words=None, emb=pre_trained) print(model) else: model = torch.load(checkpoint) # call trainer to train trainer = Trainer(dataset, model, loss=None, metrics=SpanFPreRecMetric( tag_proc.vocab, pred="predict", target="truth", seq_lens="word_seq_origin_len"), dev_data=dev_data, metric_key="f", use_tqdm=True, use_cuda=True, print_every=10, n_epochs=20, save_path=save) trainer.train(load_best_model=True) # save model & pipeline model_proc = ModelProcessor(model, seq_len_field_name="word_seq_origin_len") id2tag = Index2WordProcessor(tag_proc.vocab, "predict", "tag") pp = Pipeline( [vocab_proc, seq_len_proc, set_input_proc, model_proc, id2tag]) save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_proc.vocab} torch.save(save_dict, os.path.join(save, "model_pp.pkl")) print("pipeline saved")