def create_cb(): lrschedule_callback = LRScheduler( lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep))) clip_callback = GradientClipCallback(clip_type='value', clip_value=2) save_dir = os.path.join(root_path, f'model/{args.data_type}', f'fold{args.fold}') save_callback = SaveModelCallback(top=1, save_dir=save_dir) if args.cv: callbacks = [ lrschedule_callback, clip_callback, save_callback, ] else: callbacks = [ lrschedule_callback, clip_callback, save_callback, ] # callbacks.append(Unfreeze_Callback(embedding_param ,args.fix_embed_epoch)) if args.use_bert: if args.fix_bert_epoch != 0: callbacks.append( Unfreeze_Callback(model.lattice_embed, args.fix_bert_epoch)) else: bert_embedding.requires_grad = True callbacks.append(EarlyStopCallback(args.early_stop)) if args.warmup > 0 and args.model == 'transformer': callbacks.append(WarmupCallback(warmup=args.warmup, )) return callbacks
def test_warmup_callback(self): data_set, model = prepare_env() warmup_callback = WarmupCallback() trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), batch_size=32, n_epochs=5, print_every=50, dev_data=data_set, metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True, callbacks=warmup_callback, check_code_level=2) trainer.train()
dropout=0.1, include_cls_sep=False, layer_num=8) model = CharModel(embed=embed, label_vocab=label_vocab, pos_idx=pos_idx, Parsing_rnn_layers=rnn_layers, Parsing_arc_mlp_size=arc_mlp_size, Parsing_label_mlp_size=label_mlp_size, encoding_type='bmeso') optimizer = AdamW(model.parameters(), lr=2e-5) device = 0 if torch.cuda.is_available() else 'cpu' callbacks = [WarmupCallback(warmup=0.1, schedule='linear')] metric1 = SegAppCharParseF1Metric(label_vocab['Parsing']['APP']) metric2 = CWSMetric(label_vocab['Parsing']['APP']) metric3 = SpanFPreRecMetric(tag_vocab=label_vocab['POS']) metrics = [metric1, metric2, metric3] for target in ['train', 'test', 'dev']: CWS_dataset = DataSet() for key in task_list: if key.startswith('CWS'): for ins in all_data[target][key]: CWS_dataset.append(ins) del all_data[target][key] CWS_dataset.set_input('chars', 'target', 'seq_len', 'task_class') CWS_dataset.set_target('target', 'seq_len')
print('parameter weight:', flush=True) print(self.model_.state_dict()['encoder.layer_0.attn.w_q.weight'], flush=True) callbacks = [ evaluate_callback, lrschedule_callback, clip_callback, # CheckWeightCallback(model) ] print('parameter weight:') print(model.state_dict()['encoder.layer_0.attn.w_q.weight']) if args.warmup > 0 and args.model == 'transformer': callbacks.append(WarmupCallback(warmup=args.warmup)) class record_best_test_callback(Callback): def __init__(self, trainer, result_dict): super().__init__() self.trainer222 = trainer self.result_dict = result_dict def on_valid_end(self, eval_result, metric_key, optimizer, better_result): print(eval_result['data_test']['SpanFPreRecMetric']['f']) if args.status == 'train': trainer = Trainer(datasets['train'], model,
print(data_bundle) # print details in data_bundle # load embedding embed = BertEmbedding(data_bundle.vocabs[Const.INPUT], model_dir_or_name=arg.bert_model_dir_or_name) # define model model = BertForSentenceMatching(embed, num_labels=len( data_bundle.vocabs[Const.TARGET])) # define optimizer and callback optimizer = AdamW(lr=arg.lr, params=model.parameters()) callbacks = [ WarmupCallback(warmup=arg.warm_up_rate, schedule='linear'), ] if arg.task in ['snli']: callbacks.append( EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name])) # evaluate test set in every epoch if task is snli. # define trainer trainer = Trainer(train_data=data_bundle.get_dataset(arg.train_dataset_name), model=model, optimizer=optimizer, batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1,