) else: raise RuntimeError(f'NOT support {arg.task} task yet!') # define model model = BertForNLI(class_num=len(data_info.vocabs[Const.TARGET]), bert_dir=arg.bert_dir) # define trainer trainer = Trainer(train_data=data_info.datasets[arg.train_dataset_name], model=model, optimizer=Adam(lr=arg.lr, model_params=model.parameters()), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_info.datasets[arg.dev_dataset_name], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, save_path=arg.save_path) # train model trainer.train(load_best_model=True) # define tester tester = Tester( data=data_info.datasets[arg.test_dataset_name], model=model, metrics=AccuracyMetric(),
output += line with open('../visualize.txt', 'w', encoding='utf-8') as f: print('Visualize validation....') f.write(output) print('label num:', len(vocabs['label'])) if args.status == 'train': trainer = Trainer(datasets['train'], model, optimizer, loss, args.batch, n_epochs=args.epoch, dev_data=datasets['dev'], metrics=metrics, device=device, callbacks=create_cb(), dev_batch_size=args.test_batch, test_use_tqdm=False, check_code_level=-1, update_every=args.update_every) trainer.train() print('Evaluating...') with torch.no_grad(): model = Predictor(model) pred = model.predict( datasets['dev'], seq_len_field_name='seq_len', )['pred'] pred = [[vocabs['label'].to_word(ele) for ele in arr] for arr in pred]
for n, p in model.named_parameters(): print_info('{}:{}'.format(n, p.size())) print_info('see_param mode: finish') if not args.debug: exit(1208) datasets['train'].apply if args.see_convergence: print_info('see_convergence = True') print_info('so just test train acc|f1') datasets['train'] = datasets['train'][:100] if args.optim == 'adam': optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) trainer = Trainer(datasets['train'], model, optimizer, loss, args.batch, n_epochs=args.epoch, dev_data=datasets['train'], metrics=metrics, device=device, dev_batch_size=args.test_batch) trainer.train() exit(1208) # if args.warmup and args.model == 'transformer': # ## warm up start # if args.optim == 'adam': # warmup_optimizer = optim.AdamW(model.parameters(),lr=args.warmup_lr,weight_decay=args.weight_decay) # elif args.optim == 'sgd': # warmup_optimizer = optim.SGD(model.parameters(),lr=args.warmup_lr,momentum=args.momentum) # # warmup_lr_schedule = LRScheduler(lr_scheduler=LambdaLR(warmup_optimizer, lambda ep: 1 * (1 + 0.05)**ep)) # warmup_callbacks = [ # warmup_lr_schedule,
] if arg.task in ['snli']: callbacks.append( EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name])) # evaluate test set in every epoch if task is snli. # define trainer trainer = Trainer(train_data=data_bundle.datasets[arg.train_dataset_name], model=model, optimizer=optimizer, loss=CrossEntropyLoss(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_bundle.datasets[arg.dev_dataset_name], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, save_path=arg.save_path, callbacks=callbacks) # train model trainer.train(load_best_model=True) # define tester tester = Tester( data=data_bundle.datasets[arg.test_dataset_name], model=model,
model = CNTNModel(embedding, ns=arg.cntn_ns, k_top=arg.cntn_k_top, num_labels=num_labels, depth=arg.cntn_depth, r=arg.cntn_r) print(model) # define trainer trainer = Trainer(train_data=data_bundle.datasets['train'], model=model, optimizer=Adam(lr=arg.lr, model_params=model.parameters()), loss=CrossEntropyLoss(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_bundle.datasets[dev_dict[arg.dataset]], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1) # train model trainer.train(load_best_model=True) # define tester tester = Tester(data=data_bundle.datasets[test_dict[arg.dataset]], model=model, metrics=AccuracyMetric(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu,
for n,p in model.named_parameters(): print_info('{}:{}'.format(n,p.size())) print_info('see_param mode: finish') if not args.debug: exit(1208) datasets['train'].apply if args.see_convergence: print_info('see_convergence = True') print_info('so just test train acc|f1') datasets['train'] = datasets['train'][:100] if args.optim == 'adam': optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) trainer = Trainer(datasets['train'], model, optimizer, loss, args.batch, n_epochs=args.epoch, dev_data=datasets['train'], metrics=metrics, device=device, dev_batch_size=args.test_batch) trainer.train() exit(1208) bigram_embedding_param = list(model.bigram_embed.parameters()) gaz_embedding_param = list(model.lattice_embed.parameters()) embedding_param = bigram_embedding_param if args.lattice: gaz_embedding_param = list(model.lattice_embed.parameters()) embedding_param = embedding_param+gaz_embedding_param embedding_param_ids = list(map(id,embedding_param)) non_embedding_param = list(filter(lambda x:id(x) not in embedding_param_ids,model.parameters()))
elif arg.task == 'mnli': callbacks.append( EvaluateCallback(data={ 'dev_matched': data_bundle.datasets['dev_matched'], 'dev_mismatched': data_bundle.datasets['dev_mismatched'] }, )) trainer = Trainer(train_data=data_bundle.datasets['train'], model=model, optimizer=optimizer, num_workers=0, batch_size=arg.batch_size, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_bundle.datasets[arg.devset_name], metrics=AccuracyMetric(pred="pred", target="target"), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, callbacks=callbacks, loss=CrossEntropyLoss(pred="pred", target="target")) trainer.train(load_best_model=True) tester = Tester( data=data_bundle.datasets[arg.testset_name], model=model, metrics=AccuracyMetric(), batch_size=arg.batch_size, device=[i for i in range(torch.cuda.device_count())], )