user_iter_callback=lambda x, y: eval_iter_callback(x, y), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, label_ids, f'{nf.work_dir}/graphs'), tb_writer=nf.tb_writer, eval_step=args.eval_step_freq if args.eval_step_freq > 0 else steps_per_epoch, ) callbacks.append(eval_callback) ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq) callbacks.append(ckpt_callback) lr_policy_fn = get_lr_policy(args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) nf.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy_fn, batches_per_step=args.batches_per_step, optimizer=args.optimizer_kind, optimization_params={ "num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay }, )
def test_squad_v1(self): version_2_with_negative = False pretrained_bert_model = 'bert-base-uncased' batch_size = 3 data_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), 'data/nlp/squad/v1.1')) max_query_length = 64 max_seq_length = 384 doc_stride = 128 max_steps = 100 lr_warmup_proportion = 0 eval_step_freq = 50 lr = 3e-6 do_lower_case = True n_best_size = 5 max_answer_length = 20 null_score_diff_threshold = 0.0 tokenizer = nemo_nlp.NemoBertTokenizer(pretrained_bert_model) neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=None, create_tb_writer=False) model = nemo_nlp.huggingface.BERT( pretrained_model_name=pretrained_bert_model) hidden_size = model.local_parameters["hidden_size"] qa_head = nemo_nlp.TokenClassifier(hidden_size=hidden_size, num_classes=2, num_layers=1, log_softmax=False) squad_loss = nemo_nlp.QuestionAnsweringLoss() data_layer = nemo_nlp.BertQuestionAnsweringDataLayer( mode='train', version_2_with_negative=version_2_with_negative, batch_size=batch_size, tokenizer=tokenizer, data_dir=data_dir, max_query_length=max_query_length, max_seq_length=max_seq_length, doc_stride=doc_stride) input_ids, input_type_ids, input_mask, \ start_positions, end_positions, _ = data_layer() hidden_states = model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask) qa_output = qa_head(hidden_states=hidden_states) loss, _, _ = squad_loss(logits=qa_output, start_positions=start_positions, end_positions=end_positions) data_layer_eval = nemo_nlp.BertQuestionAnsweringDataLayer( mode='dev', version_2_with_negative=version_2_with_negative, batch_size=batch_size, tokenizer=tokenizer, data_dir=data_dir, max_query_length=max_query_length, max_seq_length=max_seq_length, doc_stride=doc_stride) input_ids_eval, input_type_ids_eval, input_mask_eval, \ start_positions_eval, end_positions_eval, unique_ids_eval \ = data_layer_eval() hidden_states_eval = model(input_ids=input_ids_eval, token_type_ids=input_type_ids_eval, attention_mask=input_mask_eval) qa_output_eval = qa_head(hidden_states=hidden_states_eval) _, start_logits_eval, end_logits_eval = squad_loss( logits=qa_output_eval, start_positions=start_positions_eval, end_positions=end_positions_eval) eval_output = [start_logits_eval, end_logits_eval, unique_ids_eval] callback_train = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())), get_tb_values=lambda x: [["loss", x[0]]], step_freq=10, tb_writer=neural_factory.tb_writer) callbacks_eval = nemo.core.EvaluatorCallback( eval_tensors=eval_output, user_iter_callback=lambda x, y: eval_iter_callback(x, y), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, eval_data_layer=data_layer_eval, do_lower_case=do_lower_case, n_best_size=n_best_size, max_answer_length=max_answer_length, version_2_with_negative=version_2_with_negative, null_score_diff_threshold=null_score_diff_threshold), tb_writer=neural_factory.tb_writer, eval_step=eval_step_freq) lr_policy_fn = get_lr_policy('WarmupAnnealing', total_steps=max_steps, warmup_ratio=lr_warmup_proportion) neural_factory.train(tensors_to_optimize=[loss], callbacks=[callback_train, callbacks_eval], lr_policy=lr_policy_fn, optimizer='adam_w', optimization_params={ "max_steps": max_steps, "lr": lr })
eval_step=args.eval_step_freq, ) callbacks.append(eval_callback) optimization_params = { "lr": args.lr, "weight_decay": args.weight_decay, } if args.max_steps < 0: total_steps = args.num_epochs * train_steps_per_epoch optimization_params['num_epochs'] = args.num_epochs else: total_steps = args.max_steps optimization_params['max_steps'] = args.max_steps lr_policy_fn = get_lr_policy(args.lr_policy, total_steps=total_steps, warmup_ratio=args.lr_warmup_proportion) if args.grad_norm_clip >= 0: optimization_params['grad_norm_clip'] = args.grad_norm_clip nf.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy_fn, optimizer=args.optimizer, batches_per_step=args.batches_per_step, optimization_params=optimization_params, ) else: load_from_folder = None
user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, validation_dataset=eval_dataset_tgt), eval_step=args.eval_freq, tb_writer=nf.tb_writer) # callback which saves checkpoints once in a while ckpt_dir = nf.checkpoint_dir if not args.interactive \ else args.restore_checkpoint_from ckpt_callback = nemo.core.CheckpointCallback(folder=ckpt_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq, checkpoints_to_keep=1) # define learning rate decay policy lr_policy_fn = get_lr_policy(args.lr_policy, total_steps=args.max_steps, warmup_steps=args.warmup_steps) if args.max_steps is not None and args.num_epochs is not None: raise ValueError("Please specify either max_steps or num_epochs.") if not args.interactive: if args.max_steps is not None: stop_training_condition = {"max_steps": args.max_steps} else: stop_training_condition = {"num_epochs": args.num_epochs} nf.train(tensors_to_optimize=[train_loss], callbacks=[train_callback, eval_callback, ckpt_callback], optimizer=args.optimizer,
def sentence_classification(args): # TODO: construct name of experiment based on args """ name = construct_name( args.exp_name, args.lr, args.batch_size, args.num_epochs, args.weight_decay, args.optimizer) work_dir = name if args.work_dir: work_dir = os.path.join(args.work_dir, name) """ # Instantiate neural modules nf = NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=args.work_dir, create_tb_writer=True, files_to_copy=[__file__], add_time_to_log_dir=True) # Pre-trained BERT tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model) if args.bert_checkpoint is None: bert = nemo_nlp.BERT(pretrained_model_name=args.pretrained_bert_model) # save bert config for inference after fine-tuning bert_config = bert.config.to_dict() with open(args.work_dir + '/' + args.pretrained_bert_model + '_config.json', 'w+') as json_file: json.dump(bert_config, json_file) else: if args.bert_config is not None: with open(args.bert_config) as json_file: bert_config = json.load(json_file) bert = nemo_nlp.BERT(**bert_config) bert.restore_from(args.bert_checkpoint) # MLP bert_hidden_size = bert.local_parameters['hidden_size'] mlp = nemo_nlp.SequenceClassifier( hidden_size=bert_hidden_size, num_classes=args.num_classes, num_layers=args.num_layers, log_softmax=False, dropout=args.dropout) # TODO: save mlp/all model configs (bake in to Neural Module?) if args.mlp_checkpoint: mlp.restore_from(args.mlp_checkpoint) # Loss function for classification loss_fn = CrossEntropyLoss() # Data layers, pipelines, and callbacks callbacks = [] # callbacks depend on files present if args.train_file: if args.preproc: train_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer( input_file=args.train_file, shuffle=True, num_samples=args.num_samples, # lower for dev, -1 for all dataset batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) else: train_data_layer = nemo_nlp.BertSentenceClassificationDataLayer( input_file=args.train_file, tokenizer=tokenizer, max_seq_length=args.max_seq_length, shuffle=True, num_samples=args.num_samples, # lower for dev, -1 for all dataset batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) train_logits, train_loss, steps_per_epoch, train_labels = create_pipeline( nf, train_data_layer, bert, mlp, loss_fn) train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[train_loss, train_logits], print_func=lambda x: nf.logger.info(f'Train loss: {str(np.round(x[0].item(), 3))}'), tb_writer=nf.tb_writer, get_tb_values=lambda x: [["train_loss", x[0]]], step_freq=steps_per_epoch) callbacks.append(train_callback) if args.num_checkpoints != 0: ckpt_callback = nemo.core.CheckpointCallback( folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq, checkpoints_to_keep=args.num_checkpoints) callbacks.append(ckpt_callback) if args.eval_file: if args.preproc: eval_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer( input_file=args.eval_file, shuffle=False, num_samples=args.num_samples, batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) else: eval_data_layer = nemo_nlp.BertSentenceClassificationDataLayer( input_file=args.eval_file, tokenizer=tokenizer, max_seq_length=args.max_seq_length, shuffle=False, num_samples=args.num_samples, batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) eval_logits, eval_loss, _, eval_labels = create_pipeline( nf, eval_data_layer, bert, mlp, loss_fn) eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[eval_logits, eval_labels], user_iter_callback=lambda x, y: eval_iter_callback( x, y, eval_data_layer), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, f'{nf.work_dir}/graphs'), tb_writer=nf.tb_writer, eval_step=steps_per_epoch) callbacks.append(eval_callback) if args.inference_file: if args.preproc: inference_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer( input_file=args.inference_file, shuffle=False, num_samples=args.num_samples, batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) else: inference_data_layer = nemo_nlp.BertSentenceClassificationDataLayer( input_file=args.inference_file, tokenizer=tokenizer, max_seq_length=args.max_seq_length, shuffle=False, num_samples=args.num_samples, batch_size=args.batch_size, num_workers=0, local_rank=args.local_rank) # TODO: Finish inference inference_callback = None # Training, eval and inference if args.train_file: lr_policy_fn = get_lr_policy( args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) nf.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy_fn, optimizer=args.optimizer_kind, optimization_params={'num_epochs': args.num_epochs, 'lr': args.lr})
get_tb_values=lambda x: [["train_loss", x[0]]], step_freq=steps_per_epoch) eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[val_logits, val_labels], user_iter_callback=lambda x, y: eval_iter_callback(x, y, val_data), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, f'{nf.work_dir}/graphs'), tb_writer=nf.tb_writer, eval_step=steps_per_epoch) ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir, epoch_freq=1) lr_policy_fn = get_lr_policy('WarmupAnnealing', total_steps=NUM_EPOCHS * steps_per_epoch, warmup_ratio=0.1) nf.train( tensors_to_optimize=[train_loss], callbacks=[train_callback, eval_callback, ckpt_callback], lr_policy=lr_policy_fn, optimizer=OPTIMIZER, optimization_params={ "num_epochs": NUM_EPOCHS, "lr": LEARNING_RATE, "weight_decay": WEIGHT_DECAY }, ) #ask if inference file is to be created from the test set