def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_step=1, **kwargs): if not preprocessed_data: max_seq_length, mask_probability, short_seq_prob =\ kwargs['max_seq_length'], kwargs['mask_probability'],\ kwargs['short_seq_prob'] data_layer = nemo_nlp.BertPretrainingDataLayer(tokenizer, data_file, max_seq_length, mask_probability, short_seq_prob, batch_size=batch_size) else: training, max_predictions_per_seq =\ kwargs['training'], kwargs['max_predictions_per_seq'] data_layer = nemo_nlp.BertPretrainingPreprocessedDataLayer( data_file, max_predictions_per_seq, batch_size=batch_size, training=training) steps_per_epoch = \ math.ceil(len(data_layer) / ( batch_size * args.num_gpus * batches_per_step)) input_ids, input_type_ids, input_mask, \ output_ids, output_mask, nsp_labels = data_layer() hidden_states = bert_model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask) mlm_logits = mlm_classifier(hidden_states=hidden_states) mlm_loss = mlm_loss_fn(logits=mlm_logits, output_ids=output_ids, output_mask=output_mask) if not args.only_mlm_loss: nsp_logits = nsp_classifier(hidden_states=hidden_states) nsp_loss = nsp_loss_fn(logits=nsp_logits, labels=nsp_labels) loss = bert_loss(loss_1=mlm_loss, loss_2=nsp_loss) else: loss = mlm_loss nsp_loss = None return loss, mlm_loss, nsp_loss, steps_per_epoch
def create_pipeline(data_file, max_seq_length, mask_probability, batch_size): data_layer = nemo_nlp.BertPretrainingDataLayer(tokenizer, data_file, max_seq_length, mask_probability, batch_size=batch_size) steps_per_epoch = len(data_layer) // (batch_size * args.num_gpus) input_ids, input_type_ids, input_mask, \ output_ids, output_mask, nsp_labels = data_layer() hidden_states = bert_model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask) mlm_logits = mlm_classifier(hidden_states=hidden_states) mlm_loss = mlm_loss_fn(logits=mlm_logits, output_ids=output_ids, output_mask=output_mask) nsp_logits = nsp_classifier(hidden_states=hidden_states) nsp_loss = nsp_loss_fn(logits=nsp_logits, labels=nsp_labels) loss = bert_loss(loss_1=mlm_loss, loss_2=nsp_loss) return loss, [mlm_loss, nsp_loss], steps_per_epoch
nsp_log_softmax = nemo_nlp.SentenceClassificationLogSoftmaxNM( d_model=args.d_model, num_classes=2, factory=neural_factory) nsp_loss = nemo_nlp.NextSentencePredictionLossNM(factory=neural_factory) bert_loss = nemo_nlp.LossAggregatorNM(num_inputs=2, factory=neural_factory) # tie weights of MLM softmax layer and embedding layer of the encoder mlm_log_softmax.log_softmax.dense.weight = \ bert_model.bert.embeddings.word_embeddings.weight train_data_layer = nemo_nlp.BertPretrainingDataLayer( tokenizer=tokenizer, dataset=args.dataset_dir, name="train", sentence_indices_filename=args.train_sentence_indices_filename, max_seq_length=args.max_sequence_length, mask_probability=args.mask_probability, batch_size=args.batch_size, factory=neural_factory) dev_data_layer = nemo_nlp.BertPretrainingDataLayer( tokenizer=tokenizer, dataset=args.dev_dataset_dir, name="dev", sentence_indices_filename=args.dev_sentence_indices_filename, max_seq_length=args.max_sequence_length, mask_probability=args.mask_probability, batch_size=args.eval_batch_size, factory=neural_factory)