bert_model = nemo_nlp.huggingface.BERT(vocab_size=tokenizer.vocab_size, num_layers=args.num_layers, d_model=args.d_model, num_heads=args.num_heads, d_inner=args.d_inner, max_seq_length=args.max_seq_length, hidden_act="gelu") """ create necessary modules for the whole translation pipeline, namely data layers, BERT encoder, and MLM and NSP loss functions """ mlm_classifier = nemo_nlp.TokenClassifier(args.d_model, num_classes=tokenizer.vocab_size, num_layers=1, log_softmax=True) mlm_loss_fn = nemo_nlp.MaskedLanguageModelingLossNM() nsp_classifier = nemo_nlp.SequenceClassifier(args.d_model, num_classes=2, num_layers=2, log_softmax=True) nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss() bert_loss = nemo_nlp.LossAggregatorNM(num_inputs=2) # tie weights of MLM softmax layer and embedding layer of the encoder mlm_classifier.mlp.last_linear_layer.weight = \ bert_model.bert.embeddings.word_embeddings.weight def create_pipeline(data_file, max_seq_length, mask_probability,
bert_model = nemo_nlp.huggingface.BERT(vocab_size=vocab_size, num_layers=args.num_layers, d_model=args.d_model, num_heads=args.num_heads, d_inner=args.d_inner, max_seq_length=args.max_sequence_length, hidden_act="gelu", factory=neural_factory) # instantiate necessary modules for the whole translation pipeline, namely # data layers, BERT encoder, and MLM and NSP loss functions mlm_log_softmax = nemo_nlp.TransformerLogSoftmaxNM(vocab_size=vocab_size, d_model=args.d_model, factory=neural_factory) mlm_loss = nemo_nlp.MaskedLanguageModelingLossNM(factory=neural_factory) nsp_log_softmax = nemo_nlp.SentenceClassificationLogSoftmaxNM( d_model=args.d_model, num_classes=2, factory=neural_factory) nsp_loss = nemo_nlp.NextSentencePredictionLossNM(factory=neural_factory) bert_loss = nemo_nlp.LossAggregatorNM(num_inputs=2, factory=neural_factory) # tie weights of MLM softmax layer and embedding layer of the encoder mlm_log_softmax.log_softmax.dense.weight = \ bert_model.bert.embeddings.word_embeddings.weight train_data_layer = nemo_nlp.BertPretrainingDataLayer( tokenizer=tokenizer, dataset=args.dataset_dir, name="train",