def create_and_check_for_masked_lm( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, fake_token_labels, ): model = FunnelForMaskedLM(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
dataset_gen_val = LineByLineTextDataset(tokenizer=bpe_tokenizer, file_path=input_path_val, block_size=block_size) data_collator = DataCollatorForLanguageModeling( tokenizer=bpe_tokenizer, mlm=True, mlm_probability=mlm_probability) # create model config = FunnelConfig( vocab_size=bpe_tokenizer.vocab_size, max_position_embeddings=max_len + 10, n_head=num_attention_heads, block_sizes=block_sizes, type_vocab_size=1, ) model = FunnelForMaskedLM(config=config) _pretty_print(f"Number of model parameters : {model.num_parameters()}") model_path = os.path.join(output_path, "lm") training_args = TrainingArguments( output_dir=model_path, overwrite_output_dir=True, num_train_epochs=epochs, per_device_train_batch_size=batch_size, per_device_eval_batch_size=val_batch_size, evaluation_strategy="steps", logging_steps=eval_steps, eval_steps=eval_steps, save_total_limit=1, load_best_model_at_end=True,