def create_and_check_electra_for_question_answering(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     fake_token_labels,
 ):
     model = ElectraForQuestionAnswering(config=config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         start_positions=sequence_labels,
         end_positions=sequence_labels,
     )
     self.parent.assertEqual(result.start_logits.shape,
                             (self.batch_size, self.seq_length))
     self.parent.assertEqual(result.end_logits.shape,
                             (self.batch_size, self.seq_length))
 def create_and_check_electra_for_question_answering(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     fake_token_labels,
 ):
     model = ElectraForQuestionAnswering(config=config)
     model.to(torch_device)
     model.eval()
     loss, start_logits, end_logits = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         start_positions=sequence_labels,
         end_positions=sequence_labels,
     )
     result = {
         "loss": loss,
         "start_logits": start_logits,
         "end_logits": end_logits,
     }
     self.parent.assertListEqual(list(result["start_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.parent.assertListEqual(list(result["end_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.check_loss_output(result)
for conf in configurations:

    conf_id = '{}_config_{}_{}_{}'.format(j, *conf)
    print(
        'Run [{}] --learning rate: {}, --hidden size: {}, --hidden layer: {}'.
        format(j, *conf))

    # Initializing a ELECTRA electra-base-uncased style configuration
    electra_conf = ElectraConfig(hidden_size=conf[1],
                                 num_hidden_layers=conf[2])
    # Initializing a model from the electra-base-uncased style configuration
    model = ElectraForQuestionAnswering(electra_conf)
    configuration = model.config

    model.to(device)
    model.train()

    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    learning_rate = conf[0]
    optim = AdamW(model.parameters(), lr=learning_rate)
    loss_records = []
    bi = 0

    for epoch in tqdm(range(EPOCHS)):
        for batch in train_loader:
            if (bi > 3):
                break
            optim.zero_grad()