Пример #1
0
 def create_and_check_bert_for_pretraining(self, config, input_ids,
                                           token_type_ids, input_mask,
                                           sequence_labels,
                                           token_labels, choice_labels):
     model = BertForPreTraining(config=config)
     model.to(torch_device)
     model.eval()
     loss, prediction_scores, seq_relationship_score = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         masked_lm_labels=token_labels,
         next_sentence_label=sequence_labels)
     result = {
         "loss": loss,
         "prediction_scores": prediction_scores,
         "seq_relationship_score": seq_relationship_score,
     }
     self.parent.assertListEqual(
         list(result["prediction_scores"].size()),
         [self.batch_size, self.seq_length, self.vocab_size])
     self.parent.assertListEqual(
         list(result["seq_relationship_score"].size()),
         [self.batch_size, 2])
     self.check_loss_output(result)
 def create_and_check_for_pretraining(
     self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 ):
     model = BertForPreTraining(config=config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         labels=token_labels,
         next_sentence_label=sequence_labels,
     )
     self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
     self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2))
Пример #3
0
def load_bert(model_name):
    if model_name == 'robbert':
        tokenizer = RobertaTokenizer.from_pretrained("pdelobelle/robBERT-base")
        model = RobertaModel.from_pretrained("pdelobelle/robBERT-base")
    elif model_name == 'bertje':
        tokenizer = BertTokenizer.from_pretrained(
            "wietsedv/bert-base-dutch-cased")
        model = BertModel.from_pretrained("wietsedv/bert-base-dutch-cased")
    elif model_name == 'bert-nl':
        tokenizer = BertTokenizer.from_pretrained("data/bert-nl")
        config = BertConfig.from_json_file("data/bert-nl/config.json")
        model = BertForPreTraining(config).bert
    else:
        raise ValueError('invalid model name')
    model.eval()
    return tokenizer, model
Пример #4
0
        def create_and_check_bert_for_pretraining(self, config, input_ids,
                                                  token_type_ids, input_mask,
                                                  sequence_labels,
                                                  token_labels, choice_labels):
            seed = 42
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            onnxruntime.set_seed(seed)

            model = BertForPreTraining(config=config)
            model.eval()
            loss, prediction_scores, seq_relationship_score = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                next_sentence_label=sequence_labels)
            model_desc = ModelDescription([
                self.input_ids_desc, self.attention_mask_desc,
                self.token_type_ids_desc, self.masked_lm_labels_desc,
                self.next_sentence_label_desc
            ], [
                self.loss_desc, self.prediction_scores_desc,
                self.seq_relationship_scores_desc
            ])

            from collections import namedtuple
            MyArgs = namedtuple(
                "MyArgs",
                "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
            )
            args = MyArgs(local_rank=0,
                          world_size=1,
                          max_steps=100,
                          learning_rate=0.00001,
                          warmup_proportion=0.01,
                          batch_size=13,
                          seq_len=7)

            def get_lr_this_step(global_step):
                return get_lr(args, global_step)

            loss_scaler = LossScaler('loss_scale_input_name',
                                     True,
                                     up_scale_window=2000)

            # It would be better to test both with/without mixed precision and allreduce_post_accumulation.
            # However, stress test of all the 4 cases is not stable at lease on the test machine.
            # There we only test mixed precision and allreduce_post_accumulation because it is the most useful use cases.
            option_fp16 = [True]
            option_allreduce_post_accumulation = [True]
            option_gradient_accumulation_steps = [1, 8]
            option_use_internal_get_lr_this_step = [True, False]
            option_use_internal_loss_scaler = [True, False]
            option_split_batch = [BatchArgsOption.ListAndDict]

            for fp16 in option_fp16:
                for allreduce_post_accumulation in option_allreduce_post_accumulation:
                    for gradient_accumulation_steps in option_gradient_accumulation_steps:
                        for use_internal_get_lr_this_step in option_use_internal_get_lr_this_step:
                            for use_internal_loss_scaler in option_use_internal_loss_scaler:
                                for split_batch in option_split_batch:
                                    print("gradient_accumulation_steps:",
                                          gradient_accumulation_steps)
                                    print("use_internal_loss_scaler:",
                                          use_internal_loss_scaler)
                                    loss_ort, prediction_scores_ort, seq_relationship_score_ort =\
                                        run_test(model, model_desc, self.device, args, gradient_accumulation_steps, fp16,
                                                allreduce_post_accumulation,
                                                get_lr_this_step, use_internal_get_lr_this_step,
                                                loss_scaler, use_internal_loss_scaler,
                                                split_batch)

                                    print(loss_ort)
                                    print(prediction_scores_ort)
                                    print(seq_relationship_score_ort)
Пример #5
0
        def create_and_check_bert_for_pretraining(
            self,
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            option_fp16,
            option_allreduce_post_accumulation,
            option_gradient_accumulation_steps,
            option_split_batch,
            option_use_internal_get_lr_this_step=[True],
            option_use_internal_loss_scaler=[True],
        ):
            seed = 42
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            onnxruntime.set_seed(seed)

            model = BertForPreTraining(config=config)
            model.eval()
            loss, prediction_scores, seq_relationship_score = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                next_sentence_label=sequence_labels,
            )
            model_desc = ModelDescription(
                [
                    self.input_ids_desc,
                    self.attention_mask_desc,
                    self.token_type_ids_desc,
                    self.masked_lm_labels_desc,
                    self.next_sentence_label_desc,
                ],
                [
                    self.loss_desc, self.prediction_scores_desc,
                    self.seq_relationship_scores_desc
                ],
            )

            from collections import namedtuple

            MyArgs = namedtuple(
                "MyArgs",
                "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
            )

            dataset_len = 100
            epochs = 8
            max_steps = epochs * dataset_len
            args = MyArgs(
                local_rank=0,
                world_size=1,
                max_steps=max_steps,
                learning_rate=0.00001,
                warmup_proportion=0.01,
                batch_size=13,
                seq_len=7,
            )

            def get_lr_this_step(global_step):
                return get_lr(args, global_step)

            loss_scaler = LossScaler("loss_scale_input_name",
                                     True,
                                     up_scale_window=2000)

            for fp16 in option_fp16:
                for allreduce_post_accumulation in option_allreduce_post_accumulation:
                    for gradient_accumulation_steps in option_gradient_accumulation_steps:
                        for use_internal_get_lr_this_step in option_use_internal_get_lr_this_step:
                            for use_internal_loss_scaler in option_use_internal_loss_scaler:
                                for split_batch in option_split_batch:
                                    print("gradient_accumulation_steps:",
                                          gradient_accumulation_steps)
                                    print("split_batch:", split_batch)

                                    seed = 42
                                    random.seed(seed)
                                    np.random.seed(seed)
                                    torch.manual_seed(seed)
                                    torch.cuda.manual_seed_all(seed)
                                    onnxruntime.set_seed(seed)

                                    (
                                        old_api_loss_ort,
                                        old_api_prediction_scores_ort,
                                        old_api_seq_relationship_score_ort,
                                    ) = run_test(
                                        model,
                                        model_desc,
                                        self.device,
                                        args,
                                        gradient_accumulation_steps,
                                        fp16,
                                        allreduce_post_accumulation,
                                        get_lr_this_step,
                                        use_internal_get_lr_this_step,
                                        loss_scaler,
                                        use_internal_loss_scaler,
                                        split_batch,
                                        dataset_len,
                                        epochs,
                                        use_new_api=False,
                                    )

                                    random.seed(seed)
                                    np.random.seed(seed)
                                    torch.manual_seed(seed)
                                    torch.cuda.manual_seed_all(seed)
                                    onnxruntime.set_seed(seed)
                                    if use_internal_get_lr_this_step and use_internal_loss_scaler:
                                        (
                                            new_api_loss_ort,
                                            new_api_prediction_scores_ort,
                                            new_api_seq_relationship_score_ort,
                                        ) = run_test(
                                            model,
                                            model_desc,
                                            self.device,
                                            args,
                                            gradient_accumulation_steps,
                                            fp16,
                                            allreduce_post_accumulation,
                                            get_lr_this_step,
                                            use_internal_get_lr_this_step,
                                            loss_scaler,
                                            use_internal_loss_scaler,
                                            split_batch,
                                            dataset_len,
                                            epochs,
                                            use_new_api=True,
                                        )

                                        assert_allclose(
                                            old_api_loss_ort, new_api_loss_ort)
                                        assert_allclose(
                                            old_api_prediction_scores_ort,
                                            new_api_prediction_scores_ort)
                                        assert_allclose(
                                            old_api_seq_relationship_score_ort,
                                            new_api_seq_relationship_score_ort)
        def create_and_check_bert_for_pretraining(self, config, input_ids,
                                                  token_type_ids, input_mask,
                                                  sequence_labels,
                                                  token_labels, choice_labels):
            model = BertForPreTraining(config=config)
            model.eval()
            loss, prediction_scores, seq_relationship_score = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                next_sentence_label=sequence_labels)
            model_desc = ModelDescription([
                self.input_ids_desc, self.attention_mask_desc,
                self.token_type_ids_desc, self.masked_lm_labels_desc,
                self.next_sentence_label_desc
            ], [
                self.loss_desc, self.prediction_scores_desc,
                self.seq_relationship_scores_desc
            ])

            import argparse
            args_ = argparse.Namespace(fp16=True, amp_opt_level='O1')

            from collections import namedtuple
            MyArgs = namedtuple(
                "MyArgs",
                "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
            )
            args = MyArgs(local_rank=0,
                          world_size=1,
                          max_steps=100,
                          learning_rate=0.00001,
                          warmup_proportion=0.01,
                          batch_size=13,
                          seq_len=7)

            from train_with_ort_trainer import get_lr

            def get_lr_this_step(global_step):
                return get_lr(args, global_step)

            loss_scaler = LossScaler('loss_scale_input_name',
                                     True,
                                     up_scale_window=2000)

            option_gradient_accumulation_steps = [8]
            option_fp16 = [True, False]
            option_allreduce_post_accumulation = True
            option_use_internal_get_lr_this_step = False
            option_use_internal_loss_scaler = False
            # TODO: with with fetches

            for gradient_accumulation_steps in option_gradient_accumulation_steps:
                for fp16 in option_fp16:
                    for option_split_batch in BatchArgsOption:
                        loss_ort, prediction_scores_ort, seq_relationship_score_ort =\
                            run_test(model, model_desc, self.device, args, gradient_accumulation_steps, fp16,
                                     option_allreduce_post_accumulation,
                                     get_lr_this_step, option_use_internal_get_lr_this_step,
                                     loss_scaler, option_use_internal_loss_scaler,
                                     option_split_batch)

                        print(loss_ort)
                        print(prediction_scores_ort)
                        print(seq_relationship_score_ort)