Exemplo n.º 1
0
        def test_pt_tf_model_equivalence(self):
            if not is_torch_available():
                return

            import torch
            import transformers

            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
            )

            for model_class in self.all_model_classes:
                pt_model_class_name = model_class.__name__[
                    2:]  # Skip the "TF" at the beggining
                pt_model_class = getattr(transformers, pt_model_class_name)

                config.output_hidden_states = True
                tf_model = model_class(config)
                pt_model = pt_model_class(config)

                # Check we can load pt model in tf and vice-versa with model => model functions
                tf_model = transformers.load_pytorch_model_in_tf2_model(
                    tf_model, pt_model, tf_inputs=inputs_dict)
                pt_model = transformers.load_tf2_model_in_pytorch_model(
                    pt_model, tf_model)

                # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
                pt_model.eval()
                pt_inputs_dict = dict(
                    (name, torch.from_numpy(key.numpy()).to(torch.long))
                    for name, key in inputs_dict.items())
                with torch.no_grad():
                    pto = pt_model(**pt_inputs_dict)
                tfo = tf_model(inputs_dict)
                max_diff = np.amax(np.abs(tfo[0].numpy() - pto[0].numpy()))
                self.assertLessEqual(max_diff, 2e-2)

                # Check we can load pt model in tf and vice-versa with checkpoint => model functions
                with TemporaryDirectory() as tmpdirname:
                    pt_checkpoint_path = os.path.join(tmpdirname,
                                                      'pt_model.bin')
                    torch.save(pt_model.state_dict(), pt_checkpoint_path)
                    tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(
                        tf_model, pt_checkpoint_path)

                    tf_checkpoint_path = os.path.join(tmpdirname,
                                                      'tf_model.h5')
                    tf_model.save_weights(tf_checkpoint_path)
                    pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(
                        pt_model, tf_checkpoint_path)

                # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
                pt_model.eval()
                pt_inputs_dict = dict(
                    (name, torch.from_numpy(key.numpy()).to(torch.long))
                    for name, key in inputs_dict.items())
                with torch.no_grad():
                    pto = pt_model(**pt_inputs_dict)
                tfo = tf_model(inputs_dict)
                max_diff = np.amax(np.abs(tfo[0].numpy() - pto[0].numpy()))
                self.assertLessEqual(max_diff, 2e-2)
Exemplo n.º 2
0
def convert_pt_checkpoint_to_tf(model_type,
                                pytorch_checkpoint_path,
                                config_file,
                                tf_dump_path,
                                compare_with_pt_model=False,
                                use_cached_models=True):
    if model_type not in MODEL_CLASSES:
        raise ValueError(
            "Unrecognized model type, should be one of {}.".format(
                list(MODEL_CLASSES.keys())))

    config_class, model_class, pt_model_class, aws_config_map = MODEL_CLASSES[
        model_type]

    # Initialise TF model
    if config_file in aws_config_map:
        config_file = cached_path(aws_config_map[config_file],
                                  force_download=not use_cached_models)
    config = config_class.from_json_file(config_file)
    config.output_hidden_states = True
    config.output_attentions = True
    print("Building TensorFlow model from configuration: {}".format(
        str(config)))
    tf_model = model_class(config)

    # Load weights from tf checkpoint
    if pytorch_checkpoint_path in aws_config_map.keys():
        pytorch_checkpoint_url = hf_bucket_url(pytorch_checkpoint_path,
                                               filename=WEIGHTS_NAME)
        pytorch_checkpoint_path = cached_path(
            pytorch_checkpoint_url, force_download=not use_cached_models)
    # Load PyTorch checkpoint in tf2 model:
    tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model,
                                                    pytorch_checkpoint_path)

    if compare_with_pt_model:
        tfo = tf_model(tf_model.dummy_inputs,
                       training=False)  # build the network

        state_dict = torch.load(pytorch_checkpoint_path, map_location="cpu")
        pt_model = pt_model_class.from_pretrained(
            pretrained_model_name_or_path=None,
            config=config,
            state_dict=state_dict)

        with torch.no_grad():
            pto = pt_model(**pt_model.dummy_inputs)

        np_pt = pto[0].numpy()
        np_tf = tfo[0].numpy()
        diff = np.amax(np.abs(np_pt - np_tf))
        print("Max absolute difference between models outputs {}".format(diff))
        assert diff <= 2e-2, "Error, model absolute difference is >2e-2: {}".format(
            diff)

    # Save pytorch-model
    print("Save TensorFlow model to {}".format(tf_dump_path))
    tf_model.save_weights(tf_dump_path, save_format="h5")
def convert_pt_checkpoint_to_tf(model_type,
                                pytorch_checkpoint_path,
                                config_file,
                                tf_dump_path,
                                compare_with_pt_model=False,
                                use_cached_models=True):
    if model_type not in MODEL_CLASSES:
        raise ValueError(
            "Unrecognized model type, should be one of {}.".format(
                list(MODEL_CLASSES.keys())))

    config_class, model_class, pt_model_class, aws_model_maps, aws_config_map = MODEL_CLASSES[
        model_type]

    # Initialise TF model
    if config_file in aws_config_map:
        config_file = cached_path(aws_config_map[config_file],
                                  force_download=not use_cached_models)
    config = config_class.from_json_file(config_file)
    config.output_hidden_states = True
    config.output_attentions = True
    print("Building TensorFlow model from configuration: {}".format(
        str(config)))
    tf_model = model_class(config)

    # Load weights from tf checkpoint
    if pytorch_checkpoint_path in aws_model_maps:
        pytorch_checkpoint_path = cached_path(
            aws_model_maps[pytorch_checkpoint_path],
            force_download=not use_cached_models)
    # Load PyTorch checkpoint in tf2 model:
    tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model,
                                                    pytorch_checkpoint_path)

    if compare_with_pt_model:
        inputs_list = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]
        tf_inputs = tf.constant(inputs_list)
        tfo = tf_model(tf_inputs, training=False)  # build the network

        pt_model = pt_model_class.from_pretrained(None,
                                                  config=config,
                                                  state_dict=torch.load(
                                                      pytorch_checkpoint_path,
                                                      map_location='cpu'))
        pt_inputs = torch.tensor(inputs_list)
        with torch.no_grad():
            pto = pt_model(pt_inputs)

        np_pt = pto[0].detach().numpy()
        np_tf = tfo[0].numpy()
        diff = np.amax(np.abs(np_pt - np_tf))
        print("Max absolute difference between models outputs {}".format(diff))
        assert diff <= 2e-2, "Error, model absolute difference is >2e-2"

    # Save pytorch-model
    print("Save TensorFlow model to {}".format(tf_dump_path))
    tf_model.save_weights(tf_dump_path, save_format='h5')
Exemplo n.º 4
0
    def test_pt_tf_model_equivalence(self):
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
                return_obj_labels="PreTraining" in model_class.__name__)

            tf_model_class_name = "TF" + model_class.__name__  # Add the "TF" at the beginning

            if not hasattr(transformers, tf_model_class_name):
                # transformers does not have TF version yet
                return

            tf_model_class = getattr(transformers, tf_model_class_name)

            config.output_hidden_states = True
            config.task_obj_predict = False

            pt_model = model_class(config)
            tf_model = tf_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions
            pt_inputs = self._prepare_for_class(inputs_dict, model_class)

            def recursive_numpy_convert(iterable):
                return_dict = {}
                for key, value in iterable.items():
                    if type(value) == bool:
                        return_dict[key] = value
                    if isinstance(value, dict):
                        return_dict[key] = recursive_numpy_convert(value)
                    else:
                        if isinstance(value, (list, tuple)):
                            return_dict[key] = (tf.convert_to_tensor(
                                iter_value.cpu().numpy(), dtype=tf.int32)
                                                for iter_value in value)
                        else:
                            return_dict[key] = tf.convert_to_tensor(
                                value.cpu().numpy(), dtype=tf.int32)
                return return_dict

            tf_inputs_dict = recursive_numpy_convert(pt_inputs)

            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=tf_inputs_dict)
            pt_model = transformers.load_tf2_model_in_pytorch_model(
                pt_model, tf_model).to(torch_device)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()

            # Delete obj labels as we want to compute the hidden states and not the loss

            if "obj_labels" in inputs_dict:
                del inputs_dict["obj_labels"]

            pt_inputs = self._prepare_for_class(inputs_dict, model_class)
            tf_inputs_dict = recursive_numpy_convert(pt_inputs)

            with torch.no_grad():
                pto = pt_model(**pt_inputs)
            tfo = tf_model(tf_inputs_dict, training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].cpu().numpy()

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0

            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs)
            self.assertLessEqual(max_diff, 6e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(
                    tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(
                    pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()

            for key, value in pt_inputs.items():
                if key in ("visual_feats", "visual_pos"):
                    pt_inputs[key] = value.to(torch.float32)
                else:
                    pt_inputs[key] = value.to(torch.long)

            with torch.no_grad():
                pto = pt_model(**pt_inputs)

            tfo = tf_model(tf_inputs_dict)
            tfo = tfo[0].numpy()
            pto = pto[0].cpu().numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 6e-2)
    def test_pt_tf_model_equivalence(self):
        if not is_torch_available():
            return

        import torch
        import transformers

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            pt_model_class_name = model_class.__name__[
                2:]  # Skip the "TF" at the beggining
            pt_model_class = getattr(transformers, pt_model_class_name)

            config.output_hidden_states = True

            tf_model = model_class(config)
            pt_model = pt_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions

            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model,
                pt_model,
                tf_inputs=self._prepare_for_class(inputs_dict, model_class))
            pt_model = transformers.load_tf2_model_in_pytorch_model(
                pt_model, tf_model)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long))
                for name, key in self._prepare_for_class(
                    inputs_dict, model_class).items())
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class),
                           training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].numpy()

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0

            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs_dict)
            self.assertLessEqual(max_diff, 2e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(
                    tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(
                    pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long))
                for name, key in self._prepare_for_class(
                    inputs_dict, model_class).items())
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class))
            tfo = tfo[0].numpy()
            pto = pto[0].numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 2e-2)
    def test_pt_tf_model_equivalence(self):
        import numpy as np
        import tensorflow as tf

        import transformers

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            tf_model_class_name = "TF" + model_class.__name__  # Add the "TF" at the beginning

            if not hasattr(transformers, tf_model_class_name):
                # transformers does not have TF version yet
                return

            tf_model_class = getattr(transformers, tf_model_class_name)

            config.output_hidden_states = True

            tf_model = tf_model_class(config)
            pt_model = model_class(config)

            # make sure only tf inputs are forward that actually exist in function args
            tf_input_keys = set(
                inspect.signature(tf_model.call).parameters.keys())

            # remove all head masks
            tf_input_keys.discard("head_mask")
            tf_input_keys.discard("cross_attn_head_mask")
            tf_input_keys.discard("decoder_head_mask")

            pt_inputs = self._prepare_for_class(inputs_dict, model_class)
            pt_inputs = {
                k: v
                for k, v in pt_inputs.items() if k in tf_input_keys
            }

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            tf_inputs_dict = {}
            for key, tensor in pt_inputs.items():
                # skip key that does not exist in tf
                if type(tensor) == bool:
                    tf_inputs_dict[key] = tensor
                elif key == "input_values":
                    tf_inputs_dict[key] = tf.convert_to_tensor(
                        tensor.numpy(), dtype=tf.float32)
                elif key == "pixel_values":
                    tf_inputs_dict[key] = tf.convert_to_tensor(
                        tensor.numpy(), dtype=tf.float32)
                else:
                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(),
                                                               dtype=tf.int32)

            # Check we can load pt model in tf and vice-versa with model => model functions
            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=tf_inputs_dict)
            pt_model = transformers.load_tf2_model_in_pytorch_model(
                pt_model, tf_model)

            # need to rename encoder-decoder "inputs" for PyTorch
            #            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
            #                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs)
            tfo = tf_model(tf_inputs_dict, training=False)

            self.assertEqual(len(tfo), len(pto),
                             "Output lengths differ between TF and PyTorch")
            for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

                if not (isinstance(tf_output, tf.Tensor)
                        and isinstance(pt_output, torch.Tensor)):
                    continue

                tf_out = tf_output.numpy()
                pt_out = pt_output.numpy()

                self.assertEqual(
                    tf_out.shape, pt_out.shape,
                    "Output component shapes differ between TF and PyTorch")

                if len(tf_out.shape) > 0:

                    tf_nans = np.copy(np.isnan(tf_out))
                    pt_nans = np.copy(np.isnan(pt_out))

                    pt_out[tf_nans] = 0
                    tf_out[tf_nans] = 0
                    pt_out[pt_nans] = 0
                    tf_out[pt_nans] = 0

                max_diff = np.amax(np.abs(tf_out - pt_out))
                self.assertLessEqual(max_diff, 4e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(
                    tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(
                    pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            tf_inputs_dict = {}
            for key, tensor in pt_inputs.items():
                # skip key that does not exist in tf
                if type(tensor) == bool:
                    tensor = np.array(tensor, dtype=bool)
                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor,
                                                               dtype=tf.int32)
                elif key == "input_values":
                    tf_inputs_dict[key] = tf.convert_to_tensor(
                        tensor.numpy(), dtype=tf.float32)
                elif key == "pixel_values":
                    tf_inputs_dict[key] = tf.convert_to_tensor(
                        tensor.numpy(), dtype=tf.float32)
                else:
                    tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(),
                                                               dtype=tf.int32)

            # need to rename encoder-decoder "inputs" for PyTorch
            #            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
            #                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs)

            tfo = tf_model(tf_inputs_dict)

            self.assertEqual(len(tfo), len(pto),
                             "Output lengths differ between TF and PyTorch")
            for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

                if not (isinstance(tf_output, tf.Tensor)
                        and isinstance(pt_output, torch.Tensor)):
                    continue

                tf_out = tf_output.numpy()
                pt_out = pt_output.numpy()

                self.assertEqual(
                    tf_out.shape, pt_out.shape,
                    "Output component shapes differ between TF and PyTorch")

                if len(tf_out.shape) > 0:
                    tf_nans = np.copy(np.isnan(tf_out))
                    pt_nans = np.copy(np.isnan(pt_out))

                    pt_out[tf_nans] = 0
                    tf_out[tf_nans] = 0
                    pt_out[pt_nans] = 0
                    tf_out[pt_nans] = 0

                max_diff = np.amax(np.abs(tf_out - pt_out))
                self.assertLessEqual(max_diff, 4e-2)
    def test_pt_tf_model_equivalence(self):
        from transformers import is_torch_available

        if not is_torch_available():
            return

        import torch

        import transformers

        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
                return_obj_labels="PreTraining" in model_class.__name__
            )

            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beginning
            pt_model_class = getattr(transformers, pt_model_class_name)

            config.output_hidden_states = True
            config.task_obj_predict = False

            tf_model = model_class(config)
            pt_model = pt_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions

            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=self._prepare_for_class(inputs_dict, model_class)
            )
            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()

            # Delete obj labels as we want to compute the hidden states and not the loss

            if "obj_labels" in inputs_dict:
                del inputs_dict["obj_labels"]

            def torch_type(key):
                if key in ("visual_feats", "visual_pos"):
                    return torch.float32
                else:
                    return torch.long

            def recursive_numpy_convert(iterable):
                return_dict = {}
                for key, value in iterable.items():
                    if isinstance(value, dict):
                        return_dict[key] = recursive_numpy_convert(value)
                    else:
                        if isinstance(value, (list, tuple)):
                            return_dict[key] = (
                                torch.from_numpy(iter_value.numpy()).to(torch_type(key)) for iter_value in value
                            )
                        else:
                            return_dict[key] = torch.from_numpy(value.numpy()).to(torch_type(key))
                return return_dict

            pt_inputs_dict = recursive_numpy_convert(self._prepare_for_class(inputs_dict, model_class))

            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class), training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].numpy()

            import numpy as np

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0

            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs_dict)
            self.assertLessEqual(max_diff, 6e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                import os

                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long))
                for name, key in self._prepare_for_class(inputs_dict, model_class).items()
            )

            for key, value in pt_inputs_dict.items():
                if key in ("visual_feats", "visual_pos"):
                    pt_inputs_dict[key] = value.to(torch.float32)
                else:
                    pt_inputs_dict[key] = value.to(torch.long)

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class))
            tfo = tfo[0].numpy()
            pto = pto[0].numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 6e-2)
Exemplo n.º 8
0
    def test_pt_tf_model_equivalence(self):
        import torch

        import transformers

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beginning
            pt_model_class = getattr(transformers, pt_model_class_name)

            config.output_hidden_states = True

            tf_model = model_class(config)
            pt_model = pt_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions

            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=self._prepare_for_class(inputs_dict, model_class)
            )
            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = {}
            for name, key in self._prepare_for_class(inputs_dict, model_class).items():
                if type(key) == bool:
                    pt_inputs_dict[name] = key
                elif name == "input_values":
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
                elif name == "pixel_values":
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
                else:
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)

            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class), training=False)

            self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
            for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

                if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
                    continue

                tf_out = tf_output.numpy()
                pt_out = pt_output.numpy()

                self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")

                if len(tf_out.shape) > 0:

                    tf_nans = np.copy(np.isnan(tf_out))
                    pt_nans = np.copy(np.isnan(pt_out))

                    pt_out[tf_nans] = 0
                    tf_out[tf_nans] = 0
                    pt_out[pt_nans] = 0
                    tf_out[pt_nans] = 0

                max_diff = np.amax(np.abs(tf_out - pt_out))
                self.assertLessEqual(max_diff, 4e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = {}
            for name, key in self._prepare_for_class(inputs_dict, model_class).items():
                if type(key) == bool:
                    key = np.array(key, dtype=bool)
                    pt_inputs_dict[name] = torch.from_numpy(key).to(torch.long)
                elif name == "input_values":
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
                elif name == "pixel_values":
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
                else:
                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class))

            self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
            for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

                if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
                    continue

                tf_out = tf_output.numpy()
                pt_out = pt_output.numpy()

                self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")

                if len(tf_out.shape) > 0:
                    tf_nans = np.copy(np.isnan(tf_out))
                    pt_nans = np.copy(np.isnan(pt_out))

                    pt_out[tf_nans] = 0
                    tf_out[tf_nans] = 0
                    pt_out[pt_nans] = 0
                    tf_out[pt_nans] = 0

                max_diff = np.amax(np.abs(tf_out - pt_out))
                self.assertLessEqual(max_diff, 4e-2)
    def test_pt_tf_model_equivalence(self):
        import numpy as np
        import tensorflow as tf

        import transformers

        # make masks reproducible
        np.random.seed(2)

        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
        num_patches = int((config.image_size // config.patch_size)**2)
        noise = np.random.uniform(size=(self.model_tester.batch_size,
                                        num_patches))
        pt_noise = torch.from_numpy(noise).to(device=torch_device)
        tf_noise = tf.constant(noise)

        def prepare_tf_inputs_from_pt_inputs(pt_inputs_dict):

            tf_inputs_dict = {}
            for key, tensor in pt_inputs_dict.items():
                tf_inputs_dict[key] = tf.convert_to_tensor(
                    tensor.cpu().numpy(), dtype=tf.float32)

            return tf_inputs_dict

        def check_outputs(tf_outputs, pt_outputs, model_class, names):
            """
            Args:
                model_class: The class of the model that is currently testing. For example, `TFBertModel`,
                    TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Currently unused, but it could make
                    debugging easier and faster.

                names: A string, or a tuple of strings. These specify what tf_outputs/pt_outputs represent in the model outputs.
                    Currently unused, but in the future, we could use this information to make the error message clearer
                    by giving the name(s) of the output tensor(s) with large difference(s) between PT and TF.
            """

            # Allow `list` because `(TF)TransfoXLModelOutput.mems` is a list of tensors.
            if type(tf_outputs) in [tuple, list]:
                self.assertEqual(type(tf_outputs), type(pt_outputs))
                self.assertEqual(len(tf_outputs), len(pt_outputs))
                if type(names) == tuple:
                    for tf_output, pt_output, name in zip(
                            tf_outputs, pt_outputs, names):
                        check_outputs(tf_output,
                                      pt_output,
                                      model_class,
                                      names=name)
                elif type(names) == str:
                    for idx, (tf_output, pt_output) in enumerate(
                            zip(tf_outputs, pt_outputs)):
                        check_outputs(tf_output,
                                      pt_output,
                                      model_class,
                                      names=f"{names}_{idx}")
                else:
                    raise ValueError(
                        f"`names` should be a `tuple` or a string. Got {type(names)} instead."
                    )
            elif isinstance(tf_outputs, tf.Tensor):
                self.assertTrue(isinstance(pt_outputs, torch.Tensor))

                tf_outputs = tf_outputs.numpy()
                if isinstance(tf_outputs, np.float32):
                    tf_outputs = np.array(tf_outputs, dtype=np.float32)
                pt_outputs = pt_outputs.detach().to("cpu").numpy()

                tf_nans = np.isnan(tf_outputs)
                pt_nans = np.isnan(pt_outputs)

                pt_outputs[tf_nans] = 0
                tf_outputs[tf_nans] = 0
                pt_outputs[pt_nans] = 0
                tf_outputs[pt_nans] = 0

                max_diff = np.amax(np.abs(tf_outputs - pt_outputs))
                self.assertLessEqual(max_diff, 1e-5)
            else:
                raise ValueError(
                    f"`tf_outputs` should be a `tuple` or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead."
                )

        def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict):
            # we are not preparing a model with labels because of the formation
            # of the ViT MAE model

            # send pytorch model to the correct device
            pt_model.to(torch_device)

            # Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences
            pt_model.eval()

            tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)

            # send pytorch inputs to the correct device
            pt_inputs_dict = {
                k:
                v.to(device=torch_device) if isinstance(v, torch.Tensor) else v
                for k, v in pt_inputs_dict.items()
            }

            # Original test: check without `labels`
            with torch.no_grad():
                pt_outputs = pt_model(**pt_inputs_dict, noise=pt_noise)
            tf_outputs = tf_model(tf_inputs_dict, noise=tf_noise)

            tf_keys = tuple(
                [k for k, v in tf_outputs.items() if v is not None])
            pt_keys = tuple(
                [k for k, v in pt_outputs.items() if v is not None])

            self.assertEqual(tf_keys, pt_keys)
            check_outputs(tf_outputs.to_tuple(),
                          pt_outputs.to_tuple(),
                          model_class,
                          names=tf_keys)

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            tf_model_class_name = "TF" + model_class.__name__  # Add the "TF" at the beginning

            # Output all for aggressive testing
            config.output_hidden_states = True
            config.output_attentions = self.has_attentions

            tf_model_class = getattr(transformers, tf_model_class_name)

            tf_model = tf_model_class(config)
            pt_model = model_class(config)

            # make sure only tf inputs are forward that actually exist in function args
            tf_input_keys = set(
                inspect.signature(tf_model.call).parameters.keys())

            # remove all head masks
            tf_input_keys.discard("head_mask")
            tf_input_keys.discard("cross_attn_head_mask")
            tf_input_keys.discard("decoder_head_mask")

            pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class)

            pt_inputs_dict = {
                k: v
                for k, v in pt_inputs_dict.items() if k in tf_input_keys
            }

            # Check we can load pt model in tf and vice-versa with model => model functions
            tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)
            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=tf_inputs_dict)
            pt_model = transformers.load_tf2_model_in_pytorch_model(
                pt_model, tf_model)

            check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(
                    tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(
                    pt_model, tf_checkpoint_path)
                pt_model = pt_model.to(torch_device)

            check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)