Exemplo n.º 1
0
    def test_trainable_variables(self):
        r"""Tests the functionality of automatically collecting trainable
        variables.
        """

        def get_variable_num(n_layers: int) -> int:
            return 1 + 1 + n_layers * 16 + 2

        # case 1: GPT2 117M
        encoder = GPT2Encoder()
        self.assertEqual(len(encoder.trainable_variables), get_variable_num(12))
        _ = encoder(self.inputs)

        # case 2: GPT2 345M
        hparams = {
            "pretrained_model_name": "345M",
        }
        encoder = GPT2Encoder(hparams=hparams)
        self.assertEqual(len(encoder.trainable_variables), get_variable_num(24))
        _ = encoder(self.inputs)

        # case 3: self-designed GPT2
        hparams = {
            "pretrained_model_name": None,
            "num_blocks": 6,
        }
        encoder = GPT2Encoder(hparams=hparams)
        self.assertEqual(len(encoder.trainable_variables), get_variable_num(6))
        _ = encoder(self.inputs)
Exemplo n.º 2
0
    def test_hparams(self):
        r"""Tests the priority of the encoder arch parameter.
        """
        # case 1: set "pretrained_mode_name" by constructor argument
        hparams = {
            "pretrained_model_name": "345M",
        }
        encoder = GPT2Encoder(pretrained_model_name="117M",
                              hparams=hparams)
        self.assertEqual(encoder.hparams.num_blocks, 12)
        _ = encoder(self.inputs)

        # case 2: set "pretrained_mode_name" by hparams
        hparams = {
            "pretrained_model_name": "117M",
            "num_blocks": 6,
        }
        encoder = GPT2Encoder(hparams=hparams)
        self.assertEqual(encoder.hparams.num_blocks, 12)
        _ = encoder(self.inputs)

        # case 3: set to None in both hparams and constructor argument
        hparams = {
            "pretrained_model_name": None,
            "num_blocks": 6,
        }
        encoder = GPT2Encoder(hparams=hparams)
        self.assertEqual(encoder.hparams.num_blocks, 6)
        _ = encoder(self.inputs)

        # case 4: using default hparams
        encoder = GPT2Encoder()
        self.assertEqual(encoder.hparams.num_blocks, 12)
        _ = encoder(self.inputs)
Exemplo n.º 3
0
    def test_trainable_variables(self):
        r"""Tests the functionality of automatically collecting trainable
        variables.
        """
        inputs = torch.zeros(32, 16, dtype=torch.int64)

        # case 1: GPT2 117M
        encoder = GPT2Encoder()
        _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 1 + 1 + 12 * 26 + 2)

        # case 2: GPT2 345M
        hparams = {"pretrained_model_name": "345M"}
        encoder = GPT2Encoder(hparams=hparams)
        _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 1 + 1 + 24 * 26 + 2)

        # case 3: self-designed GPT2
        hparams = {
            "decoder": {
                "num_blocks": 6,
            },
            "pretrained_model_name": None
        }
        encoder = GPT2Encoder(hparams=hparams)
        _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 1 + 1 + 6 * 26 + 2)
Exemplo n.º 4
0
    def test_encode(self):
        r"""Tests encoding.
        """
        # case 1: GPT2 117M
        hparams = {
            "pretrained_model_name": None,
        }
        encoder = GPT2Encoder(hparams=hparams)

        inputs = torch.randint(30521, (self.batch_size, self.max_length))
        outputs = encoder(inputs)

        self.assertEqual(
            outputs.shape,
            torch.Size([self.batch_size, self.max_length, encoder.output_size]))

        # case 2: self-designed GPT2
        hparams = {
            'pretrained_model_name': None,
            'embed': {
                'dim': 96,
            },
            'position_embed': {
                'dim': 96,
            },

            'dim': 96,
            'multihead_attention': {
                'num_units': 96,
                'output_dim': 96,
            },
            'poswise_feedforward': {
                'layers': [
                    {
                        'kwargs': {
                            'in_features': 96,
                            'out_features': 96 * 4,
                            'bias': True,
                        },
                        'type': 'Linear',
                    },
                    {"type": "GPTGELU"},
                    {
                        'kwargs': {
                            'in_features': 96 * 4,
                            'out_features': 96,
                            'bias': True,
                        },
                        'type': 'Linear',
                    }
                ]
            },
        }
        encoder = GPT2Encoder(hparams=hparams)

        outputs = encoder(inputs)
        self.assertEqual(
            outputs.shape,
            torch.Size([self.batch_size, self.max_length, encoder.output_size]))
Exemplo n.º 5
0
    def test_model_loading(self):
        r"""Tests model loading functionality."""
        # case 1
        encoder = GPT2Encoder(pretrained_model_name="117M")
        _ = encoder(self.inputs)

        # case 2
        encoder = GPT2Encoder(pretrained_model_name="345M")
        _ = encoder(self.inputs)
Exemplo n.º 6
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):

        super().__init__(hparams=hparams)

        # Create the underlying encoder
        encoder_hparams = dict_fetch(hparams, GPT2Encoder.default_hparams())

        self._encoder = GPT2Encoder(
            pretrained_model_name=pretrained_model_name,
            cache_dir=cache_dir,
            hparams=encoder_hparams)

        # Create a dropout layer
        self._dropout_layer = nn.Dropout(self._hparams.dropout)

        # Create an additional classification layer if needed
        self.num_classes = self._hparams.num_classes
        if self.num_classes <= 0:
            self._logits_layer = None
        else:
            logit_kwargs = self._hparams.logit_layer_kwargs
            if logit_kwargs is None:
                logit_kwargs = {}
            elif not isinstance(logit_kwargs, HParams):
                raise ValueError("hparams['logit_layer_kwargs'] "
                                 "must be a dict.")
            else:
                logit_kwargs = logit_kwargs.todict()

            if self._hparams.clas_strategy == 'all_time':
                self._logits_layer = nn.Linear(
                    self._encoder.output_size * self._hparams.max_seq_length,
                    self.num_classes, **logit_kwargs)
            else:
                self._logits_layer = nn.Linear(self._encoder.output_size,
                                               self.num_classes,
                                               **logit_kwargs)

        if self._hparams.initializer:
            initialize = get_initializer(self._hparams.initializer)
            assert initialize is not None
            if self._logits_layer is not None:
                initialize(self._logits_layer.weight)
                if self._logits_layer.bias is not None:
                    initialize(self._logits_layer.bias)

        self.is_binary = (self.num_classes == 1) or \
                         (self.num_classes <= 0 and
                          self._hparams.dim == 1)
Exemplo n.º 7
0
    def test_hparams(self):
        r"""Tests the priority of the encoder arch parameter.
        """
        inputs = torch.zeros(32, 16, dtype=torch.int64)

        # case 1: set "pretrained_mode_name" by constructor argument
        hparams = {
            "pretrained_model_name": "345M",
        }
        encoder = GPT2Encoder(pretrained_model_name="117M", hparams=hparams)
        _ = encoder(inputs)
        self.assertEqual(encoder.hparams.decoder.num_blocks, 12)

        # case 2: set "pretrained_mode_name" by hparams
        hparams = {
            "pretrained_model_name": "117M",
            "decoder": {
                "num_blocks": 6
            }
        }
        encoder = GPT2Encoder(hparams=hparams)
        _ = encoder(inputs)
        self.assertEqual(encoder.hparams.decoder.num_blocks, 12)

        # case 3: set to None in both hparams and constructor argument
        hparams = {
            "pretrained_model_name": None,
            "decoder": {
                "num_blocks": 6
            },
        }
        encoder = GPT2Encoder(hparams=hparams)
        _ = encoder(inputs)
        self.assertEqual(encoder.hparams.decoder.num_blocks, 6)

        # case 4: using default hparams
        encoder = GPT2Encoder()
        _ = encoder(inputs)
        self.assertEqual(encoder.hparams.decoder.num_blocks, 12)
Exemplo n.º 8
0
    def default_hparams():
        r"""Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                # (1) Same hyperparameters as in GPT2Encoder
                ...
                # (2) Additional hyperparameters
                "num_classes": 2,
                "logit_layer_kwargs": None,
                "clas_strategy": `cls_time`,
                "max_seq_length": None,
                "dropout": 0.1,
                "name": `gpt2_classifier`
            }

        Here:

        1. Same hyperparameters as in
           :class:`~texar.modules.GPT2Encoder`.
           See the :meth:`~texar.modules.GPT2Encoder.default_hparams`.
           An instance of GPT2Encoder is created for feature extraction.

        2. Additional hyperparameters:

            `"num_classes"`: int
                Number of classes:

                - If **> 0**, an additional `Linear`
                  layer is appended to the encoder to compute the logits over
                  classes.
                - If **<= 0**, no dense layer is appended. The number of
                  classes is assumed to be the final dense layer size of the
                  encoder.

            `"logit_layer_kwargs"`: dict
                Keyword arguments for the logit Dense layer constructor,
                except for argument "units" which is set to `num_classes`.
                Ignored if no extra logit layer is appended.

            `"clas_strategy"`: str
                The classification strategy, one of:

                - **cls_time**: Sequence-level classification based on the
                  output of the last time step. Each sequence has a class.
                - **all_time**: Sequence-level classification based on
                  the output of all time steps. Each sequence has a class.
                - **time_wise**: Step-wise classification, i.e., make
                  classification for each time step based on its output.

            `"max_seq_length"`: int, optional
                Maximum possible length of input sequences. Required if
                `clas_strategy` is `all_time`.

            `"dropout"`: float
                The dropout rate of the GPT2 encoder output.

            `"name"`: str
                Name of the classifier.
        """

        hparams = GPT2Encoder.default_hparams()
        hparams.update({
            "num_classes": 2,
            "logit_layer_kwargs": None,
            "clas_strategy": "cls_time",
            "max_seq_length": None,
            "dropout": 0.1,
            "name": "gpt2_classifier"
        })
        return hparams
Exemplo n.º 9
0
    def test_encode(self):
        r"""Tests encoding.
        """
        # case 1: GPT2 117M
        encoder = GPT2Encoder()

        max_time = 8
        batch_size = 16
        inputs = torch.randint(30521, (batch_size, max_time),
                               dtype=torch.int64)
        outputs = encoder(inputs)

        outputs_dim = encoder.hparams.decoder.dim

        self.assertEqual(outputs.shape,
                         torch.Size([batch_size, max_time, outputs_dim]))

        # case 2: self-designed GPT2
        hparams = {
            'pretrained_model_name': None,
            'embed': {
                'dim': 96
            },
            'position_embed': {
                'dim': 96
            },
            'decoder': {
                'dim': 96,
                'multihead_attention': {
                    'num_units': 96,
                    'output_dim': 96,
                },
                'poswise_feedforward': {
                    'layers': [{
                        'kwargs': {
                            'in_features': 96,
                            'out_features': 96 * 4,
                            'bias': True
                        },
                        'type': 'Linear'
                    }, {
                        "type": "GPTGELU"
                    }, {
                        'kwargs': {
                            'in_features': 96 * 4,
                            'out_features': 96,
                            'bias': True
                        },
                        'type': 'Linear'
                    }]
                },
            }
        }
        encoder = GPT2Encoder(hparams=hparams)

        max_time = 8
        batch_size = 16
        inputs = torch.randint(30521, (batch_size, max_time),
                               dtype=torch.int64)
        outputs = encoder(inputs)

        outputs_dim = encoder.hparams.decoder.dim

        self.assertEqual(outputs.shape,
                         torch.Size([batch_size, max_time, outputs_dim]))