def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams=hparams) # Create the underlying encoder encoder_hparams = dict_fetch(hparams, GPT2Encoder.default_hparams()) self._encoder = GPT2Encoder(pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Create a dropout layer self._dropout_layer = nn.Dropout(self._hparams.dropout) # Create an additional classification layer if needed self.num_classes = self._hparams.num_classes if self.num_classes <= 0: self._logits_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError("hparams['logit_layer_kwargs'] " "must be a dict.") else: logit_kwargs = logit_kwargs.todict() if self._hparams.clas_strategy == 'all_time': self._logits_layer = nn.Linear( self._encoder.output_size * self._hparams.max_seq_length, self.num_classes, **logit_kwargs) else: self._logits_layer = nn.Linear( self._encoder.output_size, self.num_classes, **logit_kwargs) if self._hparams.initializer: initialize = get_initializer(self._hparams.initializer) assert initialize is not None if self._logits_layer is not None: initialize(self._logits_layer.weight) if self._logits_layer.bias is not None: initialize(self._logits_layer.bias) self.is_binary = (self.num_classes == 1) or \ (self.num_classes <= 0 and self._hparams.dim == 1)
def default_hparams(): r"""Returns a dictionary of hyperparameters with default values. .. code-block:: python { # (1) Same hyperparameters as in GPT2Encoder ... # (2) Additional hyperparameters "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": `cls_time`, "max_seq_length": None, "dropout": 0.1, "name": `gpt2_classifier` } Here: 1. Same hyperparameters as in :class:`~texar.torch.modules.GPT2Encoder`. See the :meth:`~texar.torch.modules.GPT2Encoder.default_hparams`. An instance of GPT2Encoder is created for feature extraction. 2. Additional hyperparameters: `"num_classes"`: int Number of classes: - If **> 0**, an additional `Linear` layer is appended to the encoder to compute the logits over classes. - If **<= 0**, no dense layer is appended. The number of classes is assumed to be the final dense layer size of the encoder. `"logit_layer_kwargs"`: dict Keyword arguments for the logit Dense layer constructor, except for argument "units" which is set to `num_classes`. Ignored if no extra logit layer is appended. `"clas_strategy"`: str The classification strategy, one of: - **cls_time**: Sequence-level classification based on the output of the last time step. Each sequence has a class. - **all_time**: Sequence-level classification based on the output of all time steps. Each sequence has a class. - **time_wise**: Step-wise classification, i.e., make classification for each time step based on its output. `"max_seq_length"`: int, optional Maximum possible length of input sequences. Required if `clas_strategy` is `all_time`. `"dropout"`: float The dropout rate of the GPT2 encoder output. `"name"`: str Name of the classifier. """ hparams = GPT2Encoder.default_hparams() hparams.update({ "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": "cls_time", "max_seq_length": None, "dropout": 0.1, "name": "gpt2_classifier" }) return hparams