def test_trainable_variables(self): r"""Tests the functionality of automatically collecting trainable variables. """ # case 1: bert base encoder = RoBERTaEncoder() self.assertEqual(len(encoder.trainable_variables), 2 + 2 + 12 * 16 + 2) _, _ = encoder(self.inputs) # case 2: bert large hparams = {"pretrained_model_name": "roberta-large"} encoder = RoBERTaEncoder(hparams=hparams) self.assertEqual(len(encoder.trainable_variables), 2 + 2 + 24 * 16 + 2) _, _ = encoder(self.inputs) # case 3: self-designed bert hparams = { "encoder": { "num_blocks": 6, }, "pretrained_model_name": None, } encoder = RoBERTaEncoder(hparams=hparams) self.assertEqual(len(encoder.trainable_variables), 2 + 2 + 6 * 16 + 2) _, _ = encoder(self.inputs)
def test_hparams(self): r"""Tests the priority of the encoder arch parameter. """ # case 1: set "pretrained_mode_name" by constructor argument hparams = { "pretrained_model_name": "roberta-large", } encoder = RoBERTaEncoder(pretrained_model_name="roberta-base", hparams=hparams) self.assertEqual(encoder.hparams.encoder.num_blocks, 12) _, _ = encoder(self.inputs) # case 2: set "pretrained_mode_name" by hparams hparams = { "pretrained_model_name": "roberta-large", "encoder": { "num_blocks": 6, } } encoder = RoBERTaEncoder(hparams=hparams) self.assertEqual(encoder.hparams.encoder.num_blocks, 24) _, _ = encoder(self.inputs) # case 3: set to None in both hparams and constructor argument hparams = { "pretrained_model_name": None, "encoder": { "num_blocks": 6, }, } encoder = RoBERTaEncoder(hparams=hparams) self.assertEqual(encoder.hparams.encoder.num_blocks, 6) _, _ = encoder(self.inputs) # case 4: using default hparams encoder = RoBERTaEncoder() self.assertEqual(encoder.hparams.encoder.num_blocks, 12) _, _ = encoder(self.inputs)
def test_soft_ids(self): r"""Tests soft ids. """ hparams = { "pretrained_model_name": None, } encoder = RoBERTaEncoder(hparams=hparams) inputs = torch.rand(self.batch_size, self.max_length, 50265) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim self.assertEqual( outputs.shape, torch.Size([self.batch_size, self.max_length, outputs_dim])) self.assertEqual(pooled_output.shape, torch.Size([self.batch_size, encoder.output_size]))
def default_hparams(): r"""Returns a dictionary of hyperparameters with default values. .. code-block:: python { # (1) Same hyperparameters as in RoBertaEncoder ... # (2) Additional hyperparameters "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": "cls_time", "max_seq_length": None, "dropout": 0.1, "name": "roberta_classifier" } Here: 1. Same hyperparameters as in :class:`~texar.torch.modules.RoBERTaEncoder`. See the :meth:`~texar.torch.modules.RoBERTaEncoder.default_hparams`. An instance of RoBERTaEncoder is created for feature extraction. 2. Additional hyperparameters: `"num_classes"`: int Number of classes: - If **> 0**, an additional `Linear` layer is appended to the encoder to compute the logits over classes. - If **<= 0**, no dense layer is appended. The number of classes is assumed to be the final dense layer size of the encoder. `"logit_layer_kwargs"`: dict Keyword arguments for the logit Dense layer constructor, except for argument "units" which is set to `num_classes`. Ignored if no extra logit layer is appended. `"clas_strategy"`: str The classification strategy, one of: - **cls_time**: Sequence-level classification based on the output of the first time step (which is the `CLS` token). Each sequence has a class. - **all_time**: Sequence-level classification based on the output of all time steps. Each sequence has a class. - **time_wise**: Step-wise classification, i.e., make classification for each time step based on its output. `"max_seq_length"`: int, optional Maximum possible length of input sequences. Required if `clas_strategy` is `all_time`. `"dropout"`: float The dropout rate of the RoBERTa encoder output. `"name"`: str Name of the classifier. """ hparams = RoBERTaEncoder.default_hparams() hparams.update({ "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": "cls_time", "max_seq_length": None, "dropout": 0.1, "name": "roberta_classifier" }) return hparams
def test_encode(self): r"""Tests encoding. """ # case 1: bert base hparams = { "pretrained_model_name": None, } encoder = RoBERTaEncoder(hparams=hparams) inputs = torch.randint(30521, (self.batch_size, self.max_length)) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim self.assertEqual( outputs.shape, torch.Size([self.batch_size, self.max_length, outputs_dim])) self.assertEqual(pooled_output.shape, torch.Size([self.batch_size, encoder.output_size])) # case 2: self-designed bert hparams = { 'pretrained_model_name': None, 'embed': { 'dim': 96, }, 'position_embed': { 'dim': 96, }, 'encoder': { 'dim': 96, 'multihead_attention': { 'num_units': 96, 'output_dim': 96, }, 'poswise_feedforward': { 'layers': [{ 'kwargs': { 'in_features': 96, 'out_features': 96 * 4, 'bias': True, }, 'type': 'Linear', }, { "type": "BertGELU" }, { 'kwargs': { 'in_features': 96 * 4, 'out_features': 96, 'bias': True, }, 'type': 'Linear', }] }, }, 'hidden_size': 96, } encoder = RoBERTaEncoder(hparams=hparams) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim self.assertEqual( outputs.shape, torch.Size([self.batch_size, self.max_length, outputs_dim])) self.assertEqual(pooled_output.shape, torch.Size([self.batch_size, encoder.output_size]))
def test_model_loading(self): r"""Tests model loading functionality.""" for pretrained_model_name in RoBERTaEncoder.available_checkpoints(): encoder = RoBERTaEncoder( pretrained_model_name=pretrained_model_name) _, _ = encoder(self.inputs)