class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase): all_model_classes = ((TFGPT2Model, TFGPT2LMHeadModel, TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel) if is_tf_available() else ()) all_generative_model_classes = ( TFGPT2LMHeadModel, ) if is_tf_available() else () test_head_masking = False test_onnx = True onnx_min_opset = 10 def setUp(self): self.model_tester = TFGPT2ModelTester(self) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) def test_config(self): self.config_tester.run_common_tests() def test_gpt2_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model(*config_and_inputs) def test_gpt2_model_past(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs) def test_gpt2_model_att_mask_past(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_attention_mask_past( *config_and_inputs) def test_gpt2_model_past_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_past_large_inputs( *config_and_inputs) def test_gpt2_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_lm_head(*config_and_inputs) def test_gpt2_double_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert name is None else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_gpt2_sequence_classification_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_for_sequence_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFGPT2Model.from_pretrained(model_name) self.assertIsNotNone(model) # overwrite from common since ONNX runtime optimization doesn't work with tf.gather() when the argument # `batch_dims` > 0" @require_tf2onnx @slow def test_onnx_runtime_optimize(self): if not self.test_onnx: return import onnxruntime import tf2onnx config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: # Skip these 2 classes which uses `tf.gather` with `batch_dims=1` if model_class in [ TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel ]: continue model = model_class(config) model(model.dummy_inputs) onnx_model_proto, _ = tf2onnx.convert.from_keras( model, opset=self.onnx_min_opset) onnxruntime.InferenceSession(onnx_model_proto.SerializeToString())
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFBertModel, TFBertForMaskedLM, TFBertLMHeadModel, TFBertForNextSentencePrediction, TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification, TFBertForTokenClassification, TFBertForMultipleChoice, ) if is_tf_available() else ()) def setUp(self): self.model_tester = TFBertModelTester(self) self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_bert_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_masked_lm( *config_and_inputs) def test_for_causal_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_lm_head(*config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_multiple_choice( *config_and_inputs) def test_for_next_sequence_prediction(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_next_sequence_prediction( *config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_pretraining( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_token_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: model = TFBertModel.from_pretrained(model_name) self.assertIsNotNone(model)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import unittest from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .configuration_common_test import ConfigTester from .utils import require_tf, slow from transformers import DistilBertConfig, is_tf_available if is_tf_available(): import tensorflow as tf from transformers.modeling_tf_distilbert import (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, TFDistilBertForSequenceClassification) @require_tf class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, TFDistilBertForSequenceClassification) if is_tf_available() else None test_pruning = True test_torchscript = True test_resize_embeddings = True
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFBertModel, TFBertForMaskedLM, TFBertForNextSentencePrediction, TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification, TFBertForTokenClassification, ) if is_tf_available() else ()) class TFBertModelTester(object): def __init__( self, parent, batch_size=13, seq_length=7, is_training=True, use_input_mask=True, use_token_type_ids=True, use_labels=True, vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids self.use_labels = use_labels self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = BertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, ) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFBertModel(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } sequence_output, pooled_output = model(inputs) inputs = [input_ids, input_mask] sequence_output, pooled_output = model(inputs) sequence_output, pooled_output = model(input_ids) result = { "sequence_output": sequence_output.numpy(), "pooled_output": pooled_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFBertForMaskedLM(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } (prediction_scores, ) = model(inputs) result = { "prediction_scores": prediction_scores.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]) def create_and_check_bert_for_next_sequence_prediction( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFBertForNextSentencePrediction(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } (seq_relationship_score, ) = model(inputs) result = { "seq_relationship_score": seq_relationship_score.numpy(), } self.parent.assertListEqual( list(result["seq_relationship_score"].shape), [self.batch_size, 2]) def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFBertForPreTraining(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } prediction_scores, seq_relationship_score = model(inputs) result = { "prediction_scores": prediction_scores.numpy(), "seq_relationship_score": seq_relationship_score.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]) self.parent.assertListEqual( list(result["seq_relationship_score"].shape), [self.batch_size, 2]) def create_and_check_bert_for_sequence_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFBertForSequenceClassification(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } (logits, ) = model(inputs) result = { "logits": logits.numpy(), } self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels]) def create_and_check_bert_for_multiple_choice( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_choices = self.num_choices model = TFBertForMultipleChoice(config=config) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile( tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) inputs = { "input_ids": multiple_choice_inputs_ids, "attention_mask": multiple_choice_input_mask, "token_type_ids": multiple_choice_token_type_ids, } (logits, ) = model(inputs) result = { "logits": logits.numpy(), } self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices]) def create_and_check_bert_for_token_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFBertForTokenClassification(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } (logits, ) = model(inputs) result = { "logits": logits.numpy(), } self.parent.assertListEqual( list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) def create_and_check_bert_for_question_answering( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFBertForQuestionAnswering(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } start_logits, end_logits = model(inputs) result = { "start_logits": start_logits.numpy(), "end_logits": end_logits.numpy(), } self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length]) self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ) = config_and_inputs inputs_dict = { "input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask } return config, inputs_dict def setUp(self): self.model_tester = TFBertModelTest.TFBertModelTester(self) self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_bert_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_masked_lm( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_multiple_choice( *config_and_inputs) def test_for_next_sequence_prediction(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_next_sequence_prediction( *config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_pretraining( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_bert_for_token_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in ["bert-base-uncased"]: model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)
class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFBlenderbotForConditionalGeneration, TFBlenderbotModel) if is_tf_available() else () all_generative_model_classes = ( TFBlenderbotForConditionalGeneration, ) if is_tf_available() else () is_encoder_decoder = True test_pruning = False test_onnx = False def setUp(self): self.model_tester = TFBlenderbotModelTester(self) self.config_tester = ConfigTester(self, config_class=BlenderbotConfig) def test_config(self): self.config_tester.run_common_tests() def test_decoder_model_past_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common( ) self.model_tester.check_decoder_model_past_large_inputs( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert isinstance(name, dict) for k, v in name.items(): assert isinstance(v, tf.Variable) else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass def test_resize_token_embeddings(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) def _get_word_embedding_weight(model, embedding_layer): if hasattr(embedding_layer, "weight"): return embedding_layer.weight else: # Here we build the word embeddings weights if not exists. # And then we retry to get the attribute once built. model(model.dummy_inputs) if hasattr(embedding_layer, "weight"): return embedding_layer.weight else: return None for model_class in self.all_model_classes: for size in [config.vocab_size - 10, config.vocab_size + 10, None]: # build the embeddings model = model_class(config=config) old_input_embeddings = _get_word_embedding_weight( model, model.get_input_embeddings()) old_output_embeddings = _get_word_embedding_weight( model, model.get_output_embeddings()) old_final_logits_bias = model.get_bias() # reshape the embeddings model.resize_token_embeddings(size) new_input_embeddings = _get_word_embedding_weight( model, model.get_input_embeddings()) new_output_embeddings = _get_word_embedding_weight( model, model.get_output_embeddings()) new_final_logits_bias = model.get_bias() # check that the resized embeddings size matches the desired size. assert_size = size if size is not None else config.vocab_size self.assertEqual(new_input_embeddings.shape[0], assert_size) # check that weights remain the same after resizing models_equal = True for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal) if old_output_embeddings is not None and new_output_embeddings is not None: self.assertEqual(new_output_embeddings.shape[0], assert_size) models_equal = True for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal) if old_final_logits_bias is not None and new_final_logits_bias is not None: old_final_logits_bias = old_final_logits_bias[ "final_logits_bias"] new_final_logits_bias = new_final_logits_bias[ "final_logits_bias"] self.assertEqual(new_final_logits_bias.shape[0], 1) self.assertEqual(new_final_logits_bias.shape[1], assert_size) models_equal = True for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()): for p1, p2 in zip(old, new): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal)
def _model_is_tf(model_cls): return is_tf_available() and issubclass(model_cls, tf.keras.layers.Layer)
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFAlbertModel, TFAlbertForPreTraining, TFAlbertForMaskedLM, TFAlbertForSequenceClassification, TFAlbertForQuestionAnswering, TFAlbertForTokenClassification, TFAlbertForMultipleChoice, ) if is_tf_available() else ()) test_head_masking = False def setUp(self): self.model_tester = TFAlbertModelTester(self) self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_albert_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_model(*config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_for_pretraining( *config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_for_masked_lm( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_for_multiple_choice( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_for_sequence_classification( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_albert_for_question_answering( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) list_lm_models = [TFAlbertForPreTraining, TFAlbertForMaskedLM] for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in list_lm_models: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert isinstance(name, dict) for k, v in name.items(): assert isinstance(v, tf.Variable) else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_mixed_precision(self): # TODO JP: Make ALBERT float16 compliant pass @slow def test_model_from_pretrained(self): for model_name in TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFAlbertModel.from_pretrained(model_name) self.assertIsNotNone(model)
from __future__ import absolute_import, division, print_function, unicode_literals import csv import json import os from abc import ABC, abstractmethod from contextlib import contextmanager from itertools import groupby from typing import Union, Optional, Tuple, List, Dict import numpy as np from transformers import AutoTokenizer, PreTrainedTokenizer, PretrainedConfig, \ SquadExample, squad_convert_examples_to_features, is_tf_available, is_torch_available, logger if is_tf_available(): from transformers import TFAutoModel, TFAutoModelForSequenceClassification, \ TFAutoModelForQuestionAnswering, TFAutoModelForTokenClassification if is_torch_available(): import torch from transformers import AutoModel, AutoModelForSequenceClassification, \ AutoModelForQuestionAnswering, AutoModelForTokenClassification class ArgumentHandler(ABC): """ Base interface for handling varargs for each Pipeline """ @abstractmethod def __call__(self, *args, **kwargs):
def __call__(self, *texts, **kwargs): """ Args: We support multiple use-cases, the following are exclusive: X: sequence of SquadExample data: sequence of SquadExample question: (str, List[str]), batch of question(s) to map along with context context: (str, List[str]), batch of context(s) associated with the provided question keyword argument Returns: dict: {'answer': str, 'score": float, 'start": int, "end": int} answer: the textual answer in the intial context score: the score the current answer scored for the model start: the character index in the original string corresponding to the beginning of the answer' span end: the character index in the original string corresponding to the ending of the answer' span """ # Set defaults values kwargs.setdefault('topk', 1) kwargs.setdefault('doc_stride', 128) kwargs.setdefault('max_answer_len', 15) kwargs.setdefault('max_seq_len', 384) kwargs.setdefault('max_question_len', 64) if kwargs['topk'] < 1: raise ValueError('topk parameter should be >= 1 (got {})'.format( kwargs['topk'])) if kwargs['max_answer_len'] < 1: raise ValueError( 'max_answer_len parameter should be >= 1 (got {})'.format( kwargs['max_answer_len'])) # Convert inputs to features examples = self._args_parser(*texts, **kwargs) features = squad_convert_examples_to_features( examples, self.tokenizer, kwargs['max_seq_len'], kwargs['doc_stride'], kwargs['max_question_len'], False) fw_args = self.inputs_for_model(features) # Manage tensor allocation on correct device with self.device_placement(): if is_tf_available(): import tensorflow as tf fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()} start, end = self.model(fw_args) start, end = start.numpy(), end.numpy() else: import torch with torch.no_grad(): # Retrieve the score for the context tokens only (removing question tokens) fw_args = { k: torch.tensor(v) for (k, v) in fw_args.items() } start, end = self.model(**fw_args) start, end = start.cpu().numpy(), end.cpu().numpy() answers = [] for (example, feature, start_, end_) in zip(examples, features, start, end): # Normalize logits and spans to retrieve the answer start_ = np.exp(start_) / np.sum(np.exp(start_)) end_ = np.exp(end_) / np.sum(np.exp(end_)) # Mask padding and question start_, end_ = start_ * np.abs(np.array(feature.p_mask) - 1), end_ * np.abs( np.array(feature.p_mask) - 1) # TODO : What happens if not possible # Mask CLS start_[0] = end_[0] = 0 starts, ends, scores = self.decode(start_, end_, kwargs['topk'], kwargs['max_answer_len']) char_to_word = np.array(example.char_to_word_offset) # Convert the answer (tokens) back to the original text answers += [{ 'score': score.item(), 'start': np.where( char_to_word == feature.token_to_orig_map[s])[0][0].item(), 'end': np.where(char_to_word == feature.token_to_orig_map[e])[0] [-1].item(), 'answer': ' '.join( example.doc_tokens[feature.token_to_orig_map[s]:feature. token_to_orig_map[e] + 1]) } for s, e, score in zip(starts, ends, scores)] if len(answers) == 1: return answers[0] return answers
class TFConvBertModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFConvBertModel, TFConvBertForMaskedLM, TFConvBertForQuestionAnswering, TFConvBertForSequenceClassification, TFConvBertForTokenClassification, TFConvBertForMultipleChoice, ) if is_tf_available() else ()) test_pruning = False test_head_masking = False def setUp(self): self.model_tester = TFConvBertModelTester(self) self.config_tester = ConfigTester(self, config_class=ConvBertConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_token_classification( *config_and_inputs) @slow def test_saved_model_with_attentions_output(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.output_attentions = True config.output_hidden_states = False if hasattr(config, "use_cache"): config.use_cache = False encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) for model_class in self.all_model_classes: class_inputs_dict = self._prepare_for_class( inputs_dict, model_class) model = model_class(config) num_out = len(model(class_inputs_dict)) with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname, saved_model=True) model = tf.keras.models.load_model( os.path.join(tmpdirname, "saved_model", "1")) outputs = model(class_inputs_dict) output = outputs["attentions"] self.assertEqual(len(outputs), num_out) self.assertEqual(len(output), self.model_tester.num_hidden_layers) self.assertListEqual( list(output[0].shape[-3:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, encoder_key_length ], ) def test_xla_mode(self): # TODO JP: Make ConvBert XLA compliant pass @slow def test_model_from_pretrained(self): model = TFConvBertModel.from_pretrained("YituTech/conv-bert-base") self.assertIsNotNone(model) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", self.model_tester.seq_length) encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) decoder_key_length = getattr(self.model_tester, "key_length", decoder_seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) def check_decoder_attentions_output(outputs): out_len = len(outputs) self.assertEqual(out_len % 2, 0) decoder_attentions = outputs.decoder_attentions self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads / 2, decoder_seq_length, decoder_key_length ], ) def check_encoder_attentions_output(outputs): attentions = [ t.numpy() for t in (outputs.encoder_attentions if config. is_encoder_decoder else outputs.attentions) ] self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, encoder_key_length ], ) for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["use_cache"] = False config.output_hidden_states = False model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) out_len = len(outputs) self.assertEqual(config.output_hidden_states, False) check_encoder_attentions_output(outputs) if self.is_encoder_decoder: model = model_class(config) outputs = model( self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(config.output_hidden_states, False) check_decoder_attentions_output(outputs) # Check that output attentions can also be changed via the config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(config.output_hidden_states, False) check_encoder_attentions_output(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True config.output_hidden_states = True model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs)) self.assertEqual(model.config.output_hidden_states, True) check_encoder_attentions_output(outputs)
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = ((TFTransfoXLModel, TFTransfoXLLMHeadModel, TFTransfoXLForSequenceClassification) if is_tf_available() else ()) all_generative_model_classes = () if is_tf_available() else () # TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented test_resize_embeddings = False test_head_masking = False test_onnx = False test_mismatched_shapes = False def setUp(self): self.model_tester = TFTransfoXLModelTester(self) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) def test_config(self): self.config_tester.run_common_tests() def test_transfo_xl_model(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_transfo_xl_model(*config_and_inputs) def test_transfo_xl_lm_head(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_transfo_xl_lm_head( *config_and_inputs) def test_transfo_xl_sequence_classification_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_transfo_xl_for_sequence_classification( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) list_other_models_with_output_ebd = [ TFTransfoXLForSequenceClassification ] for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in list_other_models_with_output_ebd: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert name is None else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_xla_mode(self): # TODO JP: Make TransfoXL XLA compliant pass @slow def test_model_from_pretrained(self): for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFTransfoXLModel.from_pretrained(model_name) self.assertIsNotNone(model)
class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else () class TFCTRLModelTester(object): def __init__( self, parent, batch_size=13, seq_length=7, is_training=True, use_token_type_ids=True, use_input_mask=True, use_labels=True, use_mc_token_ids=True, vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_token_type_ids = use_token_type_ids self.use_input_mask = use_input_mask self.use_labels = use_labels self.use_mc_token_ids = use_mc_token_ids self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) mc_token_ids = None if self.use_mc_token_ids: mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = CTRLConfig( vocab_size_or_config_json_file=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, # intermediate_size=self.intermediate_size, # hidden_act=self.hidden_act, # hidden_dropout_prob=self.hidden_dropout_prob, # attention_probs_dropout_prob=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, n_ctx=self.max_position_embeddings # type_vocab_size=self.type_vocab_size, # initializer_range=self.initializer_range ) head_mask = ids_tensor( [self.num_hidden_layers, self.num_attention_heads], 2) return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFCTRLModel(config=config) inputs = { 'input_ids': input_ids, 'attention_mask': input_mask, 'token_type_ids': token_type_ids } sequence_output = model(inputs)[0] inputs = [input_ids, None, input_mask] # None is the data for 'past' sequence_output = model(inputs)[0] sequence_output = model(input_ids)[0] result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]) def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFCTRLLMHeadModel(config=config) inputs = { 'input_ids': input_ids, 'attention_mask': input_mask, 'token_type_ids': token_type_ids } prediction_scores = model(inputs)[0] result = { "prediction_scores": prediction_scores.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() (config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs inputs_dict = { 'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask } return config, inputs_dict def setUp(self): self.model_tester = TFCTRLModelTest.TFCTRLModelTester(self) self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37) def test_config(self): self.config_tester.run_common_tests() def test_ctrl_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_ctrl_model(*config_and_inputs) def test_ctrl_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_ctrl_lm_head(*config_and_inputs) @slow def test_model_from_pretrained(self): cache_dir = "/tmp/transformers_test/" for model_name in list( TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFCTRLModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def custom_encode_plus(sentence, tokenizer, return_tensors=None): # {'input_ids': [2, 10841, 10966, 10832, 10541, 21509, 27660, 18, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0]} words = sentence.split() tokens = [] tokens_mask = [] for word in words: word_tokens = tokenizer.tokenize(word) if not word_tokens: word_tokens = [tokenizer.unk_token ] # For handling the bad-encoded word tokens.extend(word_tokens) tokens_mask.extend([1] + [0] * (len(word_tokens) - 1)) ids = tokenizer.convert_tokens_to_ids(tokens) len_ids = len(ids) total_len = len_ids + tokenizer.num_special_tokens_to_add() if tokenizer.max_len and total_len > tokenizer.max_len: ids, _, _ = tokenizer.truncate_sequences( ids, pair_ids=None, num_tokens_to_remove=total_len - tokenizer.max_len, truncation_strategy="longest_first", stride=0, ) sequence = tokenizer.build_inputs_with_special_tokens(ids) token_type_ids = tokenizer.create_token_type_ids_from_sequences(ids) # HARD-CODED: As I know, most of the transformers architecture will be `[CLS] + text + [SEP]`` # Only way to safely cover all the cases is to integrate `token mask builder` in internal library. tokens_mask = [1] + tokens_mask + [1] words = [tokenizer.cls_token] + words + [tokenizer.sep_token] encoded_inputs = {} encoded_inputs["input_ids"] = sequence encoded_inputs["token_type_ids"] = token_type_ids if return_tensors == "tf" and is_tf_available(): encoded_inputs["input_ids"] = tf.constant( [encoded_inputs["input_ids"]]) if "token_type_ids" in encoded_inputs: encoded_inputs["token_type_ids"] = tf.constant( [encoded_inputs["token_type_ids"]]) if "attention_mask" in encoded_inputs: encoded_inputs["attention_mask"] = tf.constant( [encoded_inputs["attention_mask"]]) elif return_tensors == "pt" and is_torch_available(): encoded_inputs["input_ids"] = torch.tensor( [encoded_inputs["input_ids"]]) if "token_type_ids" in encoded_inputs: encoded_inputs["token_type_ids"] = torch.tensor( [encoded_inputs["token_type_ids"]]) if "attention_mask" in encoded_inputs: encoded_inputs["attention_mask"] = torch.tensor( [encoded_inputs["attention_mask"]]) elif return_tensors is not None: logger.warning( "Unable to convert output to tensors format {}, PyTorch or TensorFlow is not available." .format(return_tensors)) return encoded_inputs, words, tokens_mask
class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase): test_pruning = False # pruning is not supported test_headmasking = False # head masking is not supported test_torchscript = False all_model_classes = ( (TFLongformerModel, TFLongformerForMaskedLM, TFLongformerForQuestionAnswering,) if is_tf_available() else () ) def setUp(self): self.model_tester = TFLongformerModelTester(self) self.config_tester = ConfigTester(self, config_class=LongformerConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_longformer_model_attention_mask_determinism(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_attention_mask_determinism(*config_and_inputs) def test_longformer_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_longformer_model(*config_and_inputs) def test_longformer_model_global_attention_mask(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_longformer_model_with_global_attention_mask(*config_and_inputs) def test_longformer_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_longformer_for_masked_lm(*config_and_inputs) def test_longformer_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_question_answering() self.model_tester.create_and_check_longformer_for_question_answering(*config_and_inputs)
class TFBenchmarkTest(unittest.TestCase): def check_results_dict_not_empty(self, results): for model_result in results.values(): for batch_size, sequence_length in zip(model_result["bs"], model_result["ss"]): result = model_result["result"][batch_size][sequence_length] self.assertIsNotNone(result) def test_inference_no_configs_eager(self): MODEL_ID = "sshleifer/tiny-gpt2" benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], eager_mode=True, multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_inference_no_configs_only_pretrain(self): MODEL_ID = "sgugger/tiny-distilbert-classification" benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], multi_process=False, only_pretrain_model=True, ) benchmark = TensorFlowBenchmark(benchmark_args) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_inference_no_configs_graph(self): MODEL_ID = "sshleifer/tiny-gpt2" benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_inference_with_configs_eager(self): MODEL_ID = "sshleifer/tiny-gpt2" config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], eager_mode=True, multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args, [config]) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_inference_with_configs_graph(self): MODEL_ID = "sshleifer/tiny-gpt2" config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args, [config]) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_train_no_configs(self): MODEL_ID = "sshleifer/tiny-gpt2" benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=True, inference=False, sequence_lengths=[8], batch_sizes=[1], multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) results = benchmark.run() self.check_results_dict_not_empty(results.time_train_result) self.check_results_dict_not_empty(results.memory_train_result) def test_train_with_configs(self): MODEL_ID = "sshleifer/tiny-gpt2" config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=True, inference=False, sequence_lengths=[8], batch_sizes=[1], multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args, [config]) results = benchmark.run() self.check_results_dict_not_empty(results.time_train_result) self.check_results_dict_not_empty(results.memory_train_result) def test_inference_encoder_decoder_with_configs(self): MODEL_ID = "patrickvonplaten/t5-tiny-random" config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args, configs=[config]) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) @unittest.skipIf(is_tf_available() and len(tf.config.list_physical_devices("GPU")) == 0, "Cannot do xla on CPU.") def test_inference_no_configs_xla(self): MODEL_ID = "sshleifer/tiny-gpt2" benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], training=False, inference=True, sequence_lengths=[8], batch_sizes=[1], use_xla=True, multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) results = benchmark.run() self.check_results_dict_not_empty(results.time_inference_result) self.check_results_dict_not_empty(results.memory_inference_result) def test_save_csv_files(self): MODEL_ID = "sshleifer/tiny-gpt2" with tempfile.TemporaryDirectory() as tmp_dir: benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], inference=True, save_to_csv=True, sequence_lengths=[8], batch_sizes=[1], inference_time_csv_file=os.path.join(tmp_dir, "inf_time.csv"), inference_memory_csv_file=os.path.join(tmp_dir, "inf_mem.csv"), env_info_csv_file=os.path.join(tmp_dir, "env.csv"), multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) benchmark.run() self.assertTrue( Path(os.path.join(tmp_dir, "inf_time.csv")).exists()) self.assertTrue( Path(os.path.join(tmp_dir, "inf_mem.csv")).exists()) self.assertTrue(Path(os.path.join(tmp_dir, "env.csv")).exists()) def test_trace_memory(self): MODEL_ID = "sshleifer/tiny-gpt2" def _check_summary_is_not_empty(summary): self.assertTrue(hasattr(summary, "sequential")) self.assertTrue(hasattr(summary, "cumulative")) self.assertTrue(hasattr(summary, "current")) self.assertTrue(hasattr(summary, "total")) with tempfile.TemporaryDirectory() as tmp_dir: benchmark_args = TensorFlowBenchmarkArguments( models=[MODEL_ID], inference=True, sequence_lengths=[8], batch_sizes=[1], log_filename=os.path.join(tmp_dir, "log.txt"), log_print=True, trace_memory_line_by_line=True, eager_mode=True, multi_process=False, ) benchmark = TensorFlowBenchmark(benchmark_args) result = benchmark.run() _check_summary_is_not_empty(result.inference_summary) self.assertTrue(Path(os.path.join(tmp_dir, "log.txt")).exists())
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFXLNetModel, TFXLNetLMHeadModel, TFXLNetForSequenceClassification, TFXLNetForTokenClassification, TFXLNetForQuestionAnsweringSimple, TFXLNetForMultipleChoice, ) if is_tf_available() else ()) all_generative_model_classes = ( (TFXLNetLMHeadModel, ) if is_tf_available() else () ) # TODO (PVP): Check other models whether language generation is also applicable test_head_masking = False test_onnx = False def setUp(self): self.model_tester = TFXLNetModelTester(self) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) def test_config(self): self.config_tester.run_common_tests() def test_xlnet_base_model(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs) def test_xlnet_lm_head(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs) def test_xlnet_sequence_classif(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_sequence_classif( *config_and_inputs) def test_xlnet_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_for_token_classification( *config_and_inputs) def test_xlnet_qa(self): self.model_tester.set_seed() config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_qa(*config_and_inputs) def test_xlnet_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_xlnet_for_multiple_choice( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFXLNetModel.from_pretrained(model_name) self.assertIsNotNone(model) # overwrite since `TFXLNetLMHeadModel` doesn't cut logits/labels def test_loss_computation(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) if getattr(model, "hf_compute_loss", None): # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class( inputs_dict.copy(), model_class, return_labels=True) added_label = prepared_for_class[sorted( list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]] loss_size = tf.size(added_label) # `TFXLNetLMHeadModel` doesn't cut logits/labels # if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING): # # if loss is causal lm loss, labels are shift, so that one label per batch # # is cut # loss_size = loss_size - self.model_tester.batch_size # Test that model correctly compute the loss with kwargs prepared_for_class = self._prepare_for_class( inputs_dict.copy(), model_class, return_labels=True) input_name = "input_ids" if "input_ids" in prepared_for_class else "pixel_values" input_ids = prepared_for_class.pop(input_name) loss = model(input_ids, **prepared_for_class)[0] self.assertEqual(loss.shape, [loss_size]) # Test that model correctly compute the loss with a dict prepared_for_class = self._prepare_for_class( inputs_dict.copy(), model_class, return_labels=True) loss = model(prepared_for_class)[0] self.assertEqual(loss.shape, [loss_size]) # Test that model correctly compute the loss with a tuple prepared_for_class = self._prepare_for_class( inputs_dict.copy(), model_class, return_labels=True) # Get keys that were added with the _prepare_for_class function label_keys = prepared_for_class.keys() - inputs_dict.keys() signature = inspect.signature(model.call).parameters signature_names = list(signature.keys()) # Create a dictionary holding the location of the tensors in the tuple tuple_index_mapping = {0: input_name} for label_key in label_keys: label_key_index = signature_names.index(label_key) tuple_index_mapping[label_key_index] = label_key sorted_tuple_index_mapping = sorted( tuple_index_mapping.items()) # Initialize a list with their default values, update the values and convert to a tuple list_input = [] for name in signature_names: if name != "kwargs": list_input.append(signature[name].default) for index, value in sorted_tuple_index_mapping: list_input[index] = prepared_for_class[value] tuple_input = tuple(list_input) # Send to model loss = model(tuple_input[:-1])[0] self.assertEqual(loss.shape, [loss_size])
class PerformerAttentionTest(unittest.TestCase): # Check that setting attention_type='performer' actually makes the model use (TF)PerformerAttention def test_performer_models(self): def _model_is_tf(model_cls): return is_tf_available() and issubclass(model_cls, tf.keras.layers.Layer) for model_class, model_config in performer_supporting_models_and_configs( ): try: model = model_class(model_config(attention_type='performer')) # The TapasModel requires the torch-scatter library, and we shouldn't fail this test just because # the user doesn't have that library installed except ImportError: pass else: with self.subTest(model=model_class): self.assertIsNotNone(model) # It turns out that it's very non-trivial to do this type of recursive iteration of sublayers in # TensorFlow, so we just don't bother to do the check for those models if not _model_is_tf(model_class): self.assertTrue( any((isinstance(module, PerformerAttention) for module in model.modules()))) @require_torch def test_output_shape_pytorch(self): self._test_output_shape_for_library('pt') @require_tf def test_output_shape_tensorflow(self): self._test_output_shape_for_library('tf') @unittest.skipUnless( _run_nondeterministic_tests, "This can fail randomly if we draw an 'unlucky' set of features.") def test_softmax_noncausal_attention_output_pytorch(self): self._test_softmax_noncausal_attention_output_for_library('pt') @unittest.skipUnless( _run_nondeterministic_tests, "This can fail randomly if we draw an 'unlucky' set of features.") def test_softmax_noncausal_attention_output_tensorflow(self): self._test_softmax_noncausal_attention_output_for_library('tf') @unittest.skipUnless(is_torch_available() and is_tf_available(), "Both PyTorch and TensorFlow must be available") @torch.no_grad() def test_pytorch_tensorflow_parity(self): for config, batch, seq_len in self._iterate_config_options(): # This option leads to random test failures due to the TFPerformerAttention object randomly redrawing # features right after we set its features to be equal to those of the PyTorch object, so we just skip it if config.redraw_stochastically: continue try: pt_attention = PerformerAttention(config) except AssertionError: continue try: tf_attention = TFPerformerAttention(config) except AssertionError: continue # Copy the weights from the PyTorch object to the TensorFlow one for name, param in pt_attention.named_parameters(): pt_value = param.data.numpy() # Get the corresponding param (tf.Variable) in the TensorFlow object obj = tf_attention for key in name.split('.'): if key.isnumeric(): obj = obj[int(key)] elif key == "weight": # Note that we have to transpose the weights when converting to TF to get the same output obj.kernel_initializer = tf.constant_initializer( pt_value.T) elif key == "bias": obj.bias_initializer = tf.constant_initializer( pt_value) else: obj = getattr(obj, key) # Test that the two modules produce the same output, within numerical error with self.subTest(**config.to_dict()): q, k, v = (torch.randn(batch, seq_len, config.d_model) for _ in range(3)) tf_q, tf_k, tf_v = (tf.constant(x.numpy()) for x in (q, k, v)) pt_output = pt_attention(q, k, v)[0] tf_attention.random_features = tf.constant( pt_attention.random_features.numpy()) tf_output = tf_attention(tf_q, tf_k, tf_v)[0] self.assertTrue( np.allclose(pt_output.numpy(), tf_output.numpy(), atol=2e-4)) self.assertListEqual(list(pt_output.shape), [batch, seq_len, config.d_model]) # Exhaustive grid search of possible config options (and a random search of batch sizes and seq lengths) @staticmethod def _iterate_config_options( ) -> Iterator[Tuple[PerformerAttentionConfig, int, int]]: param_names = ['kernel_type', 'orthogonal_feature_algorithm'] legal_values = [PerformerKernel, OrthogonalFeatureAlgorithm ] # Enum classes are iterable # Get all boolean config options for x in fields(PerformerAttentionConfig): if x.type == bool: legal_values.append((False, True)) param_names.append(x.name) for values in product(*legal_values): kwargs = dict(zip(param_names, values)) d_model = random.randint(2, 10) batch_size = random.randint(1, 4) num_heads = random.choice([ i for i in range(1, d_model) if not d_model % i ]) # Factors of d_model length = 1 if kwargs.get( 'use_recurrent_decoding') else random.randint(1, 10) yield PerformerAttentionConfig(d_model=d_model, num_heads=num_heads, **kwargs), batch_size, length @torch.no_grad() def _test_output_shape_for_library(self, library: str = 'pt'): for config, batch_size, length in self._iterate_config_options(): d_model = config.d_model # PyTorch specific stuff if library == 'pt': attn_class = PerformerAttention def rand_tensor_func(): return torch.randn(batch_size, length, d_model) # TensorFlow specific stuff else: attn_class = TFPerformerAttention def rand_tensor_func(): return tf.random.normal((batch_size, length, d_model)) try: attention = attn_class(config) except AssertionError: # Skip illegal kwargs combinations pass else: with self.subTest(**config.to_dict()): q, k, v = [rand_tensor_func() for _ in range(3)] output = attention(q, k, v)[0] self.assertListEqual(list(output.shape), [batch_size, length, d_model]) def _test_softmax_noncausal_attention_output_for_library( self, library: str = 'pt'): batch_size = 1 length = 10 num_heads = 1 dim = 10 config = PerformerAttentionConfig(d_model=dim, num_heads=num_heads, kernel_type='exp', num_random_features=30000, use_linear_layers=False) # PyTorch-specific stuff if library == 'pt': pt_attention = PerformerAttention(config) q, k, v = [torch.randn(batch_size, length, dim) for _ in range(3)] performer_attention_output = pt_attention(q, k, v)[0] attention_scores = q @ k.transpose(-2, -1) / math.sqrt(float(dim)) attention_scores = torch.nn.functional.softmax(attention_scores, dim=1) # TensorFlow-specific stuff else: tf_attention = TFPerformerAttention(config) q, k, v = [ tf.random.normal((batch_size, length, dim)) for _ in range(3) ] performer_attention_output = tf_attention(q, k, v)[0] attention_scores = q @ tf.linalg.matrix_transpose(k) / math.sqrt( float(dim)) attention_scores = tf.nn.softmax(attention_scores, axis=1) softmax_output = (attention_scores @ v).numpy() errors = softmax_output - performer_attention_output.numpy() mse = np.mean(errors**2) bias = np.mean(errors) self.assertLess(mse, 0.1) self.assertLess(np.abs(bias), 0.025)
class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFMobileBertModel, TFMobileBertForMaskedLM, TFMobileBertForNextSentencePrediction, TFMobileBertForPreTraining, TFMobileBertForQuestionAnswering, TFMobileBertForSequenceClassification, TFMobileBertForTokenClassification, TFMobileBertForMultipleChoice, ) if is_tf_available() else ()) test_head_masking = False test_onnx = False class TFMobileBertModelTester(object): def __init__( self, parent, batch_size=13, seq_length=7, is_training=True, use_input_mask=True, use_token_type_ids=True, use_labels=True, vocab_size=99, hidden_size=32, embedding_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids self.use_labels = use_labels self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope self.embedding_size = embedding_size def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = MobileBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, embedding_size=self.embedding_size, ) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels def create_and_check_mobilebert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFMobileBertModel(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) inputs = [input_ids, input_mask] result = model(inputs) result = model(input_ids) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) def create_and_check_mobilebert_for_masked_lm( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFMobileBertForMaskedLM(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual( result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) def create_and_check_mobilebert_for_next_sequence_prediction( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFMobileBertForNextSentencePrediction(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual(result.logits.shape, (self.batch_size, 2)) def create_and_check_mobilebert_for_pretraining( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFMobileBertForPreTraining(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual( result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) self.parent.assertEqual(result.seq_relationship_logits.shape, (self.batch_size, 2)) def create_and_check_mobilebert_for_sequence_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFMobileBertForSequenceClassification(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) def create_and_check_mobilebert_for_multiple_choice( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_choices = self.num_choices model = TFMobileBertForMultipleChoice(config=config) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile( tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) inputs = { "input_ids": multiple_choice_inputs_ids, "attention_mask": multiple_choice_input_mask, "token_type_ids": multiple_choice_token_type_ids, } result = model(inputs) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) def create_and_check_mobilebert_for_token_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFMobileBertForTokenClassification(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual( result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) def create_and_check_mobilebert_for_question_answering( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFMobileBertForQuestionAnswering(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ) = config_and_inputs inputs_dict = { "input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask } return config, inputs_dict def setUp(self): self.model_tester = TFMobileBertModelTest.TFMobileBertModelTester(self) self.config_tester = ConfigTester(self, config_class=MobileBertConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_mobilebert_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_masked_lm( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_multiple_choice( *config_and_inputs) def test_for_next_sequence_prediction(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_next_sequence_prediction( *config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_pretraining( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_mobilebert_for_token_classification( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining] for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in list_lm_models: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert isinstance(name, dict) for k, v in name.items(): assert isinstance(v, tf.Variable) else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass def test_mixed_precision(self): # TODO JP: Make MobileBert float16 compliant pass @slow def test_model_from_pretrained(self): # for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["google/mobilebert-uncased"]: model = TFMobileBertModel.from_pretrained(model_name) self.assertIsNotNone(model)
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = ((TFRobertaModel, TFRobertaForMaskedLM, TFRobertaForSequenceClassification) if is_tf_available() else ()) class TFRobertaModelTester(object): def __init__( self, parent, batch_size=13, seq_length=7, is_training=True, use_input_mask=True, use_token_type_ids=True, use_labels=True, vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids self.use_labels = use_labels self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = RobertaConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, ) return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFRobertaModel(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } sequence_output = model(inputs)[0] inputs = [input_ids, input_mask] sequence_output = model(inputs)[0] sequence_output = model(input_ids)[0] result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]) def create_and_check_roberta_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFRobertaForMaskedLM(config=config) prediction_scores = model([input_ids, input_mask, token_type_ids])[0] result = { "prediction_scores": prediction_scores.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]) def create_and_check_roberta_for_token_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFRobertaForTokenClassification(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } (logits, ) = model(inputs) result = { "logits": logits.numpy(), } self.parent.assertListEqual( list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ) = config_and_inputs inputs_dict = { "input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask } return config, inputs_dict def setUp(self): self.model_tester = TFRobertaModelTest.TFRobertaModelTester(self) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_roberta_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_roberta_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_roberta_for_masked_lm( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_roberta_for_token_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in list( TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFRobertaModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFRobertaModel, TFRobertaForCausalLM, TFRobertaForMaskedLM, TFRobertaForSequenceClassification, TFRobertaForTokenClassification, TFRobertaForQuestionAnswering, ) if is_tf_available() else ()) test_head_masking = False test_onnx = False def setUp(self): self.model_tester = TFRobertaModelTester(self) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model(self): """Test the base model""" config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_causal_lm_base_model(self): """Test the base model of the causal LM model is_deocder=True, no cross_attention, no encoder outputs """ config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_causal_lm_base_model( *config_and_inputs) def test_model_as_decoder(self): """Test the base model as a decoder (of an encoder-decoder architecture) is_deocder=True + cross_attention + pass encoder outputs """ config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder( ) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_causal_lm(self): """Test the causal LM model""" config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_causal_lm_model(*config_and_inputs) def test_causal_lm_model_as_decoder(self): """Test the causal LM model as a decoder""" config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder( ) self.model_tester.create_and_check_causal_lm_model_as_decoder( *config_and_inputs) def test_causal_lm_model_past(self): """Test causal LM model with `past_key_values`""" config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_causal_lm_model_past( *config_and_inputs) def test_causal_lm_model_past_with_attn_mask(self): """Test the causal LM model with `past_key_values` and `attention_mask`""" config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_causal_lm_model_past_with_attn_mask( *config_and_inputs) def test_causal_lm_model_past_with_large_inputs(self): """Test the causal LM model with `past_key_values` and a longer decoder sequence length""" config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_causal_lm_model_past_large_inputs( *config_and_inputs) def test_decoder_model_past_with_large_inputs(self): """Similar to `test_causal_lm_model_past_with_large_inputs` but with cross-attention""" config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder( ) self.model_tester.create_and_check_decoder_model_past_large_inputs( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_token_classification( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFRobertaModel.from_pretrained(model_name) self.assertIsNotNone(model)
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = ( (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel) if is_tf_available() else () ) all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else () test_head_masking = False test_onnx = True onnx_min_opset = 10 def setUp(self): self.model_tester = TFGPT2ModelTester(self) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) def test_config(self): self.config_tester.run_common_tests() def test_gpt2_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model(*config_and_inputs) def test_gpt2_model_past(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs) def test_gpt2_model_att_mask_past(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_attention_mask_past(*config_and_inputs) def test_gpt2_model_past_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_model_past_large_inputs(*config_and_inputs) def test_gpt2_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_lm_head(*config_and_inputs) def test_gpt2_double_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert name is None else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_gpt2_sequence_classification_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs) def test_mixed_precision(self): # TODO JP: Make GPT2 float16 compliant pass def test_xla_mode(self): # TODO JP: Make GPT2 XLA compliant pass @slow def test_model_from_pretrained(self): for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFGPT2Model.from_pretrained(model_name) self.assertIsNotNone(model)
def __call__(self, text=None, images=None, padding="max_length", return_tensors="np", **kwargs): """ Main method to prepare for the model one or several text(s) and image(s). This method forwards the `text` and `kwargs` arguments to CLIPTokenizerFast's [`~CLIPTokenizerFast.__call__`] if `text` is not `None` to encode: the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to CLIPFeatureExtractor's [`~CLIPFeatureExtractor.__call__`] if `images` is not `None`. Please refer to the doctsring of the above two methods for more information. Args: text (`str`, `List[str]`, `List[List[str]]`): The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects. - `'np'`: Return NumPy `np.ndarray` objects. - `'jax'`: Return JAX `jnp.ndarray` objects. Returns: [`BatchEncoding`]: A [`BatchEncoding`] with the following fields: - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not `None`). - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. """ if text is None and images is None: raise ValueError( "You have to specify at least one text or image. Both cannot be none." ) if text is not None: if isinstance(text, str) or (isinstance(text, List) and not isinstance(text[0], List)): encodings = [ self.tokenizer(text, padding=padding, return_tensors=return_tensors, **kwargs) ] elif isinstance(text, List) and isinstance(text[0], List): encodings = [] # Maximum number of queries across batch max_num_queries = max([len(t) for t in text]) # Pad all batch samples to max number of text queries for t in text: if len(t) != max_num_queries: t = t + [" "] * (max_num_queries - len(t)) encoding = self.tokenizer(t, padding=padding, return_tensors=return_tensors, **kwargs) encodings.append(encoding) else: raise TypeError( "Input text should be a string, a list of strings or a nested list of strings" ) if return_tensors == "np": input_ids = np.concatenate( [encoding["input_ids"] for encoding in encodings], axis=0) attention_mask = np.concatenate( [encoding["attention_mask"] for encoding in encodings], axis=0) elif return_tensors == "jax" and is_flax_available(): import jax.numpy as jnp input_ids = jnp.concatenate( [encoding["input_ids"] for encoding in encodings], axis=0) attention_mask = jnp.concatenate( [encoding["attention_mask"] for encoding in encodings], axis=0) elif return_tensors == "pt" and is_torch_available(): import torch input_ids = torch.cat( [encoding["input_ids"] for encoding in encodings], dim=0) attention_mask = torch.cat( [encoding["attention_mask"] for encoding in encodings], dim=0) elif return_tensors == "tf" and is_tf_available(): import tensorflow as tf input_ids = tf.stack( [encoding["input_ids"] for encoding in encodings], axis=0) attention_mask = tf.stack( [encoding["attention_mask"] for encoding in encodings], axis=0) else: raise ValueError( "Target return tensor type could not be returned") encoding = BatchEncoding() encoding["input_ids"] = input_ids encoding["attention_mask"] = attention_mask if images is not None: image_features = self.feature_extractor( images, return_tensors=return_tensors, **kwargs) if text is not None and images is not None: encoding["pixel_values"] = image_features.pixel_values return encoding elif text is not None: return encoding else: return BatchEncoding(data=dict(**image_features), tensor_type=return_tensors)
class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFLongformerModel, TFLongformerForMaskedLM, TFLongformerForQuestionAnswering, TFLongformerForSequenceClassification, TFLongformerForMultipleChoice, TFLongformerForTokenClassification, ) if is_tf_available() else ()) def setUp(self): self.model_tester = TFLongformerModelTester(self) self.config_tester = ConfigTester(self, config_class=LongformerConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model_attention_mask_determinism(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_attention_mask_determinism( *config_and_inputs) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_model_global_attention_mask(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model_with_global_attention_mask( *config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_question_answering( ) self.model_tester.create_and_check_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_token_classification( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_multiple_choice( *config_and_inputs) @slow def test_saved_model_with_attentions_output(self): # longformer has special attentions which are not # compatible in graph mode pass def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = ( (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else () ) all_generative_model_classes = ( (TFOpenAIGPTLMHeadModel,) if is_tf_available() else () ) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly class TFOpenAIGPTModelTester(object): def __init__( self, parent, batch_size=13, seq_length=7, is_training=True, use_token_type_ids=True, use_input_mask=True, use_labels=True, use_mc_token_ids=True, vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_token_type_ids = use_token_type_ids self.use_input_mask = use_input_mask self.use_labels = use_labels self.use_mc_token_ids = use_mc_token_ids self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) mc_token_ids = None if self.use_mc_token_ids: mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = OpenAIGPTConfig( vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, # intermediate_size=self.intermediate_size, # hidden_act=self.hidden_act, # hidden_dropout_prob=self.hidden_dropout_prob, # attention_probs_dropout_prob=self.attention_probs_dropout_prob, n_positions=self.max_position_embeddings, n_ctx=self.max_position_embeddings # type_vocab_size=self.type_vocab_size, # initializer_range=self.initializer_range ) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) return ( config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels, ) def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFOpenAIGPTModel(config=config) inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} sequence_output = model(inputs)[0] inputs = [input_ids, input_mask] sequence_output = model(inputs)[0] sequence_output = model(input_ids)[0] result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] ) def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFOpenAIGPTLMHeadModel(config=config) inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} prediction_scores = model(inputs)[0] result = { "prediction_scores": prediction_scores.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size] ) def create_and_check_openai_gpt_double_head( self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args ): model = TFOpenAIGPTDoubleHeadsModel(config=config) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) inputs = { "input_ids": multiple_choice_inputs_ids, "mc_token_ids": mc_token_ids, "attention_mask": multiple_choice_input_mask, "token_type_ids": multiple_choice_token_type_ids, } lm_logits, mc_logits = model(inputs)[:2] result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()} self.parent.assertListEqual( list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size] ) self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels, ) = config_and_inputs inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} return config, inputs_dict def setUp(self): self.model_tester = TFOpenAIGPTModelTest.TFOpenAIGPTModelTester(self) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) def test_config(self): self.config_tester.run_common_tests() def test_openai_gpt_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_model(*config_and_inputs) def test_openai_gpt_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_lm_head(*config_and_inputs) def test_openai_gpt_double_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in list(TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = ((TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel, TFOpenAIGPTForSequenceClassification) if is_tf_available() else ()) all_generative_model_classes = ( (TFOpenAIGPTLMHeadModel, ) if is_tf_available() else () ) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly test_head_masking = False def setUp(self): self.model_tester = TFOpenAIGPTModelTester(self) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) def test_config(self): self.config_tester.run_common_tests() def test_openai_gpt_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_model(*config_and_inputs) def test_openai_gpt_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_lm_head( *config_and_inputs) def test_openai_gpt_double_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_double_head( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert name is None else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_openai_gpt_sequence_classification_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_openai_gpt_for_sequence_classification( *config_and_inputs) def test_mixed_precision(self): # TODO JP: Make OpenAIGPT float16 compliant pass def test_xla_mode(self): # TODO JP: Make OpenAIGPT XLA compliant pass @slow def test_model_from_pretrained(self): for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFOpenAIGPTModel.from_pretrained(model_name) self.assertIsNotNone(model)
class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFLEDForConditionalGeneration, TFLEDModel) if is_tf_available() else () all_generative_model_classes = ( TFLEDForConditionalGeneration, ) if is_tf_available() else () is_encoder_decoder = True test_pruning = False test_head_masking = False test_onnx = False def setUp(self): self.model_tester = TFLEDModelTester(self) self.config_tester = ConfigTester(self, config_class=LEDConfig) def test_config(self): self.config_tester.run_common_tests() def test_decoder_model_past_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common( ) self.model_tester.check_decoder_model_past_large_inputs( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert isinstance(name, dict) for k, v in name.items(): assert isinstance(v, tf.Variable) else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_resize_token_embeddings(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) def _get_word_embedding_weight(model, embedding_layer): if hasattr(embedding_layer, "weight"): return embedding_layer.weight else: # Here we build the word embeddings weights if not exists. # And then we retry to get the attribute once built. model(model.dummy_inputs) if hasattr(embedding_layer, "weight"): return embedding_layer.weight else: return None for model_class in self.all_model_classes: for size in [config.vocab_size - 10, config.vocab_size + 10, None]: # build the embeddings model = model_class(config=config) old_input_embeddings = _get_word_embedding_weight( model, model.get_input_embeddings()) old_output_embeddings = _get_word_embedding_weight( model, model.get_output_embeddings()) old_final_logits_bias = model.get_bias() # reshape the embeddings model.resize_token_embeddings(size) new_input_embeddings = _get_word_embedding_weight( model, model.get_input_embeddings()) new_output_embeddings = _get_word_embedding_weight( model, model.get_output_embeddings()) new_final_logits_bias = model.get_bias() # check that the resized embeddings size matches the desired size. assert_size = size if size is not None else config.vocab_size self.assertEqual(new_input_embeddings.shape[0], assert_size) # check that weights remain the same after resizing models_equal = True for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal) if old_output_embeddings is not None and new_output_embeddings is not None: self.assertEqual(new_output_embeddings.shape[0], assert_size) models_equal = True for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal) if old_final_logits_bias is not None and new_final_logits_bias is not None: old_final_logits_bias = old_final_logits_bias[ "final_logits_bias"] new_final_logits_bias = new_final_logits_bias[ "final_logits_bias"] self.assertEqual(new_final_logits_bias.shape[0], 1) self.assertEqual(new_final_logits_bias.shape[1], assert_size) models_equal = True for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()): for p1, p2 in zip(old, new): if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: models_equal = False self.assertTrue(models_equal) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) inputs_dict["global_attention_mask"] = tf.zeros_like( inputs_dict["attention_mask"]) num_global_attn_indices = 2 inputs_dict["global_attention_mask"] = tf.where( tf.range(self.model_tester.seq_length)[None, :] < num_global_attn_indices, 1, inputs_dict["global_attention_mask"], ) config.return_dict = True seq_length = self.model_tester.seq_length encoder_seq_length = self.model_tester.encoder_seq_length def check_decoder_attentions_output(outputs): decoder_attentions = outputs.decoder_attentions self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, seq_length, seq_length ], ) def check_encoder_attentions_output(outputs): attentions = [t.numpy() for t in outputs.encoder_attentions] global_attentions = [ t.numpy() for t in outputs.encoder_global_attentions ] self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(global_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, encoder_seq_length, seq_length ], ) self.assertListEqual( list(global_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, encoder_seq_length, num_global_attn_indices ], ) for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["use_cache"] = False config.output_hidden_states = False model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) out_len = len(outputs) self.assertEqual(config.output_hidden_states, False) check_encoder_attentions_output(outputs) if self.is_encoder_decoder: model = model_class(config) outputs = model( self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(config.output_hidden_states, False) check_decoder_attentions_output(outputs) # Check that output attentions can also be changed via the config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(config.output_hidden_states, False) check_encoder_attentions_output(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True config.output_hidden_states = True model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs)) self.assertEqual(model.config.output_hidden_states, True) check_encoder_attentions_output(outputs) def test_xla_mode(self): # TODO JP: Make LED XLA compliant pass def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): is_encoder_decoder = True all_model_classes = ( TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else () all_generative_model_classes = ( TFT5ForConditionalGeneration, ) if is_tf_available() else () test_onnx = False def setUp(self): self.model_tester = TFT5ModelTester(self) self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37) def test_config(self): self.config_tester.run_common_tests() def test_t5_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_t5_model(*config_and_inputs) def test_t5_model_v1_1(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() config = config_and_inputs[0] config.tie_word_embeddings = False config.feed_forward_proj = "gated-gelu" self.model_tester.create_and_check_t5_model(config, *config_and_inputs[1:]) def test_with_lm_head(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_t5_with_lm_head(*config_and_inputs) def test_t5_decoder_model_past(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_t5_decoder_model_past( *config_and_inputs) def test_t5_decoder_model_past_with_attn_mask(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_t5_decoder_model_attention_mask_past( *config_and_inputs) def test_t5_decoder_model_past_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_t5_decoder_model_past_large_inputs( *config_and_inputs) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) if model_class in self.all_generative_model_classes: x = model.get_output_embeddings() assert isinstance(x, tf.keras.layers.Layer) name = model.get_bias() assert name is None else: x = model.get_output_embeddings() assert x is None name = model.get_bias() assert name is None def test_saved_model_creation(self): # This test is too long (>30sec) and makes fail the CI pass def test_mixed_precision(self): # TODO JP: Make T5 float16 compliant pass def test_xla_mode(self): # TODO JP: Make T5 XLA compliant pass @slow def test_model_from_pretrained(self): model = TFT5Model.from_pretrained("t5-small") self.assertIsNotNone(model)
class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFMarianMTModel, ) if is_tf_available() else () all_generative_model_classes = ( TFMarianMTModel, ) if is_tf_available() else () model_tester_cls = ModelTester is_encoder_decoder = True test_pruning = False def setUp(self): self.model_tester = self.model_tester_cls(self) self.config_tester = ConfigTester(self, config_class=MarianConfig) def test_config(self): self.config_tester.run_common_tests() def test_inputs_embeds(self): # inputs_embeds not supported pass def test_saved_model_with_hidden_states_output(self): # Should be uncommented during patrick TF refactor pass def test_saved_model_with_attentions_output(self): pass def test_compile_tf_model(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy") model_class = self.all_generative_model_classes[0] input_ids = { "decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"), "input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"), } # Prepare our model model = model_class(config) model(self._prepare_for_class( inputs_dict, model_class)) # Model must be called before saving. # Let's load it from the disk to be sure we can use pre-trained weights with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname) model = model_class.from_pretrained(tmpdirname) outputs_dict = model(input_ids) hidden_states = outputs_dict[0] # Add a dense layer on top to test integration with other keras modules outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states) # Compile extended model extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) x = model.get_output_layer_with_bias() assert x is None name = model.get_prefix_bias_name() assert name is None
class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, TFDistilBertForSequenceClassification) if is_tf_available() else None test_pruning = True test_torchscript = True test_resize_embeddings = True test_head_masking = True class TFDistilBertModelTester(object): def __init__(self, parent, batch_size=13, seq_length=7, is_training=True, use_input_mask=True, use_token_type_ids=False, use_labels=True, vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, type_sequence_label_size=2, initializer_range=0.02, num_labels=3, num_choices=4, scope=None, ): self.parent = parent self.batch_size = batch_size self.seq_length = seq_length self.is_training = is_training self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids self.use_labels = use_labels self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range self.num_labels = num_labels self.num_choices = num_choices self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = DistilBertConfig( vocab_size=self.vocab_size, dim=self.hidden_size, n_layers=self.num_hidden_layers, n_heads=self.num_attention_heads, hidden_dim=self.intermediate_size, hidden_act=self.hidden_act, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, initializer_range=self.initializer_range) return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFDistilBertModel(config=config) inputs = {'input_ids': input_ids, 'attention_mask': input_mask} outputs = model(inputs) sequence_output = outputs[0] inputs = [input_ids, input_mask] (sequence_output,) = model(inputs) result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]) def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFDistilBertForMaskedLM(config=config) inputs = {'input_ids': input_ids, 'attention_mask': input_mask} (prediction_scores,) = model(inputs) result = { "prediction_scores": prediction_scores.numpy(), } self.parent.assertListEqual( list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]) def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFDistilBertForQuestionAnswering(config=config) inputs = {'input_ids': input_ids, 'attention_mask': input_mask} start_logits, end_logits = model(inputs) result = { "start_logits": start_logits.numpy(), "end_logits": end_logits.numpy(), } self.parent.assertListEqual( list(result["start_logits"].shape), [self.batch_size, self.seq_length]) self.parent.assertListEqual( list(result["end_logits"].shape), [self.batch_size, self.seq_length]) def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = TFDistilBertForSequenceClassification(config) inputs = {'input_ids': input_ids, 'attention_mask': input_mask} (logits,) = model(inputs) result = { "logits": logits.numpy(), } self.parent.assertListEqual( list(result["logits"].shape), [self.batch_size, self.num_labels]) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask} return config, inputs_dict def setUp(self): self.model_tester = TFDistilBertModelTest.TFDistilBertModelTester(self) self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37) def test_config(self): self.config_tester.run_common_tests() def test_distilbert_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_distilbert_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_distilbert_for_masked_lm(*config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_distilbert_for_question_answering(*config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_distilbert_for_sequence_classification(*config_and_inputs)
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (( TFElectraModel, TFElectraForMaskedLM, TFElectraForPreTraining, TFElectraForTokenClassification, TFElectraForMultipleChoice, TFElectraForSequenceClassification, TFElectraForQuestionAnswering, ) if is_tf_available() else ()) def setUp(self): self.model_tester = TFElectraModelTester(self) self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_electra_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_masked_lm( *config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_pretraining( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_question_answering( *config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_sequence_classification( *config_and_inputs) def test_for_multiple_choice(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_multiple_choice( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_electra_for_token_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): # for model_name in TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["google/electra-small-discriminator"]: model = TFElectraModel.from_pretrained(model_name) self.assertIsNotNone(model)