class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): all_model_classes = (( DetrModel, DetrForObjectDetection, DetrForSegmentation, ) if is_timm_available() else ()) is_encoder_decoder = True test_torchscript = False test_pruning = False test_head_masking = False test_missing_keys = False # special case for head models def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: if model_class.__name__ in [ "DetrForObjectDetection", "DetrForSegmentation" ]: labels = [] for i in range(self.model_tester.batch_size): target = {} target["class_labels"] = torch.ones( size=(self.model_tester.n_targets, ), device=torch_device, dtype=torch.long) target["boxes"] = torch.ones(self.model_tester.n_targets, 4, device=torch_device, dtype=torch.float) target["masks"] = torch.ones( self.model_tester.n_targets, self.model_tester.min_size, self.model_tester.max_size, device=torch_device, dtype=torch.float, ) labels.append(target) inputs_dict["labels"] = labels return inputs_dict def setUp(self): self.model_tester = DetrModelTester(self) self.config_tester = ConfigTester(self, config_class=DetrConfig, has_text_modality=False) def test_config(self): self.config_tester.run_common_tests() def test_detr_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_detr_model(*config_and_inputs) def test_detr_object_detection_head_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_detr_object_detection_head_model( *config_and_inputs) @unittest.skip(reason="DETR does not use inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip(reason="DETR does not have a get_input_embeddings method") def test_model_common_attributes(self): pass @unittest.skip(reason="DETR is not a generative model") def test_generate_without_input_ids(self): pass @unittest.skip(reason="DETR does not use token embeddings") def test_resize_tokens_embeddings(self): pass @slow def test_model_outputs_equivalence(self): # TODO Niels: fix me! pass def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True decoder_seq_length = self.model_tester.decoder_seq_length encoder_seq_length = self.model_tester.encoder_seq_length decoder_key_length = self.model_tester.decoder_seq_length encoder_key_length = self.model_tester.encoder_seq_length for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length ], ) out_len = len(outputs) if self.is_encoder_decoder: correct_outlen = 5 # loss is at first position if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning # Object Detection model returns pred_logits and pred_boxes if model_class.__name__ == "DetrForObjectDetection": correct_outlen += 2 # Panoptic Segmentation model returns pred_logits, pred_boxes, pred_masks if model_class.__name__ == "DetrForSegmentation": correct_outlen += 3 if "past_key_values" in outputs: correct_outlen += 1 # past_key_values have been returned self.assertEqual(out_len, correct_outlen) # decoder attentions decoder_attentions = outputs.decoder_attentions self.assertIsInstance(decoder_attentions, (list, tuple)) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length ], ) # cross attentions cross_attentions = outputs.cross_attentions self.assertIsInstance(cross_attentions, (list, tuple)) self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(cross_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, decoder_seq_length, encoder_key_length, ], ) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types elif self.is_encoder_decoder: added_hidden_states = 2 else: added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(self_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length ], ) def test_retain_grad_hidden_states_attentions(self): # removed retain_grad and grad on decoder_hidden_states, as queries don't require grad config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.output_hidden_states = True config.output_attentions = True # no need to test all models as different heads yield the same functionality model_class = self.all_model_classes[0] model = model_class(config) model.to(torch_device) inputs = self._prepare_for_class(inputs_dict, model_class) outputs = model(**inputs) output = outputs[0] encoder_hidden_states = outputs.encoder_hidden_states[0] encoder_attentions = outputs.encoder_attentions[0] encoder_hidden_states.retain_grad() encoder_attentions.retain_grad() decoder_attentions = outputs.decoder_attentions[0] decoder_attentions.retain_grad() cross_attentions = outputs.cross_attentions[0] cross_attentions.retain_grad() output.flatten()[0].backward(retain_graph=True) self.assertIsNotNone(encoder_hidden_states.grad) self.assertIsNotNone(encoder_attentions.grad) self.assertIsNotNone(decoder_attentions.grad) self.assertIsNotNone(cross_attentions.grad) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] if model.config.is_encoder_decoder: expected_arg_names = ["pixel_values", "pixel_mask"] expected_arg_names.extend( ["head_mask", "decoder_head_mask", "encoder_outputs"] if "head_mask" and "decoder_head_mask" in arg_names else []) self.assertListEqual(arg_names[:len(expected_arg_names)], expected_arg_names) else: expected_arg_names = ["pixel_values", "pixel_mask"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_different_timm_backbone(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) # let's pick a random timm backbone config.backbone = "tf_mobilenetv3_small_075" for model_class in self.all_model_classes: model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if model_class.__name__ == "DetrForObjectDetection": expected_shape = ( self.model_tester.batch_size, self.model_tester.num_queries, self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) self.assertTrue(outputs) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) configs_no_init.init_xavier_std = 1e9 for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: if "bbox_attention" in name and "bias" not in name: self.assertLess( 100000, abs(param.data.max().item()), msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) else: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", )
# limitations under the License. """ Testing suite for the PyTorch DETR model. """ import inspect import math import unittest from transformers import DetrConfig, is_timm_available, is_vision_available from transformers.testing_utils import require_timm, require_vision, slow, torch_device from transformers.utils import cached_property from ...generation.test_generation_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor if is_timm_available(): import torch from transformers import DetrForObjectDetection, DetrForSegmentation, DetrModel if is_vision_available(): from PIL import Image from transformers import DetrFeatureExtractor class DetrModelTester: def __init__( self, parent, batch_size=8,