def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "vilbert" args = test_utils.dummy_args(model=model_name) configuration = Configuration(args) config = configuration.get_config() model_class = registry.get_model_class(model_name) self.vision_feature_size = 1024 self.vision_target_size = 1279 config.model_config[model_name]["training_head_type"] = "pretraining" config.model_config[model_name][ "visual_embedding_dim"] = self.vision_feature_size config.model_config[model_name][ "v_feature_size"] = self.vision_feature_size config.model_config[model_name][ "v_target_size"] = self.vision_target_size config.model_config[model_name]["dynamic_attention"] = False self.pretrain_model = model_class(config.model_config[model_name]) self.pretrain_model.build() config.model_config[model_name][ "training_head_type"] = "classification" config.model_config[model_name]["num_labels"] = 2 self.finetune_model = model_class(config.model_config[model_name]) self.finetune_model.build()
def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "vinvl" args = test_utils.dummy_args(model=model_name, dataset="test") configuration = Configuration(args) config = configuration.get_config() model_config = config.model_config[model_name] model_config.model = model_name model_config.do_pretraining = False classification_config_dict = { "do_pretraining": False, "heads": {"mlp": {"num_labels": 3129}}, "ce_loss": {"ignore_index": -1}, } self.classification_config = OmegaConf.create( {**model_config, **classification_config_dict} ) pretraining_config_dict = { "do_pretraining": True, "heads": {"mlm": {"hidden_size": 768}}, } self.pretraining_config = OmegaConf.create( {**model_config, **pretraining_config_dict} ) self.sample_list = self._get_sample_list()
def setUp(self): test_utils.setup_proxy() setup_imports() self.model_name = "mmf_transformer" args = test_utils.dummy_args(model=self.model_name) configuration = Configuration(args) self.config = configuration.get_config() self.config.model_config[self.model_name].model = self.model_name
def setUp(self): test_utils.setup_proxy() setup_imports() self.model_name = "multimodelity_transformer" args = test_utils.dummy_args(model=self.model_name) configuration = Configuration(args) self.config = configuration.get_config() self.config.model_config[self.model_name].model = self.model_name self.finetune_model = build_model( self.config.model_config[self.model_name])
def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "vilt" args = test_utils.dummy_args(model=model_name, dataset="test") configuration = Configuration(args) config = configuration.get_config() model_config = config.model_config[model_name] model_config.model = model_name self.pretrain_model = build_model(model_config)
def setUp(self): test_utils.setup_proxy() setup_imports() replace_with_jit() model_name = "visual_bert" args = test_utils.dummy_args(model=model_name) configuration = Configuration(args) config = configuration.get_config() model_config = config.model_config[model_name] model_config.model = model_name self.pretrain_model = build_model(model_config)
def setUp(self): test_utils.setup_proxy() setup_imports() self.model_name = "mmf_transformer" args = test_utils.dummy_args(model=self.model_name) configuration = Configuration(args) self.config = configuration.get_config() self.model_class = registry.get_model_class(self.model_name) self.finetune_model = self.model_class( self.config.model_config[self.model_name]) self.finetune_model.build()
def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "mmbt" args = test_utils.dummy_args(model=model_name) configuration = Configuration(args) config = configuration.get_config() model_config = config.model_config[model_name] model_config["training_head_type"] = "classification" model_config["num_labels"] = 2 model_config.model = model_name self.finetune_model = build_model(model_config)
def setUp(self): import transformers.models.vit.modeling_vit as vit setup_proxy() config = { "layer_norm_eps": 0.0001, "hidden_size": 768, "num_hidden_layers": 2, "do_patch_embeddings": False, "add_pooling_layer": False, "return_dict": True, } hf_config = vit.ViTConfig(**config) self.model = ViTModel(hf_config)
def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "uniter" args = test_utils.dummy_args(model=model_name, dataset="vqa2") configuration = Configuration(args) config = configuration.get_config() model_config = config.model_config[model_name] model_config.model = model_name model_config.losses = {"vqa2": "logit_bce"} model_config.do_pretraining = False model_config.tasks = "vqa2" classification_config_dict = { "do_pretraining": False, "tasks": "vqa2", "heads": { "vqa2": { "type": "mlp", "num_labels": 3129 } }, "losses": { "vqa2": "logit_bce" }, } classification_config = OmegaConf.create({ **model_config, **classification_config_dict }) pretraining_config_dict = { "do_pretraining": True, "tasks": "wra", "heads": { "wra": { "type": "wra" } }, } pretraining_config = OmegaConf.create({ **model_config, **pretraining_config_dict }) self.model_for_classification = build_model(classification_config) self.model_for_pretraining = build_model(pretraining_config)
def setUp(self): test_utils.setup_proxy() setup_imports() self._image_modality_config = MMFTransformerModalityConfig( type="image", key="image", embedding_dim=256, position_dim=1, segment_id=0, encoder=ImageEncoderFactory.Config( type=ImageEncoderTypes.identity), ) self._text_modality_config = MMFTransformerModalityConfig( type="text", key="text", embedding_dim=756, position_dim=128, segment_id=1, encoder=TextEncoderFactory.Config(type=TextEncoderTypes.identity), )
def setUp(self): test_utils.setup_proxy() setup_imports() model_name = "vilbert" args = test_utils.dummy_args(model=model_name) configuration = Configuration(args) config = configuration.get_config() self.vision_feature_size = 1024 self.vision_target_size = 1279 model_config = config.model_config[model_name] model_config["training_head_type"] = "pretraining" model_config["visual_embedding_dim"] = self.vision_feature_size model_config["v_feature_size"] = self.vision_feature_size model_config["v_target_size"] = self.vision_target_size model_config["dynamic_attention"] = False model_config.model = model_name model_config["training_head_type"] = "classification" model_config["num_labels"] = 2 self.model_config = model_config
def test_bert_tokenizer(self): from mmf.datasets.processors.bert_processors import BertTokenizer test_utils.setup_proxy() processor = BertTokenizer(self.config) # Test normal caption arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:11] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue( torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) # Test empty caption arg = {"text": ""} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:2] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue( torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) # Test long caption arg = { "text": "I am working for facebook " * 100 } # make a long sentence results = processor(arg) expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100 expected_input_ids.insert(0, 101) # [CLS] expected_input_ids = expected_input_ids[:128] expected_input_ids[-1] = 102 # [SEP] expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.ones(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue( torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) # Test two captions arg = { "text_a": "This will be a test of tokens?", "text_b": "I am working for facebook", } results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:17] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102] + [1045, 2572, 2551, 2005, 9130, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_segment_ids[11:17] = 1 expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:17] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue( torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) # Test masked caption processor._probability = 1.0 arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) self.assertFalse(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue( torch.equal(results["segment_ids"], expected_segment_ids)) # Test [MASK] token is present self.assertTrue(103 in results["input_ids"])
def test_mmbt_pretrained(self): test_utils.setup_proxy() mmbt = MMBT.from_params() self.assertIsNotNone(mmbt)
def setUp(self): setup_proxy()
def test_uniter_tokenizer(self): from mmf.datasets.processors.bert_processors import UNITERTextTokenizer test_utils.setup_proxy() config = OmegaConf.create( { "tokenizer_config": { "type": "bert-base-uncased", "params": {"do_lower_case": True}, }, "mask_probability": 0.5, "max_seq_length": 128, } ) processor = UNITERTextTokenizer(config) # Test normal caption arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:11] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test empty caption arg = {"text": ""} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:2] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test long caption arg = {"text": "I am working for facebook " * 100} # make a long sentence results = processor(arg) expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100 expected_input_ids.insert(0, 101) # [CLS] expected_input_ids = expected_input_ids[:128] expected_input_ids[-1] = 102 # [SEP] expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.ones(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test two captions arg = { "text_a": "This will be a test of tokens?", "text_b": "I am working for facebook", } results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:17] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102] + [1045, 2572, 2551, 2005, 9130, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_segment_ids[11:17] = 1 expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:17] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test masked caption processor._probability = 1.0 arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test [MASK] token is present self.assertTrue(103 in results["input_ids_masked"])
def test_vinvl_tokenizer(self): from mmf.datasets.processors.bert_processors import VinVLTextTokenizer test_utils.setup_proxy() config = OmegaConf.create( { "tokenizer_config": { "type": "bert-base-uncased", "params": {"do_lower_case": True}, }, "mask_probability": 0.5, "max_seq_length": 128, "corrupt_probability": 0, } ) processor = VinVLTextTokenizer(config) # Test normal caption arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:11] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) self.assertTrue("input_ids_corrupt" not in results) # Test empty caption arg = {"text": ""} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:2] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) self.assertTrue("input_ids_corrupt" not in results) # Test long caption arg = {"text": "I am working for facebook " * 100} # make a long sentence results = processor(arg) expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100 expected_input_ids.insert(0, 101) # [CLS] expected_input_ids = expected_input_ids[:128] expected_input_ids[-1] = 102 # [SEP] expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_masks = torch.ones(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) self.assertTrue("input_ids_corrupt" not in results) # Test two captions arg = { "text_a": "This will be a test of tokens?", "text_b": "I am working for facebook", } results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:17] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102] + [1045, 2572, 2551, 2005, 9130, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) expected_segment_ids[11:17] = 1 expected_masks = torch.zeros(128, dtype=torch.long) expected_masks[:17] = 1 self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue(torch.equal(results["input_mask"], expected_masks)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) self.assertTrue("input_ids_corrupt" not in results) # Test masked caption processor._probability = 1.0 arg = {"text": "This will be a test of tokens?"} results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:11] = torch.tensor( [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) # Test [MASK] token is present self.assertTrue(103 in results["input_ids_masked"]) self.assertTrue("input_ids_corrupt" not in results) # Test corrupt tokens processor._probability = 0.5 processor._corrupt_prob = 1.0 arg = { "text": "This will be a test of tokens?", "text_b": "test tokens", "random_captions": ["Something unexpected"], "random_labels": ["cat dog icecream"], } results = processor(arg) expected_input_ids = torch.zeros(128, dtype=torch.long) expected_input_ids[:15] = torch.tensor( [ 101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102, 3231, 19204, 2015, 102, ], dtype=torch.long, ) expected_segment_ids = torch.zeros(128, dtype=torch.long) self.assertTrue(torch.equal(results["input_ids"], expected_input_ids)) self.assertTrue("input_ids_masked" in results) self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape) self.assertTrue("input_ids_corrupt" in results) expected_swapped_caption = torch.zeros(128, dtype=torch.long) expected_swapped_caption[:8] = torch.tensor( [101, 2242, 9223, 102, 3231, 19204, 2015, 102], dtype=torch.long, ) expected_swapped_labels = torch.zeros(128, dtype=torch.long) expected_swapped_labels[:17] = torch.tensor( [ 101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102, 4937, 3899, 3256, 16748, 3286, 102, ], dtype=torch.long, ) self.assertTrue( torch.equal(results["input_ids_corrupt"], expected_swapped_caption) or torch.equal(results["input_ids_corrupt"], expected_swapped_labels) )