def __init__(self, device, NUM_FRAMES_PER_STEP=5, DETECTIONS_PER_FRAME=20): super(ModelFC, self).__init__() self.device = device self.NUM_FRAMES_PER_STEP = NUM_FRAMES_PER_STEP self.DETECTIONS_PER_FRAME = DETECTIONS_PER_FRAME self.DETECTIONS_PER_STEP = self.NUM_FRAMES_PER_STEP * self.DETECTIONS_PER_FRAME self.lxmert_tokenizer = LxmertTokenizer.from_pretrained( "unc-nlp/lxmert-base-uncased") self.lxmert_tokenizer.add_special_tokens({ "additional_special_tokens": [self.NULL, self.ENTITY, self.ACTION] }) self.lxmert_tokenizer.encode([self.NULL, self.ENTITY, self.ACTION]) self.NULL_TOKEN = self.lxmert_tokenizer.convert_tokens_to_ids( self.NULL) self.ENTITY_TOKEN = self.lxmert_tokenizer.convert_tokens_to_ids( self.ENTITY) self.ACTION_TOKEN = self.lxmert_tokenizer.convert_tokens_to_ids( self.ACTION) self.lxmert = LxmertModel.from_pretrained( "unc-nlp/lxmert-base-uncased") self.lxmert.to(device) self.VG = LxmertVGHead(self.lxmert.config, self.DETECTIONS_PER_STEP) self.VG.to(device)
def __init__(self, device, NUM_FRAMES_PER_STEP=5, MAX_DETECTIONS=20): super(Model, self).__init__() self.device = device self.NUM_FRAMES_PER_STEP = NUM_FRAMES_PER_STEP self.MAX_DETECTIONS = MAX_DETECTIONS self.CANDIDATES = self.NUM_FRAMES_PER_STEP * self.MAX_DETECTIONS self.lxmert_tokenizer = LxmertTokenizer.from_pretrained( "unc-nlp/lxmert-base-uncased") self.lxmert = LxmertModel.from_pretrained( "unc-nlp/lxmert-base-uncased") self.lxmert = nn.DataParallel(self.lxmert) self.lxmert.to(device)
def test_inference_no_head_absolute_embedding(self): model = LxmertModel.from_pretrained(LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST[0]) input_ids = torch.tensor([[101, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 102]]) num_visual_features = 10 _, visual_feats = np.random.seed(0), np.random.rand(1, num_visual_features, model.config.visual_feat_dim) _, visual_pos = np.random.seed(0), np.random.rand(1, num_visual_features, 4) visual_feats = torch.as_tensor(visual_feats, dtype=torch.float32) visual_pos = torch.as_tensor(visual_pos, dtype=torch.float32) output = model(input_ids, visual_feats=visual_feats, visual_pos=visual_pos)[0] expected_shape = torch.Size([1, 11, 768]) self.assertEqual(expected_shape, output.shape) expected_slice = torch.tensor( [[[0.2417, -0.9807, 0.1480], [1.2541, -0.8320, 0.5112], [1.4070, -1.1052, 0.6990]]] ) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
def __init__(self, NUM_FRAMES_PER_STEP=5, MAX_DETECTIONS=20, max_epochs=100, lr=1e-4, batch_size=4): super().__init__() self.NUM_FRAMES_PER_STEP = NUM_FRAMES_PER_STEP self.MAX_DETECTIONS = MAX_DETECTIONS self.CANDIDATES = self.NUM_FRAMES_PER_STEP * self.MAX_DETECTIONS self.lxmert_tokenizer = LxmertTokenizer.from_pretrained( "unc-nlp/lxmert-base-uncased") self.lxmert = LxmertModel.from_pretrained( "unc-nlp/lxmert-base-uncased") self.save_hyperparameters()
def __init__(self, opt): super(Model, self).__init__() self.opt = opt self.model = LxmertModel.from_pretrained('unc-nlp/lxmert-base-uncased', return_dict=True) self.model = nn.DataParallel(self.model) self.candi_ans_num = opt.train_candi_ans_num self.batchsize = opt.batch_size self.Linear_layer = nn.Linear(768, 1) norm = opt.norm activation = opt.activation dropC = opt.dropC self.debias_loss_fn = LearnedMixin(0.36) self.classifier = SimpleClassifier(in_dim=768, hid_dim=2 * 768, out_dim=1, dropout=dropC, norm=norm, act=activation)
def create_and_check_lxmert_model( self, config, input_ids, visual_feats, bounding_boxes, token_type_ids, input_mask, obj_labels, masked_lm_labels, matched_label, ans, output_attentions, ): model = LxmertModel(config=config) model.to(torch_device) model.eval() result = model( input_ids, visual_feats, bounding_boxes, token_type_ids=token_type_ids, attention_mask=input_mask, output_attentions=output_attentions, ) result = model( input_ids, visual_feats, bounding_boxes, token_type_ids=token_type_ids, attention_mask=input_mask, output_attentions=not output_attentions, ) result = model(input_ids, visual_feats, bounding_boxes, return_dict=False) result = model(input_ids, visual_feats, bounding_boxes, return_dict=True) self.parent.assertEqual( result.language_output.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual( result.vision_output.shape, (self.batch_size, self.num_visual_features, self.hidden_size)) self.parent.assertEqual(result.pooled_output.shape, (self.batch_size, self.hidden_size))
def test_model_from_pretrained(self): for model_name in LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = LxmertModel.from_pretrained(model_name) model.to(torch_device) self.assertIsNotNone(model)