示例#1
0
    def __init__(self, image_model, fusion_method, config):
        super(TextImageGazeBertEncoder, self).__init__()

        self.hidden_size = config.hidden_size
        self.fusion_method = fusion_method
        if fusion_method == 'concat':
            self.fc = nn.Linear(self.hidden_size * 2, self.hidden_size)
        elif fusion_method == 'mcb':
            self.fusion = fusions.MCB([self.hidden_size, self.hidden_size],
                                      self.hidden_size)
        elif fusion_method == 'mlb':
            self.fusion = fusions.MLB([self.hidden_size, self.hidden_size],
                                      self.hidden_size)
        elif fusion_method == 'mutan':
            self.fusion = fusions.Mutan([self.hidden_size, self.hidden_size],
                                        self.hidden_size)
        elif fusion_method == 'block':
            self.fusion = fusions.Block([self.hidden_size, self.hidden_size],
                                        self.hidden_size)

        if image_model == 'vgg':
            from model.vgg import VggEncoder
            self.image_gaze_encoder = VggEncoder(self.hidden_size, gaze=True)
        elif image_model == 'resnet':
            from model.resnet import ResNetEncoder
            self.image_gaze_encoder = ResNetEncoder(self.hidden_size,
                                                    gaze=True)

        from model.bert import BertEncoder
        self.text_encoder = BertEncoder(config)
        M = torch.FloatTensor(self.hidden_size, self.hidden_size)
        init.xavier_normal_(M)
        self.M = nn.Parameter(M, requires_grad=True)
示例#2
0
    def __init__(self, image_model, fusion_method, id_to_vec, emb_size, vocab_size, config, device='cuda:0'):
        super(TextImageTransformerEncoder, self).__init__()

        self.hidden_size = config.hidden_size
        self.fusion_method = fusion_method
        if fusion_method == 'concat':
            self.fc = nn.Linear(self.hidden_size*2, self.hidden_size)
        elif fusion_method == 'mcb':
            self.fusion = fusions.MCB([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'mlb':
            self.fusion = fusions.MLB([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'mutan':
            self.fusion = fusions.Mutan([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'block':
            self.fusion = fusions.Block([self.hidden_size, self.hidden_size], self.hidden_size)

        if image_model == 'vgg':
            from model.vgg import VggEncoder
            self.image_encoder = VggEncoder(self.hidden_size)
        elif image_model == 'resnet':
            from model.resnet import ResNetEncoder
            self.image_encoder = ResNetEncoder(self.hidden_size)

        from model.transformer import TransformerEncoder
        self.context_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device)
        self.response_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device)
        M = torch.FloatTensor(self.hidden_size, self.hidden_size)
        init.xavier_normal_(M)
        self.M = nn.Parameter(M, requires_grad=True)
    def __init__(self, cfg):
        super(RelationVKG, self).__init__()

        q_dim = cfg['rnn_dim']*2 if cfg['rnn_bidirection'] else cfg['rnn_dim']
        self.w_emb = WordEmbedding(cfg['n_vocab'], cfg['word_embedding_dim'])
        self.w_emb.init_embedding(cfg['word_dic_file'], cfg['embedding_file'])
        self.q_emb = QuestionEmbedding(cfg['word_embedding_dim'], cfg['rnn_dim'], cfg['rnn_layer'],
                                      cfg['rnn_type'], keep_seq=True, bidirectional=cfg['rnn_bidirection'])
        self.reasoning_net = ReasoningUnit(cfg['v_dim'], q_dim, cfg['rel_dim'], cfg['node_att_hid_dim'],
                                           gat_att_hid_dim=cfg['gat_att_hid_dim'], gat_out_dim=cfg['v_dim'],
                                           gat_n_att=cfg['gat_n_att'],
                                           gat_multi_head_type="concat",
                                           que_self_att_enable=cfg['ques_self_att_enable'],
                                           node_att_enable=cfg['node_att_enable'], gat_enable=cfg['gat_enable'],
                                           spatial_feature_enable=cfg['spatial_feature_enable'], recurrent=cfg['recurrent'],
                                           dropout=cfg['dropout'], wn=cfg['wn'])
        if cfg['fuse_type'] == 'LinearSum':
            self.fuse_net = fusions.LinearSum([cfg['v_dim'], q_dim], cfg['fused_dim'], dropout_input=cfg['dropout'])
        if cfg['fuse_type'] == 'MFB':
            self.fuse_net = fusions.MFB([cfg['v_dim'], q_dim], cfg['fused_dim'], dropout_input=cfg['dropout'])
        if cfg['fuse_type'] == 'MLB':
            self.fuse_net = fusions.MLB([cfg['v_dim'], q_dim], cfg['fused_dim'], mm_dim=2*cfg['fused_dim'], dropout_input=cfg['dropout'])
        if cfg['fuse_type'] == 'BLOCK':
            self.fuse_net = fusions.Block([cfg['v_dim'], q_dim], cfg['fused_dim'], mm_dim=2*cfg['fused_dim'], dropout_input=cfg['dropout'])
        self.classifier = SimpleClassifier(cfg['fused_dim'], cfg['classifier_hid_dim'], cfg['classes'], 0.5)
示例#4
0
 def __init__(self, cfg):
     super(BottomUp, self).__init__()
     self.cfg = cfg
     q_dim = cfg['rnn_dim'] * 2 if cfg['rnn_bidirection'] else cfg['rnn_dim']
     self.w_emb = WordEmbedding(cfg['n_vocab'], cfg['word_embedding_dim'])
     self.w_emb.init_embedding(cfg['word_dic_file'], cfg['embedding_file'])
     self.q_emb = QuestionEmbedding(cfg['word_embedding_dim'],
                                    cfg['rnn_dim'],
                                    cfg['rnn_layer'],
                                    cfg['rnn_type'],
                                    keep_seq=False,
                                    bidirectional=cfg['rnn_bidirection'])
     self.v_att = NewAttention(cfg['v_dim'], q_dim, cfg['fused_dim'])
     if cfg['fuse_type'] == 'LinearSum':
         self.fuse_net = fusions.LinearSum([cfg['v_dim'], q_dim],
                                           cfg['fused_dim'],
                                           dropout_input=cfg['dropout'])
     if cfg['fuse_type'] == 'MFB':
         self.fuse_net = fusions.MFB([cfg['v_dim'], q_dim],
                                     cfg['fused_dim'],
                                     mm_dim=1000,
                                     factor=5,
                                     dropout_input=cfg['dropout'])
     if cfg['fuse_type'] == 'MLB':
         self.fuse_net = fusions.MLB([cfg['v_dim'], q_dim],
                                     cfg['fused_dim'],
                                     mm_dim=2 * cfg['fused_dim'],
                                     dropout_input=cfg['dropout'])
     if cfg['fuse_type'] == 'MFH':
         self.fuse_net = fusions.MFH([cfg['v_dim'], q_dim],
                                     cfg['fused_dim'],
                                     mm_dim=1000,
                                     factor=5,
                                     dropout_input=cfg['dropout'])
     if cfg['fuse_type'] == 'MCB':
         from compact_bilinear_pooling import CompactBilinearPooling
         self.fuse_net = CompactBilinearPooling(cfg['v_dim'], q_dim,
                                                cfg['fused_dim'])
     # self.fuse_net = fusions.MCB([cfg['v_dim'], q_dim], cfg['fused_dim'], dropout_output=cfg['dropout'])
     self.classifier = SimpleClassifier(cfg['fused_dim'],
                                        cfg['classifier_hid_dim'],
                                        cfg['classes'], 0.5)