예제 #1
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(AbstractAtt, self).__init__()
        self.opt = opt
        self.vocab_words = vocab_words
        self.vocab_answers = vocab_answers
        self.num_classes = len(self.vocab_answers)
        # Modules
        self.seq2vec = seq2vec.factory(
            self.vocab_words, self.opt['seq2vec'])  # seq2vec = skipthoughts

        # Modules for attention
        self.conv_v_att = nn.Conv2d(
            self.opt['dim_v'],  # 贰元(通道數,輸出的深度,過濾器的高,過濾器的寬)
            self.opt['attention']['dim_v'],
            1,
            1)  # (2048,2048,1,1)
        self.linear_q_att = nn.Linear(
            self.opt['dim_q'],
            self.opt['attention']['dim_q'])  # (2400,2048, 310)
        self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'],
                                  self.opt['attention']['nb_glimpses'], 1,
                                  1)  # (510,2,1,1)
        # Modules for classification
        self.list_linear_v_fusion = None
        self.linear_q_fusion = None
        self.linear_classif = None
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(Dual_Learning_Model_Abstract, self).__init__()
        self.opt = opt
        self.attention = getattr(attention_modules, opt['attention']['arch'])(opt)
        self.vocab_answers = vocab_answers
        self.vocab_words = vocab_words
        self.num_classes = len(self.vocab_answers)

        # VQA Modules
        self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec'])
        self.vqa_module = getattr(vqa_modules, self.opt['vqa']['arch'])(self.opt, self.vocab_answers)
        if self.opt['vqg']['arch'] == 'VQA_Dual':
            self.vqg_module = vqg_modules.VQA_Dual(self.vqa_module, self.seq2vec, self.opt, self.vocab_words, self. vocab_answers)
        else:
            self.vqg_module = getattr(vqg_modules, self.opt['vqg']['arch'])(self.opt, self.vocab_words, self. vocab_answers)

        self.answer_embeddings =  nn.Embedding(self.vqa_module.linear_classif.out_features, self.vqa_module.linear_classif.in_features) 
        self.answer_embeddings.weight = self.vqa_module.linear_classif.weight

        self.shared_conv_layer = None
        self.is_testing = False
        self.sample_num = 5
        self.use_same_attention = opt['attention'].get('use_same_attention', True)
        

        # To fuse different glimpses
        if self.opt['vqa']['arch'] == 'Mutan':
            dim_h = int(self.opt['vqa']['fusion']['dim_hv'] / opt['attention']['nb_glimpses'])
        else: # MLB
            dim_h = self.opt['vqa']['fusion']['dim_h']

        self.list_linear_v_fusion = nn.ModuleList([
            nn.Linear(self.opt['dim_v'], dim_h)
            for i in range(self.opt['attention']['nb_glimpses'])])
예제 #3
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(Dual_Model, self).__init__()
        self.opt = opt
        
        # To fuse different glimpses
        dim_h = int(self.opt['vqa']['fusion']['dim_hv'] / opt['attention']['nb_glimpses'])
        
        self.vocab_answers = vocab_answers
        self.vocab_words = vocab_words
        self.num_classes = len(self.vocab_answers)

        # VQA Modules
        self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec'])
        # Modules for classification
        self.linear_q_fusion = nn.Linear(self.opt['dim_q'],
                                         self.opt['vqa']['fusion']['dim_hq'])
        self.linear_classif = nn.Linear(self.opt['vqa']['fusion']['dim_mm'],
                                        self.num_classes)
        self.fusion_classif_vqa = fusion.MutanFusion(self.opt['vqa']['fusion'],
                                                 visual_embedding=False,
                                                 question_embedding=False)
        self.attention_vqa = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False)
        self.linear_q_att = nn.Linear(self.opt['dim_q'],
                                        self.opt['attention']['dim_q'])
        self.list_linear_v_fusion_vqa = nn.ModuleList([
            nn.Linear(self.opt['dim_v'], dim_h)
            for i in range(self.opt['attention']['nb_glimpses'])])

        # share W and E
        self.answer_embeddings =  nn.Embedding(self.linear_classif.out_features, self.linear_classif.in_features) 
        # VQG modules
        self.linear_va_transform = nn.Linear(self.linear_classif.in_features, 
                        self.opt['vqg']['vec2seq']['dim_embedding'])
        self.linear_a_att = nn.Linear(self.opt['dim_a'],
                                        self.opt['attention']['dim_q'])

        self.linear_a_fusion = nn.Linear(self.opt['vqa']['fusion']['dim_mm'], self.opt['vqa']['fusion']['dim_hq'])
        # Modules for Question Generation
        self.question_generation = getattr(vec2seq, opt['vqg']['vec2seq']['arch'])(vocab_words, opt['vqg']['vec2seq'])

        # Sharable modules
        if self.opt.get('share_modules', True):
            print('Sharing Modules: [Attention] and [Fusion]')
            self.fusion_classif_vqg = self.fusion_classif_vqa
            self.attention_vqg = self.attention_vqa
            self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa
        else:
            print('Disable Module Sharing')
            self.fusion_classif_vqg = fusion.MutanFusion(self.opt['vqa']['fusion'],
                                                     visual_embedding=False,
                                                     question_embedding=False)
            self.attention_vqg = getattr(attention_modules, opt['attention']['arch'])(opt, use_linear=False)
            self.list_linear_v_fusion_vqg = nn.ModuleList([
                nn.Linear(self.opt['dim_v'], dim_h)
                for i in range(self.opt['attention']['nb_glimpses'])])

        self.is_testing = False
        self.sample_num = 5
예제 #4
0
 def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
     super(AbstractNoAtt, self).__init__()
     self.opt = opt
     self.vocab_words = vocab_words
     self.vocab_answers = vocab_answers
     self.num_classes = len(self.vocab_answers)
     # Modules
     self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec'])
     self.linear_classif = nn.Linear(self.opt['fusion']['dim_h'], self.num_classes)
예제 #5
0
 def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
     super(AbstractAtt, self).__init__()
     self.opt = opt
     self.vocab_words = vocab_words
     self.vocab_answers = vocab_answers
     self.num_classes = len(self.vocab_answers)
     # Modules
     self.seq2vec = seq2vec.factory(self.vocab_words, self.opt['seq2vec'])
     # Modules for attention
     self.conv_v_att = nn.Conv2d(self.opt['dim_v'],
                                 self.opt['attention']['dim_v'], 1, 1)
     self.linear_q_att = nn.Linear(self.opt['dim_q'],
                                   self.opt['attention']['dim_q'])
     self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'],
                               self.opt['attention']['nb_glimpses'], 1, 1)
     # Modules for classification
     self.list_linear_v_fusion = None
     self.linear_q_fusion = None
     self.linear_classif = None
예제 #6
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(AbstractAtt, self).__init__()
        self.opt = opt
        self.vocab_words = vocab_words
        self.vocab_answers = vocab_answers
        self.num_classes = len(self.vocab_answers)
        # Modules
        if self.opt['seq2vec']['arch'] == "bert":
            self.seq2vec = return_self
        else:
            self.seq2vec = seq2vec.factory(self.vocab_words,
                                           self.opt['seq2vec'])
        # Modules for attention
        self.conv_v_att = nn.Conv2d(self.opt['dim_v'],
                                    self.opt['attention']['dim_v'], 1, 1)
        self.linear_q_att = nn.Linear(self.opt['dim_q'],
                                      self.opt['attention']['dim_q'])
        self.conv_att = nn.Conv2d(self.opt['attention']['dim_mm'],
                                  self.opt['attention']['nb_glimpses'], 1, 1)
        # Modules for batch norm
        self.batchnorm_conv_v_att = nn.BatchNorm2d(
            self.opt['attention']['dim_v'])
        self.batchnorm_linear_q_att = nn.BatchNorm1d(
            self.opt['attention']['dim_q'])
        self.batchnorm_conv_att = nn.BatchNorm2d(
            self.opt['attention']['nb_glimpses'])
        self.batchnorm_fusion_att = nn.BatchNorm1d(
            self.opt['attention']['dim_mm'])
        self.batchnorm_list_linear_v_fusion = nn.BatchNorm1d(
            self.opt['attention']['dim_mm'])
        self.batchnorm_list_linear_q_fusion = nn.BatchNorm1d(
            self.opt['attention']['dim_mm'] *
            self.opt['attention']['nb_glimpses'])
        self.batchnorm_fusion_classif = nn.BatchNorm1d(
            self.opt['attention']['dim_mm'] *
            self.opt['attention']['nb_glimpses'])

        # Modules for classification
        self.list_linear_v_fusion = None
        self.linear_q_fusion = None
        self.linear_classif = None
예제 #7
0
    def __init__(self, opt={}, vocab_words=[], vocab_answers=[]):
        super(Dual_Model, self).__init__()
        self.opt = opt

        # To fuse different glimpses
        dim_h = int(self.opt["vqa"]["fusion"]["dim_hv"] /
                    opt["attention"]["nb_glimpses"])

        self.vocab_answers = vocab_answers
        self.vocab_words = vocab_words
        self.num_classes = len(self.vocab_answers) + 1  # for [UNK]

        # VQA Modules
        self.seq2vec = seq2vec.factory(self.vocab_words, self.opt["seq2vec"])

        # Modules for classification
        self.linear_q_fusion = nn.Linear(self.opt["dim_q"],
                                         self.opt["vqa"]["fusion"]["dim_hq"])
        self.linear_classif = nn.Linear(self.opt["vqa"]["fusion"]["dim_mm"],
                                        self.num_classes)
        self.fusion_classif_vqa = fusion.MutanFusion(
            self.opt["vqa"]["fusion"],
            visual_embedding=False,
            question_embedding=False,
        )
        self.attention_vqa = getattr(
            attention_modules, opt["attention"]["arch"])(opt, use_linear=False)
        self.linear_q_att = nn.Linear(self.opt["dim_q"],
                                      self.opt["attention"]["dim_q"])
        self.list_linear_v_fusion_vqa = nn.ModuleList([
            nn.Linear(self.opt["dim_v"], dim_h)
            for i in range(self.opt["attention"]["nb_glimpses"])
        ])

        # share W and E
        self.answer_embeddings = nn.Embedding(self.linear_classif.out_features,
                                              self.linear_classif.in_features)
        # VQG modules
        self.linear_va_transform = nn.Linear(
            self.linear_classif.in_features,
            self.opt["vqg"]["vec2seq"]["dim_embedding"],
        )
        self.linear_a_att = nn.Linear(self.opt["dim_a"],
                                      self.opt["attention"]["dim_q"])

        self.linear_a_fusion = nn.Linear(
            self.opt["vqa"]["fusion"]["dim_mm"],
            self.opt["vqa"]["fusion"]["dim_hq"],
        )
        # Modules for Question Generation
        self.question_generation = getattr(
            vec2seq, opt["vqg"]["vec2seq"]["arch"])(vocab_words,
                                                    opt["vqg"]["vec2seq"])

        # Sharable modules
        if self.opt.get("share_modules", True):
            print("Sharing Modules: [Attention] and [Fusion]")
            self.fusion_classif_vqg = self.fusion_classif_vqa
            self.attention_vqg = self.attention_vqa
            self.list_linear_v_fusion_vqg = self.list_linear_v_fusion_vqa
        else:
            print("Disable Module Sharing")
            self.fusion_classif_vqg = fusion.MutanFusion(
                self.opt["vqa"]["fusion"],
                visual_embedding=False,
                question_embedding=False,
            )
            self.attention_vqg = getattr(
                attention_modules, opt["attention"]["arch"])(opt,
                                                             use_linear=False)
            self.list_linear_v_fusion_vqg = nn.ModuleList([
                nn.Linear(self.opt["dim_v"], dim_h)
                for i in range(self.opt["attention"]["nb_glimpses"])
            ])

        self.is_testing = False
        self.sample_num = 5