Пример #1
0
    def __init__(self, __C, pretrained_emb, token_size, answer_size):
        super(Net, self).__init__()

        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=__C.WORD_EMBED_SIZE    # 300
        )

        # Loading the GloVe embedding weights
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.lstm = nn.LSTM(
            input_size=__C.WORD_EMBED_SIZE,
            hidden_size=__C.HIDDEN_SIZE,
            num_layers=1,
            batch_first=True
        )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,    # Faster-rcnn 2048D features
            __C.HIDDEN_SIZE
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm_img = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj_norm_lang = LayerNorm(__C.FLAT_OUT_SIZE)
        
        self.proj_img = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
        self.proj_lang = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #2
0
    def __init__(self, __C):
        super(SA, self).__init__()

        self.mhatt = MHAtt(__C)
        self.ffn = FFN(__C)

        self.dropout1 = nn.Dropout(__C.DROPOUT_R)
        self.norm1 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout2 = nn.Dropout(__C.DROPOUT_R)
        self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
    def __init__(self, __C, gen_func=torch.softmax):
        super(SGA, self).__init__()

        self.mhatt1 = MHAtt(__C, gen_func=gen_func)
        self.mhatt2 = MHAtt(__C)
        self.ffn = FFN(__C)

        self.dropout1 = nn.Dropout(__C.DROPOUT_R)
        self.norm1 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout2 = nn.Dropout(__C.DROPOUT_R)
        self.norm2 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout3 = nn.Dropout(__C.DROPOUT_R)
        self.norm3 = LayerNorm(__C.HIDDEN_SIZE)
Пример #4
0
    def __init__(self, __C, answer_size):
        super(Net, self).__init__()
        self.__C = __C
        self.bert = BertModel.from_pretrained(self.__C.PRETRAINED_PATH)


        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.lstm = nn.LSTM(
            input_size=self.__C.PRETRAINED_HIDDEN,
            hidden_size=__C.HIDDEN_SIZE,
            num_layers=1,
            batch_first=True
        )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,
            __C.HIDDEN_SIZE
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #5
0
    def __init__(self, __C, q_emb, token_size, answer_size):
        super(QNet, self).__init__()

        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #6
0
    def __init__(self, __C, vocab_size=30000):
        super(Net, self).__init__()

        output_dir = './core/bert'

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       final_dim=2048)
        #self.bert =  BertForSequenceClassification.from_pretrained(output_dir)
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE)

        self.lstm = nn.LSTM(input_size=768,
                            hidden_size=__C.HIDDEN_SIZE,
                            num_layers=1,
                            batch_first=True)

        self.backbone = MCA_ED(__C)
        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)
        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)

        self.output_feat_linear = nn.Linear(768, __C.FLAT_OUT_SIZE)

        self.backbone2 = MCA_ED(__C)

        self.output_proj_linear = nn.Linear(__C.HIDDEN_SIZE, vocab_size)

        self.softmax = nn.LogSoftmax(dim=1)
Пример #7
0
    def __init__(self, __C, answer_size):
        super(Net, self).__init__()

        self.roberta_layer = RobertaModel.from_pretrained(__C.PRETRAINED_NAME, revision='main')

        # self.embedding = nn.Embedding(
        #     num_embeddings=token_size,
        #     embedding_dim=__C.WORD_EMBED_SIZE
        # )

        # # Loading the GloVe embedding weights
        # if __C.USE_GLOVE:
        #     self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        # self.lstm = nn.LSTM(
        #     input_size=__C.WORD_EMBED_SIZE,
        #     hidden_size=__C.HIDDEN_SIZE,
        #     num_layers=1,
        #     batch_first=True
        # )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,
            __C.HIDDEN_SIZE
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #8
0
    def __init__(self, __C, q_emb, token_size, answer_size):
        super(QNet, self).__init__()

        self.nnList = nn.ModuleList(
            nn.Linear(__C.FLAT_OUT_SIZE, __C.FLAT_OUT_SIZE) for i in range(1))
        #self.nnList = nn.ModuleList([FFN(__C) for _ in range(1)])

        #self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #9
0
    def __init__(self, __C):
        super(SGSGA, self).__init__()

        self.mhatt1 = MHAtt(__C)
        self.mhatt2 = MHAtt(__C)
        self.mhatt3 = MHAtt(__C)
        self.mhatt4 = MHAtt(__C)
        self.ffn1 = FFN(__C)
        self.ffn2 = FFN(__C)

        self.dropout1 = nn.Dropout(__C.DROPOUT_R)
        self.norm1 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout2 = nn.Dropout(__C.DROPOUT_R)
        self.norm2 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout3 = nn.Dropout(__C.DROPOUT_R)
        self.norm3 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout4 = nn.Dropout(__C.DROPOUT_R)
        self.norm4 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout5 = nn.Dropout(__C.DROPOUT_R)
        self.norm5 = LayerNorm(__C.HIDDEN_SIZE)

        self.dropout6 = nn.Dropout(__C.DROPOUT_R)
        self.norm6 = LayerNorm(__C.HIDDEN_SIZE)
Пример #10
0
    def __init__(self, __C, q_emb, token_size, answer_size):
        super(QNet, self).__init__()

        # self.attflat_lang = AttFlat(__C)

        self.mlp = MLP(
            in_size=__C.FLAT_OUT_SIZE,  # 1024
            mid_size=__C.FLAT_OUT_SIZE,  # 1024
            out_size=answer_size,
            dropout_r=__C.DROPOUT_R,
            use_relu=True)

        self.proj_norm = LayerNorm(answer_size)

        self.proj = nn.Linear(answer_size, answer_size)
Пример #11
0
    def __init__(self, __C, pretrained_emb, token_size, answer_size):
        super(Net, self).__init__()

        self.__C = __C

        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=__C.WORD_EMBED_SIZE
        )

        # Loading the GloVe embedding weights
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.lstm = nn.LSTM(
            input_size=__C.WORD_EMBED_SIZE,
            hidden_size=__C.HIDDEN_SIZE,
            num_layers=1,
            batch_first=True
        )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,
            __C.HIDDEN_SIZE
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)

        self.bi_attention = BiAttention(
            __C,
            __C.HIDDEN_SIZE,
            __C.MAX_TOKEN,
            __C.IMG_FEAT_PAD_SIZE
        )
        self.h_fh = nn.Linear(__C.HIDDEN_SIZE, __C.FLAT_OUT_SIZE)

        self.ag_attention = AGAttention(
            __C
        )

        self.fh_h = nn.Linear(__C.FLAT_OUT_SIZE, __C.HIDDEN_SIZE)
Пример #12
0
    def __init__(self, __C, pretrained_emb, token_size, answer_size):
        super(Net, self).__init__()
        copy_data = __C
        self.embedding = nn.Embedding(
            num_embeddings=token_size,
            embedding_dim=__C.WORD_EMBED_SIZE
        )

        self.mlp = MLP(
            in_size=__C.HIDDEN_SIZE,
            mid_size=__C.FLAT_MLP_SIZE,
            out_size=__C.FLAT_GLIMPSES,
            dropout_r=__C.DROPOUT_R,
            use_relu=True
        )

        # Loading the GloVe embedding weights
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.lstm = nn.LSTM(
            input_size=__C.WORD_EMBED_SIZE,
            hidden_size=__C.HIDDEN_SIZE,
            num_layers=1,
            batch_first=True
        )

        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,
            2048
        )

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(1024)
        self.proj = nn.Linear(1024, answer_size)

        self.dense_coattn = DCNLayer(2048, 1024, 4, 3, 5, 0.3)
        self.predict = PredictLayer(2048, 1024, 4, 3129, 0.3)

        self.apply(Initializer.xavier_normal)
Пример #13
0
    def __init__(self,
                 __C,
                 pretrained_emb,
                 token_size,
                 answer_size,
                 gen_func=torch.softmax):
        super(Net, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=token_size,
                                      embedding_dim=__C.WORD_EMBED_SIZE)

        # Loading the GloVe embedding weights
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.attention = __C.attention  #added this

        #if __C.USE_IMG_POS_EMBEDDINGS:
        #    self.img_pos_x_embeddings = nn.Embedding(num_embeddings=14, embedding_dim=int(__C.HIDDEN_SIZE/2))
        #    torch.nn.init.xavier_uniform_(self.img_pos_x_embeddings.weight)
        #    self.img_pos_y_embeddings = nn.Embedding(num_embeddings=14, embedding_dim=int(__C.HIDDEN_SIZE/2))
        #    torch.nn.init.xavier_uniform_(self.img_pos_y_embeddings.weight)
        #    self.use_img_pos_embeddings = __C.USE_IMG_POS_EMBEDDINGS

        self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE,
                            hidden_size=__C.HIDDEN_SIZE,
                            num_layers=1,
                            batch_first=True)

        self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE)

        self.gen_func = gen_func
        self.backbone = MCA_ED(__C, gen_func)

        if (self.attention == 'discrete'):
            self.attflat_img = AttFlatText(__C, self.gen_func)
        else:  # use continuous attention
            self.attflat_img = AttFlat(__C, self.gen_func)

        self.attflat_lang = AttFlatText(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
Пример #14
0
    def __init__(self, __C, pretrained_emb, token_size, answer_size,
                 ix_to_token):
        super(Net, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=token_size,
                                      embedding_dim=__C.WORD_EMBED_SIZE)

        self.USE_GLOVE = __C.USE_GLOVE
        self.USE_ELMO = __C.USE_ELMO
        # Loading the GloVe embedding weights
        if self.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE,
                            hidden_size=__C.HIDDEN_SIZE,
                            num_layers=1,
                            batch_first=True)

        # load Elmo model
        if __C.ELMO_FEAT_SIZE == 1024:
            options_file = __C.ELMO_CONF_PATH + "elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = __C.ELMO_CONF_PATH + "elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        elif __C.ELMO_FEAT_SIZE == 512:
            options_file = __C.ELMO_CONF_PATH + "elmo_2x2048_256_2048cnn_1xhighway_options.json"
            weight_file = __C.ELMO_CONF_PATH + "elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"
        self.elmo = Elmo(options_file, weight_file, 1, dropout=0)

        self.qus_feat_lstm = nn.LSTM(input_size=__C.ELMO_FEAT_SIZE,
                                     hidden_size=__C.HIDDEN_SIZE,
                                     num_layers=1,
                                     batch_first=True)

        self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE)

        self.backbone = MCA_ED(__C)

        self.attflat_img = AttFlat(__C)
        self.attflat_lang = AttFlat(__C)

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)

        self.ix_to_token = ix_to_token
    def __init__(self, __C, pretrained_emb, token_size, answer_size):
        '''
        :param __C: 配置信息
        :param pretrained_emb: 语训练的词嵌入向量
        :param token_size: 18405个词
        :param answer_size:
        '''
        super(Net, self).__init__()
        #调用 Embedding进行词向量的嵌入
        self.embedding = nn.Embedding(
            num_embeddings=token_size,  # 18405
            embedding_dim=__C.WORD_EMBED_SIZE  # 96
        )

        # 加载Glove嵌入权重
        if __C.USE_GLOVE:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))
        # 采用LSTM网络,进行特征提取
        self.lstm = nn.LSTM(
            input_size=__C.WORD_EMBED_SIZE,  # 96
            hidden_size=__C.HIDDEN_SIZE,  # 256
            num_layers=1,  # 层数:1
            batch_first=True)
        # 图像特征线性变换
        self.img_feat_linear = nn.Linear(
            __C.IMG_FEAT_SIZE,  #2048
            __C.HIDDEN_SIZE  #256
        )
        # 主干网络
        self.backbone = MCA_ED(__C)
        #
        self.attflat_img = AttFlat(__C)  # 求图像特征的自注意力
        self.attflat_lang = AttFlat(__C)  # 求文本特征的注意力

        self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE)  # 输出层的标准化 ?? 为啥来这层
        self.proj = nn.Linear(__C.FLAT_OUT_SIZE,
                              answer_size)  # 预测答案的线性变换层 ??为啥来这层