def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.embedding = nn.Embedding( num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE # 300 ) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM( input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True ) self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, # Faster-rcnn 2048D features __C.HIDDEN_SIZE ) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm_img = LayerNorm(__C.FLAT_OUT_SIZE) self.proj_norm_lang = LayerNorm(__C.FLAT_OUT_SIZE) self.proj_img = nn.Linear(__C.FLAT_OUT_SIZE, answer_size) self.proj_lang = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C): super(SA, self).__init__() self.mhatt = MHAtt(__C) self.ffn = FFN(__C) self.dropout1 = nn.Dropout(__C.DROPOUT_R) self.norm1 = LayerNorm(__C.HIDDEN_SIZE) self.dropout2 = nn.Dropout(__C.DROPOUT_R) self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, gen_func=torch.softmax): super(SGA, self).__init__() self.mhatt1 = MHAtt(__C, gen_func=gen_func) self.mhatt2 = MHAtt(__C) self.ffn = FFN(__C) self.dropout1 = nn.Dropout(__C.DROPOUT_R) self.norm1 = LayerNorm(__C.HIDDEN_SIZE) self.dropout2 = nn.Dropout(__C.DROPOUT_R) self.norm2 = LayerNorm(__C.HIDDEN_SIZE) self.dropout3 = nn.Dropout(__C.DROPOUT_R) self.norm3 = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, answer_size): super(Net, self).__init__() self.__C = __C self.bert = BertModel.from_pretrained(self.__C.PRETRAINED_PATH) self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.lstm = nn.LSTM( input_size=self.__C.PRETRAINED_HIDDEN, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True ) self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE ) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, q_emb, token_size, answer_size): super(QNet, self).__init__() self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, vocab_size=30000): super(Net, self).__init__() output_dir = './core/bert' self.detector = SimpleDetector(pretrained=True, average_pool=True, final_dim=2048) #self.bert = BertForSequenceClassification.from_pretrained(output_dir) self.bert = BertModel.from_pretrained('bert-base-uncased') self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE) self.lstm = nn.LSTM(input_size=768, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.output_feat_linear = nn.Linear(768, __C.FLAT_OUT_SIZE) self.backbone2 = MCA_ED(__C) self.output_proj_linear = nn.Linear(__C.HIDDEN_SIZE, vocab_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self, __C, answer_size): super(Net, self).__init__() self.roberta_layer = RobertaModel.from_pretrained(__C.PRETRAINED_NAME, revision='main') # self.embedding = nn.Embedding( # num_embeddings=token_size, # embedding_dim=__C.WORD_EMBED_SIZE # ) # # Loading the GloVe embedding weights # if __C.USE_GLOVE: # self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) # self.lstm = nn.LSTM( # input_size=__C.WORD_EMBED_SIZE, # hidden_size=__C.HIDDEN_SIZE, # num_layers=1, # batch_first=True # ) self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE ) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, q_emb, token_size, answer_size): super(QNet, self).__init__() self.nnList = nn.ModuleList( nn.Linear(__C.FLAT_OUT_SIZE, __C.FLAT_OUT_SIZE) for i in range(1)) #self.nnList = nn.ModuleList([FFN(__C) for _ in range(1)]) #self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C): super(SGSGA, self).__init__() self.mhatt1 = MHAtt(__C) self.mhatt2 = MHAtt(__C) self.mhatt3 = MHAtt(__C) self.mhatt4 = MHAtt(__C) self.ffn1 = FFN(__C) self.ffn2 = FFN(__C) self.dropout1 = nn.Dropout(__C.DROPOUT_R) self.norm1 = LayerNorm(__C.HIDDEN_SIZE) self.dropout2 = nn.Dropout(__C.DROPOUT_R) self.norm2 = LayerNorm(__C.HIDDEN_SIZE) self.dropout3 = nn.Dropout(__C.DROPOUT_R) self.norm3 = LayerNorm(__C.HIDDEN_SIZE) self.dropout4 = nn.Dropout(__C.DROPOUT_R) self.norm4 = LayerNorm(__C.HIDDEN_SIZE) self.dropout5 = nn.Dropout(__C.DROPOUT_R) self.norm5 = LayerNorm(__C.HIDDEN_SIZE) self.dropout6 = nn.Dropout(__C.DROPOUT_R) self.norm6 = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, q_emb, token_size, answer_size): super(QNet, self).__init__() # self.attflat_lang = AttFlat(__C) self.mlp = MLP( in_size=__C.FLAT_OUT_SIZE, # 1024 mid_size=__C.FLAT_OUT_SIZE, # 1024 out_size=answer_size, dropout_r=__C.DROPOUT_R, use_relu=True) self.proj_norm = LayerNorm(answer_size) self.proj = nn.Linear(answer_size, answer_size)
def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.__C = __C self.embedding = nn.Embedding( num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE ) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM( input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True ) self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE ) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size) self.bi_attention = BiAttention( __C, __C.HIDDEN_SIZE, __C.MAX_TOKEN, __C.IMG_FEAT_PAD_SIZE ) self.h_fh = nn.Linear(__C.HIDDEN_SIZE, __C.FLAT_OUT_SIZE) self.ag_attention = AGAttention( __C ) self.fh_h = nn.Linear(__C.FLAT_OUT_SIZE, __C.HIDDEN_SIZE)
def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() copy_data = __C self.embedding = nn.Embedding( num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE ) self.mlp = MLP( in_size=__C.HIDDEN_SIZE, mid_size=__C.FLAT_MLP_SIZE, out_size=__C.FLAT_GLIMPSES, dropout_r=__C.DROPOUT_R, use_relu=True ) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM( input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True ) self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, 2048 ) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(1024) self.proj = nn.Linear(1024, answer_size) self.dense_coattn = DCNLayer(2048, 1024, 4, 3, 5, 0.3) self.predict = PredictLayer(2048, 1024, 4, 3129, 0.3) self.apply(Initializer.xavier_normal)
def __init__(self, __C, pretrained_emb, token_size, answer_size, gen_func=torch.softmax): super(Net, self).__init__() self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.attention = __C.attention #added this #if __C.USE_IMG_POS_EMBEDDINGS: # self.img_pos_x_embeddings = nn.Embedding(num_embeddings=14, embedding_dim=int(__C.HIDDEN_SIZE/2)) # torch.nn.init.xavier_uniform_(self.img_pos_x_embeddings.weight) # self.img_pos_y_embeddings = nn.Embedding(num_embeddings=14, embedding_dim=int(__C.HIDDEN_SIZE/2)) # torch.nn.init.xavier_uniform_(self.img_pos_y_embeddings.weight) # self.use_img_pos_embeddings = __C.USE_IMG_POS_EMBEDDINGS self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE) self.gen_func = gen_func self.backbone = MCA_ED(__C, gen_func) if (self.attention == 'discrete'): self.attflat_img = AttFlatText(__C, self.gen_func) else: # use continuous attention self.attflat_img = AttFlat(__C, self.gen_func) self.attflat_lang = AttFlatText(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, pretrained_emb, token_size, answer_size, ix_to_token): super(Net, self).__init__() self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) self.USE_GLOVE = __C.USE_GLOVE self.USE_ELMO = __C.USE_ELMO # Loading the GloVe embedding weights if self.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) # load Elmo model if __C.ELMO_FEAT_SIZE == 1024: options_file = __C.ELMO_CONF_PATH + "elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = __C.ELMO_CONF_PATH + "elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elif __C.ELMO_FEAT_SIZE == 512: options_file = __C.ELMO_CONF_PATH + "elmo_2x2048_256_2048cnn_1xhighway_options.json" weight_file = __C.ELMO_CONF_PATH + "elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 1, dropout=0) self.qus_feat_lstm = nn.LSTM(input_size=__C.ELMO_FEAT_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) self.img_feat_linear = nn.Linear(__C.IMG_FEAT_SIZE, __C.HIDDEN_SIZE) self.backbone = MCA_ED(__C) self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size) self.ix_to_token = ix_to_token
def __init__(self, __C, pretrained_emb, token_size, answer_size): ''' :param __C: 配置信息 :param pretrained_emb: 语训练的词嵌入向量 :param token_size: 18405个词 :param answer_size: ''' super(Net, self).__init__() #调用 Embedding进行词向量的嵌入 self.embedding = nn.Embedding( num_embeddings=token_size, # 18405 embedding_dim=__C.WORD_EMBED_SIZE # 96 ) # 加载Glove嵌入权重 if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) # 采用LSTM网络,进行特征提取 self.lstm = nn.LSTM( input_size=__C.WORD_EMBED_SIZE, # 96 hidden_size=__C.HIDDEN_SIZE, # 256 num_layers=1, # 层数:1 batch_first=True) # 图像特征线性变换 self.img_feat_linear = nn.Linear( __C.IMG_FEAT_SIZE, #2048 __C.HIDDEN_SIZE #256 ) # 主干网络 self.backbone = MCA_ED(__C) # self.attflat_img = AttFlat(__C) # 求图像特征的自注意力 self.attflat_lang = AttFlat(__C) # 求文本特征的注意力 self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) # 输出层的标准化 ?? 为啥来这层 self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size) # 预测答案的线性变换层 ??为啥来这层