def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added last line self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.1): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) self.emb_encoder = layers.EmbeddingEncoder(d_model=hidden_size, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) # self.model_encoder = layers.ModelEncoder(d_model=hidden_size, drop_prob=drop_prob) # self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, use_pos, use_ner, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.use_pos = use_pos self.use_ner = use_ner rnn_input_size = hidden_size if use_pos: rnn_input_size += 1 if use_ner: rnn_input_size += 1 self.enc = layers.RNNEncoder(input_size=rnn_input_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads=8, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, iob_vectors, hidden_size, drop_prob=0.): super(BiDAF_CharTag, self).__init__() self.hidden_size = hidden_size self.emb = layers.EmbeddingCharTag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, iob_vectors=iob_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, args): super(BiDAFExtra, self).__init__() self.c_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=True) self.q_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=False) self.c_enc = layers.RNNEncoder(input_size=args.hidden_size + args.num_features, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.q_enc = layers.RNNEncoder(input_size=args.hidden_size, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.att = layers.BiDAFAttention(hidden_size=2 * args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.mod = layers.RNNEncoder(input_size=8 * args.hidden_size, hidden_size=args.hidden_size, num_layers=2, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.out = layers.BiDAFOutput(hidden_size=args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.args = args
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.proj_bert_down = nn.Linear(in_features=768, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_bert_down.weight, gain=1) self.proj_glove_down = nn.Linear(in_features=300, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_glove_down.weight, gain=1)
def __init__(self, word_vectors, char_vec, word_len, hidden_size, emb_size=500, drop_prob=0.): super(BiDAFChar2, self).__init__() self.emb = layers.EmbeddingWithChar(word_vectors=word_vectors, hidden_size=emb_size, char_vec=char_vec, word_len=word_len, drop_prob=drop_prob, char_prop=0.4, hwy_drop=drop_prob, char_dim=200, act='gelu') self.resize = nn.Linear(emb_size, hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(SelfAttention_and_global, self).__init__() self.hidden_size = hidden_size self.emb = layers.Char_Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention( hidden_size=2 * hidden_size, drop_prob=drop_prob) # replace this with yours self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.second_mod = layers.RNNEncoder(input_size=16 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, att_size=16 * hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF_charCNN, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=2*hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, hidden_size, drop_prob=0., freeze_tag=True): super(BiDAF_tag_ext, self).__init__() self.emb = layers.Embedding_Tag_Ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=hidden_size, drop_prob=drop_prob, freeze_tag=freeze_tag) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() # print("vectors: ", word_vectors) self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.batch_size = 64 self.hidden_size = hidden_size
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = 2 * hidden_size # As we concatinating word vectors and Char # vectors self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = Encoder(dim=self.hidden_size, depth=1, heads=3, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.att = layers.TBiDAFAttention(hidden_size=self.hidden_size, drop_prob=drop_prob) self.mod = Encoder(dim=2 * self.hidden_size, depth=3, heads=6, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=self.drop_prob)
def __init__(self, word_vectors, word_vectors_char, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, word_vectors_char=word_vectors_char, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added character vectors self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.linear = nn.Linear(in_features = 8*self.hidden_size, out_features = 2*self.hidden_size, bias=True) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=4, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size, drop_prob=0.): super(BiDAF_CBE, self).__init__() word_vectors, hidden_size, drop_prob, \ char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size self.emb = layers.EmbeddingWithCharLevel(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_dict_size=char_dict_size, char_emb_size=char_emb_size, conv_kernel_size=conv_kernel_size, conv_depth1=conv_depth1, conv_output_hidden_size=conv_output_hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) # self.transformer = make_model(word_vectors, drop_prob, hidden_size) self.emb_enc = EncoderBlock(conv_num=4, ch_num=64, k=7) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) # TODO self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.,twist_embeddings=False): super(BiDAF_charCNN_BERTEnc_BERTMod, self).__init__() ### self.twist_embeddings = twist_embeddings idx_list = [] for i in range(hidden_size): idx_list.append(i) idx_list.append(hidden_size+i) self.register_buffer('idx_twist',torch.tensor(idx_list)) ### self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = bert_layers.BertEncoder(n_layers=3, #n_layers=4, d_feature=2*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = bert_layers.BertEncoder(n_layers=3, #n_layers=3, d_feature=8*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) # self.mod = layers.RNNEncoder(input_size=8 * hidden_size, # hidden_size=hidden_size, # num_layers=2, # drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs): super(BiDAF, self).__init__() self.use_transformer = use_transformer self.use_GRU = use_GRU self.hidden_size = hidden_size self.emb = layers.Embedding(vectors=vectors, c2w_size=kwargs['c2w_size'], hidden_size=hidden_size, drop_prob=drop_prob, char_limit=char_limit) if not use_transformer: self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=1, drop_prob=drop_prob, use_GRU=use_GRU) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=2, drop_prob=drop_prob, use_GRU=use_GRU) self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob, use_transformer=use_transformer) else: self.heads = kwargs['heads'] self.inter_size = kwargs['inter_size'] self.enc = layers.TransformerEncoderStack( N=kwargs['enc_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['enc_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False) self.mod = layers.TransformerEncoderStack( N=kwargs['mod_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['mod_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.out = layers.QAOutput(2*hidden_size) self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size, drop_prob=drop_prob) # (batch_size, seq_len, 4*input_hidden_size)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(Final_Model, self).__init__() self.hidden_size = hidden_size self.emb = layers.Char_Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.pointnetGlobal = layers.PointNet(hidden_size=hidden_size, kernel_size=1) self.WordCNN = layers.WordCNN(hidden_size=hidden_size, kernel_size=5, padding=2) self.enc_global = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) # self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, # drop_prob=drop_prob) # replace this with yours self.global_att = layers.GlobalBiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=10 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.second_mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, att_size=4 * hidden_size, drop_prob=drop_prob)
def __init__(self, weights_matrix, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(weights_matrix=weights_matrix, hidden_size=hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.TPRRNN(word_emb_size=(8 * hidden_size), n_symbols=100, d_symbols=10, n_roles=20, d_roles=10, hidden_size=hidden_size) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., enable_EM=True, enable_posner=True, enable_selfatt=True): super(BiDAF, self).__init__() self.embd_size = hidden_size self.d = self.embd_size * 2 # word_embedding + char_embedding self.enable_EM = enable_EM if enable_EM: self.d += 2 # word_feature if enable_posner: self.d += 10 # word_feature self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=self.embd_size, drop_prob=drop_prob, enable_posner=enable_posner) self.enc = layers.RNNEncoder(input_size=self.d, hidden_size=self.d, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.d, drop_prob=drop_prob) self.enable_selfatt = enable_selfatt if enable_selfatt: # self.selfMatch = layers.SelfMatcher(in_size = 8 * self.d, # drop_prob=drop_prob) self.selfMatch = layers.StaticDotAttention(memory_size = 2 * self.d, input_size = 2 * self.d, attention_size = 2 * self.d, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) else: self.mod = layers.RNNEncoder(input_size=2 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.d, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.word_emb = layers.WordEmbedding(word_vectors, hidden_size) self.char_emb = layers.CharEmbedding(char_vectors, hidden_size) # assert hidden_size * 2 == (char_channel_size + word_dim) # highway network self.hwy = layers.HighwayEncoder(2, hidden_size * 2) # highway network # for i in range(2): # setattr(self, f'highway_linear{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.ReLU())) # setattr(self, f'hightway_gate{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.Sigmoid())) # self.emb = layers.Embedding(word_vectors=word_vectors, # hidden_size=hidden_size, # drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size * 2, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vocab_size, char_dim, hidden_size, drop_prob=0., kernel_size=5, padding=1): super(BiDAF_Char, self).__init__() self.char_emb = layers.Char_Embedding(char_vocab_size=char_vocab_size, char_dim=char_dim, drop_prob=drop_prob, hidden_size=hidden_size, kernel_size=kernel_size, padding=padding) self.word_emb = layers.Word_Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(num_layers=2, hidden_size=hidden_size) self.enc = layers.RNNEncoder(input_size=2 * hidden_size, # 08/09 注意这里改了input_size。因为经过highway后char_emb+word_emb的concatenation (bs, seq_len, 2*h) hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_mat, w_embedding_size, c_embeding_size, c_vocab_size, hidden_size, num_head=1, drop_prob=0.2): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_mat, w_embedding_size, c_embeding_size, c_vocab_size, hidden_size, drop_prob) self.enc = layers.RNNEncoder(input_size=w_embedding_size + hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.var_dropout = layers.VariationalDropout(drop_prob, batch_first=True) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.linear_trans = nn.Sequential( nn.Linear(8 * hidden_size, 2 * hidden_size), nn.ReLU()) self.attn_mod = layers.RNNEncoder(hidden_size * 2, hidden_size, num_layers=1, drop_prob=drop_prob) self.self_attn = layers.BiDAFSelfAttention(num_head, 2 * hidden_size) self.linear_attn = nn.Sequential( nn.Linear(2 * hidden_size, 2 * hidden_size), nn.ReLU()) self.mod = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, ch_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() torch.cuda.empty_cache() self.emb = layers.Embedding(word_vectors=word_vectors, ch_vectors=ch_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.chunk = layers.ChunkLayer(hidden_size=hidden_size, max_ans_len=10) self.out = layers.BiDAFOutput(hidden_size=hidden_size, max_ans_len=10)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF_RNet, self).__init__() self.emb = layers.WordCharEmbedding(word_vectors=word_vectors, char_vectors=char_vectors, cnn_size=16, hidden_size=hidden_size, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.selfatt = layers.SelfMatchingAttention(input_size=8 * hidden_size, hidden_size=4 * hidden_size, num_layers=3, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size * 2 # adding the char embedding, double the hidden_size. self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) #input_size=self.hidden_size+2 is due to we add two extra features (avg_attention) to both char embedding #and word embedding to boost the performance. The avg_attention is use the attention mechanism to learn #a weighted average among the vectors by the model itself. self.enc = layers.RNNEncoder( input_size=self.hidden_size + 2, hidden_size=self.hidden_size, # num_layers=2, # The number of layer can be changed, but less or no improvement. num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.hidden_size, drop_prob=drop_prob) #Add extra layer of self-attention based on the paper 'Simple and Effective Multi-Paragraph Reading Comprehension' #URL: https://arxiv.org/pdf/1710.10723.pdf self.self_att = layers.SelfAtt(hidden_size=2 * self.hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) #Planned to use cosine similarity or TF-IDF to add extra feature to the embedding but need more thoughts to well #implement # self.sim = nn.CosineSimilarity(dim=1, eps=1e-6) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vocab_size, drop_prob=0., bidaf_layers = 2): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, char_vocab_size=char_vocab_size, char_embedding_size=100, kernel_size=5, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.encs_att = nn.ModuleList([layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) for _ in range(bidaf_layers)]) self.atts = nn.ModuleList([layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) for _ in range(bidaf_layers)]) self.gates = nn.ModuleList([nn.Linear(8 * hidden_size, 8 * hidden_size) for _ in range(bidaf_layers)]) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.drop_out = nn.Dropout(drop_prob)