def __init__(self, word_vectors, char_vectors, emb_size, hidden_size, drop_prob=0.): super(QANet, self).__init__() self.num_model_blocks = 7 # as suggested by QANet paper self.encoder_conv_layers = 4 # as suggested by QANet paper self.model_conv_layers = 2 # as suggested by QANet paper self.emb = layers.charEmbedding(word_vectors=word_vectors, char_vectors=char_vectors, emb_size=emb_size, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.QAEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=self.encoder_conv_layers, drop_prob=drop_prob) self.att = layers.ContextQueryAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.model_blocks = [] for i in range(self.num_model_blocks): self.model_blocks.append(layers.QAEncoder(input_size=4*hidden_size, hidden_size=4*hidden_size, num_layers=self.model_conv_layers, drop_prob=drop_prob)) # Caution: may have to write new output block in layers.py self.out = layers.QAOutput(hidden_size=8*hidden_size)
def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs): super(BiDAF, self).__init__() self.use_transformer = use_transformer self.use_GRU = use_GRU self.hidden_size = hidden_size self.emb = layers.Embedding(vectors=vectors, c2w_size=kwargs['c2w_size'], hidden_size=hidden_size, drop_prob=drop_prob, char_limit=char_limit) if not use_transformer: self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=1, drop_prob=drop_prob, use_GRU=use_GRU) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=2, drop_prob=drop_prob, use_GRU=use_GRU) self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob, use_transformer=use_transformer) else: self.heads = kwargs['heads'] self.inter_size = kwargs['inter_size'] self.enc = layers.TransformerEncoderStack( N=kwargs['enc_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['enc_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False) self.mod = layers.TransformerEncoderStack( N=kwargs['mod_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['mod_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.out = layers.QAOutput(2*hidden_size) self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size, drop_prob=drop_prob) # (batch_size, seq_len, 4*input_hidden_size)
def __init__(self, word_vectors, char_vectors, context_max_len, query_max_len, d_model, train_cemb=False, pad=0, dropout=0.1, num_head=8): """ """ super(QANet, self).__init__() if train_cemb: self.char_emb = nn.Embedding.from_pretrained(char_vectors, freeze=False) print("Training char_embeddings") else: self.char_emb = nn.Embedding.from_pretrained(char_vectors) self.word_emb = nn.Embedding.from_pretrained(word_vectors) self.LC = context_max_len self.LQ = query_max_len self.num_head = num_head self.pad = pad self.dropout = dropout wemb_dim = word_vectors.size()[1] cemb_dim = char_vectors.size()[1] #print("Word vector dim-%d, Char vector dim-%d" % (wemb_dim, cemb_dim)) #Layer Declarations self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model) self.emb_enc = layers.Encoder(num_conv=4, d_model=d_model, num_head=num_head, k=7, dropout=0.1) self.cq_att = layers.CQAttention(d_model=d_model) self.cq_resizer = layers.Initialized_Conv1d( d_model * 4, d_model ) #Foward layer to reduce dimension of cq_att output back to d_dim self.model_enc_blks = nn.ModuleList([ layers.Encoder(num_conv=2, d_model=d_model, num_head=num_head, k=5, dropout=0.1) for _ in range(7) ]) self.out = layers.QAOutput(d_model)
def __init__(self, word_vectors, char_vectors, context_max_len, query_max_len, d_model, d_head, mem_len=0, same_length=False, clamp_len=-1, train_cemb=False, pad=0, dropout=0.1, num_head=8): """ """ super(QANet, self).__init__() if train_cemb: self.char_emb = nn.Embedding.from_pretrained(char_vectors, freeze=False) else: self.char_emb = nn.Embedding.from_pretrained(char_vectors) self.word_emb = nn.Embedding.from_pretrained(word_vectors) self.LC = context_max_len self.LQ = query_max_len self.num_head = num_head self.pad = pad self.dropout = dropout self.mem_len = mem_len self.d_head = d_head self.d_model = d_model self.num_head = num_head self.same_length = same_length self.clamp_len = clamp_len self.ext_len = 0 wemb_dim = word_vectors.size()[1] cemb_dim = char_vectors.size()[1] #Layer Declarations self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model) self.emb_enc = layers.Encoder(4, num_head, d_model, d_head, d_inner=d_model * 4, k=7, dropout=0.1) #Hard coded self.cq_att = layers.CQAttention(d_model=d_model) self.cq_resizer = layers.Initialized_Conv1d( d_model * 4, d_model ) #Foward layer to reduce dimension of cq_att output back to d_dim self.model_enc_blks = nn.ModuleList([ layers.Encoder(2, num_head, d_model, d_head, d_inner=d_model * 4, k=5, dropout=0.1) for _ in range(7) ]) self.out = layers.QAOutput(d_model) self.drop = nn.Dropout(dropout) self._create_parameters()