Пример #1
0
    def __init__(self, word_vectors, char_vectors, emb_size, hidden_size, drop_prob=0.):
        super(QANet, self).__init__()
        self.num_model_blocks = 7       # as suggested by QANet paper
        self.encoder_conv_layers = 4    # as suggested by QANet paper
        self.model_conv_layers = 2      # as suggested by QANet paper

        self.emb = layers.charEmbedding(word_vectors=word_vectors,
                                        char_vectors=char_vectors,
                                        emb_size=emb_size,
                                        hidden_size=hidden_size,
                                        drop_prob=drop_prob)

        self.enc = layers.QAEncoder(input_size=hidden_size,
                                    hidden_size=hidden_size,
                                    num_layers=self.encoder_conv_layers,
                                    drop_prob=drop_prob)

        self.att = layers.ContextQueryAttention(hidden_size=hidden_size,
                                                drop_prob=drop_prob)

        self.model_blocks = []
        for i in range(self.num_model_blocks):
            self.model_blocks.append(layers.QAEncoder(input_size=4*hidden_size,
                                                      hidden_size=4*hidden_size,
                                                      num_layers=self.model_conv_layers,
                                                      drop_prob=drop_prob))

        # Caution: may have to write new output block in layers.py
        self.out = layers.QAOutput(hidden_size=8*hidden_size)
Пример #2
0
    def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs):
        super(BiDAF, self).__init__()
        self.use_transformer = use_transformer
        self.use_GRU = use_GRU
        self.hidden_size = hidden_size

        self.emb = layers.Embedding(vectors=vectors,
                                    c2w_size=kwargs['c2w_size'],
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob,
                                    char_limit=char_limit)
        if not use_transformer:
            self.enc = layers.RNNEncoder(input_size=hidden_size,
                                         hidden_size=hidden_size,  # output = 2*hidden_size
                                         num_layers=1,
                                         drop_prob=drop_prob,
                                         use_GRU=use_GRU)
            self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                         hidden_size=hidden_size,  # output = 2*hidden_size
                                         num_layers=2,
                                         drop_prob=drop_prob,
                                         use_GRU=use_GRU)
            self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob,
                                          use_transformer=use_transformer)
        else:
            self.heads = kwargs['heads']
            self.inter_size = kwargs['inter_size']
            self.enc = layers.TransformerEncoderStack(
                N=kwargs['enc_blocks'],
                heads=self.heads,
                input_size=hidden_size,
                output_size=hidden_size,
                inter_size=self.inter_size,
                num_conv=kwargs['enc_convs'],
                drop_prob=drop_prob,
                p_sdd=kwargs['p_sdd']
                )
            self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False)
            self.mod = layers.TransformerEncoderStack(
                N=kwargs['mod_blocks'],
                heads=self.heads,
                input_size=hidden_size,
                output_size=hidden_size,
                inter_size=self.inter_size,
                num_conv=kwargs['mod_convs'],
                drop_prob=drop_prob,
                p_sdd=kwargs['p_sdd']
                )
            self.out = layers.QAOutput(2*hidden_size)

        self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size,
                                         drop_prob=drop_prob)  # (batch_size, seq_len, 4*input_hidden_size)
Пример #3
0
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 context_max_len,
                 query_max_len,
                 d_model,
                 train_cemb=False,
                 pad=0,
                 dropout=0.1,
                 num_head=8):
        """
        """
        super(QANet, self).__init__()
        if train_cemb:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors,
                                                         freeze=False)
            print("Training char_embeddings")
        else:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors)

        self.word_emb = nn.Embedding.from_pretrained(word_vectors)
        self.LC = context_max_len
        self.LQ = query_max_len
        self.num_head = num_head
        self.pad = pad
        self.dropout = dropout

        wemb_dim = word_vectors.size()[1]
        cemb_dim = char_vectors.size()[1]
        #print("Word vector dim-%d, Char vector dim-%d" % (wemb_dim, cemb_dim))

        #Layer Declarations
        self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model)
        self.emb_enc = layers.Encoder(num_conv=4,
                                      d_model=d_model,
                                      num_head=num_head,
                                      k=7,
                                      dropout=0.1)
        self.cq_att = layers.CQAttention(d_model=d_model)
        self.cq_resizer = layers.Initialized_Conv1d(
            d_model * 4, d_model
        )  #Foward layer to reduce dimension of cq_att output back to d_dim
        self.model_enc_blks = nn.ModuleList([
            layers.Encoder(num_conv=2,
                           d_model=d_model,
                           num_head=num_head,
                           k=5,
                           dropout=0.1) for _ in range(7)
        ])
        self.out = layers.QAOutput(d_model)
Пример #4
0
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 context_max_len,
                 query_max_len,
                 d_model,
                 d_head,
                 mem_len=0,
                 same_length=False,
                 clamp_len=-1,
                 train_cemb=False,
                 pad=0,
                 dropout=0.1,
                 num_head=8):
        """
        """
        super(QANet, self).__init__()
        if train_cemb:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors,
                                                         freeze=False)
        else:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors)

        self.word_emb = nn.Embedding.from_pretrained(word_vectors)
        self.LC = context_max_len
        self.LQ = query_max_len
        self.num_head = num_head
        self.pad = pad
        self.dropout = dropout
        self.mem_len = mem_len
        self.d_head = d_head
        self.d_model = d_model
        self.num_head = num_head
        self.same_length = same_length
        self.clamp_len = clamp_len
        self.ext_len = 0

        wemb_dim = word_vectors.size()[1]
        cemb_dim = char_vectors.size()[1]

        #Layer Declarations
        self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model)
        self.emb_enc = layers.Encoder(4,
                                      num_head,
                                      d_model,
                                      d_head,
                                      d_inner=d_model * 4,
                                      k=7,
                                      dropout=0.1)  #Hard coded
        self.cq_att = layers.CQAttention(d_model=d_model)
        self.cq_resizer = layers.Initialized_Conv1d(
            d_model * 4, d_model
        )  #Foward layer to reduce dimension of cq_att output back to d_dim
        self.model_enc_blks = nn.ModuleList([
            layers.Encoder(2,
                           num_head,
                           d_model,
                           d_head,
                           d_inner=d_model * 4,
                           k=5,
                           dropout=0.1) for _ in range(7)
        ])
        self.out = layers.QAOutput(d_model)
        self.drop = nn.Dropout(dropout)

        self._create_parameters()