示例#1
0
    def build(self):
        seq1 = Input(name='seq1', shape=[self.config['seq1_maxlen']])
        seq2 = Input(name='seq2', shape=[self.config['seq2_maxlen']])
        embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = self.config['embed_trainable'])

        seq1_embed = embedding(seq1)
        seq1_embed = Dropout(0.5)(seq1_embed)
        seq2_embed = embedding(seq2)
        seq2_embed = Dropout(0.5)(seq2_embed)

        lstm = Bidirectional(LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))
        seq1_rep = lstm(seq1_embed)
        seq2_rep = lstm(seq2_embed)

        cross = Match(match_type='concat')([seq1_rep, seq2_rep])
        cross_reshape = Reshape((-1, ))(cross)

        mm_k = Lambda(lambda x: K.tf.nn.top_k(x, k=100, sorted=True)[0])(cross_reshape)

        pool_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k)

        if self.config['target_mode'] == 'classification':
            out = Dense(2, activation='softmax')(pool_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out = Dense(1)(pool_flat_drop)

        model = Model(inputs=[seq1, seq2], outputs=out)
        return model
示例#2
0
    def build_model(self):
        encoding_layer1 = Bidirectional(GRU(300,
                                            return_sequences=True,
                                            dropout=0.2))
        encoded_sentence_1 = encoding_layer1(self.Q1_emb)  # (?, len, 600)
        encoded_sentence_2 = encoding_layer1(self.Q2_emb)  # (?, len, 600)

        q_conv1 = Conv1D(32, 3, padding='same')(encoded_sentence_1)
        show_layer_info('Conv1D', q_conv1)
        d_conv1 = Conv1D(32, 3, padding='same')(encoded_sentence_2)
        show_layer_info('Conv1D', d_conv1)

        cross = Match(match_type='plus')([q_conv1, d_conv1])
        show_layer_info('Match-plus', cross)

        z = Reshape((self.word_max_len, self.word_max_len, -1))(cross)
        show_layer_info('Reshape', z)

        for i in range(2):
            z = Conv2D(filters=self.kernel_counts_2d[i], kernel_size=self.kernel_sizes_2d[i], padding='same', activation='relu')(z)
            show_layer_info('Conv2D', z)
            z = MaxPooling2D(pool_size=(self.mpool_sizes_2d[i][0], self.mpool_sizes_2d[i][1]))(z)
            show_layer_info('MaxPooling2D', z)

        pool1_flat = Flatten()(z)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=0.2)(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)

        out_ = Dense(1, activation='sigmoid')(pool1_flat_drop)
        show_layer_info('Dense', out_)

        return out_
示例#3
0
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        q_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(q_embed)
        show_layer_info('Bidirectional-LSTM', q_rep)
        d_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(d_embed)
        show_layer_info('Bidirectional-LSTM', d_rep)

        cross = Match(match_type='dot')([q_rep, d_rep])
        #cross = Dot(axes=[2, 2])([q_embed, d_embed])
        show_layer_info('Match-dot', cross)

        cross_reshape = Reshape((-1, ))(cross)
        show_layer_info('Reshape', cross_reshape)

        mm_k = Lambda(lambda x: K.tf.nn.top_k(
            x, k=self.config['topk'], sorted=True)[0])(cross_reshape)
        show_layer_info('Lambda-topk', mm_k)

        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        #model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        model = Model(inputs=[query, doc], outputs=out_)
        model.summary()
        return model
示例#4
0
    def build(self):
        def conv2d_work(input_dim):
            seq = Sequential()
            assert self.config['num_conv2d_layers'] > 0
            for i in range(self.config['num_conv2d_layers']):
                seq.add(Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu'))
                seq.add(MaxPooling2D(pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1])))
            return seq
        query = Input(name='query', shape=(self.config['text1_maxlen'],))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'],))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        q_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same') (q_embed)
        show_layer_info('Conv1D', q_conv1)
        d_conv1 = Conv1D(self.config['1d_kernel_count'], self.config['1d_kernel_size'], padding='same') (d_embed)
        show_layer_info('Conv1D', d_conv1)

        cross = Match(match_type='plus')([q_conv1, d_conv1])
        show_layer_info('Match-plus', cross)

        z = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], -1))(cross)
        show_layer_info('Reshape', z)

        for i in range(self.config['num_conv2d_layers']):
            z = Conv2D(filters=self.config['2d_kernel_counts'][i], kernel_size=self.config['2d_kernel_sizes'][i], padding='same', activation='relu')(z)
            show_layer_info('Conv2D', z)
            z = MaxPooling2D(pool_size=(self.config['2d_mpool_sizes'][i][0], self.config['2d_mpool_sizes'][i][1]))(z)
            show_layer_info('MaxPooling2D', z)

        #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index])
        pool1_flat = Flatten()(z)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc], outputs=out_)
        return model
示例#5
0
    def build(self):
        def conv2d_work(input_dim):
            seq = Sequential()
            assert self.config['num_conv2d_layers'] > 0
            for i in range(self.config['num_conv2d_layers']):
                seq.add(
                    Conv2D(filters=self.config['2d_kernel_counts'][i],
                           kernel_size=self.config['2d_kernel_sizes'][i],
                           padding='same',
                           activation='relu'))
                seq.add(
                    MaxPooling2D(
                        pool_size=(self.config['2d_mpool_sizes'][i][0],
                                   self.config['2d_mpool_sizes'][i][1])))
            return seq

        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        # ########## compute attention weights for the query words: better then mvlstm alone
        if self.config["text1_attention"]:
            q_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(
                            q_embed)  # use_bias=False to simple combination
            show_layer_info('Dense', q_w)
            q_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text1_maxlen'], ),
                         name="q_w")(q_w)
            show_layer_info('Lambda-softmax', q_w)
            # ########## add attention weights for Q_words
            q_w_layer = Lambda(lambda x: K.repeat_elements(
                q_w, rep=self.config['embed_size'], axis=2))(q_w)
            show_layer_info('repeat', q_w_layer)
            q_embed = Multiply()([q_w_layer, q_embed])
            show_layer_info('Dot-qw', q_embed)
        # ####################### attention text1

        # ########## compute attention weights for the document words:
        if self.config['text2_attention']:
            d_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(d_embed)
            show_layer_info('Dense', d_w)
            d_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text2_maxlen'], ),
                         name="d_w")(d_w)
            show_layer_info('Lambda-softmax', d_w)
            # ########## add attention weights for D_words
            d_w_layer = Lambda(lambda x: K.repeat_elements(
                d_w, rep=self.config['embed_size'], axis=2))(d_w)
            d_embed = Multiply()([d_w_layer, d_embed])
            show_layer_info('Dot-qw', d_embed)
        # ####################### attention text2

        q_conv1 = Conv1D(self.config['1d_kernel_count'],
                         self.config['1d_kernel_size'],
                         padding='same')(q_embed)
        show_layer_info('Conv1D', q_conv1)
        d_conv1 = Conv1D(self.config['1d_kernel_count'],
                         self.config['1d_kernel_size'],
                         padding='same')(d_embed)
        show_layer_info('Conv1D', d_conv1)

        cross = Match(match_type='plus')([q_conv1, d_conv1])
        show_layer_info('Match-plus', cross)

        z = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'],
                     -1))(cross)
        show_layer_info('Reshape', z)

        # add the passages attention
        if self.config["passage_attention"]:
            # ########################## compute the passages attention weights
            p_cross = Permute((2, 1, 3))(z)
            show_layer_info('p_cross', p_cross)
            starts = [
                i for i in range(0, self.config['text2_maxlen'],
                                 self.config['context_len'])
            ]
            slice_layer = [
                crop(1, start, start + self.config['context_len'])
                for start in starts
            ]
            slices = [slice_layer_i(p_cross) for slice_layer_i in slice_layer]
            attention_ws = []
            for slice in slices:
                s_dw = Dense(1, use_bias=False)(slice)
                s_dw = Lambda(lambda x: softmax(x, axis=1))(s_dw)
                attention_ws.append(s_dw)
            d_w = concatenate(attention_ws, 1)
            show_layer_info('attW', d_w)
            z = Multiply()([d_w, p_cross])
            show_layer_info('Multiply', z)
            # ########################## passages attention

        for i in range(self.config['num_conv2d_layers']):
            z = Conv2D(filters=self.config['2d_kernel_counts'][i],
                       kernel_size=self.config['2d_kernel_sizes'][i],
                       padding='same',
                       activation='relu')(z)
            show_layer_info('Conv2D', z)
            z = MaxPooling2D(
                pool_size=(self.config['2d_mpool_sizes'][i][0],
                           self.config['2d_mpool_sizes'][i][1]))(z)
            show_layer_info('MaxPooling2D', z)

        #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index])
        pool1_flat = Flatten()(z)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc], outputs=out_)
        return model
    def build(self):
        query = Input(name="query", batch_shape=[None, None], dtype='int32')
        show_layer_info('Input', query)
        doc = Input(name="doc", batch_shape=[None, None], dtype='int32')
        show_layer_info('Input', doc)

        input_embed = self.config['vocab_size'] if self.config['mask_zero'] else self.config['vocab_size']
        embedding = Embedding(input_embed, self.config['embed_size'], weights=[self.config['embed']],
                              trainable=self.embed_trainable, name="embeddings",
                              mask_zero=self.config['mask_zero'])
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)
        q_lstm_layer = Bidirectional(LSTM(self.config["number_q_lstm_units"],
                                          dropout=self.config["q_lstm_dropout"],
                                          recurrent_dropout=self.config["q_lstm_dropout"],
                                          return_sequences=True),
                                     name="q_lstm")
        d_lstm_layer = Bidirectional(LSTM(self.config["number_d_lstm_units"],
                                          dropout=self.config["d_lstm_dropout"],
                                          recurrent_dropout=self.config["d_lstm_dropout"],
                                          return_sequences=True),
                                     name="d_lstm")
        q_mat = q_lstm_layer(q_embed)
        show_layer_info('Bibirectional-LSTM', q_mat)
        d_mat = d_lstm_layer(d_embed)
        show_layer_info('Bibirectional-LSTM', d_mat)
        input_mat = Match(normalize=True)([q_mat, d_mat])  # the result is cosine similarity matrix
        show_layer_info('Match', input_mat)
        #input_mat = BatchNormalization()(input_mat)
        #input_mat = Dropout(self.config["dropout_rate"])(input_mat)
        input_mat = Reshape((self.config["text1_maxlen"], self.config["text2_maxlen"]))(input_mat)
        show_layer_info('Match', input_mat)
        merged = Conv1D(self.config['filters'], self.config['kernel_size'],
                        activation=self.config['conv_activation'], name="conv1", padding='same')(input_mat)
        merged = BatchNormalization()(merged)
        merged = Dropout(self.config["conv_dropout"])(merged)
        show_layer_info('Conv1D', merged)
        merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool1")(merged)
        show_layer_info('MaxPooling1D', merged)
        merged = Conv1D(self.config['filters'], self.config['kernel_size'],
                        activation=self.config['conv_activation'], name="conv2", padding='same')(input_mat)
        show_layer_info('Conv1D', merged)
        merged = BatchNormalization()(merged)
        merged = Dropout(self.config["conv_dropout"])(merged)
        merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool2")(merged)
        show_layer_info('MaxPooling1D', merged)
        """
        merged = Conv1D(self.config['filters'], self.config['kernel_size'],
                        activation=self.config['conv_activation'], name="conv3", padding='same')(input_mat)
        show_layer_info('Conv1D', merged)
        merged = BatchNormalization()(merged)
        merged = Dropout(self.config["conv_dropout"])(merged)
        merged = MaxPooling1D(pool_size=self.config['pool_size'], name="maxPool3")(merged)
        """
        show_layer_info('MaxPooling1D', merged)
        merged = Flatten()(merged)

        dense = Dense(self.config["hidden_sizes"][0], activation=self.config['hidden_activation'],
                      name="MLP_combine_0")(merged)
        show_layer_info('Dense', dense)
        for i in range(self.config["num_layers"] - 1):
            dense = BatchNormalization()(dense)
            dense = Dropout(self.config["dropout_rate"])(dense)
            dense = Dense(self.config["hidden_sizes"][i + 1], activation=self.config['hidden_activation'],
                          name="MLP_combine_" + str(i + 1))(dense)
            show_layer_info('Dense', dense)
        dense = BatchNormalization()(dense)
        dense = Dropout(self.config["dropout_rate"])(dense)
        # out_ = Dense(1, activation=self.config['output_activation'], name="MLP_out")(dense)
        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation=self.config['output_activation'], name="MLP_out")(dense)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1, activation=self.config['output_activation'], name="MLP_out")(dense)
        show_layer_info('Output', out_)

        model = Model(inputs=[query, doc], outputs=[out_])
        plot_model(model, to_file='../conv_wc_model_plot.png', show_shapes=True, show_layer_names=True)
        return model
示例#7
0
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding_q', q_embed)

        # ########## compute attention weights for the query words: better then mvlstm alone
        if self.config["text1_attention"]:
            q_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(
                            q_embed)  # use_bias=False to simple combination
            show_layer_info('Dense', q_w)
            q_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text1_maxlen'], ))(q_w)
            show_layer_info('Lambda-softmax', q_w)
            # ########## add attention weights for Q_words
            q_w_layer = Lambda(lambda x: K.repeat_elements(
                q_w, rep=self.config['embed_size'], axis=2))(q_w)
            show_layer_info('repeat', q_w_layer)
            q_embed = Multiply()([q_w_layer, q_embed])
            show_layer_info('Dot-qw', q_embed)
        # ####################### attention

        d_embed = embedding(doc)
        show_layer_info('Embedding_d', d_embed)

        # ########## compute attention weights for the document words:
        if self.config['text2_attention']:
            d_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(d_embed)
            show_layer_info('Dense', d_w)
            d_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text2_maxlen'], ))(d_w)
            show_layer_info('Lambda-softmax', d_w)
            # ########## add attention weights for D_words
            d_w_layer = Lambda(lambda x: K.repeat_elements(
                d_w, rep=self.config['embed_size'], axis=2))(d_w)
            d_embed = Multiply()([d_w_layer, d_embed])
            show_layer_info('Dot-qw', d_embed)
        # ####################### attention

        q_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(q_embed)
        show_layer_info('Bidirectional-LSTM_q', q_rep)
        q_rep = BatchNormalization()(q_rep)
        q_rep = Dropout(self.config["dropout_lstm"])(q_rep)

        d_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(d_embed)
        show_layer_info('Bidirectional-LSTM_d', d_rep)
        d_rep = BatchNormalization()(d_rep)
        d_rep = Dropout(self.config["dropout_lstm"])(d_rep)

        cross = Match(match_type='dot')([q_rep, d_rep])
        show_layer_info('Match-dot', cross)

        # ####################### convolutions
        cov1 = Conv2D(self.config['filters'],
                      self.config['kernel_size'],
                      activation='relu',
                      name="conv1",
                      padding='same')(cross)
        cov1 = BatchNormalization()(cov1)
        cov1 = Dropout(self.config["dropout_rate"])(cov1)
        show_layer_info('Conv1', cov1)

        cov1 = MaxPooling2D(pool_size=3, name="maxPool")(cov1)
        show_layer_info('MaxPooling1D-cov2', cov1)

        cov2 = Conv2D(self.config['filters'],
                      self.config['kernel_size'],
                      activation='relu',
                      name="conv2",
                      padding='same')(cov1)
        cov2 = BatchNormalization()(cov2)
        cross = Dropout(self.config["dropout_rate"])(cov2)
        show_layer_info('Conv2', cov2)
        # ###################### convolutions

        cross_reshape = Reshape((-1, ))(cross)
        show_layer_info('Reshape', cross_reshape)

        mm_k = Lambda(lambda x: K.tf.nn.top_k(
            x, k=self.config['topk'], sorted=True)[0])(cross_reshape)
        show_layer_info('Lambda-topk', mm_k)

        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc], outputs=out_)
        plot_model(model,
                   to_file='../amvlstm_conv.png',
                   show_shapes=True,
                   show_layer_names=True)
        return model
示例#8
0
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding_q', q_embed)

        # ########## compute attention weights for the query words: better then mvlstm alone
        if self.config["text1_attention"]:
            q_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(
                            q_embed)  # use_bias=False to simple combination
            show_layer_info('Dense', q_w)
            q_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text1_maxlen'], ),
                         name="q_w")(q_w)
            show_layer_info('Lambda-softmax', q_w)
            # ########## add attention weights for Q_words
            q_w_layer = Lambda(lambda x: K.repeat_elements(
                q_w, rep=self.config['embed_size'], axis=2))(q_w)
            show_layer_info('repeat', q_w_layer)
            q_embed = Multiply()([q_w_layer, q_embed])
            show_layer_info('Dot-qw', q_embed)
        # ####################### attention

        d_embed = embedding(doc)
        show_layer_info('Embedding_d', d_embed)

        # ########## compute attention weights for the document words:
        if self.config['text2_attention']:
            d_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(d_embed)
            show_layer_info('Dense', d_w)
            d_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text2_maxlen'], ))(d_w)
            show_layer_info('Lambda-softmax', d_w)
            # ########## add attention weights for D_words
            d_w_layer = Lambda(lambda x: K.repeat_elements(
                d_w, rep=self.config['embed_size'], axis=2))(d_w)
            d_embed = Multiply()([d_w_layer, d_embed])
            show_layer_info('Dot-qw', d_embed)
        # ####################### attention

        q_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(q_embed)
        show_layer_info('Bidirectional-LSTM_q', q_rep)

        # ################# add attention for query positions:
        if self.config["position_att_text1"]:
            pos_w = Dense(1, activation='tanh')(
                q_rep)  # TimeDistributed(Dense(1, activation='tanh'))(q_rep)
            pos_w = Flatten()(pos_w)
            pos_w = Activation('softmax')(pos_w)
            pos_w = RepeatVector(self.config['hidden_size'] * 2)(pos_w)
            pos_w = Permute([2, 1])(pos_w)
            q_rep = Multiply()([q_rep,
                                pos_w])  # merge([q_rep, pos_w], mode='mul')

        d_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(d_embed)
        show_layer_info('Bidirectional-LSTM_d', d_rep)

        # ################# add attention for document positions:
        if self.config["position_att_text2"]:
            # https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/
            # timedistributed repeats the net between brackets for every input time step of the generated by the bi-LSTM
            # so, same weights are applied for all the time steps. Without it, different weights are learned
            pos_w = Dense(1, activation='tanh')(
                d_rep)  # TimeDistributed(Dense(1, activation='tanh'))(d_rep)
            pos_w = Flatten()(pos_w)
            pos_w = Activation('softmax')(pos_w)
            pos_w = RepeatVector(self.config['hidden_size'] * 2)(pos_w)
            pos_w = Permute([2, 1])(pos_w)
            d_rep = Multiply()([d_rep,
                                pos_w])  # merge([d_rep, pos_w], mode='mul')  #

        cross = Match(match_type='dot')([q_rep, d_rep])
        show_layer_info('Match-dot', cross)

        cross_reshape = Reshape((-1, ))(cross)
        show_layer_info('Reshape', cross_reshape)

        mm_k = Lambda(lambda x: K.tf.nn.top_k(
            x, k=self.config['topk'], sorted=True)[0])(cross_reshape)
        show_layer_info('Lambda-topk', mm_k)

        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc], outputs=out_)
        plot_model(model,
                   to_file='../amvlstm.png',
                   show_shapes=True,
                   show_layer_names=True)
        return model
示例#9
0
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(
            Masking(mask_value=self.config['vocab_size'] - 1)(query))
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(
            Masking(mask_value=self.config['vocab_size'] - 1)(doc))
        show_layer_info('Embedding', d_embed)

        q_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(q_embed)
        show_layer_info('Bidirectional-LSTM', q_rep)
        d_rep = Bidirectional(
            LSTM(self.config['hidden_size'],
                 return_sequences=True,
                 dropout=self.config['dropout_rate']))(d_embed)
        show_layer_info('Bidirectional-LSTM', d_rep)
        # Output size: sample,timestep,2*hidden_num
        cross = Match(match_type=self.config['match_type'],
                      embedding_size=2 * self.config['hidden_size'],
                      r=5)([q_rep, d_rep])
        # cross = Dot(axes=[2, 2])([q_embed, d_embed])
        show_layer_info('Match', cross)

        if self.config['match_type'] != 'tensor2':
            cross_reshape = Reshape((-1, ))(cross)
            show_layer_info('Reshape', cross_reshape)

            mm_k = Lambda(lambda x: K.tf.nn.top_k(
                x, k=self.config['topk'], sorted=True)[0])(cross_reshape)
            show_layer_info('Lambda-topk', mm_k)

            pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(mm_k)
            show_layer_info('Dropout', pool1_flat_drop)
        else:
            act_cross = Activation('relu')(cross)
            pool1_flat_drop = Lambda(lambda x: K.tf.reshape(
                K.tf.nn.top_k(K.tf.transpose(
                    K.tf.reshape(x, (-1, x.shape[1] * x.shape[2], x.shape[3])),
                    [0, 2, 1]),
                              k=self.config['topk'],
                              sorted=True)[0],
                (-1, K.tf.Dimension(self.config['topk'] * x.shape[3].value))))(
                    act_cross)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        # model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        model = Model(inputs=[query, doc], outputs=out_)
        return model