def get_batch_generator(self): if self.is_train: while True: X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch() if self.config['use_dpool']: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']) }, Y) else: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len }, Y) else: while self.point + self.batch_size <= self.total_rel_num: X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch( randomly=False) if self.config['use_dpool']: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID': ID_pairs }, Y) else: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID': ID_pairs }, Y)
def get_batch_generator(self): for X1, X1_len, X2, X2_len, Y, ID_pairs, list_counts in self.get_batch( ): if self.config['use_dpool']: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID': ID_pairs, 'list_counts': list_counts }, Y) else: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID': ID_pairs, 'list_counts': list_counts }, Y)
def get_batch_generator(self): while True: X1, X1_len, X2, X2_len, Y = self.get_batch() if self.config['use_dpool']: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']) }, Y) else: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len }, Y)
def get_batch_generator(self): while True: sample = self.get_batch() if not sample: break X1, X1_len, X2, X2_len, Y, ID_pairs = sample if self.config['use_dpool']: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID': ID_pairs }, Y) else: yield ({ 'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID': ID_pairs }, Y)
def get_dpool_index(self, _len1, _len2): ''' get dynamic pooling index @param _len1: int length of text1 terms @param _len2: int length of text2 terms @return: np.array(index) ''' _dpool_index_arr = DynamicMaxPooling.dynamic_pooling_index([_len1,], [_len2,], self.config['text1_maxlen'], self.config['text2_maxlen']) return _dpool_index_arr
def get_batch_generator(self): while True: X1, XP1, X1_len, XP1_len, X2, XP2, X2_len, XP2_len, Y = self.get_batch( ) # print('shapes: X1:{}, XP1:{}, X2:{}, XPS:{}, Y:{}'.format(X1.shape, XP1.shape, X2.shape, XP2.shape, Y.shape)) if self.config['use_dpool']: yield ({ 'query': X1, 'query_pos': XP1, 'query_len': X1_len, 'query_pos_len': XP1_len, 'doc': X2, 'doc_pos': XP2, 'doc_len': X2_len, 'doc_pos_len': XP2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index( X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'dpool_pos_index': DynamicMaxPooling.dynamic_pooling_index( XP1_len, XP2_len, self.config['pos1_maxlen'], self.config['pos2_maxlen']) }, Y) else: yield ({ 'query': X1, 'query_pos': XP1, 'query_len': X1_len, 'query_pos_len': XP1_len, 'doc': X2, 'doc_pos': XP2, 'doc_len': X2_len, 'doc_pos_len': XP2_len }, Y)
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) dpool_index = Input(name='dpool_index', shape=[ self.config['text1_maxlen'], self.config['text2_maxlen'], 3 ], dtype='int32') show_layer_info('Input', dpool_index) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed]) show_layer_info('Dot', cross) cross_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(cross) show_layer_info('Reshape', cross_reshape) conv2d = Conv2D(self.config['kernel_count'], self.config['kernel_size'], padding='same', activation='relu') dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1]) conv1 = conv2d(cross_reshape) show_layer_info('Conv2D', conv1) pool1 = dpool([conv1, dpool_index]) show_layer_info('DynamicMaxPooling', pool1) pool1_flat = Flatten()(pool1) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc, dpool_index], outputs=out_) return model
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) dpool_index = Input(name='dpool_index', shape=[ self.config['text1_maxlen'], self.config['text2_maxlen'], 3 ], dtype='int32') show_layer_info('Input', dpool_index) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) # ########## compute attention weights for the query words: better then mvlstm alone if self.config["text1_attention"]: q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)( q_embed) # use_bias=False to simple combination show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ), name="q_w")(q_w) show_layer_info('Lambda-softmax', q_w) # ########## add attention weights for Q_words q_w_layer = Lambda(lambda x: K.repeat_elements( q_w, rep=self.config['embed_size'], axis=2))(q_w) show_layer_info('repeat', q_w_layer) q_embed = Multiply()([q_w_layer, q_embed]) show_layer_info('Dot-qw', q_embed) # ####################### attention text1 # ########## compute attention weights for the document words: if self.config['text2_attention']: d_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(d_embed) show_layer_info('Dense', d_w) d_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text2_maxlen'], ))(d_w) show_layer_info('Lambda-softmax', d_w) # ########## add attention weights for D_words d_w_layer = Lambda(lambda x: K.repeat_elements( d_w, rep=self.config['embed_size'], axis=2))(d_w) d_embed = Multiply()([d_w_layer, d_embed]) show_layer_info('Dot-qw', d_embed) # ####################### attention text2 cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed]) show_layer_info('Dot', cross) cross_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(cross) show_layer_info('Reshape', cross_reshape) conv2d = Conv2D(self.config['kernel_count'], self.config['kernel_size'], padding='same', activation='relu') dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1]) conv1 = conv2d(cross_reshape) show_layer_info('Conv2D', conv1) pool1 = dpool([conv1, dpool_index]) show_layer_info('DynamicMaxPooling', pool1) pool1_flat = Flatten()(pool1) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc, dpool_index], outputs=out_) return model
def get_batch_generator(self): for X1, X1_len, X2, X2_len, Y, ID_pairs, list_counts in self.get_batch(): if self.config['use_dpool']: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID': ID_pairs, 'list_counts': list_counts}, Y) else: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID': ID_pairs, 'list_counts': list_counts}, Y)
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) dpool_index = Input(name='dpool_index', shape=[ self.config['text1_maxlen'], self.config['text2_maxlen'], 3 ], dtype='int32') show_layer_info('Input', dpool_index) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed]) # def cal_binsum(cross, bin_num=20): # shape = cross.get_shape() # qnum = shape[1] # mbinsum = np.zeros((qnum, bin_num), dtype=np.float32) # for (i, j), v in np.ndenumerate(cross): # if i >= qnum: # break # vid = int((v + 1.) / 2. * (bin_num - 1.)) # mbinsum[i][vid] += v # # mhist += 1. # smooth is not needed for computing bin sum # # mhist = np.log10(mhist) # not needed for computing bin sum # # return mbinsum.flatten() # # bins = Lambda(lambda x: cal_binsum(x))(cross) # out1 = Dense(50)(bins) show_layer_info('Dot', cross) cross_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(cross) show_layer_info('Reshape', cross_reshape) conv2d = Conv2D(self.config['kernel_count'], self.config['kernel_size'], padding='same', activation='relu') dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1]) conv1 = conv2d(cross_reshape) show_layer_info('Conv2D', conv1) pool1 = dpool([conv1, dpool_index]) show_layer_info('DynamicMaxPooling', pool1) pool1_flat = Flatten()(pool1) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1)(pool1_flat_drop) show_layer_info('Dense', out_) model = Model(inputs=[query, doc, dpool_index], outputs=out_) model.summary() return model
def get_batch_generator(self): while True: sample = self.get_batch() if not sample: break X1, X1_len, X2, X2_len, Y, ID_pairs = sample if self.config['use_dpool']: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID':ID_pairs}, Y) else: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID':ID_pairs}, Y)
def get_batch_generator(self): while True: X1, X1_len, X2, X2_len, Y = self.get_batch() if self.config['use_dpool']: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen'])}, Y) else: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len}, Y)
def get_batch_generator(self): if self.is_train: while True: X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch() if self.config['use_dpool']: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen'])}, Y) else: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len}, Y) else: while self.point + self.batch_size <= self.total_rel_num: X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch(randomly = False) if self.config['use_dpool']: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID':ID_pairs}, Y) else: yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID':ID_pairs}, Y)