def Stack_CNN_fuse(seqs, mask, filter_sizes=(3, 3, 3), num_units=None, gate=False, residual=True, scope=None, reuse=None, **kwargs): if num_units is None or residual: num_units = seqs.get_shape().as_list()[-1] pooled_outputs = [] with tf.variable_scope(scope or 'Deep_CNN_Residual_Block', reuse=reuse): last_seqs = seqs for idx, fz in enumerate(filter_sizes): with tf.variable_scope("conv_pool_%s" % idx): if gate: # gate next_seqs = CNN_encode(last_seqs, fz, 2 * num_units) map_res_a, map_res_b = tf.split(next_seqs, num_or_size_splits=2, axis=2) next_seqs = map_res_a * tf.nn.sigmoid(map_res_b) else: next_seqs = CNN_encode(last_seqs, fz, num_units) # res if residual: next_seqs += last_seqs pooled = Pool_fuse(next_seqs, mask, **kwargs) pooled_outputs.append(pooled) last_seqs = next_seqs return pooled_outputs[-1]
def AttentiveCNN_match(context, query, context_mask, query_mask, scope='AttentiveCNN_Block', residual=False, normalize_output=False, reuse=None, **kwargs): with tf.variable_scope(scope, reuse=reuse): cnn_wo_att = CNN_encode(context, filter_size=3, direction='none', act_fn=None) att_context, _ = Attentive_match(context, query, context_mask, query_mask) cnn_att = CNN_encode(att_context, filter_size=1, direction='none', act_fn=None) output = tf.nn.tanh(cnn_wo_att + cnn_att) if residual: # Residual connection output += context if normalize_output: # Normalize output = layer_norm(output) # (N, T_q, C) return output
def AttentiveCNN_match(context, query, context_mask, query_mask, causality=False, scope='AttentiveCNN_Block', reuse=None, **kwargs): with tf.variable_scope(scope, reuse=reuse): direction = 'forward' if causality else 'none' cnn_wo_att = CNN_encode(context, filter_size=3, direction=direction, act_fn=None) att_context, _ = Attentive_match(context, query, context_mask, query_mask, causality=causality) cnn_att = CNN_encode(att_context, filter_size=1, direction=direction, act_fn=None) output = tf.nn.tanh(cnn_wo_att + cnn_att) return dropout_res_layernorm(context, output, **kwargs)
def CNN_pool(seqs, mask, filter_size, num_units=None, scope=None, reuse=None, **kwargs): if num_units is None: num_units = seqs.get_shape().as_list()[-1] with tf.variable_scope(scope or 'CNN_Pool_Block', reuse=reuse): conv_relu = CNN_encode(seqs, filter_size, num_units) pooled = SWEM_pool(conv_relu, mask, **kwargs) return pooled
def CNN_fuse(seqs, mask, filter_size, num_units=None, scope=None, reuse=None, **kwargs): """ Simple CNN fusion """ if num_units is None: num_units = seqs.get_shape().as_list()[-1] with tf.variable_scope(scope or 'CNN_Fuse_Block', reuse=reuse): conv_relu = CNN_encode(seqs, filter_size, num_units) pooled = Pool_fuse(conv_relu, mask, **kwargs) return pooled
def emb_ff(ids): """ :param ids: shape of ids is [batch] or [batch,L] :return: embedding [batch, D] or [batch, L, D] """ num_of_dim = ids.get_shape().ndims if num_of_dim == 1: ids = tf.reshape(ids, [self.batch_size, 1]) condition = tf.less(ids, self.vocab_size) ids = tf.where(condition, ids, tf.ones_like(ids) * self.data_io.unk_id) max_axis1_len = tf.shape(ids)[-1] char_ids = tf.nn.embedding_lookup(self.tokenid_2_charsid_map, ids) # B,L,max_token_len max_axis2_len = tf.shape(char_ids)[-1] token_emb = tf.nn.embedding_lookup(self.word_embeddings, ids) char_emb = tf.reshape( tf.nn.embedding_lookup( self.char_emb, char_ids), # B,L,max_token_len,D_char [ self.batch_size * max_axis1_len, max_axis2_len, self.args.char_embed_size ]) char_emb = CNN_encode(char_emb, filter_size=self.args.embed_filter_size, num_filters=self.args.char_embed_size, scope=scope, reuse=tf.AUTO_REUSE) char_emb = tf.reshape(tf.reduce_max(char_emb, axis=1), [ self.batch_size, max_axis1_len, self.args.char_embed_size ]) concat_emb = tf.concat([token_emb, char_emb], axis=-1) concat_emb = linear_logit(concat_emb, self.args.embedding_output_dim, scope=scope, reuse=tf.AUTO_REUSE) highway_out = highway_network(concat_emb, self.args.highway_layer_num, scope=scope, reuse=tf.AUTO_REUSE) if num_of_dim == 1: return tf.squeeze(highway_out, axis=1) # B,D else: return highway_out # B,L,D
def Stack_CNN_pool(seqs, mask, filter_sizes=(3, 4, 5), num_units=None, residual=True, concat_output=True, scope=None, reuse=None, **kwargs): if num_units is None or residual: num_units = seqs.get_shape().as_list()[-1] pooled_outputs = [] with tf.variable_scope(scope or 'Deep_CNN_Residual_Block', reuse=reuse): res_rep = None iter_rep = seqs for fz in filter_sizes: with tf.variable_scope("conv_pool_%s" % fz): conv_relu = CNN_encode(iter_rep, fz, 2 * num_units) # gate map_res_a, map_res_b = tf.split(conv_relu, num_or_size_splits=2, axis=2) iter_rep = map_res_a * tf.nn.sigmoid(map_res_b) # res if residual and (res_rep is not None): iter_rep = iter_rep + res_rep res_rep = iter_rep pooled = SWEM_pool(iter_rep, mask, **kwargs) pooled_outputs.append(pooled) return tf.concat(pooled_outputs, axis=1) if concat_output else pooled_outputs[-1]