def conv1d(x, filter_size, in_channel, out_channel, strides=[1,1,1,1], padding="VALID", name="cnn", activation_fn=tf.tanh, batch_norm=fu.create_BNParams()): with tf.variable_scope(name) as vs: filter_shape = [1, filter_size, in_channel, out_channel] W = tf.get_variable('kernel', shape=filter_shape) if not batch_norm.apply: b = tf.get_variable('bias', shape=[out_channel], initializer=tf.constant_initializer(-0.)) x_filtered = tf.nn.conv2d(x, W, strides, padding) if batch_norm.apply: activation = tf.contrib.layers.batch_norm(x_filtered, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_bn') else: activation = tf.add(x_filtered, b) if activation_fn: activation = activation_fn(activation) tf.summary.histogram(vs.name + '_filter', W) if not batch_norm.apply: tf.summary.histogram(vs.name + '_biases_filter', b) s._norm_summary(W, vs.name) return activation
def gated_res_net_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): orig_x = x with tf.variable_scope(scope_name) as vs: x = dense_layer_ot(x, in_size, out_size, sequence_length, 'sub_1', activation_fn, batch_norm) x = gated_dense_layer_ot(x, in_size, out_size, sequence_length, 'sub_2', activation_fn, batch_norm) with tf.variable_scope('sub_add'): x += orig_x return x
def highway_dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): with tf.variable_scope(scope_name) as vs: x = dense_layer_ot(x, in_size, out_size, sequence_length, 'sub_1', activation_fn, batch_norm) x = gated_dense_layer_ot(x, in_size, out_size, sequence_length, 'sub_2', activation_fn, batch_norm, is_highway=True) return x
def gated_dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams(), is_highway=False): ''' Apply a gated liner layer to the input. activattion_fn(mul(x,W)) * sigmoid(mul(x,W_t)) The gate W_T should learn how much to filter the input x :param x: input mini-batch :param in_size: input size or feature number :param out_size: output size -> respect to a highway net can be what I what :param sequence_length: timestamp number :param scope_name: name of the scope of this layer :param activation_fn: activation function to apply :param batch_norm: apply batch norm before computing the activation of both W and W_t ''' layers_output = [] with tf.variable_scope(scope_name) as vs: W = tf.get_variable( 'weight_filter', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) W_t = tf.get_variable( 'weight_gate', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) if not batch_norm.apply: b_t = tf.get_variable( 'bias_gate', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[tf.GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES]) b = tf.get_variable( 'bias_filter', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[tf.GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES]) # Iterate over the timestamp for t in range(0, sequence_length): H_linear = tf.matmul(x[:, t, :], W) T_linear = tf.matmul(x[:, t, :], W_t) if batch_norm.apply: H_norm = tf.contrib.layers.batch_norm( H_linear, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_filter_bn') H = activation_fn(H_norm, name="activation") T_norm = tf.contrib.layers.batch_norm( H_linear, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_gate_bn') T = tf.sigmoid(T_norm, name="transit_gate") else: H = activation_fn(tf.add(H_linear, b), name="activation") T = tf.sigmoid(tf.add(T_linear, b_t), name="transit_gate") if is_highway: C = 1 - T layer_output = tf.multiply(H, T) + (C * x[:, t, :]) else: layer_output = tf.multiply(H, T) # apply dropout # if h_params.dropout is not None and mode == tf.contrib.learn.ModeKeys.TRAIN: # layer_output = tf.nn.dropout(layer_output, keep_prob=1 - h_params.dropout) layers_output.append(tf.expand_dims( layer_output, 1)) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layer_summary(layers_output[-1], vs.name) tf.summary.histogram(vs.name + "_weight_filter", W) tf.summary.histogram(vs.name + '_weight_gate', W_t) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias_filter', b) tf.summary.histogram(vs.name + '_bias_gate', b_t) s._norm_summary(W, vs.name + '_filter') s._norm_summary(W_t, vs.name + '_gate') return tf.concat(layers_output, axis=1)
def dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): ''' Apply a dense layer over all the time_stamp. This is for filtering the timeseries :param x: input data :param in_size: input size or number of feature :param out_size: output size :param sequence_length: length of the sequence. Number of timestemp to iterate of :param scope_name: scope name of this transformation :param activation_fn: activation function :param batch_norm: named indicating if applying batch normalization and the phase(true if training, false if tensing) :return: ''' layers_output = [] with tf.variable_scope(scope_name) as vs: W = tf.get_variable( 'weight_filter', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) if not batch_norm.apply: b = tf.get_variable( 'bias_filter', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES], trainable=True) for t in range(0, sequence_length): layer_output = standard_ops.matmul(x[:, t, :], W) if batch_norm.apply: layer_output = tf.contrib.layers.batch_norm( layer_output, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_bn') else: # apply batch norm layer_output = standard_ops.add(layer_output, b) if activation_fn: layer_output = activation_fn(layer_output) layers_output.append(tf.expand_dims( layer_output, 1)) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layer_summary(layers_output[-1], vs.name, weight=W) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias', b) return tf.concat(layers_output, axis=1)
def deep_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] hidden_layer = eval(h_params.hidden_layer_type) #apply unlinera transformation in_size = h_params.input_size filtered = features batch_norm_data = fu.create_BNParams(apply=True, phase=fu.is_training(mode)) for layer_idx, h_layer_dim in enumerate(h_params.h_layer_size[:-1]): filtered = hidden_layer(filtered, in_size, h_layer_dim, sequence_length=h_params.sequence_length, scope_name='gated_dense_{}'.format(layer_idx), activation_fn=leaky_relu, batch_norm=batch_norm_data) in_size = h_layer_dim with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], activation=tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(filtered, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layer_summary(activation=outputs[-1], name=vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = dense_layer(x=outputs[-1], in_size=h_params.h_layer_size[-1], out_size=h_params.num_class[h_params.e_type], scope=vs, activation_fn=None) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def dw_cnn_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] batch_norm_data = fu.create_BNParams(apply=True, phase=fu.is_training(mode)) channel_multiply = 3 features = tf.expand_dims(features, -2) # add channel with dim filtered = conv_layer.depthwise_gated_conv1d( features, filter_size=1, in_channel=h_params.input_size, channel_multiply=channel_multiply, name="deepwise_gated_cnn", activation_fn=tf.nn.elu, batch_norm=batch_norm_data) # reshape the data to a normal form -> move the input channel to the feature space filtered = tf.reshape( filtered, shape=[ tf.shape(filtered)[0], # last batch is not full h_params.sequence_length, h_params.input_size, channel_multiply ]) filtered = conv_layer.conv1d(filtered, filter_size=1, in_channel=channel_multiply, out_channel=1, name="cnn_down_sample", activation_fn=tf.nn.elu, batch_norm=batch_norm_data) filtered = tf.squeeze(filtered, axis=-1) with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], activation=tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(filtered, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layer_summary(activation=outputs[-1], name=vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=outputs[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def depthwise_gated_conv1d(x, filter_size, in_channel, channel_multiply, strides=[1, 1, 1, 1], padding="VALID", name="deepwise_gated_cnn", activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): ''' Compute a depthwise gated convolution. Apply a different filter to every input channel :param x: input data -> [mini batch, time_stamp, 1, feature] -> in this way every feature is filtered with a different filter :param filter_size: filter size in time :param in_channel: number of input channel :param channel_multiply: how many filters to apply to each feature :param strides: strides :param padding: zero padding :param name: scope name :param activation_fn: activation function to use :param batch_norm: apply batch norm before computing the activation of both W and W_t :return: ''' with tf.variable_scope(name) as vs: filter_shape = [1, filter_size, in_channel, channel_multiply] # variable definition W = tf.get_variable('weight_filter', shape=filter_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d(), regularizer=None) W_t = tf.get_variable('weight_gate', shape=filter_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d()) b_t = tf.get_variable('bias_gate', shape=in_channel*channel_multiply, initializer=tf.constant_initializer(-0.)) if not batch_norm.apply: b = tf.get_variable('bias_filter', shape=[in_channel * channel_multiply], initializer=tf.constant_initializer(-0.)) # convolution conv_filter = tf.nn.depthwise_conv2d(x, W, strides, padding) conv_gate = tf.nn.depthwise_conv2d(x, W_t, strides, padding) if batch_norm.apply: conv_filter_norm = tf.contrib.layers.batch_norm(conv_filter, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_bn') H = activation_fn(conv_filter_norm, name='activation') else: conv_filter_linear = tf.add(conv_filter, b) H = activation_fn(conv_filter_linear, name='activation') conv_gate = tf.add(conv_gate, b_t) T = tf.sigmoid(conv_gate, name='transform_gate') # debugging tf.summary.histogram(vs.name + "_weight_filter", W) tf.summary.histogram(vs.name + '_weight_gate', W_t) tf.summary.histogram(vs.name + '_bias_gate', b_t) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias_filter', b) s._norm_summary(W, vs.name + '_filter') s._norm_summary(W_t, vs.name + '_gate') return tf.multiply(H, T)