def mlp(h_params, mode, features_map, target): layers_output = [] features = features_map['features'] # features = features_map for layer_idx, h_layer_dim in enumerate(h_params.h_layer_size): if layer_idx == 0: layer_input = features else: layer_input = layers_output[-1] with tf.variable_scope('ml_{}'.format(layer_idx), reuse=True) as vs: layer_output = tf.contrib.layers.fully_connected( inputs=layer_input, num_outputs=h_layer_dim, activation_fn=leaky_relu, # weights_initializer=tf.truncated_normal_initializer(mean=0.5, stddev=0.5), weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer( hparams.l2_reg), # normalizer_fn=tf.contrib.layers.layer_norm, scope=vs) if h_params.dropout is not None and mode == tf.contrib.learn.ModeKeys.TRAIN: layer_output = tf.nn.dropout(layer_output, keep_prob=1 - h_params.dropout) s.add_hidden_layer_summary(activation=layer_output, weight=tf.get_variable("weights"), name=vs.name) layers_output.append(layer_output) with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=layers_output[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(activation=logits, name=vs.name) predictions = tf.argmax(tf.nn.softmax(logits), 1) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None elif mode == tf.contrib.learn.ModeKeys.TRAIN: t_accuracy = tf.contrib.metrics.streaming_accuracy( predictions, target) tf.summary.scalar('train_accuracy', tf.reduce_mean(t_accuracy)) # Calculate the binary cross-entropy loss losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=target, name='entropy') mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def simple_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # feature = tf.unstack(feature, h_params.sequence_length, 1) with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], activation=tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(features, axis=1), sequence_length=sequence_length, dtype=tf.float32) # for num_step, output in enumerate(outputs): # _add_hidden_layer_summary(output, vs.name+"_{}".format(num_step)) s.add_hidden_layers_summary(outputs, vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=outputs[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(logits, vs.name) predictions = tf.argmax(tf.nn.softmax(logits), 1) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None elif mode == tf.contrib.learn.ModeKeys.TRAIN: t_accuracy = tf.contrib.metrics.streaming_accuracy( predictions, target) tf.summary.scalar('train_accuracy', tf.reduce_mean(t_accuracy)) # Calculate the binary cross-entropy loss losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=target, name='entropy') mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def gated_dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams(), is_highway=False): ''' Apply a gated liner layer to the input. activattion_fn(mul(x,W)) * sigmoid(mul(x,W_t)) The gate W_T should learn how much to filter the input x :param x: input mini-batch :param in_size: input size or feature number :param out_size: output size -> respect to a highway net can be what I what :param sequence_length: timestamp number :param scope_name: name of the scope of this layer :param activation_fn: activation function to apply :param batch_norm: apply batch norm before computing the activation of both W and W_t ''' layers_output = [] with tf.variable_scope(scope_name) as vs: W = tf.get_variable( 'weight_filter', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) W_t = tf.get_variable( 'weight_gate', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) if not batch_norm.apply: b_t = tf.get_variable( 'bias_gate', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[tf.GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES]) b = tf.get_variable( 'bias_filter', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[tf.GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES]) # Iterate over the timestamp for t in range(0, sequence_length): H_linear = tf.matmul(x[:, t, :], W) T_linear = tf.matmul(x[:, t, :], W_t) if batch_norm.apply: H_norm = tf.contrib.layers.batch_norm( H_linear, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_filter_bn') H = activation_fn(H_norm, name="activation") T_norm = tf.contrib.layers.batch_norm( H_linear, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_gate_bn') T = tf.sigmoid(T_norm, name="transit_gate") else: H = activation_fn(tf.add(H_linear, b), name="activation") T = tf.sigmoid(tf.add(T_linear, b_t), name="transit_gate") if is_highway: C = 1 - T layer_output = tf.multiply(H, T) + (C * x[:, t, :]) else: layer_output = tf.multiply(H, T) # apply dropout # if h_params.dropout is not None and mode == tf.contrib.learn.ModeKeys.TRAIN: # layer_output = tf.nn.dropout(layer_output, keep_prob=1 - h_params.dropout) layers_output.append(tf.expand_dims( layer_output, 1)) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layer_summary(layers_output[-1], vs.name) tf.summary.histogram(vs.name + "_weight_filter", W) tf.summary.histogram(vs.name + '_weight_gate', W_t) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias_filter', b) tf.summary.histogram(vs.name + '_bias_gate', b_t) s._norm_summary(W, vs.name + '_filter') s._norm_summary(W_t, vs.name + '_gate') return tf.concat(layers_output, axis=1)
def dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): ''' Apply a dense layer over all the time_stamp. This is for filtering the timeseries :param x: input data :param in_size: input size or number of feature :param out_size: output size :param sequence_length: length of the sequence. Number of timestemp to iterate of :param scope_name: scope name of this transformation :param activation_fn: activation function :param batch_norm: named indicating if applying batch normalization and the phase(true if training, false if tensing) :return: ''' layers_output = [] with tf.variable_scope(scope_name) as vs: W = tf.get_variable( 'weight_filter', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) if not batch_norm.apply: b = tf.get_variable( 'bias_filter', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES], trainable=True) for t in range(0, sequence_length): layer_output = standard_ops.matmul(x[:, t, :], W) if batch_norm.apply: layer_output = tf.contrib.layers.batch_norm( layer_output, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_bn') else: # apply batch norm layer_output = standard_ops.add(layer_output, b) if activation_fn: layer_output = activation_fn(layer_output) layers_output.append(tf.expand_dims( layer_output, 1)) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layer_summary(layers_output[-1], vs.name, weight=W) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias', b) return tf.concat(layers_output, axis=1)
def deep_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] hidden_layer = eval(h_params.hidden_layer_type) #apply unlinera transformation in_size = h_params.input_size filtered = features batch_norm_data = fu.create_BNParams(apply=True, phase=fu.is_training(mode)) for layer_idx, h_layer_dim in enumerate(h_params.h_layer_size[:-1]): filtered = hidden_layer(filtered, in_size, h_layer_dim, sequence_length=h_params.sequence_length, scope_name='gated_dense_{}'.format(layer_idx), activation_fn=leaky_relu, batch_norm=batch_norm_data) in_size = h_layer_dim with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], activation=tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(filtered, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layer_summary(activation=outputs[-1], name=vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = dense_layer(x=outputs[-1], in_size=h_params.h_layer_size[-1], out_size=h_params.num_class[h_params.e_type], scope=vs, activation_fn=None) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def dw_cnn_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] batch_norm_data = fu.create_BNParams(apply=True, phase=fu.is_training(mode)) channel_multiply = 3 features = tf.expand_dims(features, -2) # add channel with dim filtered = conv_layer.depthwise_gated_conv1d( features, filter_size=1, in_channel=h_params.input_size, channel_multiply=channel_multiply, name="deepwise_gated_cnn", activation_fn=tf.nn.elu, batch_norm=batch_norm_data) # reshape the data to a normal form -> move the input channel to the feature space filtered = tf.reshape( filtered, shape=[ tf.shape(filtered)[0], # last batch is not full h_params.sequence_length, h_params.input_size, channel_multiply ]) filtered = conv_layer.conv1d(filtered, filter_size=1, in_channel=channel_multiply, out_channel=1, name="cnn_down_sample", activation_fn=tf.nn.elu, batch_norm=batch_norm_data) filtered = tf.squeeze(filtered, axis=-1) with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], activation=tf.nn.tanh) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(filtered, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layer_summary(activation=outputs[-1], name=vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=outputs[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def cnn_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] in_channel = 1 features = tf.expand_dims(features, -1) # add channel dim #apply conv_filtering # filtered_one = conv_layer.highway_conv2d(features, # filter_size=1, # in_channel=in_channel, # out_channel=h_params.one_by_one_out_filters, # name="highway_cnn_one") filtered_one = conv_layer.gated_conv1d( features, filter_size=1, in_channel=in_channel, out_channel=h_params.one_by_one_out_filters, name="gated_cnn") filtered = conv_layer.conv1d(filtered_one, filter_size=1, in_channel=h_params.one_by_one_out_filters, out_channel=1, name="cnn_down_sample", activation_fn=leaky_relu) # filtered = tf.add(filtered, features) # skip-trough connection filtered = tf.squeeze(filtered, axis=-1) filtered = tf.contrib.layers.batch_norm(filtered, center=True, scale=False, is_training=is_training(mode), scope='bn') # Concatenate the different filtered time_series # filtered = tf.unstack(filtered_one, axis=3) # filtered.extend(tf.unstack(filtered_all, axis=3)) # filtered = tf.concat(filtered, axis=2) with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.GRUCell(h_params.h_layer_size[-1], forget_bias=1.0, activation=tf.nn.tanh) # cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(filtered, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layer_summary(activation=outputs[-1], name=vs.name + "_output") if isinstance(states, list) or isinstance(states, tuple): s.add_hidden_layer_summary(states.h, vs.name + "_state") else: s.add_hidden_layer_summary(states, vs.name + "_state") with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=outputs[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss
def h_cnn_rnn(h_params, mode, features_map, target): features = features_map['features'] sequence_length = features_map['length'] in_channel = 1 features = tf.expand_dims(features, -1) # add channel dim #apply conv_filtering layers_output = [] for idx, dim in enumerate( h_params.h_layer_size[:-2] ): # -2 because there is the ouput layer and the linear projection if idx == 0: input_layer = features in_channel = 1 else: input_layer = layers_output[-1] in_channel = h_params.h_layer_size[idx - 1][1] with tf.variable_scope('cnn_{}'.format(idx)) as vs: W = tf.get_variable('kernel', shape=[1, dim[0], in_channel, dim[1]]) b = tf.get_variable('bias', shape=[dim[1]]) layers_output.append(tf.nn.tanh(conv2d(input_layer, W) + b)) s.add_kernel_summary(W, vs.name) # Concatenate the different filtered time_series features = tf.unstack(layers_output[-1], axis=3) features = tf.concat(features, axis=2) # apply linera transformation for layer_idx, h_layer_dim in enumerate(h_params.h_layer_size[2:-1]): layers_output = [] with tf.variable_scope('ml_{}'.format(layer_idx), reuse=True) as vs: # Iterate over the timestamp for t in range(0, h_params.sequence_length): layer_output = tf.contrib.layers.fully_connected( inputs=features[:, t, :], num_outputs=h_layer_dim, activation_fn=leaky_relu, weights_initializer=initializers.xavier_initializer(), # normalizer_fn=tf.contrib.layers.layer_norm, scope=vs) # if h_params.dropout is not None and mode == tf.contrib.learn.ModeKeys.TRAIN: # layer_output = tf.nn.dropout(layer_output, keep_prob=1 - h_params.dropout) layers_output.append( tf.expand_dims(layer_output, 1) ) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layers_summary(layers_output, vs.name, weight=tf.get_variable("weights")) features = tf.concat(layers_output, axis=1) # concat the different time_stamp # TODO: try attention transfomration # TODO: try an highway networks with tf.variable_scope('rnn') as vs: # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define a lstm cell with tensorflow cell = tf.contrib.rnn.LSTMCell(h_params.h_layer_size[-1], forget_bias=1.0, activation=tf.nn.tanh) # cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=h_params.dropout) # Get lstm cell output outputs, states = tf.contrib.rnn.static_rnn( cell, tf.unstack(features, axis=1), sequence_length=sequence_length, dtype=tf.float32) s.add_hidden_layers_summary(tensors=outputs, name=vs.name + "_output") s.add_hidden_layers_summary(tensors=states, name=vs.name + "_state") with tf.variable_scope('logits') as vs: logits = tf.contrib.layers.fully_connected( inputs=outputs[-1], num_outputs=h_params.num_class[h_params.e_type], activation_fn=None, scope=vs) s.add_hidden_layer_summary(logits, vs.name) predictions, losses = output_layer.losses(logits, target, mode=mode, h_params=h_params) if mode == tf.contrib.learn.ModeKeys.INFER: return predictions, None mean_loss = tf.reduce_mean(losses, name='mean_loss') return predictions, mean_loss