def _ln_rnn(x, gamma, beta): # calc layer mean, variance for final axis mean, variance = tf.nn.moments(x, axes=[len(x.get_shape()) - 1], keep_dims=True) # apply layer normalization x = (x - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter return gamma * x + beta
def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta
def _ln_rnn(x, gamma, beta): r"""Applies layer normalization. Normalizes the last dimension of the tensor `x`. """ mean, variance = tf.nn.moments(x, axes=[len(x.get_shape()) - 1], keep_dims=True) # apply layer normalization x = (x - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter return gamma * x + beta
def _ln_rnn(x, gamma, beta): r"""Applies layer normalization. Normalizes the last dimension of the tensor `x`. Args: x: A `Tensor`. gamma: A constant `Tensor`. Scale parameter. Default is 1. beta: A constant `Tensor`. Offset parameter. Default is 0. Returns: A `Tensor` with the same shape as `x`. """ # calc layer mean, variance for final axis mean, variance = tf.nn.moments(x, axes=[len(x.get_shape()) - 1], keep_dims=True) # apply layer normalization x = (x - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter return gamma * x + beta
def get_gram_mat(tensor): ''' Arg: tensor: 4-D tensor. The first dimension must be 1. Returns: gram matrix. Read `https://en.wikipedia.org/wiki/Gramian_matrix` for details. 512 by 512. ''' assert tensor.get_shape( ).ndims == 4, "The tensor must be 4 dimensions." dim0, dim1, dim2, dim3 = tensor.get_shape().as_list() tensor = tensor.sg_reshape(shape=[dim0 * dim1 * dim2, dim3]) #(1*7*7, 512) # normalization: Why? Because the original value of gram mat. would be too huge. mean, variance = tf.nn.moments(tensor, [0, 1]) tensor = (tensor - mean) / tf.sqrt(variance + tf.sg_eps) tensor_t = tensor.sg_transpose(perm=[1, 0]) #(512, 1*7*7) gram_mat = tf.matmul(tensor_t, tensor) # (512, 512) return gram_mat
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'lyr-' prefix with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # offset, scale parameter mean_running = init.constant('mean', opt.dim, summary=False) variance_running = init.constant('variance', opt.dim, value=1, summary=False) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running)) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if not scope.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): import sg_initializer as init import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # offset, scale parameter mean_running = init.constant('mean', opt.dim) variance_running = init.constant('variance', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running) ) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments( out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. scale: If true, multiple by a trainable gamma variable. When the activation is linear (relu included), this can be disabled because it can be implicitly learned by the next layer. The default is True. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. regularizer: A string. None, 'l1' or 'l2'. The default is None summary: If True, summaries are added. The default is True. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + sg_get_context() # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0, summary=True, scale=True) if opt.regularizer == 'l1': opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x)) elif opt.regularizer == 'l2': opt.regularizer = lambda x: tf.square( tf.reduce_mean(tf.square(x))) else: opt.regularizer = None assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) out_shape = out.get_shape() # apply batch normalization if opt.bn: beta = init.constant('beta', opt.dim, summary=opt.summary) gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary, trainable=opt.scale) # offset, scale parameter ( for inference ) mean_running = init.constant('mean', opt.dim, trainable=False, summary=opt.summary) variance_running = init.constant('variance', opt.dim, value=1, trainable=False, summary=opt.summary) # use fused batch norm if ndims in [2, 3, 4] if out_shape.ndims in [2, 3, 4]: # add HW dims if necessary, fused_batch_norm requires shape to be NHWC if out_shape.ndims == 2: out = tf.expand_dims(out, axis=1) out = tf.expand_dims(out, axis=2) elif out_shape.ndims == 3: out = tf.expand_dims(out, axis=2) fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5 out, mean, variance = tf.cond( _phase, lambda: tf.nn.fused_batch_norm( out, gamma, beta, epsilon=fused_eps), lambda: tf.nn.fused_batch_norm(out, gamma, beta, mean=mean_running, variance= variance_running, epsilon=fused_eps, is_training=False), ) # restore original shape if HW dims was added if out_shape.ndims == 2: out = tf.squeeze(out, axis=[1, 2]) elif out_shape.ndims == 3: out = tf.squeeze(out, axis=2) # fallback to naive batch norm else: mean, variance = tf.nn.moments( out, axes=list(range(len(out.get_shape()) - 1))) out = tf.cond( _phase, lambda: tf.nn.batch_normalization( out, mean, variance, beta, gamma, tf.sg_eps), lambda: tf.nn.batch_normalization( out, mean_running, variance_running, beta, gamma, tf.sg_eps)) decay = 0.99 tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, mean_running.assign(mean_running * decay + mean * (1 - decay))) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variance_running.assign(variance_running * decay + variance * (1 - decay))) # apply layer normalization if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=opt.summary) if opt.scale: gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter if opt.scale: out = gamma * out + beta else: out = out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.summary: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + sg_get_context(), prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def train(self): ''' Network ''' batch_pred_feats, batch_pred_coords, batch_pred_confs, self.final_state = self.LSTM( 'lstm', self.x) iou_predict_truth, intersection = self.iou(batch_pred_coords, self.y[:, 0:4]) should_exist = I = tf.cast( tf.reduce_sum(self.y[:, 0:4], axis=1) > 0., tf.float32) no_I = tf.ones_like(I, dtype=tf.float32) - I object_loss = tf.nn.l2_loss( I * (batch_pred_confs - iou_predict_truth)) * self.object_scale noobject_loss = tf.nn.l2_loss( no_I * (batch_pred_confs - iou_predict_truth)) * self.noobject_scale p_sqrt_w = tf.sqrt( tf.minimum(1.0, tf.maximum(0.0, batch_pred_coords[:, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(1.0, tf.maximum(0.0, batch_pred_coords[:, 3]))) sqrt_w = tf.sqrt(tf.abs(self.y[:, 2])) sqrt_h = tf.sqrt(tf.abs(self.y[:, 3])) loss = (tf.nn.l2_loss(I * (batch_pred_coords[:, 0] - self.y[:, 0])) + tf.nn.l2_loss(I * (batch_pred_coords[:, 1] - self.y[:, 1])) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))) * self.coord_scale #max_iou = tf.nn.l2_loss(I*(tf.ones_like(iou_predict_truth, dtype=tf.float32) - iou_predict_truth)) total_loss = loss + object_loss + noobject_loss #+ max_iou ''' Optimizer ''' optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( total_loss) # Adam Optimizer ''' Summary for tensorboard analysis ''' dataset_loss = -1 dataset_loss_best = 100 test_writer = tf.summary.FileWriter('summary/test') tf.summary.scalar('dataset_loss', dataset_loss) summary_op = tf.summary.merge_all() ''' Initializing the variables ''' self.saver = tf.train.Saver() batch_states = np.zeros((self.batchsize, 2 * self.len_vec)) # TODO: make this a command line argument, etc. # training set loader batch_loader = BatchLoader( "./DATA/TRAINING/", seq_len=self.nsteps, batch_size=self.batchsize, step_size=1, folders_to_use=[ "GOPR0005", "GOPR0006", "GOPR0008", "GOPR0008_2", "GOPR0009", "GOPR0009_2", "GOPR0010", "GOPR0011", "GOPR0012", "GOPR0013", "GOPR0014", "GOPR0015", "GOPR0016", "MVI_8607", "MVI_8609", "MVI_8610", "MVI_8612", "MVI_8614", "MVI_8615", "MVI_8616" ]) validation_set_loader = BatchLoader( "./DATA/VALID/", seq_len=self.nsteps, batch_size=self.batchsize, step_size=1, folders_to_use=[ "bbd_2017__2017-01-09-21-40-02_cam_flimage_raw", "bbd_2017__2017-01-09-21-44-31_cam_flimage_raw", "bbd_2017__2017-01-09-21-48-46_cam_flimage_raw", "bbd_2017__2017-01-10-16-07-49_cam_flimage_raw", "bbd_2017__2017-01-10-16-21-01_cam_flimage_raw", "bbd_2017__2017-01-10-16-31-57_cam_flimage_raw", "bbd_2017__2017-01-10-21-43-03_cam_flimage_raw", "bbd_2017__2017-01-11-20-21-32_cam_flimage_raw", "bbd_2017__2017-01-11-21-02-37_cam_flimage_raw" ]) print("%d available training batches" % len(batch_loader.batches)) print("%d available validation batches" % len(validation_set_loader.batches)) ''' Launch the graph ''' with tf.Session() as sess: if self.restore_weights == True and os.path.isfile( self.rolo_current_save + ".index"): # sess.run(init) tf.sg_init(sess) self.saver.restore(sess, self.rolo_current_save) print("Weight loaded, finetuning") else: # sess.run(init) tf.sg_init(sess) print("Training from scratch") epoch_loss = [] for self.iter_id in range(self.n_iters): ''' Load training data & ground truth ''' batch_id = self.iter_id - self.batch_offset batch_xs, batch_ys, _ = batch_loader.load_batch(batch_id) ''' Update weights by back-propagation ''' sess.run(optimizer, feed_dict={ self.x: batch_xs, self.y: batch_ys }) if self.iter_id % self.display_step == 0: ''' Calculate batch loss ''' batch_loss = sess.run(total_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) epoch_loss.append(batch_loss) print("Total Batch loss for iteration %d: %.9f" % (self.iter_id, batch_loss)) if self.iter_id % self.display_step == 0: ''' Calculate batch loss ''' batch_loss = sess.run(loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print( "Bounding box coord error loss for iteration %d: %.9f" % (self.iter_id, batch_loss)) if self.display_object_loss and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_o_loss = sess.run(object_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("Object loss for iteration %d: %.9f" % (self.iter_id, batch_o_loss)) if self.display_object_loss and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_noo_loss = sess.run(noobject_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("No Object loss for iteration %d: %.9f" % (self.iter_id, batch_noo_loss)) if self.iou_with_ground_truth and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_o_loss = sess.run(tf.reduce_mean(iou_predict_truth), feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("Average IOU with ground for iteration %d: %.9f" % (self.iter_id, batch_o_loss)) if self.display_coords is True and self.iter_id % self.display_step == 0: ''' Caculate predicted coordinates ''' coords_predict = sess.run(batch_pred_coords, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("predicted coords:" + str(coords_predict[0])) print("ground truth coords:" + str(batch_ys[0])) ''' Save model ''' if self.iter_id % self.save_step == 1: self.saver.save(sess, self.rolo_current_save) print("\n Model saved in file: %s" % self.rolo_current_save) ''' Validation ''' if self.validate == True and self.iter_id % self.validate_step == 0 and self.iter_id > 0: # Run validation set dataset_loss = self.test(sess, total_loss, validation_set_loader, batch_pred_feats, batch_pred_coords, batch_pred_confs, self.final_state) ''' Early-stop regularization ''' if dataset_loss <= dataset_loss_best: dataset_loss_best = dataset_loss self.saver.save(sess, self.rolo_weights_file) print("\n Better Model saved in file: %s" % self.rolo_weights_file) ''' Write summary for tensorboard ''' summary = sess.run(summary_op, feed_dict={ self.x: batch_xs, self.y: batch_ys }) test_writer.add_summary(summary, self.iter_id) print("Average total loss %f" % np.mean(epoch_loss)) return