def train_ops(): # Get training parameters data_dir = FLAGS.data_dir batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate # Create global step counter global_step = tf.Variable(0, name='global_step', trainable=False) # Instantiate async producers for images and labels images, labels = data.train_inputs(data_dir=data_dir) # Instantiate the model model = select.by_name(FLAGS.model) # Create a 'virtual' graph node based on images that represents the input # node to be used for graph retrieval inputs = tf.identity(images, 'inputs') # Build a Graph that computes the logits predictions from the # inference model logits = model.inference(inputs) # In the same way, create a 'virtual' node for outputs outputs = tf.identity(logits, 'predictions') # Calculate loss loss = model.loss(logits, labels) # Evaluate training accuracy accuracy = model.accuracy(logits, labels) # Attach a scalar summary only to the total loss tf.summary.scalar('loss', loss) tf.summary.scalar('batch accuracy', accuracy) # Note that for debugging purpose, we could also track other losses #for l in tf.get_collection('losses'): # tf.summary.scalar(l.op.name, l) # Build a graph that applies gradient descent to update model parameters optimizer = tf.train.GradientDescentOptimizer(learning_rate) sgd_op = optimizer.minimize(loss, global_step=global_step) # Build yet another graph to evaluate moving averages of variables after # each step: these smoothed parameters will be loaded instead of the raw # trained values during evaluation variable_averages = \ tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # For batch normalization, we also need to update some variables update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create a meta-graph that includes sgd and variables moving average with tf.control_dependencies([sgd_op, variables_averages_op] + update_ops): train_op = tf.no_op(name='train') # Build another graph to provide training summary information summary_op = tf.summary.merge_all() return (train_op, loss, accuracy, summary_op)
def save_weights(): """Saves CIFAR10 weights""" FLAGS.resume = True # Get saved weights, not new ones print(FLAGS.save_dir) run_dir = get_run_dir(FLAGS.save_dir, FLAGS.model) print('run_dir', run_dir) checkpoint_dir = os.path.join(run_dir, 'train') with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. images, labels = data.train_inputs(data_dir=FLAGS.data_dir) model = select.by_name(FLAGS.model, FLAGS, training=True) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) print('Multiplicative depth', model.mult_depth()) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] else: print('### ERROR No checkpoint file found###') print('ckpt_dir', checkpoint_dir) print('ckpt.model_checkpoint_path', ckpt.model_checkpoint_path) print('ckpt', ckpt) return # Save variables for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): weight = (sess.run([var]))[0].flatten().tolist() filename = model._name_to_filename(var.name) dir_name = filename.rsplit('/', 1)[0] os.makedirs(dir_name, exist_ok=True) print("saving", filename) np.savetxt(str(filename), weight)
def train_ops(): # Get training parameters data_dir = FLAGS.data_dir batch_size = FLAGS.batch_size # Create global step counter global_step = tf.Variable(0, name='global_step', trainable=False) # Instantiate async producers for images and labels images, labels = data.train_inputs(data_dir=data_dir) # Instantiate the model model = select.by_name(FLAGS.model, FLAGS, training=True) # Create a 'virtual' graph node based on images that represents the input # node to be used for graph retrieval inputs = tf.identity(images, 'XXX') # Build a Graph that computes the logits predictions from the # inference model logits = model.inference(inputs) print('Multiplicative depth', model.mult_depth()) # In the same way, create a 'virtual' node for outputs outputs = tf.identity(logits, 'YYY') # Calculate loss loss = model.loss(logits, labels) # Evaluate training accuracy accuracy = model.accuracy(logits, labels) # Attach a scalar summary only to the total loss tf.summary.scalar('loss', loss) tf.summary.scalar('batch accuracy', accuracy) # Note that for debugging purpose, we could also track other losses for l in tf.get_collection('losses'): tf.summary.scalar(l.op.name, l) learning_rate = 0.1 optimizer = tf.train.GradientDescentOptimizer(learning_rate) # Clip gradients to [-0.25, 0.25] if FLAGS.clip_grads: print("Clipping gradients to [-0.25, 0.25]") gvs = optimizer.compute_gradients(loss) capped_gvs = [] for grad, var in gvs: if grad is None: continue capped_gvs.append((tf.clip_by_value(grad, -0.25, 0.25), var)) sgd_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) else: print("Not clipping gradients") sgd_op = optimizer.minimize(loss, global_step=global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create a meta-graph that includes sgd and variables moving average with tf.control_dependencies([sgd_op] + update_ops): train_op = tf.no_op(name='train') return (train_op, loss, accuracy)