def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
'--eval_dir={eval_dir} ' \ '--eval_interval_secs={eval_interval_secs} ' \ '--num_eval_examples={num_eval_examples} ' \ '--min_global_step={min_global_step} ' \ '--CNN_name={CNN_name} ' \ '--batch_size={batch_size}' if __name__ == '__main__': FLAGS, unparsed = parse_args() print('current working dir [{0}]'.format(os.getcwd())) w_d = os.path.dirname(os.path.abspath(__file__)) print('change wording dir to [{0}]'.format(w_d)) os.chdir(w_d) model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() training_config.update_data_params(FLAGS.dataset_name) step_per_epoch = training_config.num_examples_per_epoch // model_config.batch_size epoch_num = FLAGS.number_of_steps // step_per_epoch print("Number of examples per epoch is", training_config.num_examples_per_epoch) print("Number of step per epoch is", step_per_epoch) print("To run", FLAGS.number_of_steps, "steps,run epoch number is", epoch_num) if FLAGS.pretrained_model_checkpoint_file: ckpt = ' --inception_checkpoint_file=' + FLAGS.pretrained_model_checkpoint_file else: ckpt = '' for i in range(epoch_num):
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern model_config.image_keys = [model_config.image_feature_name] #make sure we have the right batch size if FLAGS.train_inception: assert FLAGS.batch_size == 8 else: assert FLAGS.batch_size == 32 if FLAGS.two_input_queues: FLAGS.batch_size = int(FLAGS.batch_size / 2) model_config.batch_size = FLAGS.batch_size #assert all batch sizes are right #set flags if you are training with blocked image if FLAGS.blocked_image: assert FLAGS.blocked_input_file_pattern, "--blocked_input_file_pattern is required if you would like to train with blocked images" model_config.blocked_input_file_pattern = FLAGS.blocked_input_file_pattern model_config.image_keys.append(model_config.blocked_image_feature_name) if FLAGS.two_input_queues: assert FLAGS.input_file_pattern2, "--input_file_pattern2 is required if you would like to train with two input queues" model_config.blocked_input_file_pattern = FLAGS.input_file_pattern2 model_config.image_keys.append(model_config.image_feature_name) model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file training_config = configuration.TrainingConfig() # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) print('graph') #go from flags to dict g = tf.Graph() with g.as_default(): # Build the model. if not isinstance( FLAGS.__flags['init_from'], str ): #Tensorflow likes to change random things for different releases. One random thing it likes to change is FLAGS. This code takes care of that *sight* flag_dict = {} for key in FLAGS.__flags.keys(): flag_dict[key] = FLAGS.__flags[key].value else: flag_dict = FLAGS.__flags model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception, flags=flag_dict ) #let's just pass in all the flags bc this is going to get annoying model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: learning_rate = tf.constant( training_config.train_inception_learning_rate) else: learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) if FLAGS.init_from: inception_restore = model.init_fn def restore_full_model(sess): print("restoring full model") inception_restore(sess) saver.restore(sess, FLAGS.init_from) model.init_fn = restore_full_model print('train') # Run training. if FLAGS.debug: tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, session_wrapper=tf_debug.LocalCLIDebugWrapperSession) else: tf.contrib.slim.learning.train( train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver)
def train(number_of_steps): model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern training_config = configuration.TrainingConfig() model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file # Create training directory. train_dir = FLAGS.train_dir # if not tf.gfile.IsDirectory(train_dir): # tf.logging.info("Creating training directory: %s", train_dir) # tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = show_and_tell_model.ShowAndTellModel( model_config, mode="train", train_inception=FLAGS.train_inception) model.build() # Set up the learning rate. learning_rate_decay_fn = None if FLAGS.train_inception: print( "The inception weights are fine-tuned together with weights in the LSTM units and word embeddings." ) learning_rate = tf.constant( training_config.train_inception_learning_rate) else: print( "The inception weights are frozen. Only weights in the LSTMs and word embeddings are randomly" "initialized and trained.") learning_rate = tf.constant(training_config.initial_learning_rate) if training_config.learning_rate_decay_factor > 0: num_batches_per_epoch = ( training_config.num_examples_per_epoch / model_config.batch_size) decay_steps = int(num_batches_per_epoch * training_config.num_epochs_per_decay) def _learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay( learning_rate, global_step, decay_steps=decay_steps, decay_rate=training_config.learning_rate_decay_factor, staircase=True) learning_rate_decay_fn = _learning_rate_decay_fn # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.5) # saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) # Run training. tf.contrib.slim.learning.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=number_of_steps, init_fn=model.init_fn, saver=saver)