def train_and_eval(input_fn, end_step): # load last checkpoint and start from there current_step = load_global_step_from_checkpoint_dir(PARAMS['model_dir']) while current_step < end_step: next_checkpoint = min(current_step + STEPS_PER_EVAL, end_step) estimator.train(input_fn=input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info('Starting to evaluate.') eval_results = estimator.evaluate( input_fn=eval_input_fn, steps=VAL_DATASET_SIZE // VALIDATION_BATCH_SIZE, hooks=[RestoreMovingAverageHook(PARAMS['model_dir'])]) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results)
return pipeline.dataset return input_fn #session_config = tf.ConfigProto(allow_soft_placement=True) session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) session_config.gpu_options.visible_device_list = GPU_TO_USE session_config.gpu_options.allow_growth = True #session_config.gpu_options.per_process_gpu_memory_fraction=0.5 run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=PARAMS['model_dir'], session_config=session_config, save_summary_steps=500, save_checkpoints_secs=600, log_step_count_steps=500) train_input_fn = get_input_fn(is_training=True) val_input_fn = get_input_fn(is_training=False) estimator = tf.estimator.Estimator(model_fn, params=PARAMS, config=run_config) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=NUM_STEPS) eval_spec = tf.estimator.EvalSpec( val_input_fn, steps=None, start_delay_secs=120, throttle_secs=600, hooks=[RestoreMovingAverageHook(PARAMS['model_dir'])]) evaluate_results = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
filenames = [os.path.join(dataset_path, n) for n in sorted(filenames)] def input_fn(): pipeline = Pipeline(filenames, is_training, params) return pipeline.dataset return input_fn session_config = tf.ConfigProto(allow_soft_placement=True) session_config.gpu_options.visible_device_list = GPU_TO_USE run_config = tf.estimator.RunConfig() run_config = run_config.replace( model_dir=params['model_dir'], session_config=session_config, save_summary_steps=1000, save_checkpoints_secs=1800, log_step_count_steps=1000 ) train_input_fn = get_input_fn(is_training=True) val_input_fn = get_input_fn(is_training=False) estimator = tf.estimator.Estimator(model_fn, params=params, config=run_config) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=params['num_steps']) eval_spec = tf.estimator.EvalSpec( val_input_fn, steps=None, start_delay_secs=3600 * 3, throttle_secs=3600 * 3, hooks=[RestoreMovingAverageHook(params['model_dir'])] ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)