def test_create_train_and_eval_specs(self): """Tests that `TrainSpec` and `EvalSpec` is created correctly.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps) train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=True, final_exporter_name='exporter', eval_spec_names=['holdout']) self.assertEqual(train_steps, train_spec.max_steps) self.assertEqual(2, len(eval_specs)) self.assertEqual(None, eval_specs[0].steps) self.assertEqual('holdout', eval_specs[0].name) self.assertEqual('exporter_holdout', eval_specs[0].exporters[0].name) self.assertEqual(None, eval_specs[1].steps) self.assertEqual('eval_on_train', eval_specs[1].name)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, eval_steps=FLAGS.num_eval_steps) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = ( tf.contrib.cluster_resolver.TPUClusterResolver( tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards)) kwargs = {} if FLAGS.train_batch_size: kwargs['batch_size'] = FLAGS.train_batch_size train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, save_final_config=FLAGS.mode == 'train', **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # Currently only a single eval input is allowed. input_fn = eval_input_fns[0] model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps, name)
def test_create_estimator_with_default_train_eval_steps(self): """Tests that number of train/eval defaults to config values.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) config_train_steps = configs['train_config'].num_steps train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertEqual(config_train_steps, train_steps)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def test_create_estimator_with_default_train_eval_steps(self): """Tests that number of train/eval defaults to config values.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) config_train_steps = configs['train_config'].num_steps config_eval_steps = configs['eval_config'].num_examples train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertEqual(config_train_steps, train_steps) self.assertEqual(config_eval_steps, eval_steps)
def test_create_tpu_estimator_and_inputs(self): """Tests that number of train/eval defaults to config values.""" run_config = tf.estimator.tpu.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps, use_tpu_estimator=True) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] self.assertIsInstance(estimator, tf.estimator.tpu.TPUEstimator) self.assertEqual(20, train_steps)
def test_create_tpu_estimator_and_inputs(self): """Tests that number of train/eval defaults to config values.""" run_config = tpu_config.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps, use_tpu_estimator=True) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] self.assertIsInstance(estimator, tpu_estimator.TPUEstimator) self.assertEqual(20, train_steps)
def test_create_estimator_and_inputs(self): """Tests that Estimator and input function are constructed correctly.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertEqual(20, train_steps) self.assertIn('train_input_fn', train_and_eval_dict) self.assertIn('eval_input_fns', train_and_eval_dict) self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
def test_create_estimator_and_inputs(self): """Tests that Estimator and input function are constructed correctly.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps) estimator = train_and_eval_dict['estimator'] train_steps = train_and_eval_dict['train_steps'] self.assertIsInstance(estimator, tf.estimator.Estimator) self.assertEqual(20, train_steps) self.assertIn('train_input_fn', train_and_eval_dict) self.assertIn('eval_input_fns', train_and_eval_dict) self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
def main(unused_argv): flags.mark_flag_as_required('ckpt_dir') flags.mark_flag_as_required('pipeline_config_path') # 场景对应的类别id label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) config = tf.estimator.RunConfig(model_dir=FLAGS.ckpt_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(None), sample_1_of_n_eval_examples=1, pipeline_config_path=FLAGS.pipeline_config_path) estimator = train_and_eval_dict['estimator'] def input_fn(): if FLAGS.input_type == 'video': ds = tf.data.Dataset.from_generator( video_generator, {"image": tf.float32, "true_image_shape": tf.int32}, output_shapes={"image": tf.TensorShape([None, 300, 300, 3]), "true_image_shape": tf.TensorShape([None, 3])}, args=[FLAGS.video_path, FLAGS.batch_size]) else: ds = tf.data.Dataset.from_generator( image_generator, {"image": tf.float32, "true_image_shape": tf.int32}, output_shapes={"image": tf.TensorShape([None, 300, 300, 3]), "true_image_shape": tf.TensorShape([None, 3])}, args=[FLAGS.image_dir, FLAGS.batch_size]) return ds rs = estimator.predict(input_fn) start = datetime.datetime.now() # 写预测结果到文件中 with codecs.open(FLAGS.txt_output, mode='w', encoding='utf-8') as w: for i, r in enumerate(rs): boxes = r['detection_boxes'] scores = r['detection_scores'] classes = r['detection_classes'].astype(np.uint8) # num_detections = r['num_detections'] w.write("scores:{};classes:{}\n".format(','.join(scores.astype(np.str)), ','.join(classes.astype(np.str)))) if i % 500 == 0: print("============={}==============={:06d}=============".format(datetime.datetime.now(), i)) print(datetime.datetime.now() - start)
def train_odm(model_dir, pipeline_config_path, num_train_steps, num_eval_steps, hparams): """ Function to execute the training and evaluation of a TensorFlow Object Detection Moodel on a specified ssd_mobilenet_v1_exported_graph set. Parameters: model_dir: (str) pipeline_config_path: (str) num_train_steps: (int) num_eval_steps: (int) hparams: (str) Contains ssd_mobilenet_v1_exported_graph set parameters Returns: 0: On completed execution of num_train_steps along with num_eval_steps of evaluations """ # Generate the estimator and set the model dir of the estimator to the exp dir # This dir will contain the model weights and training stats config = tf.estimator.RunConfig(model_dir=model_dir) print(config) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(hparams), pipeline_config_path=pipeline_config_path, train_steps=num_train_steps, eval_steps=num_eval_steps) # Parse out the needed items from the estimator estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] #eval_steps = train_and_eval_dict['eval_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) return 0
def test_create_train_and_eval_specs(self): """Tests that `TrainSpec` and `EvalSpec` is created correctly.""" run_config = tf.estimator.RunConfig() hparams = model_hparams.create_hparams( hparams_overrides='load_pretrained=false') pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) train_steps = 20 eval_steps = 10 eval_on_train_steps = 15 train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config, hparams, pipeline_config_path, train_steps=train_steps, eval_steps=eval_steps) train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_steps, eval_on_train_data=True, eval_on_train_steps=eval_on_train_steps, final_exporter_name='exporter', eval_spec_name='holdout') self.assertEqual(train_steps, train_spec.max_steps) self.assertEqual(2, len(eval_specs)) self.assertEqual(eval_steps, eval_specs[0].steps) self.assertEqual('holdout', eval_specs[0].name) self.assertEqual('exporter', eval_specs[0].exporters[0].name) self.assertEqual(eval_on_train_steps, eval_specs[1].steps) self.assertEqual('eval_on_train', eval_specs[1].name)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn,steps=None,checkpoint_path=tf.train.latest_checkpoint(FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('output_dir') flags.mark_flag_as_required('checkpoint_dir') flags.mark_flag_as_required('pipeline_config_path') # Modified Runconfig config = tf.estimator.RunConfig(model_dir=FLAGS.output_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=FLAGS. sample_1_of_n_eval_on_train_examples) estimator = train_and_eval_dict['estimator'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] # train_steps = train_and_eval_dict['train_steps'] if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] # tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.run_once: estimator.evaluate(input_fn, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: checkpoint_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) for checkpoint_path in reversed( checkpoint_state.all_model_checkpoint_paths): tf.logging.warning( 'Evaluating checkpoint path: {}'.format(checkpoint_path)) estimator.evaluate(input_fn, checkpoint_path=checkpoint_path)
def __init__(self, *args): super().__init__(*args) config = getObjectDetectConfig() self.train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=tf.estimator.RunConfig(tf_random_seed=get_trial_seed()), **config) train_input_fn = self.train_and_eval_dict['train_input_fn'] eval_input_fns = self.train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = self.train_and_eval_dict[ 'eval_on_train_input_fn'] predict_input_fn = self.train_and_eval_dict['predict_input_fn'] train_steps = self.train_and_eval_dict['train_steps'] self.training_spec, self.validation_spec = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False)
def main(unused_argv): train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=tf.estimator.RunConfig(model_dir=save_path), hparams=model_hparams.create_hparams(None), pipeline_config_path=pipeline_config, train_steps=None, sample_1_of_n_eval_examples=1, sample_1_of_n_eval_on_train_examples=(5)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, eval_steps=FLAGS.num_eval_steps) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] if FLAGS.checkpoint_dir: estimator.evaluate(eval_input_fn, eval_steps, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def train_pet(): # Create RunConfig home_dir = os.getenv('HOME') pwd_path = os.path.dirname(os.path.realpath(__file__)) model_dir = os.path.join(home_dir, 'workspace/cv_models/obj_det/pet') gpu_options = tf.GPUOptions(allow_growth=True) train_distribute = tf.distribute.MirroredStrategy( devices=["/device:GPU:0", "/device:GPU:1"] ) config = tf.estimator.RunConfig( model_dir=model_dir, session_config=tf.ConfigProto(gpu_options=gpu_options), #train_distribute=train_distribute, ) # Create Estimator and so on train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(None), pipeline_config_path=os.path.join(pwd_path, 'faster_rcnn_resnet101_pets.config'), ) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
config = tf.estimator.RunConfig(model_dir=out_dir, session_config=config_proto, log_step_count_steps=1, save_checkpoints_steps=args.checkpoint_steps, keep_checkpoint_max=args.max_checkpoints) pipeline_overrides = None latest_checkpoint = find_latest_checkpoint(log_dir, modelId) if latest_checkpoint: pipeline_overrides = pipeline_pb2.TrainEvalPipelineConfig() pipeline_overrides.train_config.fine_tune_checkpoint = latest_checkpoint train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=args.pipeline_config, config_override=pipeline_overrides, train_steps=args.num_train_steps, sample_1_of_n_eval_examples=args.sample_1_of_n_eval_examples, hparams=model_hparams.create_hparams(None), save_final_config=True) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn,
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) hvd.init() flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True session_config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.allow_xla: session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 model_dir = FLAGS.model_dir if hvd.rank() == 0 else None config = tf.estimator.RunConfig(model_dir=model_dir, session_config=session_config) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, eval_count=FLAGS.eval_count, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) train_hooks = [hvd.BroadcastGlobalVariablesHook(0)] eval_hooks = [] for x in range(FLAGS.eval_count): estimator.train(train_input_fn, hooks=train_hooks, steps=train_steps // FLAGS.eval_count) if hvd.rank() == 0: eval_input_fn = eval_input_fns[0] results = estimator.evaluate(eval_input_fn, steps=None, hooks=eval_hooks)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') #config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) # Get info about full dataset dataset,videos = get_dataset(data_info) # Get experiment information from FLAGS name = FLAGS.name num_cycles = int(FLAGS.cycles) run_num = int(FLAGS.run) #num_steps = str(train_config.num_steps) epochs = int(FLAGS.epochs) restart_cycle = int(FLAGS.restart_from_cycle) if FLAGS.checkpoint_dir: """ if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) """ else: # Load active set from cycle 0 and point to right model if restart_cycle==0: model_dir = FLAGS.model_dir + 'R' + str(run_num) + 'cycle0/' #train_config.fine_tune_checkpoint = model_dir + 'model.ckpt' else: model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(restart_cycle) + '/' # Get actual checkpoint model #with open(model_dir+'checkpoint','r') as cfile: # line = cfile.readlines() # train_config.fine_tune_checkpoint = line[0].split(' ')[1][1:-2] active_set = [] unlabeled_set=[] with open(model_dir + 'active_set.txt', 'r') as f: for line in f: active_set.append(int(line)) for cycle in range(restart_cycle+1,num_cycles+1): #### Evaluation of trained model on unlabeled set to obtain data for selection if 'Rnd' not in name and cycle < num_cycles: eval_train_dir = model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + 'eval_train/' if os.path.exists(eval_train_dir + 'detections.dat'): with open(eval_train_dir + 'detections.dat','rb') as infile: ###### pdb remove latinq detected_boxes = pickle.load(infile) #detected_boxes = pickle.load(infile,encoding='latin1') else: # Get unlabeled set data_info['output_path'] = FLAGS.data_dir + 'AL/tfrecords/' + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '_unlabeled.record' # Do not evaluate labeled samples, their neighbors or unverified frames aug_active_set = sel.augment_active_set(dataset,videos,active_set,num_neighbors=5) unlabeled_set = [f['idx'] for f in dataset if f['idx'] not in aug_active_set and f['verified']] # For TCFP, we need to get detections for pretty much every frame, # as not candidates can may be used to support candidates if ('TCFP' in name): unlabeled_set = [i for i in range(len(dataset))] print('Unlabeled frames in the dataset: {}'.format(len(unlabeled_set))) save_tf_record(data_info,unlabeled_set) """ configs = config_util.get_configs_from_pipeline_file(pipeline_config_path=FLAGS.pipeline_config_path, config_override=None) eval_input_configs = configs['eval_input_configs'] eval_config = configs['eval_config'] model_config = configs['model'] eval_input_configs = configs['eval_input_configs'] MODEL_BUILD_UTIL_MAP = {'create_eval_input_fn': inputs.create_eval_input_fn} create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] eval_input_fns = [create_eval_input_fn( eval_config=eval_config, eval_input_config=eval_input_config, model_config=model_config) for eval_input_config in eval_input_configs] """ # Set number of eval images to number of unlabeled samples and point to tfrecord #eval_input_config.tf_record_input_reader.input_path[0] = data_info['output_path'] #eval_config.num_examples = len(unlabeled_set) model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '/' config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( Unlabeled_set_length=len(unlabeled_set), Active_set_length=len(active_set), epochs=epochs, data_info=data_info, FLAGS=FLAGS, restart_cycle=restart_cycle, run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=(FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] """ train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) def get_next_eval_train(config): return dataset_builder.make_initializable_iterator( dataset_builder.build(config)).get_next() # Initialize input dict again (necessary?) #create_eval_train_input_dict_fn = functools.partial(get_next_eval_train, eval_input_config) graph_rewriter_fn = None if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=False) # Need to reset graph for evaluation tf.reset_default_graph() #if FLAGS.eval_training_data: #name = 'evaluation_of_training_data' #input_fn = eval_on_train_input_fn #else: # name = 'validation_data' # # The first eval input will be evaluated. """ input_fn = eval_input_fns[0] if FLAGS.run_once: predictions=estimator.evaluate(input_fn, checkpoint_path=tf.train.latest_checkpoint(eval_train_dir)) else: pdb.set_trace() model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) pdb.set_trace() #visualize_detections(dataset, unlabeled_set, detected_boxes, groundtruth_boxes) with open(eval_train_dir + 'detections.dat','wb') as outfile: pickle.dump(detected_boxes,outfile, protocol=pickle.HIGHEST_PROTOCOL) print('Done computing detections in training set') # Remove tfrecord used for training if os.path.exists(data_info['output_path']): os.remove(data_info['output_path']) #### Training of current cycle model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '/' config = tf.estimator.RunConfig(model_dir=model_dir) # Budget for each cycle is the number of videos (0.5% of train set) if ('Rnd' in name): #indices = select_random_video(dataset,videos,active_set) #indices = sel.select_random(dataset,videos,active_set,budget=num_videos) indices = sel.select_random_video(dataset,videos,active_set) else: if ('Ent' in name): indices = sel.select_entropy_video(dataset,videos,FLAGS.data_dir,active_set,detected_boxes) elif ('Lst' in name): indices = sel.select_least_confident_video(dataset,videos,active_set,detected_boxes) elif ('TCFP' in name): indices = sel.select_TCFP_per_video(dataset,videos,FLAGS.data_dir,active_set,detected_boxes) elif ('FP_gt' in name): indices = sel.selectFpPerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) elif ('FN_gt' in name): indices = sel.selectFnPerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) elif ('FPN' in name): indices = sel.select_FPN_PerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) active_set.extend(indices) data_info['output_path'] = FLAGS.data_dir + 'AL/tfrecords/' + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '.record' save_tf_record(data_info,active_set) pdb.set_trace() train_and_eval_dict = model_lib.create_estimator_and_inputs( Unlabeled_set_length=len(unlabeled_set), Active_set_length=len(active_set), epochs=epochs, data_info=data_info, FLAGS=FLAGS, restart_cycle=restart_cycle, run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=(FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) print('-----------------train and evaluation-------------------------') # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) #Save active_set in train dir in case we want to restart training with open(model_dir + 'active_set.txt', 'w') as f: for item in active_set: f.write('{}\n'.format(item)) # Remove tfrecord used for training if os.path.exists(data_info['output_path']): os.remove(data_info['output_path'])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.per_process_gpu_memory_fraction = 0.95 session_config.gpu_options.visible_device_list = "1" log_step_cnt = 10 save_checkpoints_steps = 25000 tf.logging.set_verbosity(tf.logging.INFO) config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, session_config=session_config, log_step_count_steps=log_step_cnt) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, #num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. # throttle_secs is not documented in eval.proto. This replaces eval_interval_secs somewhat throttle_secs = 60 * 60 # every 60 min eval_spec = eval_specs[0] my_eval_spec = tf.estimator.EvalSpec( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=None, exporters=eval_spec.exporters, start_delay_secs= 1800, # 30 minutes - does not seem to be respected... throttle_secs=throttle_secs) print('=========== my_eval_spec') print(my_eval_spec) print('=========================') tf.estimator.train_and_evaluate(estimator, train_spec, my_eval_spec)
def main(unused_argv): flags.mark_flag_as_required('pipeline_config_path') flags.mark_flag_as_required('model_dir') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) # Export the model pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: text_format.Merge(f.read(), pipeline_config) input_shape = None input_type = 'image_tensor' export_dir = FLAGS.model_dir + '/inference' trained_checkpoint_prefix = tf.train.latest_checkpoint( FLAGS.model_dir, latest_filename=None) exporter.export_inference_graph(input_type, pipeline_config, trained_checkpoint_prefix, export_dir, input_shape=input_shape, write_inference_graph=False) #TF Object detection saves the model without version. But tesnorflow model server needs versioned #saved models for serving. So copy the saved model to version folder. tf.gfile.Rename(FLAGS.model_dir + '/inference/saved_model', FLAGS.model_dir + '/1') tf.gfile.DeleteRecursively(FLAGS.model_dir + '/export') with tf.gfile.GFile(FLAGS.model_dir + '/1/variables/Dummy', 'w') as file: file.write("dummy file")
def start_training(self, package: str = '1111111', num_steps: int = 1000, selected_model: str = 'ssd_mobilenet_v2', batch_size: int = 8): os.makedirs(f'/deepmicroscopy/{package}', exist_ok=True) with zipfile.ZipFile(f'/deepmicroscopy/{package}.zip', 'r') as zip_ref: zip_ref.extractall(f'/deepmicroscopy/{package}') v = Via(f'/deepmicroscopy/{package}/data/project.json') v.prepare() test_record_fname = f'/deepmicroscopy/{package}/data/test.record' train_record_fname = f'/deepmicroscopy/{package}/data/train.record' label_map_pbtxt_fname = f'/deepmicroscopy/{package}/data/label_map.pbtxt' num_eval_steps = 50 MODELS_CONFIG = { 'ssd_mobilenet_v2': { 'model_name': 'ssd_mobilenet_v2_coco_2018_03_29', 'pipeline_file': 'ssd_mobilenet_v2_coco.config', 'batch_size': 8 }, 'faster_rcnn_inception_v2': { 'model_name': 'faster_rcnn_inception_v2_coco_2018_01_28', 'pipeline_file': 'faster_rcnn_inception_v2_pets.config', 'batch_size': 12 }, 'rfcn_resnet101': { 'model_name': 'rfcn_resnet101_coco_2018_01_28', 'pipeline_file': 'rfcn_resnet101_pets.config', 'batch_size': 8 }, 'ssd_mobilenet_v1_coco': { 'model_name': 'ssd_mobilenet_v1_coco_2018_01_28', 'pipeline_file': 'ssd_mobilenet_v1_coco.config', 'batch_size': 12 }, } # Name of the object detection model to use. MODEL = MODELS_CONFIG[selected_model]['model_name'] # Name of the pipline file in tensorflow object detection API. pipeline_file = MODELS_CONFIG[selected_model]['pipeline_file'] # Training batch size fits in Colabe's Tesla K80 GPU memory for selected model. batch_size = MODELS_CONFIG[selected_model]['batch_size'] DEST_DIR = '/server/pretrained_model' fine_tune_checkpoint = os.path.join(DEST_DIR, "model.ckpt") pipeline_fname = os.path.join( '/server/object_detection/samples/configs/', pipeline_file) pipeline_fname_new = os.path.join(f'/deepmicroscopy/{package}', pipeline_file) num_classes = self.get_num_classes(label_map_pbtxt_fname) with open(pipeline_fname) as f: s = f.read() with open(pipeline_fname_new, 'w') as f: # fine_tune_checkpoint s = re.sub( 'fine_tune_checkpoint: ".*?"', 'fine_tune_checkpoint: "{}"'.format(fine_tune_checkpoint), s) # tfrecord files train and test. s = re.sub('(input_path: ".*?)(train.record)(.*?")', 'input_path: "{}"'.format(train_record_fname), s) s = re.sub('(input_path: ".*?)(test.record)(.*?")', 'input_path: "{}"'.format(test_record_fname), s) # label_map_path s = re.sub('label_map_path: ".*?"', 'label_map_path: "{}"'.format(label_map_pbtxt_fname), s) # Set training batch_size. s = re.sub('batch_size: [0-9]+', 'batch_size: {}'.format(batch_size), s) # Set training steps, num_steps s = re.sub('num_steps: [0-9]+', 'num_steps: {}'.format(num_steps), s) # Set number of classes num_classes. s = re.sub('num_classes: [0-9]+', 'num_classes: {}'.format(num_classes), s) f.write(s) model_dir = f'/deepmicroscopy/{package}/training/' os.makedirs(model_dir, exist_ok=True) config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(None), pipeline_config_path=pipeline_fname_new, train_steps=num_steps, sample_1_of_n_eval_examples=1, sample_1_of_n_eval_on_train_examples=(5)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) logging.info(result) output_directory = f'/deepmicroscopy/{package}/training' lst = os.listdir(model_dir) lst = [l for l in lst if 'model.ckpt-' in l and '.meta' in l] steps = np.array([int(re.findall('\d+', l)[0]) for l in lst]) last_model = lst[steps.argmax()].replace('.meta', '') last_model_path = os.path.join(model_dir, last_model) pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() with tf.gfile.GFile(pipeline_fname_new, 'r') as f: text_format.Merge(f.read(), pipeline_config) text_format.Merge('', pipeline_config) input_shape = None input_type = 'image_tensor' exporter.export_inference_graph(input_type, pipeline_config, last_model_path, output_directory, input_shape=input_shape, write_inference_graph=False) pb_fname = os.path.join(os.path.abspath(output_directory), "frozen_inference_graph.pb") GRAPH_PB_PATH = f'/deepmicroscopy/{package}/training/frozen_inference_graph.pb' with tf.Session() as sess: print("load graph") with gfile.FastGFile(GRAPH_PB_PATH, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') graph_nodes = [n for n in graph_def.node] names = [] for t in graph_nodes: names.append(t.name) print(names) input_names = ['image_tensor'] output_names = [ 'detection_boxes', 'detection_scores', 'detection_multiclass_scores', 'detection_classes', 'num_detections', 'raw_detection_boxes', 'raw_detection_scores' ] trt_graph = trt.create_inference_graph( input_graph_def=graph_def, outputs=output_names, max_batch_size=1, max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50) with open(f'/deepmicroscopy/{package}/training/trt_graph.pb', 'wb') as f: f.write(trt_graph.SerializeToString()) return { 'status': 'done', 'train_spec': train_spec, 'eval_specs': eval_specs }
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) if (FLAGS.sparsity is None) and (FLAGS.pruning_start_step is None) and \ (FLAGS.pruning_end_step is None): pruning = False else: pruning = True train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), ) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: if pruning: # Instantiate hook model_pruning_hook = train_hooks.ModelPruningHook( target_sparsity=FLAGS.sparsity, start_step=FLAGS.pruning_start_step, end_step=FLAGS.pruning_end_step) hooks = [model_pruning_hook] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False, hooks=hooks) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') if FLAGS.gpu_device is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_device) session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, session_config=session_config, save_checkpoints_secs=FLAGS.save_checkpoints_secs) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Multiple Eval Specs allowed. # TODO: Fix name of saving_listeners saving_listeners = [ EvalCheckpointSaverListener(estimator, eval_specs[0].input_fn, 'validation') ] if len(eval_specs) > 1: saving_listeners.append( EvalCheckpointSaverListener(estimator, eval_specs[1].input_fn, 'training')) estimator.train(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, saving_listeners=saving_listeners)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.amp: os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1" else: os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "0" # Set seed to reduce randomness np.random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) hvd.init() flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') session_config = tf.ConfigProto() session_config.gpu_options.per_process_gpu_memory_fraction=0.9 session_config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.allow_xla: session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 model_dir = FLAGS.model_dir if hvd.rank() == 0 else None config = tf.estimator.RunConfig(tf_random_seed=(FLAGS.seed + hvd.rank()), model_dir=model_dir, session_config=session_config) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, eval_count=FLAGS.eval_count, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) train_hooks = [hvd.BroadcastGlobalVariablesHook(0), DLLoggerHook(hvd.size()*train_and_eval_dict['train_batch_size'], hvd.rank())] eval_hooks = [] for x in range(FLAGS.eval_count): estimator.train(train_input_fn, hooks=train_hooks, steps=train_steps // FLAGS.eval_count) if hvd.rank() == 0 and not FLAGS.train_only: eval_input_fn = eval_input_fns[0] results = estimator.evaluate(eval_input_fn, steps=None, hooks=eval_hooks)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) # config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, # save_checkpoints_steps=5000) config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, #save_checkpoints_steps=5000, save_checkpoints_steps=1000, save_checkpoints_secs=None, keep_checkpoint_max= None, # useless: model_lib.py tf.train.Saver-> max_to_keep take effect keep_checkpoint_every_n_hours=10000) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] # import pdb # pdb.set_trace() if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # Modified Runconfig if FLAGS.save_checkpoints_secs is not None: if FLAGS.save_checkpoints_steps is not None: tf.logging.warning( 'set both save_checkpoints_secs and save_checkpoints_steps, the latter will be ignored' ) config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_secs=FLAGS.save_checkpoints_secs, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) elif FLAGS.save_checkpoints_steps is not None: config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) else: config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_secs=600, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=FLAGS. sample_1_of_n_eval_on_train_examples) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. # tf.logging.set_verbosity(tf.logging.INFO) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') current_time_start = datetime.now(tz_SG).strftime('%d-%m-%Y %H:%M:%S') start=time.time() log_directory = os.path.join(os.getcwd(),FLAGS.model_dir) #make directory for log files if not os.path.exists(log_directory): os.makedirs(log_directory) print("Logging will be found in {}".format(log_directory)) log_file = os.path.join(log_directory, 'log.txt') # create file handler which logs event debug messages log = logging.getLogger('tensorflow') log.root.handlers[0].setFormatter(CsvFormatter(output_csv = os.path.join(log_directory, 'log.csv'))) #log.disable(logging.WARNING) log.addFilter(StepLossFilter()) config2 = tf.ConfigProto() config2.gpu_options.allow_growth = True log.setLevel(logging.INFO) #formatter = logging.Formatter('%(levelname)s - %(message)s') formatter = logging.Formatter('%(message)s') #FileHandler is used to send the log entries to a file fh = logging.FileHandler(log_file) print("File handler: {}".format(fh)) fh.setLevel(logging.INFO) fh.setFormatter(formatter) log.addHandler(fh) # StreamHandler is used to send the log entries to console ch = logging.StreamHandler() ch.addFilter(StepLossFilter()) ch.setLevel(logging.INFO) ch.setFormatter(formatter) log.addHandler(ch) #Log the estimator steps config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir,save_checkpoints_steps=500, log_step_count_steps=100,session_config=config2) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) end=time.time() current_time_end = datetime.now(tz_SG).strftime('%d-%m-%Y %H:%M:%S') log.info("Started: {}".format(current_time_start)) log.info("Ended: {}".format(current_time_end)) log.info("Duration: {} secs".format(round(end-start,0)))
def main(unused_argv): print("*** train.py/main()") # flags.mark_flag_as_required('model_dir') # flags.mark_flag_as_required('pipeline_config_path') print('*** FLAGS ***') print("pipeline_config_path:", FLAGS.pipeline_config_path) ## --verification - debug print("config exists:", os.path.exists(FLAGS.pipeline_config_path)) dir_list = [f for f in listdir(".")] for item in dir_list: print("file:", item) print("model_dir:", FLAGS.model_dir) print("train:", FLAGS.train) print("val:", FLAGS.val) print("sample_1_of_n_eval_examples:", FLAGS.sample_1_of_n_eval_examples) print("hparams_overrides:", FLAGS.hparams_overrides) print("checkpoint_dir:", FLAGS.checkpoint_dir) # check pipeline config pararameters # - input data pipeline_config_dict = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) check_input_data_existance(pipeline_config_dict) print(" - - - - - - - - -") config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) tf.enable_eager_execution() tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.ERROR) # Creates `Estimator`, input functions, and steps train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) # so here are the outputs (that were in a dict) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = (tf.contrib.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu_name.split(','), zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards), save_checkpoints_steps=FLAGS.eval_every_n_iters) kwargs = {} if FLAGS.train_batch_size: kwargs['batch_size'] = FLAGS.train_batch_size train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # Currently only a single eval input is allowed. input_fn = eval_input_fns[0] model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps, name) if FLAGS.mode == 'train_and_eval': # load last checkpoint and start from there current_step = load_global_step_from_checkpoint_dir(FLAGS.model_dir) while current_step < train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.eval_every_n_iters, train_steps) estimator.train(input_fn=train_input_fn, max_steps=next_checkpoint) current_step = next_checkpoint # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate at step %d', next_checkpoint) # Background evaluation process. args = [ 'PYTHONPATH=$PYTHONPATH:slim', 'python', 'object_detection/model_main.py', '--pipeline_config_path', FLAGS.pipeline_config_path, '--model_dir', FLAGS.model_dir, '--checkpoint_dir', FLAGS.model_dir, '--sample_1_of_n_eval_examples', str(FLAGS.sample_1_of_n_eval_examples), '--gpu_memory_fraction', str(0.2), '--run_once', str(True) ] print(' '.join(args)) p = subprocess.Popen(' '.join(args), stderr=subprocess.STDOUT, shell=True)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # substitute the AML data reference mount points for relevant parts in the pipeline.config and overwrite with open(FLAGS.pipeline_config_path) as f: config_file = f.read() new_config_file = config_file.replace( '$AZUREML_DATAREFERENCE_tfrecords', actual_path_tfrecords).replace('$AZUREML_DATAREFERENCE_artifacts', actual_path_artifacts) with open(FLAGS.pipeline_config_path, 'w') as f: f.write(new_config_file) print('model_main.py, main(), finished substituting mount points.') config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps= 104012 # save less often than default - 1/5 of an epoch ) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. # throttle_secs is not documented in eval.proto. This replaces eval_interval_secs somewhat throttle_secs = 60 * 60 # every 60 min eval_spec = eval_specs[0] my_eval_spec = tf.estimator.EvalSpec( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=None, exporters=eval_spec.exporters, start_delay_secs= 1800, # 30 minutes - does not seem to be respected... throttle_secs=throttle_secs) print('=========== my_eval_spec') print(my_eval_spec) print('=========================') tf.estimator.train_and_evaluate(estimator, train_spec, my_eval_spec)
def main(): targs = build_config() parser = ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.set_defaults(worker=False) group.set_defaults(evaluator=False) group.add_argument('--worker', dest='worker', action='store_true', help='Training') group.add_argument('--evaluator', dest='evaluator', action='store_true', help='Continuously evaluate model') parser.add_argument('--training_dir') parser.add_argument('--research_dir') parser.add_argument('--build_id') parser.add_argument('--only_train', default='False') parser.add_argument('--export', type=str_bool, help='Export model') parser.add_argument('--model_name') parser.add_argument('--model_version') args, _ = parser.parse_known_args() with open('faster_rcnn.config', 'r') as cf: data = cf.read() config_html = '<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{}</pre></body></html>'.format( data) client.Client().update_task_info({'#documents.config.html': config_html}) sys.path.append(args.research_dir) num_steps = targs['num_steps'] model_dir = '{}/{}'.format(args.training_dir, args.build_id) config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(None), pipeline_config_path='faster_rcnn.config', train_steps=num_steps, sample_1_of_n_eval_examples=1, sample_1_of_n_eval_on_train_examples=(5)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_input_fns = train_and_eval_dict['eval_input_fns'] if args.evaluator: tf.logging.info('Starting Evaluation.') model_name = None model_version = None if args.export: model_name = args.model_name model_version = args.model_version continuous_eval(estimator, model_dir, eval_input_fns[0], 'validation_data', args, model_name, model_version) elif os.environ.get("TF_CONFIG", '') != '': tf.logging.info('Starting Distributed.') eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) else: tf.logging.info('Starting Training.') estimator.train(input_fn=train_input_fn, max_steps=train_steps)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') #config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, log_step_count_steps=1) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: #os.makedirs(estimator.eval_dir()) # https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/stop_if_no_decrease_hook early_stopping1 = tf.contrib.estimator.stop_if_no_decrease_hook( estimator, metric_name='loss_1', max_steps_without_decrease=5, min_steps=0 ) early_stopping2 = tf.contrib.estimator.stop_if_no_decrease_hook( estimator, metric_name='loss_2', max_steps_without_decrease=5, min_steps=0 ) early_stopping3 = tf.contrib.estimator.stop_if_lower_hook( estimator, metric_name='loss_1', threshold=10, eval_dir=None, min_steps=0, run_every_secs=60, run_every_steps=None ) early_stopping_hook = EarlyStoppingHook(monitor='total_loss', patience=300) train_spec = tf.estimator.TrainSpec( input_fn=train_input_fn, max_steps=train_steps, hooks=[early_stopping_hook]) eval_specs = model_lib.create_train_and_eval_specs( eval_input_fns, eval_on_train_input_fn, predict_input_fn, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])