def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' tf.gfile.MakeDirs(FLAGS.eval_dir) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) tf.gfile.Copy( FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) for name, config in [('model.config', FLAGS.model_config_path), ('eval.config', FLAGS.eval_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True) model_config = configs['model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config = configs['train_input_config'] model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_builder.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial(get_next, input_config) categories = label_map_util.create_categories_from_labelmap( input_config.label_map_path) if FLAGS.run_once: eval_config.max_evals = 1 graph_rewriter_fn = None if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=False) evaluator.evaluate( create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir, graph_hook_fn=graph_rewriter_fn)
def test_get_configs_from_multiple_files(self): """Tests that proto configs can be read from multiple files.""" temp_dir = self.get_temp_dir() # Write model config file. model_config_path = os.path.join(temp_dir, "model.config") model = model_pb2.DetectionModel() model.faster_rcnn.num_classes = 10 _write_config(model, model_config_path) # Write train config file. train_config_path = os.path.join(temp_dir, "train.config") train_config = train_config = train_pb2.TrainConfig() train_config.batch_size = 32 _write_config(train_config, train_config_path) # Write train input config file. train_input_config_path = os.path.join(temp_dir, "train_input.config") train_input_config = input_reader_pb2.InputReader() train_input_config.label_map_path = "path/to/label_map" _write_config(train_input_config, train_input_config_path) # Write eval config file. eval_config_path = os.path.join(temp_dir, "eval.config") eval_config = eval_pb2.EvalConfig() eval_config.num_examples = 20 _write_config(eval_config, eval_config_path) # Write eval input config file. eval_input_config_path = os.path.join(temp_dir, "eval_input.config") eval_input_config = input_reader_pb2.InputReader() eval_input_config.label_map_path = "path/to/another/label_map" _write_config(eval_input_config, eval_input_config_path) configs = config_util.get_configs_from_multiple_files( model_config_path=model_config_path, train_config_path=train_config_path, train_input_config_path=train_input_config_path, eval_config_path=eval_config_path, eval_input_config_path=eval_input_config_path) self.assertProtoEquals(model, configs["model"]) self.assertProtoEquals(train_config, configs["train_config"]) self.assertProtoEquals(train_input_config, configs["train_input_config"]) self.assertProtoEquals(eval_config, configs["eval_config"]) self.assertProtoEquals(eval_input_config, configs["eval_input_config"])
def main(argv): del argv required_flags = ['input_config_path', 'eval_config_path', 'eval_dir'] for flag_name in required_flags: if not getattr(FLAGS, flag_name): raise ValueError('Flag --{} is required'.format(flag_name)) configs = config_util.get_configs_from_multiple_files( eval_input_config_path=FLAGS.input_config_path, eval_config_path=FLAGS.eval_config_path) eval_config = configs['eval_config'] input_config = configs['eval_input_config'] metrics = read_data_and_evaluate(input_config, eval_config) # Save metrics write_metrics(metrics, FLAGS.eval_dir)
def main(_): assert FLAGS.train_dir, '`train_dir` is missing.' if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) if FLAGS.task == 0: tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.train_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, train_config_path=FLAGS.train_config_path, train_input_config_path=FLAGS.input_config_path) if FLAGS.task == 0: for name, config in [('model.config', FLAGS.model_config_path), ('train.config', FLAGS.train_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name), overwrite=True) model_config = configs['model'] train_config = configs['train_config'] input_config = configs['train_input_config'] model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=True) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial(get_next, input_config) env = json.loads(os.environ.get('TF_CONFIG', '{}')) cluster_data = env.get('cluster', None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get('task', None) or {'type': 'master', 'index': 0} task_info = type('TaskSpec', (object, ), task_data) # Parameters for a single worker. ps_tasks = 0 worker_replicas = 1 worker_job_name = 'lonely_worker' task = 0 is_chief = True master = '' if cluster_data and 'worker' in cluster_data: # Number of total worker replicas include "worker"s and the "master". worker_replicas = len(cluster_data['worker']) + 1 if cluster_data and 'ps' in cluster_data: ps_tasks = len(cluster_data['ps']) if worker_replicas > 1 and ps_tasks < 1: raise ValueError( 'At least 1 ps task is needed for distributed training.') if worker_replicas >= 1 and ps_tasks > 0: # Set up distributed training. server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', job_name=task_info.type, task_index=task_info.index) if task_info.type == 'ps': server.join() return worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) task = task_info.index is_chief = (task_info.type == 'master') master = server.target trainer.train(create_input_dict_fn, model_fn, train_config, master, task, FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks, worker_job_name, is_chief, FLAGS.train_dir)