Exemplo n.º 1
0
    def testNewFocalLossParameters(self):
        """Tests that the loss weight ratio is updated appropriately."""
        original_alpha = 1.0
        original_gamma = 1.0
        new_alpha = 0.3
        new_gamma = 2.0
        hparams = tf.contrib.training.HParams(focal_loss_alpha=new_alpha,
                                              focal_loss_gamma=new_gamma)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        classification_loss = pipeline_config.model.ssd.loss.classification_loss
        classification_loss.weighted_sigmoid_focal.alpha = original_alpha
        classification_loss.weighted_sigmoid_focal.gamma = original_gamma
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        classification_loss = configs["model"].ssd.loss.classification_loss
        self.assertAlmostEqual(
            new_alpha, classification_loss.weighted_sigmoid_focal.alpha)
        self.assertAlmostEqual(
            new_gamma, classification_loss.weighted_sigmoid_focal.gamma)
Exemplo n.º 2
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
    tf.gfile.Copy(FLAGS.pipeline_config_path,
                  os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                  overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        eval_config_path=FLAGS.eval_config_path,
        eval_input_config_path=FLAGS.input_config_path)
    for name, config in [('model.config', FLAGS.model_config_path),
                         ('eval.config', FLAGS.eval_config_path),
                         ('input.config', FLAGS.input_config_path)]:
      tf.gfile.Copy(config,
                    os.path.join(FLAGS.eval_dir, name),
                    overwrite=True)

  model_config = configs['model']
  eval_config = configs['eval_config']
  input_config = configs['eval_input_config']
  if FLAGS.eval_training_data:
    input_config = configs['train_input_config']

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

  def get_next(config):
    return dataset_builder.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  create_input_dict_fn = functools.partial(get_next, input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=False)

  evaluator.evaluate(
      create_input_dict_fn,
      model_fn,
      eval_config,
      categories,
      FLAGS.checkpoint_dir,
      FLAGS.eval_dir,
      graph_hook_fn=graph_rewriter_fn)
Exemplo n.º 3
0
    def test_get_configs_from_pipeline_file(self):
        """Test that proto configs can be read from pipeline config file."""
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.model.faster_rcnn.num_classes = 10
        pipeline_config.train_config.batch_size = 32
        pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
        pipeline_config.eval_config.num_examples = 20
        pipeline_config.eval_input_reader.queue_capacity = 100

        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        self.assertProtoEquals(pipeline_config.model, configs["model"])
        self.assertProtoEquals(pipeline_config.train_config,
                               configs["train_config"])
        self.assertProtoEquals(pipeline_config.train_input_reader,
                               configs["train_input_config"])
        self.assertProtoEquals(pipeline_config.eval_config,
                               configs["eval_config"])
        self.assertProtoEquals(pipeline_config.eval_input_reader,
                               configs["eval_input_config"])
Exemplo n.º 4
0
    def testGetNumberOfClasses(self):
        """Tests that number of classes can be retrieved."""
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.model.faster_rcnn.num_classes = 20
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        number_of_classes = config_util.get_number_of_classes(configs["model"])
        self.assertEqual(20, number_of_classes)
Exemplo n.º 5
0
def _get_configs_for_model(model_name):
  """Returns configurations for model."""
  filename = get_pipeline_config_path(model_name)
  data_path = _get_data_path()
  label_map_path = _get_labelmap_path()
  configs = config_util.get_configs_from_pipeline_file(filename)
  configs = config_util.merge_external_params_with_configs(
      configs,
      train_input_path=data_path,
      eval_input_path=data_path,
      label_map_path=label_map_path)
  return configs
Exemplo n.º 6
0
    def testUseMovingAverageForEval(self):
        use_moving_averages_orig = False
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.eval_config.use_moving_averages = use_moving_averages_orig
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, eval_with_moving_averages=True)
        self.assertEqual(True, configs["eval_config"].use_moving_averages)
Exemplo n.º 7
0
def _get_configs_for_model(model_name):
    """Returns configurations for model."""
    fname = os.path.join(tf.resource_loader.get_data_files_path(),
                         'samples/configs/' + model_name + '.config')
    label_map_path = os.path.join(tf.resource_loader.get_data_files_path(),
                                  'data/pet_label_map.pbtxt')
    data_path = os.path.join(tf.resource_loader.get_data_files_path(),
                             'test_data/pets_examples.record')
    configs = config_util.get_configs_from_pipeline_file(fname)
    return config_util.merge_external_params_with_configs(
        configs,
        train_input_path=data_path,
        eval_input_path=data_path,
        label_map_path=label_map_path)
Exemplo n.º 8
0
    def test_save_pipeline_config(self):
        """Tests that the pipeline config is properly saved to disk."""
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.model.faster_rcnn.num_classes = 10
        pipeline_config.train_config.batch_size = 32
        pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
        pipeline_config.eval_config.num_examples = 20
        pipeline_config.eval_input_reader.queue_capacity = 100

        config_util.save_pipeline_config(pipeline_config, self.get_temp_dir())
        configs = config_util.get_configs_from_pipeline_file(
            os.path.join(self.get_temp_dir(), "pipeline.config"))
        pipeline_config_reconstructed = (
            config_util.create_pipeline_proto_from_configs(configs))

        self.assertEqual(pipeline_config, pipeline_config_reconstructed)
Exemplo n.º 9
0
    def testTrainShuffle(self):
        """Tests that `train_shuffle` keyword arguments are applied correctly."""
        original_shuffle = True
        desired_shuffle = False

        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.train_input_reader.shuffle = original_shuffle
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, train_shuffle=desired_shuffle)
        train_shuffle = configs["train_input_config"].shuffle
        self.assertEqual(desired_shuffle, train_shuffle)
Exemplo n.º 10
0
    def testNewBatchSizeWithClipping(self):
        """Tests that batch size is clipped to 1 from below."""
        original_batch_size = 2
        hparams = tf.contrib.training.HParams(batch_size=0.5)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.train_config.batch_size = original_batch_size
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        new_batch_size = configs["train_config"].batch_size
        self.assertEqual(1, new_batch_size)  # Clipped to 1.0.
Exemplo n.º 11
0
    def testNewBatchSize(self):
        """Tests that batch size is updated appropriately."""
        original_batch_size = 2
        hparams = tf.contrib.training.HParams(batch_size=16)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.train_config.batch_size = original_batch_size
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        new_batch_size = configs["train_config"].batch_size
        self.assertEqual(16, new_batch_size)
Exemplo n.º 12
0
    def test_create_pipeline_proto_from_configs(self):
        """Tests that proto can be reconstructed from configs dictionary."""
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.model.faster_rcnn.num_classes = 10
        pipeline_config.train_config.batch_size = 32
        pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
        pipeline_config.eval_config.num_examples = 20
        pipeline_config.eval_input_reader.queue_capacity = 100
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        pipeline_config_reconstructed = (
            config_util.create_pipeline_proto_from_configs(configs))
        self.assertEqual(pipeline_config, pipeline_config_reconstructed)
Exemplo n.º 13
0
  def test_create_estimator_with_default_train_eval_steps(self):
    """Tests that number of train/eval defaults to config values."""
    run_config = tf.estimator.RunConfig()
    hparams = model_hparams.create_hparams(
        hparams_overrides='load_pretrained=false')
    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    config_train_steps = configs['train_config'].num_steps
    config_eval_steps = configs['eval_config'].num_examples
    train_and_eval_dict = model_lib.create_estimator_and_inputs(
        run_config, hparams, pipeline_config_path)
    estimator = train_and_eval_dict['estimator']
    train_steps = train_and_eval_dict['train_steps']
    eval_steps = train_and_eval_dict['eval_steps']

    self.assertIsInstance(estimator, tf.estimator.Estimator)
    self.assertEqual(config_train_steps, train_steps)
    self.assertEqual(config_eval_steps, eval_steps)
Exemplo n.º 14
0
    def testNewTrainInputPathList(self):
        """Tests that train input path can be overwritten with multiple files."""
        original_train_path = ["path/to/data"]
        new_train_path = ["another/path/to/data", "yet/another/path/to/data"]
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        reader_config = pipeline_config.train_input_reader.tf_record_input_reader
        reader_config.input_path.extend(original_train_path)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, train_input_path=new_train_path)
        reader_config = configs["train_input_config"].tf_record_input_reader
        final_path = reader_config.input_path
        self.assertEqual(new_train_path, final_path)
Exemplo n.º 15
0
    def testNewMomentumOptimizerValue(self):
        """Tests that new momentum value is updated appropriately."""
        original_momentum_value = 0.4
        hparams = tf.contrib.training.HParams(momentum_optimizer_value=1.1)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        optimizer_config = pipeline_config.train_config.optimizer.rms_prop_optimizer
        optimizer_config.momentum_optimizer_value = original_momentum_value
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
        new_momentum_value = optimizer_config.momentum_optimizer_value
        self.assertAlmostEqual(1.0, new_momentum_value)  # Clipped to 1.0.
Exemplo n.º 16
0
    def testOverWriteRetainOriginalImages(self):
        """Tests that `train_shuffle` keyword arguments are applied correctly."""
        original_retain_original_images = True
        desired_retain_original_images = False

        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.eval_config.retain_original_images = (
            original_retain_original_images)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs,
            retain_original_images_in_eval=desired_retain_original_images)
        retain_original_images = configs["eval_config"].retain_original_images
        self.assertEqual(desired_retain_original_images,
                         retain_original_images)
Exemplo n.º 17
0
    def testNewMaskType(self):
        """Tests that mask type can be overwritten in input readers."""
        original_mask_type = input_reader_pb2.NUMERICAL_MASKS
        new_mask_type = input_reader_pb2.PNG_MASKS
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        train_input_reader = pipeline_config.train_input_reader
        train_input_reader.mask_type = original_mask_type
        eval_input_reader = pipeline_config.eval_input_reader
        eval_input_reader.mask_type = original_mask_type
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, mask_type=new_mask_type)
        self.assertEqual(new_mask_type,
                         configs["train_input_config"].mask_type)
        self.assertEqual(new_mask_type, configs["eval_input_config"].mask_type)
Exemplo n.º 18
0
    def testDontOverwriteEmptyLabelMapPath(self):
        """Tests that label map path will not by overwritten with empty string."""
        original_label_map_path = "path/to/original/label_map"
        new_label_map_path = ""
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        train_input_reader = pipeline_config.train_input_reader
        train_input_reader.label_map_path = original_label_map_path
        eval_input_reader = pipeline_config.eval_input_reader
        eval_input_reader.label_map_path = original_label_map_path
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, label_map_path=new_label_map_path)
        self.assertEqual(original_label_map_path,
                         configs["train_input_config"].label_map_path)
        self.assertEqual(original_label_map_path,
                         configs["eval_input_config"].label_map_path)
Exemplo n.º 19
0
    def testNewClassificationLocalizationWeightRatio(self):
        """Tests that the loss weight ratio is updated appropriately."""
        original_localization_weight = 0.1
        original_classification_weight = 0.2
        new_weight_ratio = 5.0
        hparams = tf.contrib.training.HParams(
            classification_localization_weight_ratio=new_weight_ratio)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.model.ssd.loss.localization_weight = (
            original_localization_weight)
        pipeline_config.model.ssd.loss.classification_weight = (
            original_classification_weight)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        loss = configs["model"].ssd.loss
        self.assertAlmostEqual(1.0, loss.localization_weight)
        self.assertAlmostEqual(new_weight_ratio, loss.classification_weight)
Exemplo n.º 20
0
    def testMergingKeywordArguments(self):
        """Tests that keyword arguments get merged as do hyperparameters."""
        original_num_train_steps = 100
        original_num_eval_steps = 5
        desired_num_train_steps = 10
        desired_num_eval_steps = 1
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        pipeline_config.train_config.num_steps = original_num_train_steps
        pipeline_config.eval_config.num_examples = original_num_eval_steps
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs,
            train_steps=desired_num_train_steps,
            eval_steps=desired_num_eval_steps)
        train_steps = configs["train_config"].num_steps
        eval_steps = configs["eval_config"].num_examples
        self.assertEqual(desired_num_train_steps, train_steps)
        self.assertEqual(desired_num_eval_steps, eval_steps)
Exemplo n.º 21
0
    def _assertOptimizerWithNewLearningRate(self, optimizer_name):
        """Asserts successful updating of all learning rate schemes."""
        original_learning_rate = 0.7
        learning_rate_scaling = 0.1
        warmup_learning_rate = 0.07
        hparams = tf.contrib.training.HParams(learning_rate=0.15)
        pipeline_config_path = os.path.join(self.get_temp_dir(),
                                            "pipeline.config")

        # Constant learning rate.
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        optimizer = getattr(pipeline_config.train_config.optimizer,
                            optimizer_name)
        _update_optimizer_with_constant_learning_rate(optimizer,
                                                      original_learning_rate)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
        constant_lr = optimizer.learning_rate.constant_learning_rate
        self.assertAlmostEqual(hparams.learning_rate,
                               constant_lr.learning_rate)

        # Exponential decay learning rate.
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        optimizer = getattr(pipeline_config.train_config.optimizer,
                            optimizer_name)
        _update_optimizer_with_exponential_decay_learning_rate(
            optimizer, original_learning_rate)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
        exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
        self.assertAlmostEqual(hparams.learning_rate,
                               exponential_lr.initial_learning_rate)

        # Manual step learning rate.
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        optimizer = getattr(pipeline_config.train_config.optimizer,
                            optimizer_name)
        _update_optimizer_with_manual_step_learning_rate(
            optimizer, original_learning_rate, learning_rate_scaling)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
        manual_lr = optimizer.learning_rate.manual_step_learning_rate
        self.assertAlmostEqual(hparams.learning_rate,
                               manual_lr.initial_learning_rate)
        for i, schedule in enumerate(manual_lr.schedule):
            self.assertAlmostEqual(
                hparams.learning_rate * learning_rate_scaling**i,
                schedule.learning_rate)

        # Cosine decay learning rate.
        pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
        optimizer = getattr(pipeline_config.train_config.optimizer,
                            optimizer_name)
        _update_optimizer_with_cosine_decay_learning_rate(
            optimizer, original_learning_rate, warmup_learning_rate)
        _write_config(pipeline_config, pipeline_config_path)

        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path)
        configs = config_util.merge_external_params_with_configs(
            configs, hparams)
        optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
        cosine_lr = optimizer.learning_rate.cosine_decay_learning_rate

        self.assertAlmostEqual(hparams.learning_rate,
                               cosine_lr.learning_rate_base)
        warmup_scale_factor = warmup_learning_rate / original_learning_rate
        self.assertAlmostEqual(hparams.learning_rate * warmup_scale_factor,
                               cosine_lr.warmup_learning_rate)
Exemplo n.º 22
0
def main(_):
    assert FLAGS.train_dir, '`train_dir` is missing.'
    if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)
    if FLAGS.pipeline_config_path:
        configs = config_util.get_configs_from_pipeline_file(
            FLAGS.pipeline_config_path)
        if FLAGS.task == 0:
            tf.gfile.Copy(FLAGS.pipeline_config_path,
                          os.path.join(FLAGS.train_dir, 'pipeline.config'),
                          overwrite=True)
    else:
        configs = config_util.get_configs_from_multiple_files(
            model_config_path=FLAGS.model_config_path,
            train_config_path=FLAGS.train_config_path,
            train_input_config_path=FLAGS.input_config_path)
        if FLAGS.task == 0:
            for name, config in [('model.config', FLAGS.model_config_path),
                                 ('train.config', FLAGS.train_config_path),
                                 ('input.config', FLAGS.input_config_path)]:
                tf.gfile.Copy(config,
                              os.path.join(FLAGS.train_dir, name),
                              overwrite=True)

    model_config = configs['model']
    train_config = configs['train_config']
    input_config = configs['train_input_config']

    model_fn = functools.partial(model_builder.build,
                                 model_config=model_config,
                                 is_training=True)

    def get_next(config):
        return dataset_builder.make_initializable_iterator(
            dataset_builder.build(config)).get_next()

    create_input_dict_fn = functools.partial(get_next, input_config)

    env = json.loads(os.environ.get('TF_CONFIG', '{}'))
    cluster_data = env.get('cluster', None)
    cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
    task_data = env.get('task', None) or {'type': 'master', 'index': 0}
    task_info = type('TaskSpec', (object, ), task_data)

    # Parameters for a single worker.
    ps_tasks = 0
    worker_replicas = 1
    worker_job_name = 'lonely_worker'
    task = 0
    is_chief = True
    master = ''

    if cluster_data and 'worker' in cluster_data:
        # Number of total worker replicas include "worker"s and the "master".
        worker_replicas = len(cluster_data['worker']) + 1
    if cluster_data and 'ps' in cluster_data:
        ps_tasks = len(cluster_data['ps'])

    if worker_replicas > 1 and ps_tasks < 1:
        raise ValueError(
            'At least 1 ps task is needed for distributed training.')

    if worker_replicas >= 1 and ps_tasks > 0:
        # Set up distributed training.
        server = tf.train.Server(tf.train.ClusterSpec(cluster),
                                 protocol='grpc',
                                 job_name=task_info.type,
                                 task_index=task_info.index)
        if task_info.type == 'ps':
            server.join()
            return

        worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
        task = task_info.index
        is_chief = (task_info.type == 'master')
        master = server.target

    graph_rewriter_fn = None
    if 'graph_rewriter_config' in configs:
        graph_rewriter_fn = graph_rewriter_builder.build(
            configs['graph_rewriter_config'], is_training=True)

    trainer.train(create_input_dict_fn,
                  model_fn,
                  train_config,
                  master,
                  task,
                  FLAGS.num_clones,
                  worker_replicas,
                  FLAGS.clone_on_cpu,
                  ps_tasks,
                  worker_job_name,
                  is_chief,
                  FLAGS.train_dir,
                  graph_hook_fn=graph_rewriter_fn)
Exemplo n.º 23
0
 def _create_and_load_test_configs(self, pipeline_config):
     pipeline_config_path = os.path.join(self.get_temp_dir(),
                                         "pipeline.config")
     _write_config(pipeline_config, pipeline_config_path)
     return config_util.get_configs_from_pipeline_file(pipeline_config_path)