Exemplo n.º 1
0
def main(argv):
    del argv  # Unused.

    params = params_dict.ParamsDict(unet_config.UNET_CONFIG,
                                    unet_config.UNET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=False)

    if FLAGS.training_file_pattern:
        params.override({'training_file_pattern': FLAGS.training_file_pattern},
                        is_strict=True)

    if FLAGS.eval_file_pattern:
        params.override({'eval_file_pattern': FLAGS.eval_file_pattern},
                        is_strict=True)

    train_epoch_steps = params.train_item_count // params.train_batch_size
    eval_epoch_steps = params.eval_item_count // params.eval_batch_size

    params.override(
        {
            'model_dir': FLAGS.model_dir,
            'min_eval_interval': FLAGS.min_eval_interval,
            'eval_timeout': FLAGS.eval_timeout,
            'tpu_config': tpu_executor.get_tpu_flags(),
            'lr_decay_steps': train_epoch_steps,
            'train_steps': params.train_epochs * train_epoch_steps,
            'eval_steps': eval_epoch_steps,
        },
        is_strict=False)

    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params.validate()
    params.lock()

    train_input_fn = None
    eval_input_fn = None
    train_input_shapes = None
    eval_input_shapes = None
    if FLAGS.mode in ('train', 'train_and_eval'):
        train_input_fn = input_reader.LiverInputFn(
            params.training_file_pattern,
            params,
            mode=tf.estimator.ModeKeys.TRAIN)
        train_input_shapes = train_input_fn.get_input_shapes(params)
    if FLAGS.mode in ('eval', 'train_and_eval'):
        eval_input_fn = input_reader.LiverInputFn(
            params.eval_file_pattern, params, mode=tf.estimator.ModeKeys.EVAL)
        eval_input_shapes = eval_input_fn.get_input_shapes(params)

    assert train_input_shapes is not None or eval_input_shapes is not None
    run_executer(params,
                 train_input_shapes=train_input_shapes,
                 eval_input_shapes=eval_input_shapes,
                 train_input_fn=train_input_fn,
                 eval_input_fn=eval_input_fn)
Exemplo n.º 2
0
    def _serving_model_fn(features, labels, mode, params):
        """Builds the serving model_fn."""
        del labels  # unused.
        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError('To build the serving model_fn, set '
                             'mode = `tf.estimator.ModeKeys.PREDICT`')

        model_params = params_dict.ParamsDict(params)
        serving_model_graph = serving_model_graph_builder(
            output_image_info, output_normalized_coordinates,
            cast_num_detections_to_float)
        model_outputs = serving_model_graph(features, model_params)

        predictions = {
            'num_detections':
            tf.identity(model_outputs['num_detections'], 'NumDetections'),
            'detection_boxes':
            tf.identity(model_outputs['detection_boxes'], 'DetectionBoxes'),
            'detection_classes':
            tf.identity(model_outputs['detection_classes'],
                        'DetectionClasses'),
            'detection_scores':
            tf.identity(model_outputs['detection_scores'], 'DetectionScores'),
        }
        if output_image_info:
            predictions['image_info'] = tf.identity(
                model_outputs['image_info'], 'ImageInfo')

        if export_tpu_model:
            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   predictions=predictions)
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
Exemplo n.º 3
0
def main(_):
  config = params_dict.ParamsDict(mask_rcnn_config.MASK_RCNN_CFG,
                                  mask_rcnn_config.MASK_RCNN_RESTRICTIONS)
  config = params_dict.override_params_dict(
      config, FLAGS.config, is_strict=True)
  config.is_training_bn = False
  config.train_batch_size = FLAGS.batch_size
  config.eval_batch_size = FLAGS.batch_size

  config.validate()
  config.lock()

  model_params = dict(
      list(config.as_dict().items()),
      use_tpu=FLAGS.use_tpu,
      mode=tf.estimator.ModeKeys.PREDICT,
      transpose_input=False)

  print(' - Setting up TPUEstimator...')
  estimator = tf.estimator.tpu.TPUEstimator(
      model_fn=serving.serving_model_fn_builder(
          FLAGS.output_source_id, FLAGS.output_image_info,
          FLAGS.output_box_features, FLAGS.output_normalized_coordinates,
          FLAGS.cast_num_detections_to_float),
      model_dir=FLAGS.model_dir,
      config=tpu_config.RunConfig(
          tpu_config=tpu_config.TPUConfig(
              iterations_per_loop=FLAGS.iterations_per_loop),
          master='local',
          evaluation_master='local'),
      params=model_params,
      use_tpu=FLAGS.use_tpu,
      train_batch_size=FLAGS.batch_size,
      predict_batch_size=FLAGS.batch_size,
      export_to_tpu=FLAGS.use_tpu,
      export_to_cpu=True)

  print(' - Exporting the model...')
  input_type = FLAGS.input_type
  export_path = estimator.export_saved_model(
      export_dir_base=FLAGS.export_dir,
      serving_input_receiver_fn=functools.partial(
          serving.serving_input_fn,
          batch_size=FLAGS.batch_size,
          desired_image_size=config.image_size,
          padding_stride=(2**config.max_level),
          input_type=input_type,
          input_name=FLAGS.input_name),
      checkpoint_path=FLAGS.checkpoint_path)

  if FLAGS.add_warmup_requests and input_type == 'image_bytes':
    inference_warmup.write_warmup_requests(
        export_path,
        FLAGS.model_name,
        config.image_size,
        batch_sizes=[FLAGS.batch_size],
        image_format='JPEG',
        input_signature=FLAGS.input_name)
  print(' - Done! path: %s' % export_path)
Exemplo n.º 4
0
def config_generator(model):
    """Model function generator."""
    if model == 'retinanet':
        default_config = retinanet_config.RETINANET_CFG
        restrictions = retinanet_config.RETINANET_RESTRICTIONS
    else:
        raise ValueError('Model %s is not supported.' % model)

    return params_dict.ParamsDict(default_config, restrictions)
Exemplo n.º 5
0
def config_generator(model):
    """Model function generator."""
    if model == 'attribute_mask_rcnn':
        default_config = model_config.CFG
        restrictions = model_config.RESTRICTIONS
    else:
        raise ValueError('Model %s is not supported.' % model)

    return params_dict.ParamsDict(default_config, restrictions)
Exemplo n.º 6
0
def main(argv):
    del argv  # Unused.

    params = params_dict.ParamsDict(retinanet_config.RETINANET_CFG,
                                    retinanet_config.RETINANET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_overrides,
                                              is_strict=True)
    params.validate()
    params.lock()

    model_params = dict(params.as_dict(),
                        use_tpu=FLAGS.use_tpu,
                        mode=tf.estimator.ModeKeys.PREDICT,
                        transpose_input=False)

    print(' - Setting up TPUEstimator...')
    estimator = tf.contrib.tpu.TPUEstimator(
        model_fn=serving.serving_model_fn_builder(
            FLAGS.use_tpu, FLAGS.output_image_info,
            FLAGS.output_normalized_coordinates,
            FLAGS.cast_num_detections_to_float),
        model_dir=None,
        config=tpu_config.RunConfig(
            tpu_config=tpu_config.TPUConfig(iterations_per_loop=1),
            master='local',
            evaluation_master='local'),
        params=model_params,
        use_tpu=FLAGS.use_tpu,
        train_batch_size=FLAGS.batch_size,
        predict_batch_size=FLAGS.batch_size,
        export_to_tpu=FLAGS.use_tpu,
        export_to_cpu=True)

    print(' - Exporting the model...')
    input_type = FLAGS.input_type
    image_size = [int(x) for x in FLAGS.input_image_size.split(',')]
    export_path = estimator.export_saved_model(
        export_dir_base=FLAGS.export_dir,
        serving_input_receiver_fn=functools.partial(
            serving.serving_input_fn,
            batch_size=FLAGS.batch_size,
            desired_image_size=image_size,
            stride=(2**params.anchor.max_level),
            input_type=input_type,
            input_name=FLAGS.input_name),
        checkpoint_path=FLAGS.checkpoint_path)

    print(' - Done! path: %s' % export_path)
Exemplo n.º 7
0
 def prepare_evaluation(self):
     """Preapre for evaluation."""
     val_json_file = os.path.join(self._params.model_dir,
                                  'eval_annotation_file.json')
     if self._params.eval.val_json_file:
         tf.gfile.Copy(self._params.eval.val_json_file, val_json_file)
     else:
         coco_utils.scan_and_generator_annotation_file(
             self._params.eval.eval_file_pattern,
             self._params.eval.eval_samples,
             include_mask=False,
             annotation_file=val_json_file)
     eval_params = params_dict.ParamsDict(self._params.eval)
     eval_params.override({'val_json_file': val_json_file})
     self._evaluator = factory.evaluator_generator(eval_params)
Exemplo n.º 8
0
def config_generator(model):
    """Model function generator."""
    if model == 'retinanet':
        default_config = retinanet_config.RETINANET_CFG
        restrictions = retinanet_config.RETINANET_RESTRICTIONS
    elif model == 'shapemask':
        default_config = shapemask_config.SHAPEMASK_CFG
        restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS
    elif model == 'segmentation':
        default_config = segmentation_config.SEGMENTATION_CFG
        restrictions = segmentation_config.SEGMENTATION_RESTRICTIONS
    else:
        raise ValueError('Model %s is not supported.' % model)

    return params_dict.ParamsDict(default_config, restrictions)
Exemplo n.º 9
0
def main(argv):
    del argv  # Unused.

    # Configure parameters.
    params = params_dict.ParamsDict(mask_rcnn_config.MASK_RCNN_CFG,
                                    mask_rcnn_config.MASK_RCNN_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)
    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    params.validate()
    params.lock()

    # Check data path
    train_input_fn = None
    eval_input_fn = None
    if (FLAGS.mode in ('train', 'train_and_eval')
            and not params.training_file_pattern):
        raise RuntimeError(
            'You must specify `training_file_pattern` for training.')
    if FLAGS.mode in ('eval', 'train_and_eval'):
        if not params.validation_file_pattern:
            raise RuntimeError('You must specify `validation_file_pattern` '
                               'for evaluation.')
        if not params.val_json_file and not params.include_groundtruth_in_features:
            raise RuntimeError(
                'You must specify `val_json_file` or '
                'include_groundtruth_in_features=True for evaluation.')

    if FLAGS.mode in ('train', 'train_and_eval'):
        train_input_fn = dataloader.InputReader(
            params.training_file_pattern,
            mode=tf.estimator.ModeKeys.TRAIN,
            use_fake_data=FLAGS.use_fake_data,
            use_instance_mask=params.include_mask)
    if (FLAGS.mode in ('eval', 'train_and_eval')
            or (FLAGS.mode == 'train' and FLAGS.eval_after_training)):
        eval_input_fn = dataloader.InputReader(
            params.validation_file_pattern,
            mode=tf.estimator.ModeKeys.PREDICT,
            num_examples=params.eval_samples,
            use_instance_mask=params.include_mask)

    run_executer(params, train_input_fn, eval_input_fn)
Exemplo n.º 10
0
    def _serving_model_fn(features, labels, mode, params):
        """Builds the serving model_fn."""
        del labels  # unused.
        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError('To build the serving model_fn, set '
                             'mode = `tf.estimator.ModeKeys.PREDICT`')

        model_params = params_dict.ParamsDict(params)
        serving_model_graph = serving_model_graph_builder(
            output_image_info, output_normalized_coordinates,
            cast_num_detections_to_float)
        predictions = serving_model_graph(features, model_params)

        if export_tpu_model:
            return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                                     predictions=predictions)
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
Exemplo n.º 11
0
def config_generator(model):
    """Model function generator."""
    if model == 'classification':
        default_config = classification_config.CLASSIFICATION_CFG
        restrictions = classification_config.CLASSIFICATION_RESTRICTIONS
    elif model == 'retinanet':
        default_config = retinanet_config.RETINANET_CFG
        restrictions = retinanet_config.RETINANET_RESTRICTIONS
    elif model == 'mask_rcnn':
        default_config = maskrcnn_config.MASKRCNN_CFG
        restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS
    elif model == 'shapemask':
        default_config = shapemask_config.SHAPEMASK_CFG
        restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS
    else:
        raise ValueError('Model %s is not supported.' % model)

    return params_dict.ParamsDict(default_config, restrictions)
Exemplo n.º 12
0
def main(_):
    params = params_dict.ParamsDict(unet_config.UNET_CONFIG,
                                    unet_config.UNET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=False)
    params.train_batch_size = FLAGS.batch_size
    params.eval_batch_size = FLAGS.batch_size
    params.use_bfloat16 = False

    model_params = dict(params.as_dict(),
                        use_tpu=FLAGS.use_tpu,
                        mode=tf.estimator.ModeKeys.PREDICT,
                        transpose_input=False)

    print(' - Setting up TPUEstimator...')
    estimator = tf.estimator.tpu.TPUEstimator(
        model_fn=serving_model_fn,
        model_dir=FLAGS.model_dir,
        config=tf.estimator.tpu.RunConfig(
            tpu_config=tf.estimator.tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop),
            master='local',
            evaluation_master='local'),
        params=model_params,
        use_tpu=FLAGS.use_tpu,
        train_batch_size=FLAGS.batch_size,
        predict_batch_size=FLAGS.batch_size,
        export_to_tpu=FLAGS.use_tpu,
        export_to_cpu=True)

    print(' - Exporting the model...')
    input_type = FLAGS.input_type
    export_path = estimator.export_saved_model(
        export_dir_base=FLAGS.export_dir,
        serving_input_receiver_fn=functools.partial(
            serving_input_fn,
            batch_size=FLAGS.batch_size,
            input_type=input_type,
            params=params,
            input_name=FLAGS.input_name),
        checkpoint_path=FLAGS.checkpoint_path)

    print(' - Done! path: %s' % export_path)
Exemplo n.º 13
0
def filter_unused_blocks(params):
    """Filters unused architecture params blocks."""
    filtered_params = params_dict.ParamsDict(params)
    if 'parser' in params.architecture.as_dict().keys():
        for parser in _PARSERS:
            if (parser in params.as_dict().keys()
                    and parser != params.architecture.parser):
                delattr(filtered_params, parser)
    if 'backbone' in params.architecture.as_dict().keys():
        for backbone in _BACKBONES:
            if (backbone in params.as_dict().keys()
                    and backbone != params.architecture.backbone):
                delattr(filtered_params, backbone)
    if 'multilevel_features' in params.architecture.as_dict().keys():
        for features in _MULTILEVEL_FEATURES:
            if (features in params.as_dict().keys()
                    and features != params.architecture.multilevel_features):
                delattr(filtered_params, features)
    return filtered_params
Exemplo n.º 14
0
 def prepare_evaluation(self):
     """Preapre for evaluation."""
     eval_params = params_dict.ParamsDict(self._params.eval)
     if self._params.eval.use_json_file:
         val_json_file = os.path.join(
             self._params.model_dir, "eval_annotation_file.json"
         )
         if self._params.eval.val_json_file:
             tf.io.gfile.copy(
                 self._params.eval.val_json_file, val_json_file, overwrite=True
             )
         else:
             coco_utils.scan_and_generator_annotation_file(
                 self._params.eval.eval_file_pattern,
                 self._params.eval.eval_samples,
                 include_mask=False,
                 annotation_file=val_json_file,
                 dataset_type=self._params.eval.eval_dataset_type,
             )
         eval_params.override({"val_json_file": val_json_file})
     self._evaluator = factory.evaluator_generator(eval_params)
Exemplo n.º 15
0
 def prepare_evaluation(self):
   """Preapre for evaluation."""
   eval_params = params_dict.ParamsDict(self._params.eval)
   if self._params.eval.type == 'box_and_mask':
     if (not self._params.eval.use_json_file or
         not self._params.eval.val_json_file):
       raise ValueError('If `eval.type` == `box_and_mask`, '
                        '`eval.val_json_file` is required.')
   if self._params.eval.use_json_file:
     val_json_file = os.path.join(self._params.model_dir,
                                  'eval_annotation_file.json')
     if self._params.eval.val_json_file:
       tf.gfile.Copy(
           self._params.eval.val_json_file, val_json_file, overwrite=True)
     else:
       coco_utils.scan_and_generator_annotation_file(
           self._params.eval.eval_file_pattern,
           self._params.eval.eval_samples,
           include_mask=False,
           annotation_file=val_json_file,
           dataset_type=self._params.eval.eval_dataset_type)
     eval_params.override({'val_json_file': val_json_file})
   self._evaluator = factory.evaluator_generator(eval_params)
Exemplo n.º 16
0
    def apply_pre_parser(dataset, mode):
        """Parses per-parser data and zips the parsed output to the input dataset.

    This method can be used to pre-process some data to pass additional
    parsed data to the main parser. It is mainly helpful when we want to combine
    multiple images. The data path and parsing method can be
    set via config.train.pre_parser_dataset.file_pattern and
    config.architecture.pre_parser. Fer example, for Copy-Paste augmentation the
    pre_parser should be set to 'extract_objects_parser' to parse pasting
    objects and then these data will be passed to the main parser of
    'maskrcnn_parser_with_copy_paste'.
    Args:
      dataset: a tf.data.Dataset dataset.
      mode: Training mode string.
    Returns:
      tf.data.Dataset dataset.
    """

        config_params_ = params_dict.ParamsDict(config_params)
        config_params_.architecture.parser = config_params.architecture.pre_parser
        dataset_p, pre_parser_fn = get_dataset(
            config_params_,
            config_params.train.pre_parser_dataset.file_pattern,
            config_params.train.pre_parser_dataset.dataset_type, mode)

        dataset_p = dataset_p.map(
            pre_parser_fn,
            num_parallel_calls=tf.data.experimental.AUTOTUNE,
            deterministic=False)

        dataset_p = dataset_p.prefetch(tf.data.experimental.AUTOTUNE)
        dataset_p = dataset_p.filter(
            lambda data: tf.greater(data['num_groundtrtuhs'], 0))
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        dataset = tf.data.Dataset.zip((dataset, dataset_p))
        return dataset
Exemplo n.º 17
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train ShapeMask."""

from configs import detection_config
from hyperparameters import params_dict

# pylint: disable=line-too-long

SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+/)conv2d(|_([1-9]|10))\/'

SHAPEMASK_CFG = params_dict.ParamsDict(detection_config.DETECTION_CFG)
SHAPEMASK_CFG.override(
    {
        'type': 'shapemask',
        'architecture': {
            'parser': 'shapemask_parser',
            'backbone': 'resnet',
            'multilevel_features': 'fpn',
            'outer_box_scale': 1.25,
        },
        'train': {
            'total_steps': 45000,
            'learning_rate': {
                'learning_rate_steps': [30000, 40000],
            },
            'frozen_variable_prefix': SHAPEMASK_RESNET_FROZEN_VAR_PREFIX,
Exemplo n.º 18
0
def main(unused_argv):
    params = params_dict.ParamsDict(mnasnet_config.MNASNET_CFG,
                                    mnasnet_config.MNASNET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    additional_params = {
        'steps_per_epoch': params.num_train_images / params.train_batch_size,
        'quantized_training': FLAGS.quantized_training,
    }

    params = params_dict.override_params_dict(params,
                                              additional_params,
                                              is_strict=False)

    params.validate()
    params.lock()

    if FLAGS.tpu or params.use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    else:
        tpu_cluster_resolver = None

    if params.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, params.iterations_per_loop)
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=params.iterations_per_loop,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    # Validates Flags.
    if params.precision == 'bfloat16' and params.use_keras:
        raise ValueError(
            'Keras layers do not have full support to bfloat16 activation training.'
            ' You have set precision as %s and use_keras as %s' %
            (params.precision, params.use_keras))

    # Initializes model parameters.
    mnasnet_est = tf.contrib.tpu.TPUEstimator(
        use_tpu=params.use_tpu,
        model_fn=mnasnet_model_fn,
        config=config,
        train_batch_size=params.train_batch_size,
        eval_batch_size=params.eval_batch_size,
        export_to_tpu=FLAGS.export_to_tpu,
        params=params.as_dict())

    if FLAGS.mode == 'export_only':
        export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
        return

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=False,
                transpose_input=params.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=params.transpose_input,
                cache=params.use_cache and is_training,
                image_size=params.input_image_size,
                num_parallel_calls=params.num_parallel_calls,
                use_bfloat16=(params.precision == 'bfloat16'))
            for is_training in [True, False]
        ]

    if FLAGS.mode == 'eval':
        eval_steps = params.num_eval_images // params.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = mnasnet_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= params.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if FLAGS.export_dir:
            export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(  # pylint: disable=protected-access
            FLAGS.model_dir)

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', params.train_steps,
            params.train_steps / params.steps_per_epoch, current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if params.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, params.iterations_per_loop)))
            mnasnet_est.train(input_fn=imagenet_train.input_fn,
                              max_steps=params.train_steps,
                              hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < params.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      params.train_steps)
                mnasnet_est.train(input_fn=imagenet_train.input_fn,
                                  max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = mnasnet_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=params.num_eval_images // params.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)
                ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                params.train_steps, elapsed_time)
            if FLAGS.export_dir:
                export(mnasnet_est, FLAGS.export_dir, params,
                       FLAGS.post_quantize)
Exemplo n.º 19
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Mask R-CNN."""

from configs import base_config
from hyperparameters import params_dict

# pylint: disable=line-too-long
MASKRCNN_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
MASKRCNN_CFG.override({
    'type': 'mask_rcnn',
    'eval': {
        'type': 'box_and_mask',
    },
    'architecture': {
        'parser': 'maskrcnn_parser',
        'backbone': 'resnet',
        'multilevel_features': 'fpn',
        'use_bfloat16': True,
        'include_mask': False,
    },
    'maskrcnn_parser': {
        'use_bfloat16': True,
        'output_size': [1024, 1024],
Exemplo n.º 20
0
def main(argv):
    del argv  # Unused.

    params = params_dict.ParamsDict(retinanet_config.RETINANET_CFG,
                                    retinanet_config.RETINANET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_overrides,
                                              is_strict=True)
    params.override(
        {
            'platform': {
                'eval_master': FLAGS.eval_master,
                'tpu': FLAGS.tpu,
                'tpu_zone': FLAGS.tpu_zone,
                'gcp_project': FLAGS.gcp_project,
            },
            'use_tpu': FLAGS.use_tpu,
            'model_dir': FLAGS.model_dir,
            'train': {
                'num_shards': FLAGS.num_cores,
            },
        },
        is_strict=False)
    params.validate()
    params.lock()
    pp = pprint.PrettyPrinter()
    params_str = pp.pformat(params.as_dict())
    tf.logging.info('Model Parameters: {}'.format(params_str))

    # Builds detection model on TPUs.
    model_fn = model_builder.ModelFn(params)
    executor = tpu_executor.TpuExecutor(model_fn, params)

    # Prepares input functions for train and eval.
    train_input_fn = input_reader.InputFn(params.train.train_file_pattern,
                                          params,
                                          mode=ModeKeys.TRAIN)
    eval_input_fn = input_reader.InputFn(params.eval.eval_file_pattern,
                                         params,
                                         mode=ModeKeys.PREDICT_WITH_GT)

    # Runs the model.
    if FLAGS.mode == 'train':
        save_config(params, params.model_dir)
        executor.train(train_input_fn, params.train.total_steps)
        if FLAGS.eval_after_training:
            executor.evaluate(
                eval_input_fn,
                params.eval.eval_samples // params.predict.predict_batch_size)

    elif FLAGS.mode == 'eval':

        def terminate_eval():
            tf.logging.info(
                'Terminating eval after %d seconds of no checkpoints' %
                params.eval.eval_timeout)
            return True

        # Runs evaluation when there's a new checkpoint.
        for ckpt in tf.contrib.training.checkpoints_iterator(
                params.model_dir,
                min_interval_secs=params.eval.min_eval_interval,
                timeout=params.eval.eval_timeout,
                timeout_fn=terminate_eval):
            # Terminates eval job when final checkpoint is reached.
            current_step = int(os.path.basename(ckpt).split('-')[1])

            tf.logging.info('Starting to evaluate.')
            try:
                executor.evaluate(
                    eval_input_fn, params.eval.eval_samples //
                    params.predict.predict_batch_size, ckpt)

                if current_step >= params.train.total_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d' %
                        current_step)
                    break
            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint' %
                    ckpt)

    elif FLAGS.mode == 'train_and_eval':
        save_config(params, params.model_dir)
        num_cycles = int(params.train.total_steps /
                         params.eval.num_steps_per_eval)
        for cycle in range(num_cycles):
            tf.logging.info('Start training cycle %d.' % cycle)
            current_cycle_last_train_step = ((cycle + 1) *
                                             params.eval.num_steps_per_eval)
            executor.train(train_input_fn, current_cycle_last_train_step)
            executor.evaluate(
                eval_input_fn,
                params.eval.eval_samples // params.predict.predict_batch_size)
    else:
        tf.logging.info('Mode not found.')
Exemplo n.º 21
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Segmentation."""

from configs import base_config
from hyperparameters import params_dict

# pylint: disable=line-too-long
RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/'

SEGMENTATION_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
SEGMENTATION_CFG.override(
    {
        'type': 'segmentation',
        'architecture': {
            'parser': 'segmentation_parser',
            'backbone': 'resnet',
            'multilevel_features': 'fpn',
            'use_aspp': False,
            'use_pyramid_fusion': False,
            'num_classes': 21,  # Include background class 0.
        },
        'train': {
            'train_batch_size': 64,
            'total_steps': 10000,
            'learning_rate': {
Exemplo n.º 22
0
from configs import base_config
from hyperparameters import params_dict

# pylint: disable=line-too-long

# For ResNet, this freezes the variables of the first conv1 and conv2_x
# layers [1], which leads to higher training speed and slightly better testing
# accuracy. The intuition is that the low-level architecture (e.g., ResNet-50)
# is able to capture low-level features such as edges; therefore, it does not
# need to be fine-tuned for the detection task.
# Note that we need to trailing `/` to avoid the incorrect match.
# [1]: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L198
RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/'

DETECTION_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
DETECTION_CFG.override({
    'architecture': {
        # Note that `num_classes` is the total number of classes including
        # one background classes whose index is 0.
        'num_classes': 91
    },
    'eval': {
        'type': 'box',
        # Setting `eval_samples` = None will exhaust all the samples in the eval
        # dataset once. This only works if `type` != customized.
        'eval_samples': None,
        'use_json_file': True,
        'val_json_file': '',
        'per_category_metrics': False,
    },
Exemplo n.º 23
0
  def _serving_model_fn(features, labels, mode, params):
    """Builds the serving model_fn."""
    del labels  # unused.
    if mode != tf.estimator.ModeKeys.PREDICT:
      raise ValueError('To build the serving model_fn, set '
                       'mode = `tf.estimator.ModeKeys.PREDICT`')

    model_params = params_dict.ParamsDict(params)

    images = features['images']
    _, height, width, _ = images.get_shape().as_list()

    model_fn = factory.model_generator(model_params)
    outputs = model_fn.build_outputs(
        features['images'], labels=None, mode=mode_keys.PREDICT)

    logits = tf.image.resize_bilinear(
        outputs['logits'], tf.shape(images)[1:3], align_corners=False)

    original_image_size = tf.squeeze(features['image_info'][:, 0:1, :])
    height = original_image_size[0]
    width = original_image_size[1]
    offset_height = tf.zeros_like(height, dtype=tf.int32)
    offset_width = tf.zeros_like(width, dtype=tf.int32)

    # Clip the predictions to original image size.
    logits = tf.image.crop_to_bounding_box(logits, offset_height, offset_width,
                                           tf.cast(height, dtype=tf.int32),
                                           tf.cast(width, dtype=tf.int32))
    probabilities = tf.nn.softmax(logits)

    score_threshold_placeholder = features['score_thresholds']
    key_placeholder = features['key']

    score_threshold_pred_expanded = score_threshold_placeholder
    for _ in range(0, logits.shape.ndims - 1):
      score_threshold_pred_expanded = tf.expand_dims(
          score_threshold_pred_expanded, -1)

    scores = tf.where(probabilities > score_threshold_pred_expanded,
                      probabilities, tf.zeros_like(probabilities))
    scores = tf.reduce_max(scores, 3)
    scores = tf.expand_dims(scores, -1)
    scores = tf.cast(tf.minimum(scores * 255.0, 255), tf.uint8)
    categories = tf.to_int32(tf.expand_dims(tf.argmax(probabilities, 3), -1))

    # Generate images for scores and categories.
    score_bytes = tf.map_fn(
        tf.image.encode_png, scores, back_prop=False, dtype=tf.string)
    category_bytes = tf.map_fn(
        tf.image.encode_png,
        tf.cast(categories, tf.uint8),
        back_prop=False,
        dtype=tf.string)

    predictions = {}

    predictions['category_bytes'] = tf.identity(
        category_bytes, name='category_bytes')
    predictions['score_bytes'] = tf.identity(score_bytes, name='score_bytes')
    predictions['key'] = tf.identity(key_placeholder, name='key')
    if output_image_info:
      predictions['image_info'] = tf.identity(
          features['image_info'], name='image_info')

    if export_tpu_model:
      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode, predictions=predictions)
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
Exemplo n.º 24
0
def main(unused_argv):
  params = params_dict.ParamsDict(
      resnet_config.RESNET_CFG, resnet_config.RESNET_RESTRICTIONS)
  params = params_dict.override_params_dict(
      params, FLAGS.config_file, is_strict=True)
  params = params_dict.override_params_dict(
      params, FLAGS.params_override, is_strict=True)

  params = flags_to_params.override_params_from_input_flags(params, FLAGS)

  params.validate()
  params.lock()

  tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
      FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '',
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)

  if params.use_async_checkpointing:
    save_checkpoints_steps = None
  else:
    save_checkpoints_steps = max(5000, params.iterations_per_loop)
  config = tf.estimator.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=FLAGS.model_dir,
      save_checkpoints_steps=save_checkpoints_steps,
      log_step_count_steps=FLAGS.log_step_count_steps,
      session_config=tf.ConfigProto(
          graph_options=tf.GraphOptions(
              rewrite_options=rewriter_config_pb2.RewriterConfig(
                  disable_meta_optimizer=True))),
      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=params.iterations_per_loop,
          num_shards=params.num_cores,
          per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig
          .PER_HOST_V2))  # pylint: disable=line-too-long

  resnet_classifier = tf.estimator.tpu.TPUEstimator(
      use_tpu=params.use_tpu,
      model_fn=resnet_model_fn,
      config=config,
      params=params.as_dict(),
      train_batch_size=params.train_batch_size,
      eval_batch_size=params.eval_batch_size,
      export_to_tpu=FLAGS.export_to_tpu)

  assert (params.precision == 'bfloat16' or
          params.precision == 'float32'), (
              'Invalid value for precision parameter; '
              'must be bfloat16 or float32.')
  tf.logging.info('Precision: %s', params.precision)
  use_bfloat16 = params.precision == 'bfloat16'

  # Input pipelines are slightly different (with regards to shuffling and
  # preprocessing) between training and evaluation.
  if FLAGS.bigtable_instance:
    tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table)
    select_train, select_eval = _select_tables_from_flags()
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetBigtableInput(  # pylint: disable=g-complex-comprehension
            is_training=is_training,
            use_bfloat16=use_bfloat16,
            transpose_input=params.transpose_input,
            selection=selection,
            augment_name=FLAGS.augment_name,
            randaug_num_layers=FLAGS.randaug_num_layers,
            randaug_magnitude=FLAGS.randaug_magnitude)
        for (is_training, selection) in [(True,
                                          select_train), (False, select_eval)]
    ]
  else:
    if FLAGS.data_dir == FAKE_DATA_DIR:
      tf.logging.info('Using fake dataset.')
    else:
      tf.logging.info('Using dataset: %s', FLAGS.data_dir)
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(  # pylint: disable=g-complex-comprehension
            is_training=is_training,
            data_dir=FLAGS.data_dir,
            transpose_input=params.transpose_input,
            cache=params.use_cache and is_training,
            image_size=params.image_size,
            num_parallel_calls=params.num_parallel_calls,
            include_background_label=(params.num_label_classes == 1001),
            use_bfloat16=use_bfloat16,
            augment_name=FLAGS.augment_name,
            randaug_num_layers=FLAGS.randaug_num_layers,
            randaug_magnitude=FLAGS.randaug_magnitude)
        for is_training in [True, False]
    ]

  steps_per_epoch = params.num_train_images // params.train_batch_size
  eval_steps = params.num_eval_images // params.eval_batch_size

  if FLAGS.mode == 'eval':

    # Run evaluation when there's a new checkpoint
    for ckpt in tf.train.checkpoints_iterator(
        FLAGS.model_dir, timeout=FLAGS.eval_timeout):
      tf.logging.info('Starting to evaluate.')
      try:
        start_timestamp = time.time()  # This time will include compilation time
        eval_results = resnet_classifier.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt)
        elapsed_time = int(time.time() - start_timestamp)
        tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                        eval_results, elapsed_time)

        # Terminate eval job when final checkpoint is reached
        current_step = int(os.path.basename(ckpt).split('-')[1])
        if current_step >= params.train_steps:
          tf.logging.info(
              'Evaluation finished after training step %d', current_step)
          break

      except tf.errors.NotFoundError:
        # Since the coordinator is on a different job than the TPU worker,
        # sometimes the TPU worker does not finish initializing until long after
        # the CPU job tells it to start evaluating. In this case, the checkpoint
        # file could have been deleted already.
        tf.logging.info(
            'Checkpoint %s no longer exists, skipping checkpoint', ckpt)

  else:   # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
    try:
      current_step = tf.train.load_variable(FLAGS.model_dir,
                                            tf.GraphKeys.GLOBAL_STEP)
    except (TypeError, ValueError, tf.errors.NotFoundError):
      current_step = 0
    steps_per_epoch = params.num_train_images // params.train_batch_size
    tf.logging.info('Training for %d steps (%.2f epochs in total). Current'
                    ' step %d.',
                    params.train_steps,
                    params.train_steps / steps_per_epoch,
                    current_step)

    start_timestamp = time.time()  # This time will include compilation time

    if FLAGS.mode == 'train':
      hooks = []
      if params.use_async_checkpointing:
        try:
          from tensorflow.contrib.tpu.python.tpu import async_checkpoint  # pylint: disable=g-import-not-at-top
        except ImportError as e:
          logging.exception(
              'Async checkpointing is not supported in TensorFlow 2.x')
          raise e

        hooks.append(
            async_checkpoint.AsyncCheckpointSaverHook(
                checkpoint_dir=FLAGS.model_dir,
                save_steps=max(5000, params.iterations_per_loop)))
      if FLAGS.profile_every_n_steps > 0:
        hooks.append(
            tpu_profiler_hook.TPUProfilerHook(
                save_steps=FLAGS.profile_every_n_steps,
                output_dir=FLAGS.model_dir, tpu=FLAGS.tpu)
            )
      resnet_classifier.train(
          input_fn=imagenet_train.input_fn,
          max_steps=params.train_steps,
          hooks=hooks)

    else:
      assert FLAGS.mode == 'train_and_eval'
      while current_step < params.train_steps:
        # Train for up to steps_per_eval number of steps.
        # At the end of training, a checkpoint will be written to --model_dir.
        next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                              params.train_steps)
        resnet_classifier.train(
            input_fn=imagenet_train.input_fn, max_steps=next_checkpoint)
        current_step = next_checkpoint

        tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                        next_checkpoint, int(time.time() - start_timestamp))

        # Evaluate the model on the most recent model in --model_dir.
        # Since evaluation happens in batches of --eval_batch_size, some images
        # may be excluded modulo the batch size. As long as the batch size is
        # consistent, the evaluated images are also consistent.
        tf.logging.info('Starting to evaluate.')
        eval_results = resnet_classifier.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=params.num_eval_images // params.eval_batch_size)
        tf.logging.info('Eval results at step %d: %s',
                        next_checkpoint, eval_results)

      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                      params.train_steps, elapsed_time)

    if FLAGS.export_dir is not None:
      # The guide to serve a exported TensorFlow model is at:
      #    https://www.tensorflow.org/serving/serving_basic
      tf.logging.info('Starting to export model.')
      export_path = resnet_classifier.export_saved_model(
          export_dir_base=FLAGS.export_dir,
          serving_input_receiver_fn=imagenet_input.image_serving_input_fn)
      if FLAGS.add_warmup_requests:
        inference_warmup.write_warmup_requests(
            export_path,
            FLAGS.model_name,
            params.image_size,
            batch_sizes=FLAGS.inference_batch_sizes,
            image_format='JPEG')
Exemplo n.º 25
0
def main(unused_argv):
    params = params_dict.ParamsDict(squeezenet_config.SQUEEZENET_CFG,
                                    squeezenet_config.SQUEEZENET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    total_steps = (
        (params.train.num_epochs * params.train.num_examples_per_epoch) //
        params.train.train_batch_size)
    params.override(
        {
            "train": {
                "total_steps": total_steps
            },
            "eval": {
                "num_steps_per_eval": (total_steps // params.eval.num_evals)
            },
        },
        is_strict=False)

    params.validate()
    params.lock()

    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    if not params.use_async_checkpointing:
        save_checkpoints_steps = max(5000, params.train.iterations_per_loop)

    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=params.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False),
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=params.train.iterations_per_loop,
            num_shards=params.train.num_cores_per_replica,
        ),
    )

    estimator = contrib_tpu.TPUEstimator(
        model_fn=squeezenet_model.model_fn,
        use_tpu=params.use_tpu,
        config=run_config,
        train_batch_size=params.train.train_batch_size,
        eval_batch_size=params.eval.eval_batch_size,
        params=params.as_dict(),
    )

    for eval_cycle in range(params.eval.num_evals):
        current_cycle_last_train_step = ((eval_cycle + 1) *
                                         params.eval.num_steps_per_eval)
        estimator.train(input_fn=data_pipeline.InputReader(FLAGS.data_dir,
                                                           is_training=True),
                        steps=current_cycle_last_train_step)

        tf.logging.info("Running evaluation")
        tf.logging.info(
            "%s",
            estimator.evaluate(input_fn=data_pipeline.InputReader(
                FLAGS.data_dir, is_training=False),
                               steps=(params.eval.num_eval_examples //
                                      params.eval.eval_batch_size)))
Exemplo n.º 26
0
def main(argv):
    del argv  # Unused.

    params = factory.config_generator(FLAGS.model)

    if FLAGS.config_file:
        params = params_dict.override_params_dict(params,
                                                  FLAGS.config_file,
                                                  is_strict=True)

    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)
    params.override({
        'use_tpu': FLAGS.use_tpu,
        'model_dir': FLAGS.model_dir,
    },
                    is_strict=True)
    if not FLAGS.use_tpu:
        params.override(
            {
                'architecture': {
                    'use_bfloat16': False,
                },
                'batch_norm_activation': {
                    'use_sync_bn': False,
                },
            },
            is_strict=True)
    # Only run spatial partitioning in training mode.
    if FLAGS.mode != 'train':
        params.train.input_partition_dims = None
        params.train.num_cores_per_replica = None
    params_to_save = params_dict.ParamsDict(params)
    params.override(
        {
            'platform': {
                'eval_master': FLAGS.eval_master,
                'tpu': FLAGS.tpu,
                'tpu_zone': FLAGS.tpu_zone,
                'gcp_project': FLAGS.gcp_project,
            },
            'tpu_job_name': FLAGS.tpu_job_name,
            'train': {
                'num_shards': FLAGS.num_cores,
            },
        },
        is_strict=False)

    params.validate()
    params.lock()
    pp = pprint.PrettyPrinter()
    params_str = pp.pformat(params.as_dict())
    logging.info('Model Parameters: %s', params_str)

    # Builds detection model on TPUs.
    model_fn = model_builder.ModelFn(params)
    executor = tpu_executor.TpuExecutor(model_fn, params)

    # Prepares input functions for train and eval.
    train_input_fn = input_reader.InputFn(
        params.train.train_file_pattern,
        params,
        mode=ModeKeys.TRAIN,
        dataset_type=params.train.train_dataset_type)
    if params.eval.type == 'customized':
        eval_input_fn = input_reader.InputFn(
            params.eval.eval_file_pattern,
            params,
            mode=ModeKeys.EVAL,
            dataset_type=params.eval.eval_dataset_type)
    else:
        eval_input_fn = input_reader.InputFn(
            params.eval.eval_file_pattern,
            params,
            mode=ModeKeys.PREDICT_WITH_GT,
            dataset_type=params.eval.eval_dataset_type)

    if params.eval.eval_samples:
        eval_times = params.eval.eval_samples // params.eval.eval_batch_size
    else:
        eval_times = None

    # Runs the model.
    if FLAGS.mode == 'train':
        config_utils.save_config(params_to_save, params.model_dir)
        executor.train(train_input_fn, params.train.total_steps)
        if FLAGS.eval_after_training:
            executor.evaluate(eval_input_fn, eval_times)

    elif FLAGS.mode == 'eval':

        def terminate_eval():
            logging.info('Terminating eval after %d seconds of no checkpoints',
                         params.eval.eval_timeout)
            return True

        # Runs evaluation when there's a new checkpoint.
        for ckpt in tf.train.checkpoints_iterator(
                params.model_dir,
                min_interval_secs=params.eval.min_eval_interval,
                timeout=params.eval.eval_timeout,
                timeout_fn=terminate_eval):
            # Terminates eval job when final checkpoint is reached.
            current_step = int(
                six.ensure_str(os.path.basename(ckpt)).split('-')[1])

            logging.info('Starting to evaluate.')
            try:
                executor.evaluate(eval_input_fn, eval_times, ckpt)

                if current_step >= params.train.total_steps:
                    logging.info('Evaluation finished after training step %d',
                                 current_step)
                    break
            except tf.errors.NotFoundError as e:
                logging.info(
                    'Erorr occurred during evaluation: NotFoundError: %s', e)

    elif FLAGS.mode == 'train_and_eval':
        config_utils.save_config(params_to_save, params.model_dir)
        num_cycles = int(params.train.total_steps /
                         params.eval.num_steps_per_eval)
        for cycle in range(num_cycles):
            logging.info('Start training cycle %d.', cycle)
            current_cycle_last_train_step = ((cycle + 1) *
                                             params.eval.num_steps_per_eval)
            executor.train(train_input_fn, current_cycle_last_train_step)
            executor.evaluate(eval_input_fn, eval_times)
    else:
        logging.info('Mode not found.')
Exemplo n.º 27
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train ShapeMask."""

from configs import base_config
from hyperparameters import params_dict

# pylint: disable=line-too-long

SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+/)conv2d(|_([1-9]|10))\/'

SHAPEMASK_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
SHAPEMASK_CFG.override(
    {
        'type': 'shapemask',
        'train': {
            'total_steps': 45000,
            'learning_rate': {
                'learning_rate_steps': [30000, 40000],
            },
            'frozen_variable_prefix': SHAPEMASK_RESNET_FROZEN_VAR_PREFIX,
            'regularization_variable_regex': None,
        },
        'eval': {
            'type': 'shapemask_box_and_mask',
            'mask_eval_class': 'all',  # 'all', 'voc', or 'nonvoc'.
        },
Exemplo n.º 28
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Retinanet."""

from configs import base_config
from hyperparameters import params_dict

# pylint: disable=line-too-long
RETINANET_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
RETINANET_CFG.override(
    {
        'type': 'retinanet',
        'architecture': {
            'parser': 'retinanet_parser',
            'backbone': 'resnet',
            'multilevel_features': 'fpn',
            'use_bfloat16': True,
        },
        'retinanet_parser': {
            'use_bfloat16': True,
            'output_size': [640, 640],
            'match_threshold': 0.5,
            'unmatched_threshold': 0.5,
            'aug_rand_hflip': True,
Exemplo n.º 29
0
def main(unused_argv):
    del unused_argv  # Unused

    params = params_dict.ParamsDict({},
                                    mobilenet_config.MOBILENET_RESTRICTIONS)
    params = flags_to_params.override_params_from_input_flags(params, FLAGS)
    params = params_dict.override_params_dict(params,
                                              mobilenet_config.MOBILENET_CFG,
                                              is_strict=False)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    input_perm = [0, 1, 2, 3]
    output_perm = [0, 1, 2, 3]

    batch_axis = 0
    batch_size_per_shard = params.train_batch_size // params.num_cores
    if params.transpose_enabled:
        if batch_size_per_shard >= 64:
            input_perm = [3, 0, 1, 2]
            output_perm = [1, 2, 3, 0]
            batch_axis = 3
        else:
            input_perm = [2, 0, 1, 3]
            output_perm = [1, 2, 0, 3]
            batch_axis = 2

    additional_params = {
        'input_perm': input_perm,
        'output_perm': output_perm,
    }
    params = params_dict.override_params_dict(params,
                                              additional_params,
                                              is_strict=False)

    params.validate()
    params.lock()

    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if params.eval_total_size > 0:
        eval_size = params.eval_total_size
    else:
        eval_size = params.num_eval_images
    eval_steps = eval_size // params.eval_batch_size

    iterations = (eval_steps
                  if FLAGS.mode == 'eval' else params.iterations_per_loop)

    eval_batch_size = (None
                       if FLAGS.mode == 'train' else params.eval_batch_size)

    per_host_input_for_training = (params.num_cores <= 8
                                   if FLAGS.mode == 'train' else True)

    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs,
        save_summary_steps=FLAGS.save_summary_steps,
        session_config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=iterations,
            per_host_input_for_training=per_host_input_for_training))

    inception_classifier = tf.contrib.tpu.TPUEstimator(
        model_fn=model_fn,
        use_tpu=params.use_tpu,
        config=run_config,
        params=params.as_dict(),
        train_batch_size=params.train_batch_size,
        eval_batch_size=eval_batch_size,
        batch_axis=(batch_axis, 0))

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    imagenet_train = supervised_images.InputPipeline(is_training=True,
                                                     data_dir=FLAGS.data_dir)
    imagenet_eval = supervised_images.InputPipeline(is_training=False,
                                                    data_dir=FLAGS.data_dir)

    if params.moving_average:
        eval_hooks = [LoadEMAHook(FLAGS.model_dir)]
    else:
        eval_hooks = []

    if FLAGS.mode == 'eval':

        def terminate_eval():
            tf.logging.info('%d seconds without new checkpoints have elapsed '
                            '... terminating eval' % FLAGS.eval_timeout)
            return True

        def get_next_checkpoint():
            return evaluation.checkpoints_iterator(
                FLAGS.model_dir,
                min_interval_secs=params.min_eval_interval,
                timeout=FLAGS.eval_timeout,
                timeout_fn=terminate_eval)

        for checkpoint in get_next_checkpoint():
            tf.logging.info('Starting to evaluate.')
            try:
                eval_results = inception_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    hooks=eval_hooks,
                    checkpoint_path=checkpoint)
                tf.logging.info('Evaluation results: %s' % eval_results)
            except tf.errors.NotFoundError:
                # skip checkpoint if it gets deleted prior to evaluation
                tf.logging.info('Checkpoint %s no longer exists ... skipping')

    elif FLAGS.mode == 'train_and_eval':
        for cycle in range(params.train_steps // params.train_steps_per_eval):
            tf.logging.info('Starting training cycle %d.' % cycle)
            inception_classifier.train(input_fn=imagenet_train.input_fn,
                                       steps=params.train_steps_per_eval)

            tf.logging.info('Starting evaluation cycle %d .' % cycle)
            eval_results = inception_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)

    else:
        tf.logging.info('Starting training ...')
        inception_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=params.train_steps)

    if FLAGS.export_dir:
        tf.logging.info('Starting to export model with image input.')
        inception_classifier.export_saved_model(
            export_dir_base=FLAGS.export_dir,
            serving_input_receiver_fn=image_serving_input_fn)

    if FLAGS.tflite_export_dir:
        tf.logging.info('Starting to export default TensorFlow model.')
        savedmodel_dir = inception_classifier.export_saved_model(
            export_dir_base=FLAGS.tflite_export_dir,
            serving_input_receiver_fn=functools.partial(tensor_serving_input_fn, params))  # pylint: disable=line-too-long
        tf.logging.info('Starting to export TFLite.')
        converter = tf.lite.TFLiteConverter.from_saved_model(
            savedmodel_dir, output_arrays=['softmax_tensor'])
        tflite_file_name = 'mobilenet.tflite'
        if params.post_quantize:
            converter.post_training_quantize = True
            tflite_file_name = 'quantized_' + tflite_file_name
        tflite_file = os.path.join(savedmodel_dir, tflite_file_name)
        tflite_model = converter.convert()
        tf.gfile.GFile(tflite_file, 'wb').write(tflite_model)
Exemplo n.º 30
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Retinanet."""

from configs import detection_config
from hyperparameters import params_dict

# pylint: disable=line-too-long
RETINANET_CFG = params_dict.ParamsDict(detection_config.DETECTION_CFG)
RETINANET_CFG.override(
    {
        'type': 'retinanet',
        'architecture': {
            'parser': 'retinanet_parser',
            'backbone': 'resnet',
            'multilevel_features': 'fpn',
            'output_flat_fpn_features': False,
        },
        'retinanet_parser': {
            'output_size': [640, 640],
            'match_threshold': 0.5,
            'unmatched_threshold': 0.5,
            'aug_rand_hflip': True,
            'aug_scale_min': 1.0,