示例#1
0
    def train(self,
              train_input_fn,
              run_eval_after_train=False,
              eval_input_fn=None):
        """Run distributed training on Mask RCNN model."""

        self._save_config()
        train_run_config = self.build_strategy_configuration('train')
        train_params = self.build_model_parameters('train')
        train_estimator = self.build_mask_rcnn_estimator(
            train_params, train_run_config, 'train')

        with dump_callback():
            train_estimator.train(
                input_fn=train_input_fn,
                max_steps=self._runtime_config.total_steps,
                hooks=get_training_hooks(
                    mode="train",
                    model_dir=self._runtime_config.model_dir,
                    checkpoint_path=self._runtime_config.checkpoint,
                    skip_checkpoint_variables=self._runtime_config.
                    skip_checkpoint_variables,
                    batch_size=train_params['batch_size'],
                    save_summary_steps=self._runtime_config.save_summary_steps,
                ))

        if not run_eval_after_train:
            return None

        if eval_input_fn is None:
            raise ValueError(
                'Eval input_fn must be passed to conduct evaluation after training.'
            )

        eval_run_config = self.build_strategy_configuration('eval')
        eval_params = self.build_model_parameters('eval')
        eval_estimator = self.build_mask_rcnn_estimator(
            eval_params, eval_run_config, 'eval')

        last_ckpt = self.get_last_checkpoint_path()
        logging.info("Restoring parameters from %s\n" % last_ckpt)

        eval_results, predictions = evaluation.evaluate(
            eval_estimator,
            eval_input_fn,
            self._runtime_config.eval_samples,
            self._runtime_config.eval_batch_size,
            self._runtime_config.include_mask,
            self._runtime_config.val_json_file,
            report_frequency=self._runtime_config.report_frequency,
            checkpoint_path=last_ckpt)

        output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
        tf.io.gfile.makedirs(output_dir)

        # Summary writer writes out eval metrics.
        self._write_summary(output_dir, eval_results, predictions,
                            self._runtime_config.total_steps)

        return eval_results
    def _save(self, session, step):
        """Saves the latest checkpoint, returns should_stop."""
        logging.info("Saving checkpoints for %d into %s.", step,
                     self._save_path)

        self._saver.save(session, self._save_path, global_step=step)
        self._summary_writer.add_session_log(
            tf.compat.v1.SessionLog(status=tf.compat.v1.SessionLog.CHECKPOINT,
                                    checkpoint_path=self._save_path), step)
示例#3
0
    def log_parameters(self):
        """Log the hyperparameters value along with the source of those values.
    """
        params_log = ''

        for k in self._params:
            params_log += k + ': \t' + str(self._params[k])
            params_log += ' \t[' + self._params_source[k] + ']\n'

        logging.info('\nModel hyperparameters [source]:\n%s', params_log)
示例#4
0
    def loadRes(self, detection_results, include_mask, is_image_mask=False):
        """Load result file and return a result api object.

    Args:
      detection_results: a dictionary containing predictions results.
      include_mask: a boolean, whether to include mask in detection results.
      is_image_mask: a boolean, where the predict mask is a whole image mask.

    Returns:
      res: result MaskCOCO api object
    """
        res = MaskCOCO()
        res.dataset['images'] = [img for img in self.dataset['images']]
        logging.info('Loading and preparing results...')
        predictions = self.load_predictions(detection_results,
                                            include_mask=include_mask,
                                            is_image_mask=is_image_mask)
        assert isinstance(predictions,
                          list), 'results in not an array of objects'
        if predictions:
            image_ids = [pred['image_id'] for pred in predictions]
            assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \
                   'Results do not correspond to current coco set'

            if (predictions and 'bbox' in predictions[0]
                    and predictions[0]['bbox']):
                res.dataset['categories'] = copy.deepcopy(
                    self.dataset['categories'])
                for idx, pred in enumerate(predictions):
                    bb = pred['bbox']
                    x1, x2, y1, y2 = [
                        bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]
                    ]
                    if 'segmentation' not in pred:
                        pred['segmentation'] = [[
                            x1, y1, x1, y2, x2, y2, x2, y1
                        ]]
                    pred['area'] = bb[2] * bb[3]
                    pred['id'] = idx + 1
                    pred['iscrowd'] = 0
            elif 'segmentation' in predictions[0]:
                res.dataset['categories'] = copy.deepcopy(
                    self.dataset['categories'])
                for idx, pred in enumerate(predictions):
                    # now only support compressed RLE format as segmentation results
                    pred['area'] = maskUtils.area(pred['segmentation'])
                    if 'bbox' not in pred:
                        pred['bbox'] = maskUtils.toBbox(pred['segmentation'])
                    pred['id'] = idx + 1
                    pred['iscrowd'] = 0

            res.dataset['annotations'] = predictions

        res.createIndex()
        return res
示例#5
0
def compute_model_statistics(batch_size, is_training=True):
    """Compute number of parameters and FLOPS."""
    options = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
    options['output'] = 'none'

    from tensorflow.python.keras.backend import get_graph
    flops = tf.compat.v1.profiler.profile(get_graph(),
                                          options=options).total_float_ops
    flops_per_image = flops / batch_size

    logging.info(
        '[%s Compute Statistics] %.1f GFLOPS/image' %
        ("Training" if is_training else "Inference", flops_per_image / 1e9))
示例#6
0
  def __init__(self, runtime_config, model_fn):
    super(EstimatorExecuter, self).__init__(runtime_config, model_fn)

    if MPI_is_distributed():
      os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
      os.environ['HOROVOD_NUM_NCCL_STREAMS'] = '1'
      # os.environ['HOROVOD_AUTOTUNE'] = '2'

      logging.info("SageMaker Distributed Data Parallel successfully initialized ...")

    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
    os.environ['TF_GPU_THREAD_COUNT'] = '1' if not MPI_is_distributed() else str(hvd.size())

    os.environ['TF_SYNC_ON_FINISH'] = '0'
示例#7
0
    def _save_config(self):
        """Save parameters to config files if model_dir is defined."""

        model_dir = self._runtime_config.model_dir

        if model_dir is not None:
            if not tf.io.gfile.exists(model_dir):
                tf.io.gfile.makedirs(model_dir)

            params_io.save_hparams_to_yaml(self._runtime_config,
                                           model_dir + '/params.yaml')

            try:
                from TensorFlow.common.tb_utils import write_hparams_v1
                write_hparams_v1(model_dir, self._runtime_config.values())
                # Prevent performance degradation by creating empty Session.
                with tf.compat.v1.Session():
                    pass
            except:
                logging.info('Could not save hparams to tfevent file')
    def __init__(self, checkpoint_dir, checkpoint_basename="model.ckpt"):
        """Initializes a `CheckpointSaverHook`.
        Args:
          checkpoint_dir: `str`, base directory for the checkpoint files.
          checkpoint_basename: `str`, base name for the checkpoint files.
        Raises:
          ValueError: One of `save_steps` or `save_secs` should be set.
          ValueError: At most one of `saver` or `scaffold` should be set.
        """
        logging.info("Create CheckpointSaverHook.")

        self._saver = None
        self._checkpoint_dir = checkpoint_dir
        self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)

        self._steps_per_run = 1

        self._is_initialized = False

        self._global_step_tensor = None
        self._summary_writer = None
示例#9
0
    def __init__(self, runtime_config, model_fn):
        super(EstimatorExecuter, self).__init__(runtime_config, model_fn)

        # Handle recipe cache. Skip if externally set or empty.
        recipe_cache = runtime_config.recipe_cache
        if 'TF_RECIPE_CACHE_PATH' not in os.environ.keys() and recipe_cache:
            os.environ['TF_RECIPE_CACHE_PATH'] = recipe_cache

            # Clear previous recipe cache.
            if not MPI_is_distributed() or MPI_rank() == 0:
                if os.path.exists(recipe_cache) and os.path.isdir(
                        recipe_cache):
                    shutil.rmtree(recipe_cache)

        if MPI_is_distributed():
            os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
            os.environ['HOROVOD_NUM_NCCL_STREAMS'] = '1'
            # os.environ['HOROVOD_AUTOTUNE'] = '2'

            if runtime_config.device == "HPU":
                from TensorFlow.common.horovod_helpers import hvd_init, Framework
                hvd = hvd_init(framework=Framework.TENSORFLOW)
            else:
                hvd.init()

            # Other ranks should wait for recipe cache to be removed.
            # This operation can't be done before hvd_init.
            from mpi4py import MPI
            MPI.COMM_WORLD.Barrier()

            logging.info("Horovod successfully initialized ...")

        os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
        os.environ['TF_GPU_THREAD_COUNT'] = '1' if not MPI_is_distributed(
        ) else str(hvd.size())

        os.environ['TF_SYNC_ON_FINISH'] = '0'
示例#10
0
    def eval(self, eval_input_fn):
        """Run distributed eval on Mask RCNN model."""

        output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
        tf.io.gfile.makedirs(output_dir)

        # Summary writer writes out eval metrics.
        run_config = self.build_strategy_configuration('eval')
        eval_params = self.build_model_parameters('eval')
        eval_estimator = self.build_mask_rcnn_estimator(
            eval_params, run_config, 'eval')

        logging.info('Starting to evaluate.')

        last_ckpt = self.get_last_checkpoint_path()

        if last_ckpt is not None:
            logging.info("Restoring parameters from %s\n" % last_ckpt)
            current_step = int(os.path.basename(last_ckpt).split('-')[1])

        else:
            logging.warning(
                "Could not find trained model in model_dir: `%s`, running initialization to predict\n"
                % self._runtime_config.model_dir)
            current_step = 0

        eval_results, predictions = evaluation.evaluate(
            eval_estimator,
            eval_input_fn,
            self._runtime_config.eval_samples,
            self._runtime_config.eval_batch_size,
            self._runtime_config.include_mask,
            self._runtime_config.val_json_file,
            checkpoint_path=last_ckpt)

        self._write_summary(output_dir, eval_results, predictions,
                            current_step)

        if current_step >= self._runtime_config.total_steps:
            logging.info('Evaluation finished after training step %d' %
                         current_step)

        return eval_results
            FLAGS.data_dir, "train*.tfrecord"),
                                    mode=tf.estimator.ModeKeys.TRAIN,
                                    use_fake_data=FLAGS.use_synthetic_data,
                                    use_instance_mask=True,
                                    seed=FLAGS.seed)

    else:
        input_dataset = InputReader(file_pattern=os.path.join(
            FLAGS.data_dir, "val*.tfrecord"),
                                    mode=tf.estimator.ModeKeys.PREDICT,
                                    num_examples=5000,
                                    use_fake_data=FLAGS.use_synthetic_data,
                                    use_instance_mask=True,
                                    seed=FLAGS.seed)

    logging.info("[*] Executing Benchmark in %s mode" %
                 ("training" if FLAGS.training else "inference"))
    logging.info("[*] Benchmark using %s data" %
                 ("synthetic" if FLAGS.use_synthetic_data else "real"))

    time.sleep(1)

    # Build the data input
    dataset = input_dataset(
        params={
            "anchor_scale": 8.0,
            "aspect_ratios": [[1.0, 1.0], [1.4, 0.7], [0.7, 1.4]],
            "batch_size": FLAGS.batch_size,
            "gt_mask_size": 112,
            "image_size": [1024, 1024],
            "include_groundtruth_in_features": False,
            "augment_input_data": True,
示例#12
0
  def train_and_eval(self, train_input_fn, eval_input_fn):
    """Run distributed train and eval on Mask RCNN model."""

    self._save_config()
    output_dir = os.path.join(self._runtime_config.model_dir, 'eval')
    tf.io.gfile.makedirs(output_dir)

    train_run_config = self.build_strategy_configuration('train')
    train_params = self.build_model_parameters('train')
    train_estimator = self.build_mask_rcnn_estimator(train_params, train_run_config, 'train')

    eval_estimator = None
    eval_results = None

    num_cycles = math.ceil(self._runtime_config.total_steps / self._runtime_config.num_steps_per_eval)

    training_hooks = get_training_hooks(
        mode="train",
        model_dir=self._runtime_config.model_dir,
        checkpoint_path=self._runtime_config.checkpoint,
        skip_checkpoint_variables=self._runtime_config.skip_checkpoint_variables
    )

    for cycle in range(1, num_cycles + 1):

      if not MPI_is_distributed() or MPI_rank() == 0:

        print()  # Visual Spacing
        logging.info("=================================")
        logging.info('     Start training cycle %02d' % cycle)
        logging.info("=================================\n")

      max_cycle_step = min(int(cycle * self._runtime_config.num_steps_per_eval), self._runtime_config.total_steps)

      PROFILER_ENABLED = False

      if (not MPI_is_distributed() or MPI_rank() == 0) and PROFILER_ENABLED:
          profiler_context_manager = tf.contrib.tfprof.ProfileContext

      else:
          from contextlib import suppress
          profiler_context_manager = lambda *args, **kwargs: suppress()  # No-Op context manager

      with profiler_context_manager(
              '/workspace/profiling/',
              trace_steps=range(100, 200, 3),
              dump_steps=[200]
      ) as pctx:

          if (not MPI_is_distributed() or MPI_rank() == 0) and PROFILER_ENABLED:
            opts = tf.compat.v1.profiler.ProfileOptionBuilder.time_and_memory()
            pctx.add_auto_profiling('op', opts, [150, 200])

          train_estimator.train(
              input_fn=train_input_fn,
              max_steps=max_cycle_step,
              hooks=training_hooks,
          )

      if not MPI_is_distributed() or MPI_rank() == 0:

          print()  # Visual Spacing
          logging.info("=================================")
          logging.info('    Start evaluation cycle %02d' % cycle)
          logging.info("=================================\n")

          if eval_estimator is None:
              eval_run_config = self.build_strategy_configuration('eval')
              eval_params = self.build_model_parameters('eval')
              eval_estimator = self.build_mask_rcnn_estimator(eval_params, eval_run_config, 'eval')

          last_ckpt = tf.train.latest_checkpoint(self._runtime_config.model_dir, latest_filename=None)
          logging.info("Restoring parameters from %s\n" % last_ckpt)

          eval_results, predictions = evaluation.evaluate(
              eval_estimator,
              eval_input_fn,
              self._runtime_config.eval_samples,
              self._runtime_config.eval_batch_size,
              self._runtime_config.include_mask,
              self._runtime_config.val_json_file,
              report_frequency=self._runtime_config.report_frequency
          )

          self._write_summary(output_dir, eval_results, predictions, max_cycle_step)

      if MPI_is_distributed():
          from mpi4py import MPI
          comm = hvd.get_worker_comm()
          comm.Barrier()  # Waiting for all MPI processes to sync

    return eval_results
示例#13
0
  def _get_session_config(mode, use_xla, use_amp, use_tf_distributed=False, allow_xla_at_inference=False):

      assert mode in ('train', 'eval')

      rewrite_options = rewriter_config_pb2.RewriterConfig(
          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.ON,

          # constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
          # constant_folding=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # debug_stripper=rewriter_config_pb2.RewriterConfig.OFF,
          # debug_stripper=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # dependency_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # disable_model_pruning=False,           # INCOMPATIBLE with AMP
          # function_optimization=True,
          # implementation_selector=True,

          # loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # loop_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # The default setting (SCHEDULING and SWAPPING HEURISTICS only)
          # memory_optimization=rewriter_config_pb2.RewriterConfig.DEFAULT_MEM_OPT,

          # Disabled in the meta-optimizer.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT,

          # Driven by manual op-level annotations.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,

          # Swapping heuristic will move a tensor from the GPU to the CPU and move it
          # back when needed to reduce peak memory usage..
          # memory_optimization=rewriter_config_pb2.RewriterConfig.SWAPPING_HEURISTICS,

          # Recomputation heuristics will recompute ops (such as Relu activation)
          # during backprop instead of storing them, reducing peak memory usage.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.RECOMPUTATION_HEURISTICS,

          # Scheduling will split big ops such as AddN and try to enforce a schedule of
          # the new computations that decreases peak memory usage.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS,

          # Use any combination of swapping and recomputation heuristics.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS,

          meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.TWO,
          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.DEFAULT_NUM_ITERS,

          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.ON,         # TO TEST
          #
          # remapping=rewriter_config_pb2.RewriterConfig.OFF,
          # remapping=rewriter_config_pb2.RewriterConfig.ON,                   # TO TEST

          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.ON,  # TO TEST

          # shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # shape_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
      )

      if use_amp:
          logging.info("[%s] AMP is activated - Experiment Feature" % mode)
          rewrite_options.auto_mixed_precision = True

      config = tf.compat.v1.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False,
          graph_options=tf.compat.v1.GraphOptions(
              rewrite_options=rewrite_options,
              # infer_shapes=True  # Heavily drops throughput by 30%
          )
      )

      if use_tf_distributed:
        config.gpu_options.force_gpu_compatible = False

      else:
        config.gpu_options.force_gpu_compatible = True  # Force pinned memory

        if MPI_is_distributed():
            config.gpu_options.visible_device_list = str(MPI_local_rank())

      if use_xla and (mode == "train" or allow_xla_at_inference):
          logging.info("[%s] XLA is activated - Experiment Feature" % mode)
          config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
          # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2

      if mode == 'train':
          config.intra_op_parallelism_threads = 1  # Avoid pool of Eigen threads

          if MPI_is_distributed():
              config.inter_op_parallelism_threads = max(2, multiprocessing.cpu_count() // hvd.local_size())

          elif not use_tf_distributed:
              config.inter_op_parallelism_threads = 4

      return config
    def after_create_session(self, session, coord=None):
        if not self._is_initialized:
            session.run(self._init_op, feed_dict=self._init_feed_dict)
            logging.info("Pretrained weights loaded with success...\n")

            self._is_initialized = True
示例#15
0
def compute_coco_eval_metric(predictor,
                             num_batches=-1,
                             include_mask=True,
                             annotation_json_file="",
                             eval_batch_size=-1,
                             report_frequency=None):
    """Compute COCO eval metric given a prediction generator.

    Args:
    predictor: a generator that iteratively pops a dictionary of predictions
      with the format compatible with COCO eval tool.
    num_batches: the number of batches to be aggregated in eval. This is how
      many times that the predictor gets pulled.
    include_mask: a boolean that indicates whether we include the mask eval.
    annotation_json_file: the annotation json file of the eval dataset.

    Returns:
    eval_results: the aggregated COCO metric eval results.
    """

    if annotation_json_file == "":
        annotation_json_file = None

    use_groundtruth_from_json = (annotation_json_file is not None)

    predictions = dict()
    batch_idx = 0

    if use_groundtruth_from_json:
        eval_metric = coco_metric.EvaluationMetric(annotation_json_file,
                                                   include_mask=include_mask)

    else:
        eval_metric = coco_metric.EvaluationMetric(filename=None,
                                                   include_mask=include_mask)

    def evaluation_preds(preds):

        # Essential to avoid modifying the source dict
        _preds = copy.deepcopy(preds)

        for k, v in six.iteritems(_preds):
            _preds[k] = np.concatenate(_preds[k], axis=0)

        if 'orig_images' in _preds and _preds['orig_images'].shape[0] > 10:
            # Only samples a few images for visualization.
            _preds['orig_images'] = _preds['orig_images'][:10]

        if use_groundtruth_from_json:
            eval_results = eval_metric.predict_metric_fn(_preds)

        else:
            images, annotations = coco_utils.extract_coco_groundtruth(
                _preds, include_mask)
            coco_dataset = coco_utils.create_coco_format_dataset(
                images, annotations)
            eval_results = eval_metric.predict_metric_fn(
                _preds, groundtruth_data=coco_dataset)

        return eval_results

    # Take into account cuDNN & Tensorflow warmup
    # Drop N first steps for avg throughput calculation
    BURNIN_STEPS = 100
    model_throughput_list = list()
    inference_time_list = list()

    if MPI_is_distributed():
        eval_batch_size *= MPI_size()
        num_batches /= MPI_size()

    while num_batches < 0 or batch_idx < num_batches:

        try:
            step_t0 = time.time()
            step_predictions = six.next(predictor)
            if MPI_is_distributed():
                from mpi4py import MPI
                MPI.COMM_WORLD.Barrier(
                )  # Need to get time for all predictors for given batch_idx
            batch_time = time.time() - step_t0

            throughput = eval_batch_size / batch_time
            model_throughput_list.append(throughput)
            inference_time_list.append(batch_time)

            logging.info(
                'Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s'
                % (batch_idx + 1, num_batches, batch_time, throughput))

        except StopIteration:
            logging.info('Get StopIteration at %d batch.' % (batch_idx + 1))
            break

        step_predictions = process_prediction_for_eval(step_predictions)

        for k, v in step_predictions.items():

            if k not in predictions:
                predictions[k] = [v]

            else:
                predictions[k].append(v)

        batch_idx = batch_idx + 1

        # If you want the report to happen each report_frequency to happen each report_frequency batches.
        # Thus, each report is of eval_batch_size * report_frequency
        if report_frequency and batch_idx % report_frequency == 0:
            eval_results = evaluation_preds(preds=predictions)
            logging.info('Eval results: %s' %
                         pprint.pformat(eval_results, indent=4))

    if MPI_is_distributed():
        from mpi4py import MPI
        all_predictions = MPI.COMM_WORLD.gather(predictions, root=0)
        MPI.COMM_WORLD.Barrier(
        )  # FIXME: first gather is calling MPI_FINALIZE causing crash
        if MPI_rank() == 0:
            predictions.clear()
            for pred in all_predictions:
                for k in pred.keys():
                    if k not in predictions:
                        predictions[k] = pred[k]
                    else:
                        predictions[k].extend(pred[k])

    inference_time_list.sort()
    eval_results = evaluation_preds(preds=predictions)

    if not MPI_is_distributed() or MPI_rank() == 0:

        average_time = np.mean(inference_time_list)
        latency_50 = max(
            inference_time_list[:int(len(inference_time_list) * 0.5)])
        latency_90 = max(
            inference_time_list[:int(len(inference_time_list) * 0.90)])
        latency_95 = max(
            inference_time_list[:int(len(inference_time_list) * 0.95)])
        latency_99 = max(
            inference_time_list[:int(len(inference_time_list) * 0.99)])
        latency_100 = max(
            inference_time_list[:int(len(inference_time_list) * 1)])

        print()  # Visual Spacing
        logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
        logging.info("         Evaluation Performance Summary          ")
        logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")

        total_processing_hours, rem = divmod(np.sum(model_throughput_list),
                                             3600)
        total_processing_minutes, total_processing_seconds = divmod(rem, 60)

        if len(model_throughput_list) > BURNIN_STEPS:
            # Take into account cuDNN & Tensorflow warmup
            # Drop N first steps for avg throughput calculation
            # Also drop last step which may have a different batch size
            avg_throughput = np.mean(model_throughput_list[BURNIN_STEPS:-1])
        else:
            avg_throughput = -1.

        print()  # Visual Spacing
        logging.info("Average throughput: {throughput:.1f} samples/sec".format(
            throughput=avg_throughput))
        logging.info("Inference Latency Average (s) = {avg:.4f}".format(
            avg=average_time))
        logging.info(
            "Inference Latency 50% (s) = {cf_50:.4f}".format(cf_50=latency_50))
        logging.info("Inference Latency 90%  (s) = {cf_90:.4f}".format(
            cf_90=latency_90))
        logging.info("Inference Latency 95%  (s) = {cf_95:.4f}".format(
            cf_95=latency_95))
        logging.info("Inference Latency 99%  (s) = {cf_99:.4f}".format(
            cf_99=latency_99))
        logging.info("Inference Latency 100%  (s) = {cf_100:.4f}".format(
            cf_100=latency_100))
        logging.info("Total processed steps: {total_steps}".format(
            total_steps=len(model_throughput_list)))
        logging.info(
            "Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".
            format(hours=total_processing_hours,
                   minutes=int(total_processing_minutes),
                   seconds=int(total_processing_seconds)))
        dllogger.log(step=(),
                     data={"avg_inference_throughput": avg_throughput},
                     verbosity=Verbosity.DEFAULT)
        avg_inference_time = float(total_processing_hours * 3600 +
                                   int(total_processing_minutes) * 60 +
                                   int(total_processing_seconds))
        dllogger.log(step=(),
                     data={"avg_inference_time": avg_inference_time},
                     verbosity=Verbosity.DEFAULT)
        logging.info("==================== Metrics ====================")

        # logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
        for key, value in sorted(eval_results.items(),
                                 key=operator.itemgetter(0)):
            logging.info("%s: %.9f" % (key, value))
        print()  # Visual Spacing

    return eval_results, predictions
def multilevel_propose_rois(scores_outputs,
                            box_outputs,
                            all_anchors,
                            image_info,
                            rpn_pre_nms_topn,
                            rpn_post_nms_topn,
                            rpn_nms_threshold,
                            rpn_min_size,
                            bbox_reg_weights,
                            use_batched_nms=False):
    """Proposes RoIs given a group of candidates from different FPN levels.

    Args:
    scores_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4]
    all_anchors: an Anchors object that contains the all anchors.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width. See dataloader.DetectionInputProcessor for
      details.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.
    use_batched_nms: whether use batched nms. The batched nms will use
      tf.combined_non_max_suppression, which is only available for CPU/GPU.

    Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals.
    rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      representing the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax].
    """
    with tf.name_scope('multilevel_propose_rois'):

        levels = scores_outputs.keys()
        scores = []
        rois = []
        anchor_boxes = all_anchors.get_unpacked_boxes()

        height = tf.expand_dims(image_info[:, 0:1], axis=-1)
        width = tf.expand_dims(image_info[:, 1:2], axis=-1)
        scale = tf.expand_dims(image_info[:, 2:3], axis=-1)

        for level in levels:

            with tf.name_scope('level_%d' % level) as scope:

                batch_size, feature_h, feature_w, num_anchors_per_location = scores_outputs[
                    level].get_shape().as_list()
                num_boxes = feature_h * feature_w * num_anchors_per_location

                this_level_scores = tf.reshape(scores_outputs[level],
                                               [batch_size, num_boxes])
                this_level_scores = tf.sigmoid(this_level_scores)
                this_level_boxes = tf.reshape(box_outputs[level],
                                              [batch_size, num_boxes, 4])

                this_level_anchors = tf.cast(tf.reshape(
                    tf.expand_dims(anchor_boxes[level], axis=0) *
                    tf.ones([batch_size, 1, 1, 1]),
                    [batch_size, num_boxes, 4]),
                                             dtype=this_level_scores.dtype)

                # TODO: Remove when Batched NMS stop leading to eval metrics being all 0
                # commented out because scope no longer exists
                if use_batched_nms:
                    logging.info("[ROI OPs] Using Batched NMS... Scope: %s" %
                                 scope)
                    propose_rois_fn = _propose_rois_gpu

                else:
                    logging.debug(
                        "[ROI OPs] Not Using Batched NMS... Scope: %s" % scope)
                    propose_rois_fn = _propose_rois_tpu

                this_level_scores, this_level_boxes = propose_rois_fn(
                    this_level_scores, this_level_boxes, this_level_anchors,
                    height, width, scale, rpn_pre_nms_topn, rpn_post_nms_topn,
                    rpn_nms_threshold, rpn_min_size, bbox_reg_weights)

                scores.append(this_level_scores)
                rois.append(this_level_boxes)

    scores = tf.concat(scores, axis=1)
    rois = tf.concat(rois, axis=1)

    with tf.name_scope('roi_post_nms_topk'):

        post_nms_num_anchors = scores.shape[1]
        post_nms_topk_limit = min(post_nms_num_anchors, rpn_post_nms_topn)

        top_k_scores, top_k_rois = box_utils.top_k(scores,
                                                   k=post_nms_topk_limit,
                                                   boxes_list=[rois])

        top_k_rois = top_k_rois[0]

    return top_k_scores, top_k_rois
示例#17
0
    def call(self, inputs, **kwargs):
        """
        Returns:
        mask_outputs: a tensor with a shape of
          [batch_size, num_masks, mask_height, mask_width],
          representing the mask predictions.
        fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
          representing the fg mask targets.
        Raises:
        ValueError: If boxes is not a rank-3 tensor or the last dimension of
          boxes is not 4.
        """

        batch_size, num_rois, height, width, filters = inputs.get_shape(
        ).as_list()

        net = tf.reshape(inputs, [-1, height, width, filters])

        for conv_id in range(4):
            net = self._conv_stage1[conv_id](net)

        net = self._conv_stage2(net)

        mask_outputs = self._conv_stage3(net)

        # [SW-34925] Workaround to offload ScatterNd Op to CPU
        if self._offload_post_proc:
            logging.info("[Mask Head] Running post processing on CPU")
            with tf.device('CPU:0'):
                mask_outputs = tf.reshape(mask_outputs, [
                    -1, num_rois, self._mrcnn_resolution,
                    self._mrcnn_resolution, self._num_classes
                ])

                with tf.name_scope('masks_post_processing'):

                    mask_outputs = tf.transpose(a=mask_outputs,
                                                perm=[0, 1, 4, 2, 3])

                    indices_dtype = tf.float32 if self._is_gpu_inference else tf.int32

                    if batch_size == 1:
                        indices = tf.reshape(
                            tf.reshape(tf.range(num_rois, dtype=indices_dtype),
                                       [batch_size, num_rois, 1]) *
                            self._num_classes +
                            tf.expand_dims(self._class_indices, axis=-1),
                            [batch_size, -1])
                        indices = tf.cast(indices, tf.int32)

                        mask_outputs = tf.gather(tf.reshape(
                            mask_outputs, [
                                batch_size, -1, self._mrcnn_resolution,
                                self._mrcnn_resolution
                            ]),
                                                 indices,
                                                 axis=1)

                        mask_outputs = tf.squeeze(mask_outputs, axis=1)
                        mask_outputs = tf.reshape(mask_outputs, [
                            batch_size, num_rois, self._mrcnn_resolution,
                            self._mrcnn_resolution
                        ])

                    else:
                        batch_indices = (
                            tf.expand_dims(tf.range(batch_size,
                                                    dtype=indices_dtype),
                                           axis=1) *
                            tf.ones([1, num_rois], dtype=indices_dtype))

                        mask_indices = (tf.expand_dims(
                            tf.range(num_rois, dtype=indices_dtype), axis=0) *
                                        tf.ones([batch_size, 1],
                                                dtype=indices_dtype))

                        gather_indices = tf.stack(
                            [batch_indices, mask_indices, self._class_indices],
                            axis=2)

                        if self._is_gpu_inference:
                            gather_indices = tf.cast(gather_indices,
                                                     dtype=tf.int32)

                        mask_outputs = tf.gather_nd(mask_outputs,
                                                    gather_indices)

                return mask_outputs
        else:
            mask_outputs = tf.reshape(mask_outputs, [
                -1, num_rois, self._mrcnn_resolution, self._mrcnn_resolution,
                self._num_classes
            ])

            with tf.name_scope('masks_post_processing'):

                mask_outputs = tf.transpose(a=mask_outputs,
                                            perm=[0, 1, 4, 2, 3])

                indices_dtype = tf.float32 if self._is_gpu_inference else tf.int32

                if batch_size == 1:
                    indices = tf.reshape(
                        tf.reshape(tf.range(num_rois, dtype=indices_dtype),
                                   [batch_size, num_rois, 1]) *
                        self._num_classes +
                        tf.expand_dims(self._class_indices, axis=-1),
                        [batch_size, -1])
                    indices = tf.cast(indices, tf.int32)

                    mask_outputs = tf.gather(tf.reshape(
                        mask_outputs, [
                            batch_size, -1, self._mrcnn_resolution,
                            self._mrcnn_resolution
                        ]),
                                             indices,
                                             axis=1)

                    mask_outputs = tf.squeeze(mask_outputs, axis=1)
                    mask_outputs = tf.reshape(mask_outputs, [
                        batch_size, num_rois, self._mrcnn_resolution,
                        self._mrcnn_resolution
                    ])

                else:
                    batch_indices = (tf.expand_dims(
                        tf.range(batch_size, dtype=indices_dtype),
                        axis=1) * tf.ones([1, num_rois], dtype=indices_dtype))

                    mask_indices = (
                        tf.expand_dims(tf.range(num_rois, dtype=indices_dtype),
                                       axis=0) *
                        tf.ones([batch_size, 1], dtype=indices_dtype))

                    mask_indices = batch_indices * num_rois + mask_indices
                    gather_indices = tf.stack(
                        [mask_indices, self._class_indices], axis=2)
                    gather_indices = tf.reshape(gather_indices,
                                                [batch_size * num_rois, 2])

                    if self._is_gpu_inference:
                        gather_indices = tf.cast(gather_indices,
                                                 dtype=tf.int32)

                    mask_outputs = tf.reshape(mask_outputs, [
                        batch_size * num_rois, self._num_classes,
                        self._mrcnn_resolution, self._mrcnn_resolution
                    ])
                    mask_outputs = tf.gather_nd(mask_outputs, gather_indices)
                    mask_outputs = tf.reshape(mask_outputs, [
                        batch_size, num_rois, self._mrcnn_resolution,
                        self._mrcnn_resolution
                    ])

            return mask_outputs
示例#18
0
    def load_predictions(self,
                         detection_results,
                         include_mask,
                         is_image_mask=False):
        """Create prediction dictionary list from detection and mask results.

    Args:
      detection_results: a dictionary containing numpy arrays which corresponds
        to prediction results.
      include_mask: a boolean, whether to include mask in detection results.
      is_image_mask: a boolean, where the predict mask is a whole image mask.

    Returns:
      a list of dictionary including different prediction results from the model
        in numpy form.
    """
        predictions = []
        num_detections = detection_results['detection_scores'].size
        current_index = 0
        for i, image_id in enumerate(detection_results['source_id']):

            if include_mask:
                box_coorindates_in_image = detection_results[
                    'detection_boxes'][i]
                segments = generate_segmentation_from_masks(
                    detection_results['detection_masks'][i],
                    box_coorindates_in_image,
                    int(detection_results['image_info'][i][3]),
                    int(detection_results['image_info'][i][4]),
                    is_image_mask=is_image_mask)

                # Convert the mask to uint8 and then to fortranarray for RLE encoder.
                encoded_masks = [
                    maskUtils.encode(
                        np.asfortranarray(instance_mask.astype(np.uint8)))
                    for instance_mask in segments
                ]

            for box_index in range(int(
                    detection_results['num_detections'][i])):
                if current_index % 1000 == 0:
                    logging.info('{}/{}'.format(current_index, num_detections))

                current_index += 1

                prediction = {
                    'image_id':
                    int(image_id),
                    'bbox':
                    detection_results['detection_boxes'][i]
                    [box_index].tolist(),
                    'score':
                    detection_results['detection_scores'][i][box_index],
                    'category_id':
                    int(detection_results['detection_classes'][i][box_index]),
                }

                if include_mask:
                    prediction['segmentation'] = encoded_masks[box_index]

                predictions.append(prediction)

        return predictions
    def __call__(self, params, input_context=None):

        batch_size = params['batch_size'] if 'batch_size' in params else 1

        try:
            seed = params['seed'] if not MPI_is_distributed(
            ) else params['seed'] * MPI_rank()
        except (KeyError, TypeError):
            seed = None

        if MPI_is_distributed():
            n_gpus = MPI_size()

        elif input_context is not None:
            n_gpus = input_context.num_input_pipelines

        else:
            n_gpus = 1

        ##################################################

        dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)

        if self._mode == tf.estimator.ModeKeys.TRAIN:

            if input_context is not None:
                logging.info("Using Dataset Sharding with TF Distributed")
                _num_shards = input_context.num_input_pipelines
                _shard_idx = input_context.input_pipeline_id

            elif MPI_is_distributed():
                logging.info("Using Dataset Sharding with Horovod")
                _shard_idx, _num_shards = MPI_rank_and_size()

            try:
                dataset = dataset.shard(num_shards=_num_shards,
                                        index=_shard_idx)
                dataset = dataset.shuffle(math.ceil(256 / _num_shards))

            except NameError:  # Not a distributed training setup
                pass

        def _prefetch_dataset(filename):
            return tf.data.TFRecordDataset(filename).prefetch(1)

        dataset = dataset.interleave(
            map_func=_prefetch_dataset,
            cycle_length=32,
            block_length=64,
            num_parallel_calls=tf.data.experimental.AUTOTUNE,
        )

        if self._num_examples is not None and self._num_examples > 0:
            logging.info("[*] Limiting the amount of sample to: %d" %
                         self._num_examples)
            dataset = dataset.take(self._num_examples)

        dataset = dataset.cache()

        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(buffer_size=4096,
                                      reshuffle_each_iteration=True,
                                      seed=seed)

            dataset = dataset.repeat()

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(
            map_func=self._create_dataset_parser_fn(params),
            num_parallel_calls=16,
        )

        dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)

        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            logging.info("Using Fake Dataset Loop...")
            dataset = dataset.take(1).cache().repeat()

            if self._mode != tf.estimator.ModeKeys.TRAIN:
                dataset = dataset.take(int(5000 / batch_size))

        dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE, )

        if not tf.distribute.has_strategy():
            dataset = dataset.apply(
                tf.data.experimental.prefetch_to_device(
                    '/gpu:0',  # With Horovod the local GPU is always 0
                    buffer_size=1,
                ))

        data_options = tf.data.Options()

        data_options.experimental_deterministic = seed is not None
        if LooseVersion(tf.__version__) <= LooseVersion("2.0.0"):
            data_options.experimental_distribute.auto_shard = False
        else:
            data_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
        # data_options.experimental_distribute.auto_shard = False
        data_options.experimental_slack = True

        data_options.experimental_threading.max_intra_op_parallelism = 1
        # data_options.experimental_threading.private_threadpool_size = int(multiprocessing.cpu_count() / n_gpus) * 2

        # ================= experimental_optimization ================= #

        data_options.experimental_optimization.apply_default_optimizations = False

        # data_options.experimental_optimization.autotune = True
        data_options.experimental_optimization.filter_fusion = True
        data_options.experimental_optimization.map_and_batch_fusion = True
        data_options.experimental_optimization.map_and_filter_fusion = True
        data_options.experimental_optimization.map_fusion = True
        data_options.experimental_optimization.map_parallelization = True

        map_vectorization_options = tf.data.experimental.MapVectorizationOptions(
        )
        map_vectorization_options.enabled = True
        map_vectorization_options.use_choose_fastest = True

        data_options.experimental_optimization.map_vectorization = map_vectorization_options

        data_options.experimental_optimization.noop_elimination = True
        data_options.experimental_optimization.parallel_batch = True
        data_options.experimental_optimization.shuffle_and_repeat_fusion = True

        # ========== Stats on TF Data =============
        # aggregator = tf.data.experimental.StatsAggregator()
        # data_options.experimental_stats.aggregator = aggregator
        # data_options.experimental_stats.latency_all_edges = True

        dataset = dataset.with_options(data_options)

        return dataset
示例#20
0
def get_image_summary(predictions, current_step, max_images=10):
    """Write out image and prediction for summary."""

    if 'orig_images' not in predictions:
        logging.info('Missing orig_images in predictions: %s',
                     predictions.keys())
        return

    max_images = min(
        len(predictions['orig_images']) *
        predictions['orig_images'][0].shape[0], max_images)

    _detection_boxes = np.concatenate(predictions['detection_boxes'], axis=0)
    _detection_scores = np.concatenate(predictions['detection_scores'], axis=0)
    _detection_classes = np.concatenate(predictions['detection_classes'],
                                        axis=0)
    _image_info = np.concatenate(predictions['image_info'], axis=0)
    _num_detections = np.concatenate(predictions['num_detections'], axis=0)
    _orig_images = np.concatenate(predictions['orig_images'], axis=0)

    if 'detection_masks' in predictions:
        _detection_masks = np.concatenate(predictions['detection_masks'],
                                          axis=0)
    else:
        _detection_masks = None

    if 'groundtruth_boxes' in predictions:
        _groundtruth_boxes = np.concatenate(predictions['groundtruth_boxes'],
                                            axis=0)
    else:
        _groundtruth_boxes = None

    _orig_images = _orig_images * 255
    _orig_images = _orig_images.astype(np.uint8)

    image_previews = []

    for i in range(max_images):
        num_detections = min(len(_detection_boxes[i]), int(_num_detections[i]))

        detection_boxes = _detection_boxes[i][:num_detections]
        detection_scores = _detection_scores[i][:num_detections]
        detection_classes = _detection_classes[i][:num_detections]

        image = _orig_images[i]
        image_height = image.shape[0]
        image_width = image.shape[1]

        # Rescale the box to fit the visualization image.
        h, w = _image_info[i][3:5]
        detection_boxes = detection_boxes / np.array([w, h, w, h])
        detection_boxes = detection_boxes * np.array(
            [image_width, image_height, image_width, image_height])

        if _groundtruth_boxes is not None:
            gt_boxes = _groundtruth_boxes[i]
            gt_boxes = gt_boxes * np.array(
                [image_height, image_width, image_height, image_width])
        else:
            gt_boxes = None

        if _detection_masks is not None:
            instance_masks = _detection_masks[i][0:num_detections]
            segmentations = coco_metric.generate_segmentation_from_masks(
                instance_masks, detection_boxes, image_height, image_width)
        else:
            segmentations = None

        # From [x, y, w, h] to [x1, y1, x2, y2] and
        # process_prediction_for_eval() set the box to be [x, y] format, need to
        # reverted them to [y, x] format.
        xmin, ymin, w, h = np.split(detection_boxes, 4, axis=-1)
        xmax = xmin + w
        ymax = ymin + h

        boxes_to_visualize = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)

        image_preview = generate_image_preview(
            image,
            boxes=boxes_to_visualize,
            scores=detection_scores,
            classes=detection_classes.astype(np.int32),
            gt_boxes=gt_boxes,
            segmentations=segmentations)
        image_previews.append(image_preview)

    try:
        summaries = []

        for i, image_preview in enumerate(image_previews):
            image_buffer = generate_image_buffer(image_preview)
            image_summary = tf.compat.v1.Summary.Image(
                encoded_image_string=image_buffer)
            image_value = tf.compat.v1.Summary.Value(tag='%d_input' % i,
                                                     image=image_summary)

            summaries.append(image_value)

    except AttributeError:
        image_previews = np.array(image_previews)
        summaries = tf.summary.image(name='image_summary',
                                     data=image_previews,
                                     step=current_step,
                                     max_outputs=max_images)

    return summaries
示例#21
0
def _propose_rois(scores, boxes, anchor_boxes, height, width, scale,
                  rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
                  rpn_min_size, bbox_reg_weights, topk_before_nms, nms_on_hpu):
    """Proposes RoIs giva group of candidates.

    Args:
    scores: a tensor with a shape of [batch_size, num_boxes].
    boxes: a tensor with a shape of [batch_size, num_boxes, 4],
      in the encoded form.
    anchor_boxes: an Anchors object that contains the anchors with a shape of
      [batch_size, num_boxes, 4].
    height: a tensor of shape [batch_size, 1, 1] representing the image height.
    width: a tensor of shape [batch_size, 1, 1] representing the image width.
    scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
    rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
      before applying NMS. This is *per FPN level* (not total).
    rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
      after applying NMS. This is the total number of RPN proposals produced.
    rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
      used on RPN proposals.
    rpn_min_size: a integer number as the minimum proposal height and width as
      both need to be greater than this number. Note that this number is at
      origingal image scale; not scale used during training or inference).
    bbox_reg_weights: None or a list of four integer specifying the weights used
      when decoding the box.
    topk_before_nms: a boolean indicating if topk is done before NMS
    nms_on_hpu: a boolean indicating if NMS is to be run on HPU.
      If False will run on CPU.

    Returns:
    scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
      representing the scores of the proposals. It has same dtype as input
      scores.
    boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
      represneting the boxes of the proposals. The boxes are in normalized
      coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
      input boxes.
    """
    batch_size, num_boxes = scores.get_shape().as_list()

    topk_limit = min(num_boxes, rpn_pre_nms_topn)

    if topk_before_nms:
        scores, boxes_list = box_utils.top_k(scores,
                                             k=topk_limit,
                                             boxes_list=[boxes, anchor_boxes])
        boxes = boxes_list[0]
        anchor_boxes = boxes_list[1]
        num_boxes = topk_limit

    boxes = box_utils.decode_boxes(boxes, anchor_boxes, bbox_reg_weights)

    boxes = box_utils.clip_boxes(boxes, height, width)

    if rpn_min_size > 0.0:
        boxes, scores = box_utils.filter_boxes(boxes,
                                               tf.expand_dims(scores, axis=-1),
                                               rpn_min_size, height, width,
                                               scale)

        scores = tf.squeeze(scores, axis=-1)

    post_nms_topk_limit = topk_limit if topk_limit < rpn_post_nms_topn else rpn_post_nms_topn

    if rpn_nms_threshold > 0:
        # Normalize coordinates as combined_non_max_suppression currently
        # only support normalized coordinates.
        pre_nms_boxes = box_utils.to_normalized_coordinates(
            boxes, height, width)
        pre_nms_boxes = tf.reshape(pre_nms_boxes,
                                   [batch_size, num_boxes, 1, 4])
        pre_nms_scores = tf.reshape(scores, [batch_size, num_boxes, 1])
        if not nms_on_hpu:
            logging.info(
                "[ROI OPs] Using Batched NMS on CPU/GPU with %d input boxes" %
                num_boxes)
            with tf.device('CPU:0'):
                boxes, scores, _, _ = tf.image.combined_non_max_suppression(
                    pre_nms_boxes,
                    pre_nms_scores,
                    max_output_size_per_class=topk_limit,
                    max_total_size=post_nms_topk_limit,
                    iou_threshold=rpn_nms_threshold,
                    score_threshold=0.0,
                    pad_per_class=False,
                )
        else:
            logging.info(
                "[ROI OPs] Using Batched NMS on HPU with %d input boxes" %
                num_boxes)
            boxes, scores, _, _ = tf.image.combined_non_max_suppression(
                pre_nms_boxes,
                pre_nms_scores,
                max_output_size_per_class=topk_limit,
                max_total_size=post_nms_topk_limit,
                iou_threshold=rpn_nms_threshold,
                score_threshold=0.0,
                pad_per_class=False,
            )
        boxes = box_utils.to_absolute_coordinates(boxes, height, width)
    else:
        scores, boxes = box_utils.top_k(scores,
                                        k=post_nms_topk_limit,
                                        boxes_list=[boxes])
        boxes = boxes[0]

    return scores, boxes
示例#22
0
 def log_info(self, message):
     logging.info("%s%s" % (self.LOGGING_PREFIX, message))