Пример #1
0
    def testKeepalive(self):
        count = Count()
        shared_handle = shared.Shared()
        other_shared_handle = shared.Shared()

        def dummy_acquire_fn():
            return None

        def acquire_fn():
            return Marker(count)

        p1 = shared_handle.acquire(acquire_fn)
        self.assertEqual(1, count.get_total())
        self.assertEqual(1, count.get_active())
        del p1
        gc.collect()
        # Won't be garbage collected, because of the keep-alive
        self.assertEqual(1, count.get_active())

        # Reacquire.
        p2 = shared_handle.acquire(acquire_fn)
        self.assertEqual(1, count.get_total())  # No reinitialisation.
        self.assertEqual(1, count.get_active())

        # Get rid of the keepalive
        other_shared_handle.acquire(dummy_acquire_fn)
        del p2
        gc.collect()
        self.assertEqual(0, count.get_active())
Пример #2
0
  def _benchmarkRunMetaGraphDoFnManualActuationCommon(self, force_tf_compat_v1):
    """Common implementation to benchmark RunMetaGraphDoFn "manually"."""
    common_variables = _get_common_variables(self._dataset, force_tf_compat_v1)
    batch_size, batched_records = _get_batched_records(self._dataset,
                                                       force_tf_compat_v1,
                                                       self._max_num_examples())
    fn = tft_beam_impl._RunMetaGraphDoFn(  # pylint: disable=protected-access
        tf_config=None,
        shared_graph_state_handle=shared.Shared(),
        passthrough_keys=set(),
        exclude_outputs=None,
        use_tf_compat_v1=force_tf_compat_v1,
        input_tensor_adapter_config=common_variables.tfxio.TensorAdapterConfig(
        ))
    fn.setup()

    start = time.time()
    for batch in batched_records:
      _ = list(
          fn.process(
              batch,
              saved_model_dir=self._dataset.tft_saved_model_path(
                  force_tf_compat_v1)))
    end = time.time()
    delta = end - start
    self.report_benchmark(
        iters=1,
        wall_time=delta,
        extras={
            "batch_size":
                batch_size,
            "num_examples":
                self._dataset.num_examples(limit=self._max_num_examples())
        })
Пример #3
0
 def expand(self, pcoll: beam.PCollection) -> beam.PCollection:
     return (pcoll
             | beam.BatchElements()
             | beam.ParDo(
                 _RunInferenceDoFn(shared.Shared(), self._model_loader,
                                   self._clock))
             | beam.FlatMap(_unbatch))
Пример #4
0
  def testTagReturnsCached(self):
    sequence = Sequence()
    handle = shared.Shared()

    f1 = handle.acquire(sequence.make_acquire_fn(), tag='1')
    self.assertEqual('sequence1', f1.get_name())

    # should return cached
    f1 = handle.acquire(sequence.make_acquire_fn(), tag='1')
    self.assertEqual('sequence1', f1.get_name())
Пример #5
0
    def testConcurrentCallsDeduped(self):
        # Test that only one among many calls to acquire will actually run the
        # initialisation function.

        count = Count()
        shared_handle = shared.Shared()
        other_shared_handle = shared.Shared()

        refs = []
        ref_lock = threading.Lock()

        def dummy_acquire_fn():
            return None

        def acquire_fn():
            time.sleep(1)
            return Marker(count)

        def thread_fn():
            p = shared_handle.acquire(acquire_fn)
            with ref_lock:
                refs.append(p)

        threads = []
        for _ in range(100):
            t = threading.Thread(target=thread_fn)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        self.assertEqual(1, count.get_total())
        self.assertEqual(1, count.get_active())

        other_shared_handle.acquire(
            dummy_acquire_fn)  # Get rid of the keepalive

        with ref_lock:
            del refs[:]
        gc.collect()

        self.assertEqual(0, count.get_active())
Пример #6
0
 def __init__(self, model_loader: ModelLoader, clock=None):
     self._model_loader = model_loader
     self._inference_runner = model_loader.get_inference_runner()
     self._shared_model_handle = shared.Shared()
     self._metrics_collector = _MetricsCollector(
         self._inference_runner.get_metrics_namespace())
     self._clock = clock
     if not clock:
         self._clock = _ClockFactory.make_clock()
     self._model = None
Пример #7
0
def _MultiInference(  # pylint: disable=invalid-name
    pcoll: beam.pvalue.PCollection,
    inference_spec_type: model_spec_pb2.InferenceSpecType):
  """Performs multi inference PTransform."""
  if _using_in_process_inference(inference_spec_type):
    return (pcoll
            | 'MultiInference' >> beam.ParDo(
                _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())))
  else:
    raise NotImplementedError
Пример #8
0
    def testMultiple(self):
        count = Count()
        shared_handle = shared.Shared()
        other_shared_handle = shared.Shared()

        def dummy_acquire_fn():
            return None

        def acquire_fn():
            return Marker(count)

        p = shared_handle.acquire(acquire_fn)
        other_shared_handle.acquire(
            dummy_acquire_fn)  # Get rid of the keepalive
        self.assertEqual(1, count.get_total())
        self.assertEqual(1, count.get_active())
        del p
        gc.collect()
        self.assertEqual(0, count.get_active())
        # Shared value should be garbage collected.

        # Acquiring multiple times only results in one initialisation
        p1 = shared_handle.acquire(acquire_fn)
        # Since shared value was released, expect a reinitialisation.
        self.assertEqual(2, count.get_total())
        self.assertEqual(1, count.get_active())
        p2 = shared_handle.acquire(acquire_fn)
        self.assertEqual(2, count.get_total())
        self.assertEqual(1, count.get_active())

        other_shared_handle.acquire(
            dummy_acquire_fn)  # Get rid of the keepalive

        # Check that shared object isn't destroyed if there's still a reference to
        # it.
        del p2
        gc.collect()
        self.assertEqual(1, count.get_active())

        del p1
        gc.collect()
        self.assertEqual(0, count.get_active())
Пример #9
0
 def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
   return (
       raw_records_pcoll
       | "BatchElements" >> beam.BatchElements(
           **batch_util.GetBatchElementsKwargs(batch_size))
       | "Decode" >> beam.ParDo(_RecordsToRecordBatch(
           self._saved_decoder_path,
           self.telemetry_descriptors,
           shared.Shared() if self._use_singleton_decoder else None,
           self.raw_record_column_name,
           self._record_index_column_name)))
Пример #10
0
    def testTagCacheEviction(self):
        shared1 = shared.Shared()
        shared2 = shared.Shared()

        def acquire_fn_1():
            return NamedObject('obj_1')

        def acquire_fn_2():
            return NamedObject('obj_2')

        # with no tag, shared handle does not know when to evict objects
        p1 = shared1.acquire(acquire_fn_1)
        assert p1.get_name() == 'obj_1'
        p2 = shared1.acquire(acquire_fn_2)
        assert p2.get_name() == 'obj_1'

        # cache eviction can be forced by specifying different tags
        p1 = shared2.acquire(acquire_fn_1, tag='1')
        assert p1.get_name() == 'obj_1'
        p2 = shared2.acquire(acquire_fn_2, tag='2')
        assert p2.get_name() == 'obj_2'
Пример #11
0
 def __init__(
         self, model_agnostic_config: agnostic_predict.ModelAgnosticConfig
 ) -> None:
     self._model_agnostic_config = model_agnostic_config
     # TODO(b/140805724): It's odd that shared_handle is not passed as an
     # argument to the constructor. Logically, it seems to have a 1-1
     # correspondence with the model_agnostic_config, so it should be passed with
     # it.
     self._shared_handle = shared.Shared()
     self._model_agnostic_wrapper = None
     self._model_load_seconds = None
     self._model_load_seconds_distribution = beam.metrics.Metrics.distribution(
         constants.METRICS_NAMESPACE, 'model_load_seconds')
 def __init__(self):
     self._shared_handle = shared.Shared()
     self._right_lookup_contruction_seconds_distribution = (
         beam.metrics.Metrics.distribution(
             constants.METRICS_NAMESPACE,
             'right_lookup_construction_seconds'))
     # These should be gauges, but not all runners support gauges so they are
     # made distributions, which are equivalent.
     # TODO(b/130840752): support gauges in the internal runner.
     self._right_lookup_num_keys = (beam.metrics.Metrics.distribution(
         constants.METRICS_NAMESPACE, 'right_lookup_num_keys'))
     self._right_lookup_num_values = (beam.metrics.Metrics.distribution(
         constants.METRICS_NAMESPACE, 'right_lookup_num_values'))
Пример #13
0
def _Predict(  # pylint: disable=invalid-name
    pcoll: beam.pvalue.PCollection,
    inference_spec_type: model_spec_pb2.InferenceSpecType):
  """Performs predict PTransform."""
  if _using_in_process_inference(inference_spec_type):
    return (pcoll
            | 'Predict' >> beam.ParDo(
                _BatchPredictDoFn(inference_spec_type, shared.Shared())))
  else:
    return (
        pcoll
        | 'RemotePredict'>> beam.ParDo(
            _BatchRemotePredictDoFn(
                inference_spec_type, pcoll.pipeline.options)))
Пример #14
0
    def testDifferentObjects(self):
        sequence = Sequence()

        def dummy_acquire_fn():
            return None

        first_handle = shared.Shared()
        second_handle = shared.Shared()
        dummy_handle = shared.Shared()

        f1 = first_handle.acquire(sequence.make_acquire_fn())
        s1 = second_handle.acquire(sequence.make_acquire_fn())

        self.assertEqual('sequence1', f1.get_name())
        self.assertEqual('sequence2', s1.get_name())

        f2 = first_handle.acquire(sequence.make_acquire_fn())
        s2 = second_handle.acquire(sequence.make_acquire_fn())

        # Check that the repeated acquisitions return the earlier objects
        self.assertEqual('sequence1', f2.get_name())
        self.assertEqual('sequence2', s2.get_name())

        # Release all references and force garbage-collection
        del f1
        del f2
        del s1
        del s2
        dummy_handle.acquire(dummy_acquire_fn)  # Get rid of the keepalive
        gc.collect()

        # Check that acquiring again after they're released gives new objects
        f3 = first_handle.acquire(sequence.make_acquire_fn())
        s3 = second_handle.acquire(sequence.make_acquire_fn())
        self.assertEqual('sequence3', f3.get_name())
        self.assertEqual('sequence4', s3.get_name())
Пример #15
0
 def __init__(self, model_handler: ModelHandler[ExampleT, PredictionT, Any],
              clock):
     self._model_handler = model_handler
     self._shared_model_handle = shared.Shared()
     self._clock = clock
     self._model = None
Пример #16
0
    pipeline_options = PipelineOptions(save_main_session=True)
    question_list = [
        ("""TensorRT is a high performance deep learning inference platform
                                that delivers low latency and high throughput for apps such as
                                recommenders, speech and image/video on NVIDIA GPUs. It includes
                                parsers to import models, and plugins to support novel ops and
                                layers before applying optimizations for inference. Today NVIDIA
                                is open-sourcing parsers and plugins in TensorRT so that the deep
                                learning community can customize and extend these components to
                                take advantage of powerful TensorRT optimizations for your apps.""",
         [
             "What is TensorRT?", "Is TensorRT open sourced?",
             "Who is open sourcing TensorRT?", "What does TensorRT deliver?"
         ] * 4)
    ] * 40000
    engine_path = "/workspace/trt_beam/bert_large_seq384_bs16_trt2011.engine"

    start_time = time.time()
    with beam.Pipeline(options=pipeline_options) as p:
        shared_handle = shared.Shared()
        _ = (p | beam.Create(question_list)
             | beam.ParDo(
                 DoManualInference(shared_handle=shared_handle,
                                   engine_path=engine_path,
                                   batch_size=16))
             | beam.Map(print))
    logging.info(f"--- {time.time() - start_time} seconds ---")
    logging.info(
        f"--- {len(question_list) * 16.0 // (time.time() - start_time)} questions/seconds ---"
    )
Пример #17
0
 def __init__(self,
              construct_fn: Callable[[], Any],
              tags: Optional[List[Text]] = None):
     self.construct_fn = construct_fn
     self.tags = tags
     self._shared_handle = shared.Shared()