def testKeepalive(self): count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() def dummy_acquire_fn(): return None def acquire_fn(): return Marker(count) p1 = shared_handle.acquire(acquire_fn) self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) del p1 gc.collect() # Won't be garbage collected, because of the keep-alive self.assertEqual(1, count.get_active()) # Reacquire. p2 = shared_handle.acquire(acquire_fn) self.assertEqual(1, count.get_total()) # No reinitialisation. self.assertEqual(1, count.get_active()) # Get rid of the keepalive other_shared_handle.acquire(dummy_acquire_fn) del p2 gc.collect() self.assertEqual(0, count.get_active())
def _benchmarkRunMetaGraphDoFnManualActuationCommon(self, force_tf_compat_v1): """Common implementation to benchmark RunMetaGraphDoFn "manually".""" common_variables = _get_common_variables(self._dataset, force_tf_compat_v1) batch_size, batched_records = _get_batched_records(self._dataset, force_tf_compat_v1, self._max_num_examples()) fn = tft_beam_impl._RunMetaGraphDoFn( # pylint: disable=protected-access tf_config=None, shared_graph_state_handle=shared.Shared(), passthrough_keys=set(), exclude_outputs=None, use_tf_compat_v1=force_tf_compat_v1, input_tensor_adapter_config=common_variables.tfxio.TensorAdapterConfig( )) fn.setup() start = time.time() for batch in batched_records: _ = list( fn.process( batch, saved_model_dir=self._dataset.tft_saved_model_path( force_tf_compat_v1))) end = time.time() delta = end - start self.report_benchmark( iters=1, wall_time=delta, extras={ "batch_size": batch_size, "num_examples": self._dataset.num_examples(limit=self._max_num_examples()) })
def expand(self, pcoll: beam.PCollection) -> beam.PCollection: return (pcoll | beam.BatchElements() | beam.ParDo( _RunInferenceDoFn(shared.Shared(), self._model_loader, self._clock)) | beam.FlatMap(_unbatch))
def testTagReturnsCached(self): sequence = Sequence() handle = shared.Shared() f1 = handle.acquire(sequence.make_acquire_fn(), tag='1') self.assertEqual('sequence1', f1.get_name()) # should return cached f1 = handle.acquire(sequence.make_acquire_fn(), tag='1') self.assertEqual('sequence1', f1.get_name())
def testConcurrentCallsDeduped(self): # Test that only one among many calls to acquire will actually run the # initialisation function. count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() refs = [] ref_lock = threading.Lock() def dummy_acquire_fn(): return None def acquire_fn(): time.sleep(1) return Marker(count) def thread_fn(): p = shared_handle.acquire(acquire_fn) with ref_lock: refs.append(p) threads = [] for _ in range(100): t = threading.Thread(target=thread_fn) threads.append(t) t.start() for t in threads: t.join() self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) other_shared_handle.acquire( dummy_acquire_fn) # Get rid of the keepalive with ref_lock: del refs[:] gc.collect() self.assertEqual(0, count.get_active())
def __init__(self, model_loader: ModelLoader, clock=None): self._model_loader = model_loader self._inference_runner = model_loader.get_inference_runner() self._shared_model_handle = shared.Shared() self._metrics_collector = _MetricsCollector( self._inference_runner.get_metrics_namespace()) self._clock = clock if not clock: self._clock = _ClockFactory.make_clock() self._model = None
def _MultiInference( # pylint: disable=invalid-name pcoll: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll | 'MultiInference' >> beam.ParDo( _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))) else: raise NotImplementedError
def testMultiple(self): count = Count() shared_handle = shared.Shared() other_shared_handle = shared.Shared() def dummy_acquire_fn(): return None def acquire_fn(): return Marker(count) p = shared_handle.acquire(acquire_fn) other_shared_handle.acquire( dummy_acquire_fn) # Get rid of the keepalive self.assertEqual(1, count.get_total()) self.assertEqual(1, count.get_active()) del p gc.collect() self.assertEqual(0, count.get_active()) # Shared value should be garbage collected. # Acquiring multiple times only results in one initialisation p1 = shared_handle.acquire(acquire_fn) # Since shared value was released, expect a reinitialisation. self.assertEqual(2, count.get_total()) self.assertEqual(1, count.get_active()) p2 = shared_handle.acquire(acquire_fn) self.assertEqual(2, count.get_total()) self.assertEqual(1, count.get_active()) other_shared_handle.acquire( dummy_acquire_fn) # Get rid of the keepalive # Check that shared object isn't destroyed if there's still a reference to # it. del p2 gc.collect() self.assertEqual(1, count.get_active()) del p1 gc.collect() self.assertEqual(0, count.get_active())
def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection): return ( raw_records_pcoll | "BatchElements" >> beam.BatchElements( **batch_util.GetBatchElementsKwargs(batch_size)) | "Decode" >> beam.ParDo(_RecordsToRecordBatch( self._saved_decoder_path, self.telemetry_descriptors, shared.Shared() if self._use_singleton_decoder else None, self.raw_record_column_name, self._record_index_column_name)))
def testTagCacheEviction(self): shared1 = shared.Shared() shared2 = shared.Shared() def acquire_fn_1(): return NamedObject('obj_1') def acquire_fn_2(): return NamedObject('obj_2') # with no tag, shared handle does not know when to evict objects p1 = shared1.acquire(acquire_fn_1) assert p1.get_name() == 'obj_1' p2 = shared1.acquire(acquire_fn_2) assert p2.get_name() == 'obj_1' # cache eviction can be forced by specifying different tags p1 = shared2.acquire(acquire_fn_1, tag='1') assert p1.get_name() == 'obj_1' p2 = shared2.acquire(acquire_fn_2, tag='2') assert p2.get_name() == 'obj_2'
def __init__( self, model_agnostic_config: agnostic_predict.ModelAgnosticConfig ) -> None: self._model_agnostic_config = model_agnostic_config # TODO(b/140805724): It's odd that shared_handle is not passed as an # argument to the constructor. Logically, it seems to have a 1-1 # correspondence with the model_agnostic_config, so it should be passed with # it. self._shared_handle = shared.Shared() self._model_agnostic_wrapper = None self._model_load_seconds = None self._model_load_seconds_distribution = beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'model_load_seconds')
def __init__(self): self._shared_handle = shared.Shared() self._right_lookup_contruction_seconds_distribution = ( beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'right_lookup_construction_seconds')) # These should be gauges, but not all runners support gauges so they are # made distributions, which are equivalent. # TODO(b/130840752): support gauges in the internal runner. self._right_lookup_num_keys = (beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'right_lookup_num_keys')) self._right_lookup_num_values = (beam.metrics.Metrics.distribution( constants.METRICS_NAMESPACE, 'right_lookup_num_values'))
def _Predict( # pylint: disable=invalid-name pcoll: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll | 'Predict' >> beam.ParDo( _BatchPredictDoFn(inference_spec_type, shared.Shared()))) else: return ( pcoll | 'RemotePredict'>> beam.ParDo( _BatchRemotePredictDoFn( inference_spec_type, pcoll.pipeline.options)))
def testDifferentObjects(self): sequence = Sequence() def dummy_acquire_fn(): return None first_handle = shared.Shared() second_handle = shared.Shared() dummy_handle = shared.Shared() f1 = first_handle.acquire(sequence.make_acquire_fn()) s1 = second_handle.acquire(sequence.make_acquire_fn()) self.assertEqual('sequence1', f1.get_name()) self.assertEqual('sequence2', s1.get_name()) f2 = first_handle.acquire(sequence.make_acquire_fn()) s2 = second_handle.acquire(sequence.make_acquire_fn()) # Check that the repeated acquisitions return the earlier objects self.assertEqual('sequence1', f2.get_name()) self.assertEqual('sequence2', s2.get_name()) # Release all references and force garbage-collection del f1 del f2 del s1 del s2 dummy_handle.acquire(dummy_acquire_fn) # Get rid of the keepalive gc.collect() # Check that acquiring again after they're released gives new objects f3 = first_handle.acquire(sequence.make_acquire_fn()) s3 = second_handle.acquire(sequence.make_acquire_fn()) self.assertEqual('sequence3', f3.get_name()) self.assertEqual('sequence4', s3.get_name())
def __init__(self, model_handler: ModelHandler[ExampleT, PredictionT, Any], clock): self._model_handler = model_handler self._shared_model_handle = shared.Shared() self._clock = clock self._model = None
pipeline_options = PipelineOptions(save_main_session=True) question_list = [ ("""TensorRT is a high performance deep learning inference platform that delivers low latency and high throughput for apps such as recommenders, speech and image/video on NVIDIA GPUs. It includes parsers to import models, and plugins to support novel ops and layers before applying optimizations for inference. Today NVIDIA is open-sourcing parsers and plugins in TensorRT so that the deep learning community can customize and extend these components to take advantage of powerful TensorRT optimizations for your apps.""", [ "What is TensorRT?", "Is TensorRT open sourced?", "Who is open sourcing TensorRT?", "What does TensorRT deliver?" ] * 4) ] * 40000 engine_path = "/workspace/trt_beam/bert_large_seq384_bs16_trt2011.engine" start_time = time.time() with beam.Pipeline(options=pipeline_options) as p: shared_handle = shared.Shared() _ = (p | beam.Create(question_list) | beam.ParDo( DoManualInference(shared_handle=shared_handle, engine_path=engine_path, batch_size=16)) | beam.Map(print)) logging.info(f"--- {time.time() - start_time} seconds ---") logging.info( f"--- {len(question_list) * 16.0 // (time.time() - start_time)} questions/seconds ---" )
def __init__(self, construct_fn: Callable[[], Any], tags: Optional[List[Text]] = None): self.construct_fn = construct_fn self.tags = tags self._shared_handle = shared.Shared()