Exemplo n.º 1
0
    def test_cancel_stops_recording(self):
        # Add the TestStream so that it can be cached.
        ib.options.capturable_sources.add(TestStream)

        p = beam.Pipeline(InteractiveRunner(),
                          options=PipelineOptions(streaming=True))
        elems = (p
                 | TestStream().advance_watermark_to(
                     0).advance_processing_time(1).add_elements(list(
                         range(10))).advance_processing_time(1))
        squares = elems | beam.Map(lambda x: x**2)

        # Watch the local scope for Interactive Beam so that referenced PCollections
        # will be cached.
        ib.watch(locals())

        # This is normally done in the interactive_utils when a transform is
        # applied but needs an IPython environment. So we manually run this here.
        ie.current_env().track_user_pipelines()

        # Get the recording then the BackgroundCachingJob.
        rm = RecordingManager(p)
        recording = rm.record([squares], max_n=10, max_duration=30)

        # The BackgroundCachingJob is still waiting for more elements, so it isn't
        # done yet.
        bcj = ie.current_env().get_background_caching_job(p)
        self.assertFalse(bcj.is_done())

        # Assert that something was read and that the BackgroundCachingJob was
        # sucessfully stopped.
        self.assertTrue(list(recording.stream(squares).read()))
        rm.cancel()
        self.assertTrue(bcj.is_done())
Exemplo n.º 2
0
  def _mark_all_computed(self):
    # type: () -> None

    """Marks all the PCollections upon a successful pipeline run."""
    if not self._result:
      return

    while not PipelineState.is_terminal(self._result.state):
      with self._result_lock:
        bcj = ie.current_env().get_background_caching_job(self._user_pipeline)
        if bcj and bcj.is_done():
          self._result.wait_until_finish()

        elif time.time() - self._start >= self._duration_secs:
          self._result.cancel()
          self._result.wait_until_finish()

        elif all(s.is_done() for s in self._streams.values()):
          self._result.cancel()
          self._result.wait_until_finish()

      time.sleep(0.1)

    # Mark the PCollection as computed so that Interactive Beam wouldn't need to
    # re-compute.
    if self._result.state is PipelineState.DONE and self._set_computed:
      ie.current_env().mark_pcollection_computed(self._pcolls)