def test_cancel_stops_recording(self): # Add the TestStream so that it can be cached. ib.options.capturable_sources.add(TestStream) p = beam.Pipeline(InteractiveRunner(), options=PipelineOptions(streaming=True)) elems = (p | TestStream().advance_watermark_to( 0).advance_processing_time(1).add_elements(list( range(10))).advance_processing_time(1)) squares = elems | beam.Map(lambda x: x**2) # Watch the local scope for Interactive Beam so that referenced PCollections # will be cached. ib.watch(locals()) # This is normally done in the interactive_utils when a transform is # applied but needs an IPython environment. So we manually run this here. ie.current_env().track_user_pipelines() # Get the recording then the BackgroundCachingJob. rm = RecordingManager(p) recording = rm.record([squares], max_n=10, max_duration=30) # The BackgroundCachingJob is still waiting for more elements, so it isn't # done yet. bcj = ie.current_env().get_background_caching_job(p) self.assertFalse(bcj.is_done()) # Assert that something was read and that the BackgroundCachingJob was # sucessfully stopped. self.assertTrue(list(recording.stream(squares).read())) rm.cancel() self.assertTrue(bcj.is_done())
def _mark_all_computed(self): # type: () -> None """Marks all the PCollections upon a successful pipeline run.""" if not self._result: return while not PipelineState.is_terminal(self._result.state): with self._result_lock: bcj = ie.current_env().get_background_caching_job(self._user_pipeline) if bcj and bcj.is_done(): self._result.wait_until_finish() elif time.time() - self._start >= self._duration_secs: self._result.cancel() self._result.wait_until_finish() elif all(s.is_done() for s in self._streams.values()): self._result.cancel() self._result.wait_until_finish() time.sleep(0.1) # Mark the PCollection as computed so that Interactive Beam wouldn't need to # re-compute. if self._result.state is PipelineState.DONE and self._set_computed: ie.current_env().mark_pcollection_computed(self._pcolls)