def test_cacheables(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) ie.current_env().set_cache_manager(InMemoryCache(), p) # pylint: disable=range-builtin-not-iterating init_pcoll = p | 'Init Create' >> beam.Create(range(10)) squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x) cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3) ib.watch(locals()) pipeline_instrument = instr.build_pipeline_instrument(p) self.assertEqual( pipeline_instrument.cacheables, { pipeline_instrument._cacheable_key(init_pcoll): instr.Cacheable( var='init_pcoll', version=str(id(init_pcoll)), pcoll_id='ref_PCollection_PCollection_8', producer_version=str(id(init_pcoll.producer)), pcoll=init_pcoll), pipeline_instrument._cacheable_key(squares): instr.Cacheable( var='squares', version=str(id(squares)), pcoll_id='ref_PCollection_PCollection_9', producer_version=str(id(squares.producer)), pcoll=squares), pipeline_instrument._cacheable_key(cubes): instr.Cacheable( var='cubes', version=str(id(cubes)), pcoll_id='ref_PCollection_PCollection_10', producer_version=str(id(cubes.producer)), pcoll=cubes) })
def test_cacheables(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) ie.current_env().set_cache_manager(InMemoryCache(), p) # pylint: disable=range-builtin-not-iterating init_pcoll = p | 'Init Create' >> beam.Create(range(10)) squares = init_pcoll | 'Square' >> beam.Map(lambda x: x * x) cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3) ib.watch(locals()) pipeline_instrument = instr.build_pipeline_instrument(p) # TODO(BEAM-7760): The PipelineInstrument cacheables maintains a global list # of cacheable PCollections across all pipelines. Here we take the subset of # cacheables that only pertain to this test's pipeline. cacheables = { k: c for k, c in pipeline_instrument.cacheables.items() if c.pcoll.pipeline is p } self.assertEqual( cacheables, { pipeline_instrument._cacheable_key(init_pcoll): instr.Cacheable( var='init_pcoll', version=str(id(init_pcoll)), pcoll_id='ref_PCollection_PCollection_8', producer_version=str(id(init_pcoll.producer)), pcoll=init_pcoll), pipeline_instrument._cacheable_key(squares): instr.Cacheable( var='squares', version=str(id(squares)), pcoll_id='ref_PCollection_PCollection_9', producer_version=str(id(squares.producer)), pcoll=squares), pipeline_instrument._cacheable_key(cubes): instr.Cacheable( var='cubes', version=str(id(cubes)), pcoll_id='ref_PCollection_PCollection_10', producer_version=str(id(cubes.producer)), pcoll=cubes) })