def expand(self, pcoll): do_once = pcoll.pipeline | 'DoOnce' >> core.Create([None]) init_result_coll = do_once | 'InitializeWrite' >> core.Map( lambda _, sink: sink.initialize_write(), self.sink) if getattr(self.sink, 'num_shards', 0): min_shards = self.sink.num_shards if min_shards == 1: keyed_pcoll = pcoll | core.Map(lambda x: (None, x)) else: keyed_pcoll = pcoll | core.ParDo(_RoundRobinKeyFn(min_shards)) write_result_coll = ( keyed_pcoll | core.WindowInto(window.GlobalWindows()) | core.GroupByKey() | 'WriteBundles' >> core.ParDo(_WriteKeyedBundleDoFn(self.sink), AsSingleton(init_result_coll))) else: min_shards = 1 write_result_coll = ( pcoll | 'WriteBundles' >> core.ParDo(_WriteBundleDoFn(self.sink), AsSingleton(init_result_coll)) | 'Pair' >> core.Map(lambda x: (None, x)) | core.WindowInto(window.GlobalWindows()) | core.GroupByKey() | 'Extract' >> core.FlatMap(lambda x: x[1])) return do_once | 'FinalizeWrite' >> core.FlatMap( _finalize_write, self.sink, AsSingleton(init_result_coll), AsIter(write_result_coll), min_shards)
def expand(self, pcoll): return (pcoll | 'Use Value as Key' >> core.Map(lambda x: (x, None)) | 'DeduplicatePerKey' >> DeduplicatePerKey( processing_time_duration=self.processing_time_duration, event_time_duration=self.event_time_duration) | 'Output Value' >> core.Map(lambda kv: kv[0]))
def expand(self, pcoll): paired_with_void_type = KV[pcoll.element_type, Any] return (pcoll | (core.Map( '%s:PairWithVoid' % self.label, lambda x: (x, None)).with_output_types(paired_with_void_type)) | core.CombinePerKey(CountCombineFn()))
def expand(self, pcoll): paired_with_void_type = typehints.Tuple[pcoll.element_type, Any] output_type = typehints.KV[pcoll.element_type, int] return (pcoll | ('%s:PairWithVoid' % self.label >> core.Map(lambda x: ( x, None)).with_output_types(paired_with_void_type)) | core.CombinePerKey( CountCombineFn()).with_output_types(output_type))