def _get_transform_overrides(): # A list of PTransformOverride objects to be applied before running a pipeline # using DirectRunner. # Currently this only works for overrides where the input and output types do # not change. # For internal use only; no backwards-compatibility guarantees. # Importing following locally to avoid a circular dependency. from apache_beam.runners.sdf_common import SplittableParDoOverride from apache_beam.runners.direct.sdf_direct_runner import ProcessKeyedElementsViaKeyedWorkItemsOverride return [SplittableParDoOverride(), ProcessKeyedElementsViaKeyedWorkItemsOverride()]
def _get_transform_overrides(pipeline_options): # A list of PTransformOverride objects to be applied before running a pipeline # using DirectRunner. # Currently this only works for overrides where the input and output types do # not change. # For internal use only; no backwards-compatibility guarantees. # Importing following locally to avoid a circular dependency. from apache_beam.pipeline import PTransformOverride from apache_beam.runners.sdf_common import SplittableParDoOverride from apache_beam.runners.direct.helper_transforms import LiftedCombinePerKey from apache_beam.runners.direct.sdf_direct_runner import ProcessKeyedElementsViaKeyedWorkItemsOverride class CombinePerKeyOverride(PTransformOverride): def matches(self, applied_ptransform): if isinstance(applied_ptransform.transform, CombinePerKey): return applied_ptransform.inputs[0].windowing.is_default() def get_replacement_transform(self, transform): # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems # with resolving imports when they are at top. # pylint: disable=wrong-import-position try: return LiftedCombinePerKey(transform.fn, transform.args, transform.kwargs) except NotImplementedError: return transform class StreamingGroupByKeyOverride(PTransformOverride): def matches(self, applied_ptransform): # Note: we match the exact class, since we replace it with a subclass. return applied_ptransform.transform.__class__ == _GroupByKeyOnly def get_replacement_transform(self, transform): # Use specialized streaming implementation. transform = _StreamingGroupByKeyOnly() return transform class StreamingGroupAlsoByWindowOverride(PTransformOverride): def matches(self, applied_ptransform): # Note: we match the exact class, since we replace it with a subclass. transform = applied_ptransform.transform return (isinstance(applied_ptransform.transform, ParDo) and isinstance(transform.dofn, _GroupAlsoByWindowDoFn) and transform.__class__ != _StreamingGroupAlsoByWindow) def get_replacement_transform(self, transform): # Use specialized streaming implementation. transform = _StreamingGroupAlsoByWindow(transform.dofn.windowing) return transform overrides = [SplittableParDoOverride(), ProcessKeyedElementsViaKeyedWorkItemsOverride(), CombinePerKeyOverride()] # Add streaming overrides, if necessary. if pipeline_options.view_as(StandardOptions).streaming: overrides.append(StreamingGroupByKeyOverride()) overrides.append(StreamingGroupAlsoByWindowOverride()) # Add PubSub overrides, if PubSub is available. try: from apache_beam.io.gcp import pubsub as unused_pubsub overrides += _get_pubsub_transform_overrides(pipeline_options) except ImportError: pass return overrides