Exemplo n.º 1
0
def create_operation(operation_name,
                     spec,
                     counter_factory,
                     step_name,
                     state_sampler,
                     test_shuffle_source=None,
                     test_shuffle_sink=None,
                     is_streaming=False):
    """Create Operation object for given operation specification."""
    if isinstance(spec, operation_specs.WorkerRead):
        if isinstance(spec.source, iobase.SourceBundle):
            op = ReadOperation(operation_name, spec, counter_factory,
                               state_sampler)
        else:
            from dataflow_worker.native_operations import NativeReadOperation
            op = NativeReadOperation(operation_name, spec, counter_factory,
                                     state_sampler)
    elif isinstance(spec, operation_specs.WorkerWrite):
        from dataflow_worker.native_operations import NativeWriteOperation
        op = NativeWriteOperation(operation_name, spec, counter_factory,
                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerCombineFn):
        op = CombineOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
        op = create_pgbk_op(operation_name, spec, counter_factory,
                            state_sampler)
    elif isinstance(spec, operation_specs.WorkerDoFn):
        op = DoOperation(operation_name, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
        from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
        op = GroupedShuffleReadOperation(operation_name,
                                         spec,
                                         counter_factory,
                                         state_sampler,
                                         shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
        from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
        op = UngroupedShuffleReadOperation(operation_name,
                                           spec,
                                           counter_factory,
                                           state_sampler,
                                           shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
        op = InMemoryWriteOperation(operation_name, spec, counter_factory,
                                    state_sampler)
    elif isinstance(spec, operation_specs.WorkerShuffleWrite):
        from dataflow_worker.shuffle_operations import ShuffleWriteOperation
        op = ShuffleWriteOperation(operation_name,
                                   spec,
                                   counter_factory,
                                   state_sampler,
                                   shuffle_sink=test_shuffle_sink)
    elif isinstance(spec, operation_specs.WorkerFlatten):
        op = FlattenOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerMergeWindows):
        from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
        from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
        if is_streaming:
            op = StreamingGroupAlsoByWindowsOperation(operation_name, spec,
                                                      counter_factory,
                                                      state_sampler)
        else:
            op = BatchGroupAlsoByWindowsOperation(operation_name, spec,
                                                  counter_factory,
                                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
        from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
        op = ReifyTimestampAndWindowsOperation(operation_name, spec,
                                               counter_factory, state_sampler)
    else:
        raise TypeError(
            'Expected an instance of operation_specs.Worker* class '
            'instead of %s' % (spec, ))
    op.step_name = step_name
    op.metrics_container = MetricsContainer(step_name)
    op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container)
    return op
Exemplo n.º 2
0
def create_operation(name_context,
                     spec,
                     counter_factory,
                     step_name,
                     state_sampler,
                     test_shuffle_source=None,
                     test_shuffle_sink=None,
                     is_streaming=False):
    """Create Operation object for given operation specification."""
    if not isinstance(name_context, common.NameContext):
        # TODO(BEAM-4028): Remove ad-hoc NameContext once all has been migrated.
        name_context = common.DataflowNameContext(step_name=name_context,
                                                  user_name=step_name,
                                                  system_name=None)

    if isinstance(spec, operation_specs.WorkerRead):
        if isinstance(spec.source, iobase.SourceBundle):
            op = ReadOperation(name_context, spec, counter_factory,
                               state_sampler)
        else:
            from dataflow_worker.native_operations import NativeReadOperation
            op = NativeReadOperation(name_context, spec, counter_factory,
                                     state_sampler)
    elif isinstance(spec, operation_specs.WorkerWrite):
        from dataflow_worker.native_operations import NativeWriteOperation
        op = NativeWriteOperation(name_context, spec, counter_factory,
                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerCombineFn):
        op = CombineOperation(name_context, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
        op = create_pgbk_op(name_context, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerDoFn):
        op = DoOperation(name_context, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
        from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
        op = GroupedShuffleReadOperation(name_context,
                                         spec,
                                         counter_factory,
                                         state_sampler,
                                         shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
        from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
        op = UngroupedShuffleReadOperation(name_context,
                                           spec,
                                           counter_factory,
                                           state_sampler,
                                           shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
        op = InMemoryWriteOperation(name_context, spec, counter_factory,
                                    state_sampler)
    elif isinstance(spec, operation_specs.WorkerShuffleWrite):
        from dataflow_worker.shuffle_operations import ShuffleWriteOperation
        op = ShuffleWriteOperation(name_context,
                                   spec,
                                   counter_factory,
                                   state_sampler,
                                   shuffle_sink=test_shuffle_sink)
    elif isinstance(spec, operation_specs.WorkerFlatten):
        op = FlattenOperation(name_context, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerMergeWindows):
        from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
        from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
        if is_streaming:
            op = StreamingGroupAlsoByWindowsOperation(name_context, spec,
                                                      counter_factory,
                                                      state_sampler)
        else:
            op = BatchGroupAlsoByWindowsOperation(name_context, spec,
                                                  counter_factory,
                                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
        from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
        op = ReifyTimestampAndWindowsOperation(name_context, spec,
                                               counter_factory, state_sampler)
    else:
        raise TypeError(
            'Expected an instance of operation_specs.Worker* class '
            'instead of %s' % (spec, ))
    return op
Exemplo n.º 3
0
def create_operation(name_context,
                     spec,
                     counter_factory,
                     step_name=None,
                     state_sampler=None,
                     test_shuffle_source=None,
                     test_shuffle_sink=None,
                     is_streaming=False):
    # type: (...) -> Operation
    """Create Operation object for given operation specification."""

    # TODO(pabloem): Document arguments to this function call.
    if not isinstance(name_context, common.NameContext):
        name_context = common.NameContext(step_name=name_context)

    if isinstance(spec, operation_specs.WorkerRead):
        if isinstance(spec.source, iobase.SourceBundle):
            op = ReadOperation(name_context, spec, counter_factory,
                               state_sampler)  # type: Operation
        else:
            from dataflow_worker.native_operations import NativeReadOperation
            op = NativeReadOperation(name_context, spec, counter_factory,
                                     state_sampler)
    elif isinstance(spec, operation_specs.WorkerWrite):
        from dataflow_worker.native_operations import NativeWriteOperation
        op = NativeWriteOperation(name_context, spec, counter_factory,
                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerCombineFn):
        op = CombineOperation(name_context, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
        op = create_pgbk_op(name_context, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerDoFn):
        op = DoOperation(name_context, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
        from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
        op = GroupedShuffleReadOperation(name_context,
                                         spec,
                                         counter_factory,
                                         state_sampler,
                                         shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
        from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
        op = UngroupedShuffleReadOperation(name_context,
                                           spec,
                                           counter_factory,
                                           state_sampler,
                                           shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
        op = InMemoryWriteOperation(name_context, spec, counter_factory,
                                    state_sampler)
    elif isinstance(spec, operation_specs.WorkerShuffleWrite):
        from dataflow_worker.shuffle_operations import ShuffleWriteOperation
        op = ShuffleWriteOperation(name_context,
                                   spec,
                                   counter_factory,
                                   state_sampler,
                                   shuffle_sink=test_shuffle_sink)
    elif isinstance(spec, operation_specs.WorkerFlatten):
        op = FlattenOperation(name_context, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerMergeWindows):
        from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
        from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
        if is_streaming:
            op = StreamingGroupAlsoByWindowsOperation(name_context, spec,
                                                      counter_factory,
                                                      state_sampler)
        else:
            op = BatchGroupAlsoByWindowsOperation(name_context, spec,
                                                  counter_factory,
                                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
        from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
        op = ReifyTimestampAndWindowsOperation(name_context, spec,
                                               counter_factory, state_sampler)
    else:
        raise TypeError(
            'Expected an instance of operation_specs.Worker* class '
            'instead of %s' % (spec, ))
    return op