Пример #1
0
 def test_hash_two_objects(self):
     self.assertEqual(
         hash(CounterName('counter_name', 'stage_name', 'step_name')),
         hash(CounterName('counter_name', 'stage_name', 'step_name')))
     self.assertNotEqual(
         hash(CounterName('counter_name', 'stage_name', 'step_name')),
         hash(CounterName('counter_name', 'stage_name', 'step_nam')))
Пример #2
0
    def test_equal_objects(self):
        self.assertEqual(
            CounterName('counter_name', 'stage_name', 'step_name'),
            CounterName('counter_name', 'stage_name', 'step_name'))
        self.assertNotEqual(
            CounterName('counter_name', 'stage_name', 'step_name'),
            CounterName('counter_name', 'stage_name', 'step_nam'))

        # Testing objects with an IOTarget.
        self.assertEqual(
            CounterName('counter_name',
                        'stage_name',
                        'step_name',
                        io_target=counters.side_input_id(1, 's9')),
            CounterName('counter_name',
                        'stage_name',
                        'step_name',
                        io_target=counters.side_input_id(1, 's9')))
        self.assertNotEqual(
            CounterName('counter_name',
                        'stage_name',
                        'step_name',
                        io_target=counters.side_input_id(1, 's')),
            CounterName('counter_name',
                        'stage_name',
                        'step_name',
                        io_target=counters.side_input_id(1, 's9')))
Пример #3
0
  def test_mean_counter(self):
    mean_counter = self.counter_factory.get_counter(
        CounterName('mean', 'stage_foo', 'step_bar'), counters.Counter.MEAN)
    for i in range(100):
      mean_counter.update(i)

    self.assertEqual(49, mean_counter.value())
Пример #4
0
    def scoped_state(self,
                     name_context,
                     state_name,
                     io_target=None,
                     metrics_container=None):
        """Returns a ScopedState object associated to a Step and a State.

    Args:
      name_context: common.NameContext. It is the step name information.
      state_name: str. It is the state name (e.g. process / start / finish).
      io_target:
      metrics_container: MetricsContainer. The step's metrics container.

    Returns:
      A ScopedState that keeps the execution context and is able to switch it
      for the execution thread.
    """
        if not isinstance(name_context, common.NameContext):
            name_context = common.NameContext(name_context)

        counter_name = CounterName(state_name + '-msecs',
                                   stage_name=self._prefix,
                                   step_name=name_context.metrics_name(),
                                   io_target=io_target)
        if counter_name in self._states_by_name:
            return self._states_by_name[counter_name]
        else:
            output_counter = self._counter_factory.get_counter(
                counter_name, Counter.SUM)
            self._states_by_name[counter_name] = super(
                StateSampler, self)._scoped_state(counter_name, name_context,
                                                  output_counter,
                                                  metrics_container)
            return self._states_by_name[counter_name]
Пример #5
0
  def test_sum_counter(self):
    sum_counter = self.counter_factory.get_counter(
        CounterName('sum', 'stage_foo', 'step_bar'), counters.Counter.SUM)
    for i in range(100):
      sum_counter.update(i)

    self.assertEqual(99 * 50, sum_counter.value())
Пример #6
0
  def test_distribution_counter(self):
    distribution_counter = self.counter_factory.get_counter(
        CounterName('distribution', 'stage_foo', 'step_bar'),
        counters.Counter.BEAM_DISTRIBUTION)
    for i in range(100):
      distribution_counter.update(i)

    self.assertEqual((49, 4950, 100, 0, 99), distribution_counter.value())
Пример #7
0
 def _update_counters_for_requesting_step(self, step_name):
     side_input_id = counters.side_input_id(step_name, self.input_index)
     self.scoped_state = self._state_sampler.scoped_state(
         self.declaring_step, 'read-sideinput', io_target=side_input_id)
     self.bytes_read_counter = self._counter_factory.get_counter(
         CounterName('read-sideinput-byte-count',
                     step_name=self.declaring_step,
                     io_target=side_input_id), Counter.SUM)
Пример #8
0
    def test_name_string_representation(self):
        counter_name = CounterName('counter_name', 'stage_name', 'step_name')

        # This string representation is utilized by the worker to report progress.
        # Change only if the worker code has also been changed.
        self.assertEqual('stage_name-step_name-counter_name',
                         str(counter_name))
        self.assertIn('<CounterName<stage_name-step_name-counter_name> at 0x',
                      repr(counter_name))
Пример #9
0
    def __init__(self,
                 fn,
                 args,
                 kwargs,
                 side_inputs,
                 windowing,
                 tagged_receivers=None,
                 step_name=None,
                 logging_context=None,
                 state=None,
                 scoped_metrics_container=None,
                 operation_name=None):
        """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      tagged_receivers: a dict of tag name to Receiver objects
      step_name: the name of this step
      logging_context: DEPRECATED [BEAM-4728]
      state: handle for accessing DoFn state
      scoped_metrics_container: DEPRECATED
      operation_name: The system name assigned by the runner for this operation.
    """
        # Need to support multiple iterations.
        side_inputs = list(side_inputs)

        self.step_name = step_name
        self.context = DoFnContext(step_name, state=state)

        do_fn_signature = DoFnSignature(fn)

        # Optimize for the common case.
        main_receivers = tagged_receivers[None]

        # TODO(BEAM-3937): Remove if block after output counter released.
        if 'outputs_per_element_counter' in RuntimeValueProvider.experiments:
            # TODO(BEAM-3955): Make step_name and operation_name less confused.
            output_counter_name = (CounterName('per-element-output-count',
                                               step_name=operation_name))
            per_element_output_counter = state._counter_factory.get_counter(
                output_counter_name, Counter.DATAFLOW_DISTRIBUTION).accumulator
        else:
            per_element_output_counter = None

        output_processor = _OutputProcessor(windowing.windowfn, main_receivers,
                                            tagged_receivers,
                                            per_element_output_counter)

        self.do_fn_invoker = DoFnInvoker.create_invoker(
            do_fn_signature, output_processor, self.context, side_inputs, args,
            kwargs)
Пример #10
0
 def scoped_state(self, step_name, state_name, io_target=None):
   counter_name = CounterName(state_name + '-msecs',
                              stage_name=self._prefix,
                              step_name=step_name,
                              io_target=io_target)
   if counter_name in self._states_by_name:
     return self._states_by_name[counter_name]
   else:
     output_counter = self._counter_factory.get_counter(counter_name,
                                                        Counter.SUM)
     self._states_by_name[counter_name] = super(
         StateSampler, self)._scoped_state(counter_name, output_counter)
     return self._states_by_name[counter_name]
Пример #11
0
  def test_reset(self):
    counter = self.counter_factory.get_counter(
        CounterName(self.combiner.default_label, 'stage_foo', 'reset'),
        self.combiner)

    for value in range(100):
      counter.update(value)
    expected = counter.value()
    counter.reset()

    for value in range(100):
      counter.update(value)

    self.assertEqual(expected, counter.value())
Пример #12
0
  def test_basic_sampler(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler(
        'basic', counter_factory, sampling_period_ms=1)

    # Duration of the fastest state. Total test duration is 6 times longer.
    state_duration_ms = 1000
    margin_of_error = 0.25
    # Run basic workload transitioning between 3 states.
    sampler.start()
    with sampler.scoped_state('step1', 'statea'):
      time.sleep(state_duration_ms / 1000)
      self.assertEqual(
          sampler.current_state().name,
          CounterName('statea-msecs', step_name='step1', stage_name='basic'))
      with sampler.scoped_state('step1', 'stateb'):
        time.sleep(state_duration_ms / 1000)
        self.assertEqual(
            sampler.current_state().name,
            CounterName('stateb-msecs', step_name='step1', stage_name='basic'))
        with sampler.scoped_state('step1', 'statec'):
          time.sleep(3 * state_duration_ms / 1000)
          self.assertEqual(
              sampler.current_state().name,
              CounterName(
                  'statec-msecs', step_name='step1', stage_name='basic'))
        time.sleep(state_duration_ms / 1000)

    sampler.stop()
    sampler.commit_counters()

    if not statesampler.FAST_SAMPLER:
      # The slow sampler does not implement sampling, so we won't test it.
      return

    # Test that sampled state timings are close to their expected values.
    # yapf: disable
    expected_counter_values = {
        CounterName('statea-msecs', step_name='step1', stage_name='basic'):
            state_duration_ms,
        CounterName('stateb-msecs', step_name='step1', stage_name='basic'): 2 *
        state_duration_ms,
        CounterName('statec-msecs', step_name='step1', stage_name='basic'): 3 *
        state_duration_ms,
    }
    # yapf: enable
    for counter in counter_factory.get_counters():
      self.assertIn(counter.name, expected_counter_values)
      expected_value = expected_counter_values[counter.name]
      actual_value = counter.value()
      deviation = float(abs(actual_value - expected_value)) / expected_value
      _LOGGER.info('Sampling deviation from expectation: %f', deviation)
      self.assertGreater(actual_value, expected_value * (1.0 - margin_of_error))
      self.assertLess(actual_value, expected_value * (1.0 + margin_of_error))
Пример #13
0
  def test_basic_sampler(self):
    # Set up state sampler.
    counter_factory = CounterFactory()
    sampler = statesampler.StateSampler('basic', counter_factory,
                                        sampling_period_ms=1)

    # Run basic workload transitioning between 3 states.
    sampler.start()
    with sampler.scoped_state('step1', 'statea'):
      time.sleep(0.1)
      self.assertEqual(
          sampler.current_state().name,
          CounterName(
              'statea-msecs', step_name='step1', stage_name='basic'))
      with sampler.scoped_state('step1', 'stateb'):
        time.sleep(0.2 / 2)
        self.assertEqual(
            sampler.current_state().name,
            CounterName(
                'stateb-msecs', step_name='step1', stage_name='basic'))
        with sampler.scoped_state('step1', 'statec'):
          time.sleep(0.3)
          self.assertEqual(
              sampler.current_state().name,
              CounterName(
                  'statec-msecs', step_name='step1', stage_name='basic'))
        time.sleep(0.2 / 2)

    sampler.stop()
    sampler.commit_counters()

    if not statesampler.FAST_SAMPLER:
      # The slow sampler does not implement sampling, so we won't test it.
      return

    # Test that sampled state timings are close to their expected values.
    expected_counter_values = {
        CounterName('statea-msecs', step_name='step1', stage_name='basic'): 100,
        CounterName('stateb-msecs', step_name='step1', stage_name='basic'): 200,
        CounterName('statec-msecs', step_name='step1', stage_name='basic'): 300,
    }
    for counter in counter_factory.get_counters():
      self.assertIn(counter.name, expected_counter_values)
      expected_value = expected_counter_values[counter.name]
      actual_value = counter.value()
      deviation = float(abs(actual_value - expected_value)) / expected_value
      logging.info('Sampling deviation from expectation: %f', deviation)
      self.assertGreater(actual_value, expected_value * 0.75)
      self.assertLess(actual_value, expected_value * 1.25)
Пример #14
0
  def test_update_n(self):
    counter = self.counter_factory.get_counter(
        CounterName(self.combiner.default_label, 'stage_foo', 'update_n'),
        self.combiner)
    for i in range(100):
      value = i
      n = 100 - i
      for _ in range(n):
        counter.update(value)

    expected = counter.value()

    counter.reset()

    for i in range(100):
      value = i
      n = 100 - i
      counter.update_n(value, n)

    self.assertEqual(expected, counter.value())
Пример #15
0
    def test_log_lull_in_bundle_processor(self):
        bundle_processor_cache = mock.MagicMock()
        worker = SdkWorker(bundle_processor_cache)

        sampler_info = statesampler.StateSamplerInfo(
            CounterName('progress-msecs', 'stage_name', 'step_name'), 1,
            400000000000, threading.current_thread())

        now = time.time()
        log_full_thread_dump_fn_name = \
            'apache_beam.runners.worker.sdk_worker.SdkWorker._log_full_thread_dump'
        with mock.patch('logging.Logger.warning') as warn_mock:
            with mock.patch(
                    log_full_thread_dump_fn_name) as log_full_thread_dump:
                with mock.patch('time.time') as time_mock:
                    time_mock.return_value = now
                    worker._log_lull_sampler_info(sampler_info)

                    processing_template = warn_mock.call_args[0][1]
                    step_name_template = warn_mock.call_args[0][2]
                    traceback = warn_mock.call_args = warn_mock.call_args[0][3]

                    self.assertIn('progress-msecs', processing_template)
                    self.assertIn('step_name', step_name_template)
                    self.assertIn('test_log_lull_in_bundle_processor',
                                  traceback)

                    log_full_thread_dump.assert_called_once_with()

        with mock.patch(log_full_thread_dump_fn_name) as log_full_thread_dump:
            with mock.patch('time.time') as time_mock:
                time_mock.return_value = now + 6 * 60  # 6 minutes
                worker._log_lull_sampler_info(sampler_info)
                self.assertFalse(log_full_thread_dump.called,
                                 'log_full_thread_dump should not be called.')

        with mock.patch(log_full_thread_dump_fn_name) as log_full_thread_dump:
            with mock.patch('time.time') as time_mock:
                time_mock.return_value = now + 21 * 60  # 21 minutes
                worker._log_lull_sampler_info(sampler_info)
                log_full_thread_dump.assert_called_once_with()
Пример #16
0
  def update_current_step(self):
    """Update the current running step.

    Due to the fusion optimization, user code may choose to emit the data
    structure that holds side inputs (Iterable, Dict, or others). This call
    updates the current step, to attribute the data consumption to the step
    that is responsible for actual consumption.

    CounterName uses the io_target field for information pertinent to the
    consumption of side inputs.
    """
    current_state = self._state_sampler.current_state()
    operation_name = current_state.name.step_name
    self.scoped_state = self._state_sampler.scoped_state(
        self.declaring_step,
        'read-sideinput',
        io_target=counters.side_input_id(operation_name, self.input_index))
    self.bytes_read_counter = self._counter_factory.get_counter(
        CounterName(
            'read-sideinput-byte-count',
            step_name=self.declaring_step,
            io_target=counters.side_input_id(operation_name, self.input_index)),
        Counter.SUM)
Пример #17
0
    def __init__(
            self,
            fn,  # type: core.DoFn
            args,
            kwargs,
            side_inputs,  # type: Iterable[sideinputs.SideInputMap]
            windowing,
            tagged_receivers,  # type: Mapping[Optional[str], Receiver]
            step_name=None,  # type: Optional[str]
            logging_context=None,
            state=None,
            scoped_metrics_container=None,
            operation_name=None,
            user_state_context=None  # type: Optional[userstate.UserStateContext]
    ):
        """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      tagged_receivers: a dict of tag name to Receiver objects
      step_name: the name of this step
      logging_context: DEPRECATED [BEAM-4728]
      state: handle for accessing DoFn state
      scoped_metrics_container: DEPRECATED
      operation_name: The system name assigned by the runner for this operation.
      user_state_context: The UserStateContext instance for the current
                          Stateful DoFn.
    """
        # Need to support multiple iterations.
        side_inputs = list(side_inputs)

        self.step_name = step_name
        self.context = DoFnContext(step_name, state=state)
        self.bundle_finalizer_param = DoFn.BundleFinalizerParam()

        do_fn_signature = DoFnSignature(fn)

        # Optimize for the common case.
        main_receivers = tagged_receivers[None]

        # TODO(BEAM-3937): Remove if block after output counter released.
        if 'outputs_per_element_counter' in RuntimeValueProvider.experiments:
            # TODO(BEAM-3955): Make step_name and operation_name less confused.
            output_counter_name = (CounterName('per-element-output-count',
                                               step_name=operation_name))
            per_element_output_counter = state._counter_factory.get_counter(
                output_counter_name, Counter.DATAFLOW_DISTRIBUTION).accumulator
        else:
            per_element_output_counter = None

        output_processor = _OutputProcessor(windowing.windowfn, main_receivers,
                                            tagged_receivers,
                                            per_element_output_counter)

        if do_fn_signature.is_stateful_dofn() and not user_state_context:
            raise Exception(
                'Requested execution of a stateful DoFn, but no user state context '
                'is available. This likely means that the current runner does not '
                'support the execution of stateful DoFns.')

        self.do_fn_invoker = DoFnInvoker.create_invoker(
            do_fn_signature,
            output_processor,
            self.context,
            side_inputs,
            args,
            kwargs,
            user_state_context=user_state_context,
            bundle_finalizer_param=self.bundle_finalizer_param)
Пример #18
0
 def _get_state_sampler_info_for_lull(self, lull_duration_s):
     return statesampler.StateSamplerInfo(
         CounterName('progress-msecs', 'stage_name', 'step_name'), 1,
         lull_duration_s * 1e9, threading.current_thread())