Exemplo n.º 1
0
    def test(self):
        if self.get_option_or_default('use_stateful_load_generator', False):
            source = (
                self.pipeline
                | 'LoadGenerator' >> StatefulLoadGenerator(self.input_options)
                | beam.ParDo(AssignTimestamps())
                | beam.WindowInto(window.FixedWindows(20)))
        else:
            source = (
                self.pipeline
                | 'Read synthetic' >> beam.io.Read(
                    SyntheticSource(self.parse_synthetic_source_options())))

        pc = (source
              | 'Measure time: Start' >> beam.ParDo(
                  MeasureTime(self.metrics_namespace)))

        for branch in range(self.fanout):
            (  # pylint: disable=expression-not-assigned
                pc
                | 'Combine with Top %i' % branch >> beam.CombineGlobally(
                    beam.combiners.TopCombineFn(
                        self.top_count)).without_defaults()
                | 'Consume %i' % branch >> beam.ParDo(self._GetElement())
                | 'Measure time: End %i' % branch >> beam.ParDo(
                    MeasureTime(self.metrics_namespace)))
Exemplo n.º 2
0
  def test(self):
    class CounterOperation(beam.DoFn):
      def __init__(self, number_of_counters, number_of_operations):
        self.number_of_operations = number_of_operations
        self.counters = []
        for i in range(number_of_counters):
          self.counters.append(
              Metrics.counter('do-not-publish', 'name-{}'.format(i)))

      state_param = beam.DoFn.StateParam(
          userstate.CombiningValueStateSpec(
              'count',
              beam.coders.IterableCoder(beam.coders.VarIntCoder()),
              sum)) if self.stateful else None

      def process(self, element, state=state_param):
        for _ in range(self.number_of_operations):
          for counter in self.counters:
            counter.inc()
          if state:
            state.add(1)
        yield element

    if self.get_option_or_default('streaming', False):
      source = (
          self.pipeline
          | 'LoadGenerator' >> StatefulLoadGenerator(self.input_options))
    else:
      source = (
          self.pipeline
          | 'Read synthetic' >> beam.io.Read(
              SyntheticSource(self.parse_synthetic_source_options())))

    pc = (
        source
        | 'Measure time: Start' >> beam.ParDo(
            MeasureTime(self.metrics_namespace))
        | 'Assign timestamps' >> beam.ParDo(AssignTimestamps()))

    for i in range(self.iterations):
      pc = (
          pc
          | 'Step: %d' % i >> beam.ParDo(
              CounterOperation(
                  self.number_of_counters, self.number_of_operations)))

    # pylint: disable=expression-not-assigned
    (
        pc
        |
        'Measure latency' >> beam.ParDo(MeasureLatency(self.metrics_namespace))
        |
        'Measure time: End' >> beam.ParDo(MeasureTime(self.metrics_namespace)))