Пример #1
0
def run_coder_benchmarks(num_runs, input_size, seed, verbose):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typehints.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_multiple_windows),
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
    ]
    utils.run_benchmarks(suite, verbose=verbose)
Пример #2
0
def run_coder_benchmarks(num_runs,
                         input_size,
                         seed,
                         verbose,
                         filter_regex='.*'):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()),
            large_iterable),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                small_message_with_map),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                large_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            small_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            large_message_with_map),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.IntervalWindowCoder()),
            wv_with_multiple_windows),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.GlobalWindowCoder()),
            globally_windowed_value),
        coder_benchmark_factory(
            coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int)
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
        if re.search(filter_regex, b.__name__, flags=re.I)
    ]
    utils.run_benchmarks(suite, verbose=verbose)
Пример #3
0
  def test_windowedvalue_coder_paneinfo(self):
    coder = coders.WindowedValueCoder(coders.VarIntCoder(),
                                      coders.GlobalWindowCoder())
    test_paneinfo_values = [
        windowed_value.PANE_INFO_UNKNOWN,
        windowed_value.PaneInfo(
            True, True, windowed_value.PaneInfoTiming.EARLY, 0, -1),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 0, 0),
        windowed_value.PaneInfo(
            True, False, windowed_value.PaneInfoTiming.ON_TIME, 10, 0),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 0, 23),
        windowed_value.PaneInfo(
            False, True, windowed_value.PaneInfoTiming.ON_TIME, 12, 23),
        windowed_value.PaneInfo(
            False, False, windowed_value.PaneInfoTiming.LATE, 0, 123),]

    test_values = [windowed_value.WindowedValue(123, 234, (GlobalWindow(),), p)
                   for p in test_paneinfo_values]

    # Test unnested.
    self.check_coder(coder, windowed_value.WindowedValue(
        123, 234, (GlobalWindow(),), windowed_value.PANE_INFO_UNKNOWN))
    for value in test_values:
      self.check_coder(coder, value)

    # Test nested.
    for value1 in test_values:
      for value2 in test_values:
        self.check_coder(coders.TupleCoder((coder, coder)), (value1, value2))
Пример #4
0
  def test_windowed_value_coder(self):
    coder = coders.WindowedValueCoder(
        coders.VarIntCoder(), coders.GlobalWindowCoder())
    # Verify cloud object representation
    self.assertEqual({
        '@type': 'kind:windowed_value',
        'is_wrapper': True,
        'component_encodings': [
            coders.VarIntCoder().as_cloud_object(),
            coders.GlobalWindowCoder().as_cloud_object(),
        ],
    },
                     coder.as_cloud_object())
    # Test binary representation
    self.assertEqual(
        b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01',
        coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test decoding large timestamp
    self.assertEqual(
        coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'),
        windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(), )))

    # Test unnested
    self.check_coder(
        coders.WindowedValueCoder(coders.VarIntCoder()),
        windowed_value.WindowedValue(3, -100, ()),
        windowed_value.WindowedValue(-1, 100, (1, 2, 3)))

    # Test Global Window
    self.check_coder(
        coders.WindowedValueCoder(
            coders.VarIntCoder(), coders.GlobalWindowCoder()),
        window.GlobalWindows.windowed_value(1))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.WindowedValueCoder(coders.FloatCoder()),
            coders.WindowedValueCoder(coders.StrUtf8Coder()))),
        (
            windowed_value.WindowedValue(1.5, 0, ()),
            windowed_value.WindowedValue("abc", 10, ('window', ))))
Пример #5
0
  def test_param_windowed_value_coder(self):
    from apache_beam.transforms.window import IntervalWindow
    from apache_beam.utils.windowed_value import PaneInfo
    wv = windowed_value.create(
        b'',
        # Milliseconds to microseconds
        1000 * 1000,
        (IntervalWindow(11, 21),),
        PaneInfo(True, False, 1, 2, 3))
    windowed_value_coder = coders.WindowedValueCoder(
        coders.BytesCoder(), coders.IntervalWindowCoder())
    payload = windowed_value_coder.encode(wv)
    coder = coders.ParamWindowedValueCoder(
        payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()])

    # Test binary representation
    self.assertEqual(b'\x01',
                     coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test unnested
    self.check_coder(
        coders.ParamWindowedValueCoder(
            payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]),
        windowed_value.WindowedValue(
            3,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
        windowed_value.WindowedValue(
            1,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.FloatCoder(),
                    coders.IntervalWindowCoder()]),
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.StrUtf8Coder(),
                    coders.IntervalWindowCoder()]))),
        (windowed_value.WindowedValue(
            1.5,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
         windowed_value.WindowedValue(
             "abc",
             1,
             (window.IntervalWindow(11, 21),),
             PaneInfo(True, False, 1, 2, 3))))
Пример #6
0
    def test_nested_observables(self):
        class FakeObservableIterator(observable.ObservableMixin):
            def __iter__(self):
                return iter([1, 2, 3])

        # Coder for elements from the observable iterator.
        elem_coder = coders.VarIntCoder()
        iter_coder = coders.TupleSequenceCoder(elem_coder)

        # Test nested WindowedValue observable.
        coder = coders.WindowedValueCoder(iter_coder)
        observ = FakeObservableIterator()
        value = windowed_value.WindowedValue(observ, 0, ())
        self.assertEqual(
            coder.get_impl().get_estimated_size_and_observables(value)[1],
            [(observ, elem_coder.get_impl())])

        # Test nested tuple observable.
        coder = coders.TupleCoder((coders.StrUtf8Coder(), iter_coder))
        value = (u'123', observ)
        self.assertEqual(
            coder.get_impl().get_estimated_size_and_observables(value)[1],
            [(observ, elem_coder.get_impl())])
class StandardCodersTest(unittest.TestCase):

  _urn_to_coder_class = {
      'urn:beam:coders:bytes:0.1': coders.BytesCoder,
      'urn:beam:coders:varint:0.1': coders.VarIntCoder,
      'urn:beam:coders:kv:0.1': lambda k, v: coders.TupleCoder((k, v)),
      'urn:beam:coders:interval_window:0.1': coders.IntervalWindowCoder,
      'urn:beam:coders:stream:0.1': lambda t: coders.IterableCoder(t),
      'urn:beam:coders:global_window:0.1': coders.GlobalWindowCoder,
      'urn:beam:coders:windowed_value:0.1':
          lambda v, w: coders.WindowedValueCoder(v, w)
  }

  _urn_to_json_value_parser = {
      'urn:beam:coders:bytes:0.1': lambda x: x,
      'urn:beam:coders:varint:0.1': lambda x: x,
      'urn:beam:coders:kv:0.1':
          lambda x, key_parser, value_parser: (key_parser(x['key']),
                                               value_parser(x['value'])),
      'urn:beam:coders:interval_window:0.1':
          lambda x: IntervalWindow(
              start=Timestamp(micros=(x['end'] - x['span']) * 1000),
              end=Timestamp(micros=x['end'] * 1000)),
      'urn:beam:coders:stream:0.1': lambda x, parser: map(parser, x),
      'urn:beam:coders:global_window:0.1': lambda x: window.GlobalWindow(),
      'urn:beam:coders:windowed_value:0.1':
          lambda x, value_parser, window_parser: windowed_value.create(
              value_parser(x['value']), x['timestamp'] * 1000,
              tuple([window_parser(w) for w in x['windows']]))
  }

  def test_standard_coders(self):
    for name, spec in _load_test_cases(STANDARD_CODERS_YAML):
      logging.info('Executing %s test.', name)
      self._run_standard_coder(name, spec)

  def _run_standard_coder(self, name, spec):
    coder = self.parse_coder(spec['coder'])
    parse_value = self.json_value_parser(spec['coder'])
    nested_list = [spec['nested']] if 'nested' in spec else [True, False]
    for nested in nested_list:
      for expected_encoded, json_value in spec['examples'].items():
        value = parse_value(json_value)
        expected_encoded = expected_encoded.encode('latin1')
        if not spec['coder'].get('non_deterministic', False):
          actual_encoded = encode_nested(coder, value, nested)
          if self.fix and actual_encoded != expected_encoded:
            self.to_fix[spec['index'], expected_encoded] = actual_encoded
          else:
            self.assertEqual(expected_encoded, actual_encoded)
            self.assertEqual(decode_nested(coder, expected_encoded, nested),
                             value)
        else:
          # Only verify decoding for a non-deterministic coder
          self.assertEqual(decode_nested(coder, expected_encoded, nested),
                           value)

  def parse_coder(self, spec):
    return self._urn_to_coder_class[spec['urn']](
        *[self.parse_coder(c) for c in spec.get('components', ())])

  def json_value_parser(self, coder_spec):
    component_parsers = [
        self.json_value_parser(c) for c in coder_spec.get('components', ())]
    return lambda x: self._urn_to_json_value_parser[coder_spec['urn']](
        x, *component_parsers)

  # Used when --fix is passed.

  fix = False
  to_fix = {}

  @classmethod
  def tearDownClass(cls):
    if cls.fix and cls.to_fix:
      print "FIXING", len(cls.to_fix), "TESTS"
      doc_sep = '\n---\n'
      docs = open(STANDARD_CODERS_YAML).read().split(doc_sep)

      def quote(s):
        return json.dumps(s.decode('latin1')).replace(r'\u0000', r'\0')
      for (doc_ix, expected_encoded), actual_encoded in cls.to_fix.items():
        print quote(expected_encoded), "->", quote(actual_encoded)
        docs[doc_ix] = docs[doc_ix].replace(
            quote(expected_encoded) + ':', quote(actual_encoded) + ':')
      open(STANDARD_CODERS_YAML, 'w').write(doc_sep.join(docs))