示例#1
0
 def test_interval_window_coder(self):
     self.check_coder(
         coders.IntervalWindowCoder(), *[
             window.IntervalWindow(x, y) for x in [-2**52, 0, 2**52]
             for y in range(-100, 100)
         ])
     self.check_coder(coders.TupleCoder((coders.IntervalWindowCoder(), )),
                      (window.IntervalWindow(0, 10), ))
示例#2
0
  def test_param_windowed_value_coder(self):
    from apache_beam.transforms.window import IntervalWindow
    from apache_beam.utils.windowed_value import PaneInfo
    wv = windowed_value.create(
        b'',
        # Milliseconds to microseconds
        1000 * 1000,
        (IntervalWindow(11, 21),),
        PaneInfo(True, False, 1, 2, 3))
    windowed_value_coder = coders.WindowedValueCoder(
        coders.BytesCoder(), coders.IntervalWindowCoder())
    payload = windowed_value_coder.encode(wv)
    coder = coders.ParamWindowedValueCoder(
        payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()])

    # Test binary representation
    self.assertEqual(b'\x01',
                     coder.encode(window.GlobalWindows.windowed_value(1)))

    # Test unnested
    self.check_coder(
        coders.ParamWindowedValueCoder(
            payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]),
        windowed_value.WindowedValue(
            3,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
        windowed_value.WindowedValue(
            1,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)))

    # Test nested
    self.check_coder(
        coders.TupleCoder((
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.FloatCoder(),
                    coders.IntervalWindowCoder()]),
            coders.ParamWindowedValueCoder(
                payload, [
                    coders.StrUtf8Coder(),
                    coders.IntervalWindowCoder()]))),
        (windowed_value.WindowedValue(
            1.5,
            1,
            (window.IntervalWindow(11, 21),),
            PaneInfo(True, False, 1, 2, 3)),
         windowed_value.WindowedValue(
             "abc",
             1,
             (window.IntervalWindow(11, 21),),
             PaneInfo(True, False, 1, 2, 3))))
示例#3
0
 def test_timestamp_prefixing_window_coder(self):
     self.check_coder(
         coders.TimestampPrefixingWindowCoder(coders.IntervalWindowCoder()),
         *[
             window.IntervalWindow(x, y) for x in [-2**52, 0, 2**52]
             for y in range(-100, 100)
         ])
     self.check_coder(
         coders.TupleCoder((coders.TimestampPrefixingWindowCoder(
             coders.IntervalWindowCoder()), )),
         (window.IntervalWindow(0, 10), ))
示例#4
0
  def test_window_preserved(self):
    expected_timestamp = timestamp.Timestamp(5)
    expected_window = window.IntervalWindow(1.0, 2.0)

    class AddWindowDoFn(beam.DoFn):
      def process(self, element):
        yield WindowedValue(
            element, expected_timestamp, [expected_window])

    pipeline = TestPipeline()
    data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)]
    expected_windows = [
        TestWindowedValue(kv, expected_timestamp, [expected_window])
        for kv in data]
    before_identity = (pipeline
                       | 'start' >> beam.Create(data)
                       | 'add_windows' >> beam.ParDo(AddWindowDoFn()))
    assert_that(before_identity, equal_to(expected_windows),
                label='before_identity', reify_windows=True)
    after_identity = (before_identity
                      | 'window' >> beam.WindowInto(
                          beam.transforms.util._IdentityWindowFn(
                              coders.IntervalWindowCoder())))
    assert_that(after_identity, equal_to(expected_windows),
                label='after_identity', reify_windows=True)
    pipeline.run()
示例#5
0
def run_coder_benchmarks(num_runs,
                         input_size,
                         seed,
                         verbose,
                         filter_regex='.*'):
    random.seed(seed)

    # TODO(BEAM-4441): Pick coders using type hints, for example:
    # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...])
    benchmarks = [
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), small_list),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()), large_list),
        coder_benchmark_factory(
            coders.IterableCoder(coders.FastPrimitivesCoder()),
            large_iterable),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict),
        coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                small_message_with_map),
        coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap),
                                large_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            small_message_with_map),
        coder_benchmark_factory(
            coders.DeterministicProtoCoder(test_message.MessageWithMap),
            large_message_with_map),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder()),
            wv_with_one_window),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.IntervalWindowCoder()),
            wv_with_multiple_windows),
        coder_benchmark_factory(
            coders.WindowedValueCoder(coders.FastPrimitivesCoder(),
                                      coders.GlobalWindowCoder()),
            globally_windowed_value),
        coder_benchmark_factory(
            coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int)
    ]

    suite = [
        utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks
        if re.search(filter_regex, b.__name__, flags=re.I)
    ]
    utils.run_benchmarks(suite, verbose=verbose)
示例#6
0
 def get_window_coder(self):
     return coders.IntervalWindowCoder()
示例#7
0
 def get_window_coder(self):
   # type: () -> coders.IntervalWindowCoder
   return coders.IntervalWindowCoder()