def test_interval_window_coder(self): self.check_coder( coders.IntervalWindowCoder(), *[ window.IntervalWindow(x, y) for x in [-2**52, 0, 2**52] for y in range(-100, 100) ]) self.check_coder(coders.TupleCoder((coders.IntervalWindowCoder(), )), (window.IntervalWindow(0, 10), ))
def test_param_windowed_value_coder(self): from apache_beam.transforms.window import IntervalWindow from apache_beam.utils.windowed_value import PaneInfo wv = windowed_value.create( b'', # Milliseconds to microseconds 1000 * 1000, (IntervalWindow(11, 21),), PaneInfo(True, False, 1, 2, 3)) windowed_value_coder = coders.WindowedValueCoder( coders.BytesCoder(), coders.IntervalWindowCoder()) payload = windowed_value_coder.encode(wv) coder = coders.ParamWindowedValueCoder( payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]) # Test binary representation self.assertEqual(b'\x01', coder.encode(window.GlobalWindows.windowed_value(1))) # Test unnested self.check_coder( coders.ParamWindowedValueCoder( payload, [coders.VarIntCoder(), coders.IntervalWindowCoder()]), windowed_value.WindowedValue( 3, 1, (window.IntervalWindow(11, 21),), PaneInfo(True, False, 1, 2, 3)), windowed_value.WindowedValue( 1, 1, (window.IntervalWindow(11, 21),), PaneInfo(True, False, 1, 2, 3))) # Test nested self.check_coder( coders.TupleCoder(( coders.ParamWindowedValueCoder( payload, [ coders.FloatCoder(), coders.IntervalWindowCoder()]), coders.ParamWindowedValueCoder( payload, [ coders.StrUtf8Coder(), coders.IntervalWindowCoder()]))), (windowed_value.WindowedValue( 1.5, 1, (window.IntervalWindow(11, 21),), PaneInfo(True, False, 1, 2, 3)), windowed_value.WindowedValue( "abc", 1, (window.IntervalWindow(11, 21),), PaneInfo(True, False, 1, 2, 3))))
def test_timestamp_prefixing_window_coder(self): self.check_coder( coders.TimestampPrefixingWindowCoder(coders.IntervalWindowCoder()), *[ window.IntervalWindow(x, y) for x in [-2**52, 0, 2**52] for y in range(-100, 100) ]) self.check_coder( coders.TupleCoder((coders.TimestampPrefixingWindowCoder( coders.IntervalWindowCoder()), )), (window.IntervalWindow(0, 10), ))
def test_window_preserved(self): expected_timestamp = timestamp.Timestamp(5) expected_window = window.IntervalWindow(1.0, 2.0) class AddWindowDoFn(beam.DoFn): def process(self, element): yield WindowedValue( element, expected_timestamp, [expected_window]) pipeline = TestPipeline() data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)] expected_windows = [ TestWindowedValue(kv, expected_timestamp, [expected_window]) for kv in data] before_identity = (pipeline | 'start' >> beam.Create(data) | 'add_windows' >> beam.ParDo(AddWindowDoFn())) assert_that(before_identity, equal_to(expected_windows), label='before_identity', reify_windows=True) after_identity = (before_identity | 'window' >> beam.WindowInto( beam.transforms.util._IdentityWindowFn( coders.IntervalWindowCoder()))) assert_that(after_identity, equal_to(expected_windows), label='after_identity', reify_windows=True) pipeline.run()
def run_coder_benchmarks(num_runs, input_size, seed, verbose, filter_regex='.*'): random.seed(seed) # TODO(BEAM-4441): Pick coders using type hints, for example: # tuple_coder = typecoders.registry.get_coder(typing.Tuple[int, ...]) benchmarks = [ coder_benchmark_factory(coders.FastPrimitivesCoder(), small_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_int), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_string), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), small_list), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_list), coder_benchmark_factory( coders.IterableCoder(coders.FastPrimitivesCoder()), large_iterable), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_tuple), coder_benchmark_factory(coders.FastPrimitivesCoder(), small_dict), coder_benchmark_factory(coders.FastPrimitivesCoder(), large_dict), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory(coders.ProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), small_message_with_map), coder_benchmark_factory( coders.DeterministicProtoCoder(test_message.MessageWithMap), large_message_with_map), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder()), wv_with_one_window), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.IntervalWindowCoder()), wv_with_multiple_windows), coder_benchmark_factory( coders.WindowedValueCoder(coders.FastPrimitivesCoder(), coders.GlobalWindowCoder()), globally_windowed_value), coder_benchmark_factory( coders.LengthPrefixCoder(coders.FastPrimitivesCoder()), small_int) ] suite = [ utils.BenchmarkConfig(b, input_size, num_runs) for b in benchmarks if re.search(filter_regex, b.__name__, flags=re.I) ] utils.run_benchmarks(suite, verbose=verbose)
def get_window_coder(self): return coders.IntervalWindowCoder()
def get_window_coder(self): # type: () -> coders.IntervalWindowCoder return coders.IntervalWindowCoder()