Exemplo n.º 1
0
def load(events, metadata=None, pipeline_options=None):
  return (
      events
      | 'query10_shard_events' >> beam.ParDo(ShardEventsDoFn())
      # trigger fires when each sub-triger (executed in order) fires
      # repeatedly 1. after at least maxLogEvents in pane
      #            2. or finally when watermark pass the end of window
      # Repeatedly 1. after at least maxLogEvents in pane
      #            2. or processing time pass the first element in pane + delay
      | 'query10_fix_window' >> beam.WindowInto(
          window.FixedWindows(metadata.get('window_size_sec')),
          trigger=trigger.AfterEach(
              trigger.OrFinally(
                  trigger.Repeatedly(
                      trigger.AfterCount(metadata.get('max_log_events'))),
                  trigger.AfterWatermark()),
              trigger.Repeatedly(
                  trigger.AfterAny(
                      trigger.AfterCount(metadata.get('max_log_events')),
                      trigger.AfterProcessingTime(LATE_BATCHING_PERIOD)))),
          accumulation_mode=trigger.AccumulationMode.DISCARDING,
          # Use a 1 day allowed lateness so that any forgotten hold will stall
          # the pipeline for that period and be very noticeable.
          allowed_lateness=Duration.of(1 * 24 * 60 * 60))
      | 'query10_gbk' >> beam.GroupByKey()
      | 'query10_write_event' >> beam.ParDo(WriteEventDoFn(), pipeline_options)
      | 'query10_window_log_files' >> beam.WindowInto(
          window.FixedWindows(metadata.get('window_size_sec')),
          accumulation_mode=trigger.AccumulationMode.DISCARDING,
          allowed_lateness=Duration.of(1 * 24 * 60 * 60))
      | 'query10_gbk_2' >> beam.GroupByKey()
      | 'query10_write_index' >> beam.ParDo(WriteIndexDoFn(), pipeline_options))
Exemplo n.º 2
0
 def test_trigger_encoding(self):
   for trigger_fn in (DefaultTrigger(),
                      AfterAll(AfterCount(1), AfterCount(10)),
                      AfterAny(AfterCount(10), AfterCount(100)),
                      AfterWatermark(early=AfterCount(1000)),
                      AfterWatermark(early=AfterCount(1000),
                                     late=AfterCount(1)),
                      Repeatedly(AfterCount(100)),
                      trigger.OrFinally(AfterCount(3), AfterCount(10))):
     context = pipeline_context.PipelineContext()
     self.assertEqual(
         trigger_fn,
         TriggerFn.from_runner_api(trigger_fn.to_runner_api(context), context))