def test_clear(self):
   cache = StreamingCache(cache_dir=None)
   self.assertFalse(cache.exists('my_label'))
   cache.sink(['my_label'], is_capture=True)
   cache.write([TestStreamFileRecord()], 'my_label')
   self.assertTrue(cache.exists('my_label'))
   self.assertEqual(cache.capture_keys, set(['my_label']))
   self.assertTrue(cache.clear('my_label'))
   self.assertFalse(cache.exists('my_label'))
   self.assertFalse(cache.capture_keys)
示例#2
0
 def test_capture_size_limit_not_reached_when_file_size_under_limit(self):
     ib.options.capture_size_limit = 100
     cache = StreamingCache(cache_dir=None)
     # Build a sink object to track the label as a capture in the test.
     cache.sink(['my_label'], is_capture=True)
     cache.write([TestStreamFileRecord()], 'my_label')
     self.assertTrue(cache.exists('my_label'))
     ie.current_env().set_cache_manager(cache)
     self.assertFalse(ie.current_env().options.capture_control.
                      is_capture_size_limit_reached())
示例#3
0
  def test_capture_size_limit_not_reached_when_file_size_under_limit(self):
    ib.options.capture_size_limit = 100
    cache = StreamingCache(cache_dir=None)
    # Build a sink object to track the label as a capture in the test.
    cache.sink(['my_label'], is_capture=True)
    cache.write([beam_interactive_api_pb2.TestStreamFileRecord()], 'my_label')
    self.assertTrue(cache.exists('my_label'))
    ie.current_env().set_cache_manager(cache, 'dummy pipeline')

    limiter = capture_limiters.SizeLimiter(ib.options.capture_size_limit)
    self.assertFalse(limiter.is_triggered())
示例#4
0
 def test_capture_size_limit_reached_when_file_size_above_limit(self):
     ib.options.capture_size_limit = 1
     cache = StreamingCache(cache_dir=None)
     cache.sink(['my_label'], is_capture=True)
     cache.write([
         TestStreamFileRecord(recorded_event=TestStreamPayload.Event(
             element_event=TestStreamPayload.Event.AddElements(elements=[
                 TestStreamPayload.TimestampedElement(
                     encoded_element=coders.FastPrimitivesCoder().encode(
                         'a'),
                     timestamp=0)
             ])))
     ], 'my_label')
     self.assertTrue(cache.exists('my_label'))
     ie.current_env().set_cache_manager(cache)
     self.assertTrue(ie.current_env().options.capture_control.
                     is_capture_size_limit_reached())
示例#5
0
    def test_capture_size_limit_reached_when_file_size_above_limit(self):
        ib.options.capture_size_limit = 1
        cache = StreamingCache(cache_dir=None)
        cache.sink(['my_label'], is_capture=True)
        cache.write([
            TestStreamFileRecord(recorded_event=TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coders.FastPrimitivesCoder().encode(
                            'a'),
                        timestamp=0)
                ])))
        ], 'my_label')
        self.assertTrue(cache.exists('my_label'))
        p = _build_an_empty_streaming_pipeline()
        ie.current_env().set_cache_manager(cache, p)

        limiter = capture_limiters.SizeLimiter(1)
        self.assertTrue(limiter.is_triggered())
  def test_read_and_write_multiple_outputs(self):
    """An integration test between the Sink and Source with multiple outputs.

    This tests the funcionatlity that the StreamingCache reads from multiple
    files and combines them into a single sorted output.
    """
    LETTERS_TAG = repr(CacheKey('letters', '', '', ''))
    NUMBERS_TAG = repr(CacheKey('numbers', '', '', ''))

    # Units here are in seconds.
    test_stream = (TestStream()
                   .advance_watermark_to(0, tag=LETTERS_TAG)
                   .advance_processing_time(5)
                   .add_elements(['a', 'b', 'c'], tag=LETTERS_TAG)
                   .advance_watermark_to(10, tag=NUMBERS_TAG)
                   .advance_processing_time(1)
                   .add_elements(
                       [
                           TimestampedValue('1', 15),
                           TimestampedValue('2', 15),
                           TimestampedValue('3', 15)
                       ],
                       tag=NUMBERS_TAG)) # yapf: disable

    cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0)

    coder = SafeFastPrimitivesCoder()

    options = StandardOptions(streaming=True)
    with TestPipeline(options=options) as p:
      # pylint: disable=expression-not-assigned
      events = p | test_stream
      events[LETTERS_TAG] | 'Letters sink' >> cache.sink([LETTERS_TAG])
      events[NUMBERS_TAG] | 'Numbers sink' >> cache.sink([NUMBERS_TAG])

    reader = cache.read_multiple([[LETTERS_TAG], [NUMBERS_TAG]])
    actual_events = list(reader)

    # Units here are in microseconds.
    expected_events = [
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=5 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=0, tag=LETTERS_TAG)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('a'), timestamp=0),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('b'), timestamp=0),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('c'), timestamp=0),
                ],
                tag=LETTERS_TAG)),
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=10 * 10**6, tag=NUMBERS_TAG)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=0, tag=LETTERS_TAG)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('1'), timestamp=15 *
                        10**6),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('2'), timestamp=15 *
                        10**6),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('3'), timestamp=15 *
                        10**6),
                ],
                tag=NUMBERS_TAG)),
    ]

    self.assertListEqual(actual_events, expected_events)
  def test_read_and_write(self):
    """An integration test between the Sink and Source.

    This ensures that the sink and source speak the same language in terms of
    coders, protos, order, and units.
    """
    CACHED_RECORDS = repr(CacheKey('records', '', '', ''))

    # Units here are in seconds.
    test_stream = (
        TestStream(output_tags=(CACHED_RECORDS))
                   .advance_watermark_to(0, tag=CACHED_RECORDS)
                   .advance_processing_time(5)
                   .add_elements(['a', 'b', 'c'], tag=CACHED_RECORDS)
                   .advance_watermark_to(10, tag=CACHED_RECORDS)
                   .advance_processing_time(1)
                   .add_elements(
                       [
                           TimestampedValue('1', 15),
                           TimestampedValue('2', 15),
                           TimestampedValue('3', 15)
                       ],
                       tag=CACHED_RECORDS)) # yapf: disable

    coder = SafeFastPrimitivesCoder()
    cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0)

    # Assert that there are no capture keys at first.
    self.assertEqual(cache.capture_keys, set())

    options = StandardOptions(streaming=True)
    with TestPipeline(options=options) as p:
      records = (p | test_stream)[CACHED_RECORDS]

      # pylint: disable=expression-not-assigned
      records | cache.sink([CACHED_RECORDS], is_capture=True)

    reader, _ = cache.read(CACHED_RECORDS)
    actual_events = list(reader)

    # Assert that the capture keys are forwarded correctly.
    self.assertEqual(cache.capture_keys, set([CACHED_RECORDS]))

    # Units here are in microseconds.
    expected_events = [
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=5 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=0, tag=CACHED_RECORDS)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('a'), timestamp=0),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('b'), timestamp=0),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('c'), timestamp=0),
                ],
                tag=CACHED_RECORDS)),
        TestStreamPayload.Event(
            processing_time_event=TestStreamPayload.Event.AdvanceProcessingTime(
                advance_duration=1 * 10**6)),
        TestStreamPayload.Event(
            watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                new_watermark=10 * 10**6, tag=CACHED_RECORDS)),
        TestStreamPayload.Event(
            element_event=TestStreamPayload.Event.AddElements(
                elements=[
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('1'), timestamp=15 *
                        10**6),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('2'), timestamp=15 *
                        10**6),
                    TestStreamPayload.TimestampedElement(
                        encoded_element=coder.encode('3'), timestamp=15 *
                        10**6),
                ],
                tag=CACHED_RECORDS)),
    ]
    self.assertEqual(actual_events, expected_events)
示例#8
0
    def test_read_and_write(self):
        """An integration test between the Sink and Source.

    This ensures that the sink and source speak the same language in terms of
    coders, protos, order, and units.
    """

        # Units here are in seconds.
        test_stream = (TestStream()
                       .advance_watermark_to(0, tag='records')
                       .advance_processing_time(5)
                       .add_elements(['a', 'b', 'c'], tag='records')
                       .advance_watermark_to(10, tag='records')
                       .advance_processing_time(1)
                       .add_elements(
                           [
                               TimestampedValue('1', 15),
                               TimestampedValue('2', 15),
                               TimestampedValue('3', 15)
                           ],
                           tag='records')) # yapf: disable

        coder = SafeFastPrimitivesCoder()
        cache = StreamingCache(cache_dir=None, sample_resolution_sec=1.0)

        options = StandardOptions(streaming=True)
        options.view_as(DebugOptions).add_experiment(
            'passthrough_pcollection_output_ids')
        with TestPipeline(options=options) as p:
            # pylint: disable=expression-not-assigned
            p | test_stream | cache.sink(['records'])

        reader, _ = cache.read('records')
        actual_events = list(reader)

        # Units here are in microseconds.
        expected_events = [
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=5 * 10**6)),
            TestStreamPayload.Event(
                watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                    new_watermark=0, tag='records')),
            TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(
                    elements=[
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode('a'), timestamp=0),
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode('b'), timestamp=0),
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode('c'), timestamp=0),
                    ],
                    tag='records')),
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1 * 10**6)),
            TestStreamPayload.Event(
                watermark_event=TestStreamPayload.Event.AdvanceWatermark(
                    new_watermark=10 * 10**6, tag='records')),
            TestStreamPayload.Event(element_event=TestStreamPayload.Event.
                                    AddElements(elements=[
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode('1'),
                                            timestamp=15 * 10**6),
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode('2'),
                                            timestamp=15 * 10**6),
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode('3'),
                                            timestamp=15 * 10**6),
                                    ],
                                                tag='records')),
        ]
        self.assertEqual(actual_events, expected_events)