示例#1
0
  def test_label_detection_with_video_context(self):
    with TestPipeline(is_integration_test=True) as p:
      output = (
          p
          | beam.Create([(
              self.VIDEO_PATH,
              types.VideoContext(
                  label_detection_config=types.LabelDetectionConfig(
                      label_detection_mode=enums.LabelDetectionMode.SHOT_MODE,
                      model='builtin/latest')))])
          | AnnotateVideoWithContext(features=[enums.Feature.LABEL_DETECTION])
          | beam.ParDo(extract_entities_descriptions)
          | beam.combiners.ToList())

      # Search for at least one entity that contains 'bicycle'.
      assert_that(
          output, matches_all([hc.has_item(hc.contains_string('bicycle'))]))
示例#2
0
    def test_label_detection_with_video_context(self):
        with TestPipeline(is_integration_test=True) as p:
            output = (
                p
                | beam.Create(
                    [(self.VIDEO_PATH,
                      types.VideoContext(
                          label_detection_config=types.LabelDetectionConfig(
                              label_detection_mode=enums.LabelDetectionMode.
                              SHOT_MODE)))])
                | AnnotateVideoWithContext(
                    features=[enums.Feature.LABEL_DETECTION])
                | beam.ParDo(extract_entities_descriptions)
                | beam.combiners.ToList())

            assert_that(output,
                        matches_all([hc.has_items('bicycle', 'dinosaur')]))
示例#3
0
    def test_streaming_different_file_types(self):
        dir = self._new_tempdir()
        input = iter(WriteFilesTest.SIMPLE_COLLECTION)
        ts = (TestStream().advance_watermark_to(0).add_elements(
            [next(input), next(input)]).advance_watermark_to(10).add_elements(
                [next(input),
                 next(input)]).advance_watermark_to(20).add_elements([
                     next(input), next(input)
                 ]).advance_watermark_to(30).add_elements([
                     next(input), next(input)
                 ]).advance_watermark_to(40).advance_watermark_to_infinity())

        def no_colon_file_naming(*args):
            file_name = fileio.destination_prefix_naming()(*args)
            return file_name.replace(':', '_')

        with TestPipeline() as p:
            _ = (p
                 | ts
                 | beam.WindowInto(FixedWindows(10))
                 | beam.io.fileio.WriteToFiles(
                     path=dir,
                     destination=lambda record: record['foundation'],
                     sink=lambda dest:
                     (WriteFilesTest.CsvSink(WriteFilesTest.CSV_HEADERS)
                      if dest == 'apache' else WriteFilesTest.JsonSink()),
                     file_naming=no_colon_file_naming,
                     max_writers_per_bundle=0,
                 ))

        with TestPipeline() as p:
            cncf_files = (p
                          | fileio.MatchFiles(FileSystems.join(dir, 'cncf*'))
                          | "CncfFileNames" >> beam.Map(lambda fm: fm.path))

            apache_files = (p
                            | "MatchApache" >> fileio.MatchFiles(
                                FileSystems.join(dir, 'apache*'))
                            |
                            "ApacheFileNames" >> beam.Map(lambda fm: fm.path))

            assert_that(
                cncf_files,
                matches_all([
                    stringmatches.matches_regexp(
                        '.*cncf-1970-01-01T00_00_00-1970-01-01T00_00_10--.*'),
                    stringmatches.matches_regexp(
                        '.*cncf-1970-01-01T00_00_10-1970-01-01T00_00_20--.*'),
                    stringmatches.matches_regexp(
                        '.*cncf-1970-01-01T00_00_20-1970-01-01T00_00_30--.*'),
                    stringmatches.matches_regexp(
                        '.*cncf-1970-01-01T00_00_30-1970-01-01T00_00_40--.*')
                ]),
                label='verifyCNCFFiles')

            assert_that(
                apache_files,
                matches_all([
                    stringmatches.matches_regexp(
                        '.*apache-1970-01-01T00_00_00-1970-01-01T00_00_10--.*'
                    ),
                    stringmatches.matches_regexp(
                        '.*apache-1970-01-01T00_00_10-1970-01-01T00_00_20--.*'
                    ),
                    stringmatches.matches_regexp(
                        '.*apache-1970-01-01T00_00_20-1970-01-01T00_00_30--.*'
                    ),
                    stringmatches.matches_regexp(
                        '.*apache-1970-01-01T00_00_30-1970-01-01T00_00_40--.*')
                ]),
                label='verifyApacheFiles')
示例#4
0
    def test_streaming_complex_timing(self):
        # Use state on the TestCase class, since other references would be pickled
        # into a closure and not have the desired side effects.
        #
        # TODO(BEAM-5295): Use assert_that after it works for the cases here in
        # streaming mode.
        WriteFilesTest.all_records = []

        dir = '%s%s' % (self._new_tempdir(), os.sep)

        # Setting up the input (TestStream)
        ts = TestStream().advance_watermark_to(0)
        for elm in WriteFilesTest.LARGER_COLLECTION:
            timestamp = int(elm)

            ts.add_elements([('key', '%s' % elm)])
            if timestamp % 5 == 0 and timestamp != 0:
                # TODO(BEAM-3759): Add many firings per window after getting PaneInfo.
                ts.advance_processing_time(5)
                ts.advance_watermark_to(timestamp)
        ts.advance_watermark_to_infinity()

        def no_colon_file_naming(*args):
            file_name = fileio.destination_prefix_naming()(*args)
            return file_name.replace(':', '_')

        # The pipeline that we are testing
        options = PipelineOptions()
        options.view_as(StandardOptions).streaming = True
        with TestPipeline(options=options) as p:
            res = (p
                   | ts
                   | beam.WindowInto(
                       FixedWindows(10),
                       trigger=trigger.AfterWatermark(),
                       accumulation_mode=trigger.AccumulationMode.DISCARDING)
                   | beam.GroupByKey()
                   | beam.FlatMap(lambda x: x[1]))
            # Triggering after 5 processing-time seconds, and on the watermark. Also
            # discarding old elements.

            _ = (res
                 | beam.io.fileio.WriteToFiles(
                     path=dir,
                     file_naming=no_colon_file_naming,
                     max_writers_per_bundle=0)
                 | beam.Map(lambda fr: FileSystems.join(dir, fr.file_name))
                 | beam.ParDo(self.record_dofn()))

        # Verification pipeline
        with TestPipeline() as p:
            files = (p | beam.io.fileio.MatchFiles(FileSystems.join(dir, '*')))

            file_names = (files | beam.Map(lambda fm: fm.path))

            file_contents = (
                files
                | beam.io.fileio.ReadMatches()
                | beam.Map(lambda rf: (rf.metadata.path, rf.read_utf8().strip(
                ).split('\n'))))

            content = (file_contents
                       | beam.FlatMap(lambda fc: [ln.strip() for ln in fc[1]]))

            assert_that(file_names,
                        equal_to(WriteFilesTest.all_records),
                        label='AssertFilesMatch')
            assert_that(content,
                        matches_all(WriteFilesTest.LARGER_COLLECTION),
                        label='AssertContentsMatch')