Exemplo n.º 1
1
  def test_run_example_with_setup_file(self):
    pipeline = TestPipeline(is_integration_test=True)
    coordinate_output = FileSystems.join(
        pipeline.get_option('output'),
        'juliaset-{}'.format(str(uuid.uuid4())),
        'coordinates.txt')
    extra_args = {
        'coordinate_output': coordinate_output,
        'grid_size': self.GRID_SIZE,
        'setup_file': os.path.normpath(
            os.path.join(os.path.dirname(__file__), '..', 'setup.py')),
        'on_success_matcher': all_of(PipelineStateMatcher(PipelineState.DONE)),
    }
    args = pipeline.get_full_options_as_args(**extra_args)

    juliaset.run(args)
  def test_hourly_team_score_it(self):
    state_verifier = PipelineStateMatcher(PipelineState.DONE)
    query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project,
                                                  self.dataset.name,
                                                  self.OUTPUT_TABLE))

    bigquery_verifier = BigqueryMatcher(self.project,
                                        query,
                                        self.DEFAULT_EXPECTED_CHECKSUM)

    extra_opts = {'input': self.DEFAULT_INPUT_FILE,
                  'dataset': self.dataset.name,
                  'window_duration': 1,
                  'on_success_matcher': all_of(state_verifier,
                                               bigquery_verifier)}

    # Register clean up before pipeline execution
    # Note that actual execution happens in reverse order.
    self.addCleanup(self._cleanup_dataset)
    self.addCleanup(utils.delete_bq_table, self.project,
                    self.dataset.name, self.OUTPUT_TABLE)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    hourly_team_score.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 3
0
def step_with(name, start, stop, status):
    return has_properties(name=name,
                          title=name,
                          attrib=all_of(
                                        has_entry('start', has_float(greater_than_or_equal_to(start))),
                                        has_entry('stop', has_float(less_than_or_equal_to(stop))),
                                        has_entry('status', status)))
Exemplo n.º 4
0
  def test_multiple_destinations_transform(self):
    output_table_1 = '%s%s' % (self.output_table, 1)
    output_table_2 = '%s%s' % (self.output_table, 2)
    output_table_3 = '%s%s' % (self.output_table, 3)
    output_table_4 = '%s%s' % (self.output_table, 4)
    pipeline_verifiers = [
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_1,
            data=[(d['name'], d['language'])
                  for d in _ELEMENTS
                  if 'language' in d]),
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_2,
            data=[(d['name'], d['foundation'])
                  for d in _ELEMENTS
                  if 'foundation' in d]),
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_3,
            data=[(d['name'], d['language'])
                  for d in _ELEMENTS
                  if 'language' in d]),
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_4,
            data=[(d['name'], d['foundation'])
                  for d in _ELEMENTS
                  if 'foundation' in d])]

    args = self.test_pipeline.get_full_options_as_args(
        on_success_matcher=all_of(*pipeline_verifiers))

    with beam.Pipeline(argv=args) as p:
      input = p | beam.Create(_ELEMENTS)

      # Get all input in same machine
      input = (input
               | beam.Map(lambda x: (None, x))
               | beam.GroupByKey()
               | beam.FlatMap(lambda elm: elm[1]))

      _ = (input |
           "WriteWithMultipleDestsFreely" >> bigquery.WriteToBigQuery(
               table=lambda x: (output_table_1
                                if 'language' in x
                                else output_table_2),
               create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
               write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY))

      _ = (input |
           "WriteWithMultipleDests" >> bigquery.WriteToBigQuery(
               table=lambda x: (output_table_3
                                if 'language' in x
                                else output_table_4),
               create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
               write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY,
               max_file_size=20,
               max_files_per_bundle=-1))
 def run_bigquery_io_read_pipeline(self, input_size):
   test_pipeline = TestPipeline(is_integration_test=True)
   pipeline_verifiers = [PipelineStateMatcher(),]
   extra_opts = {'input_table': self.DEFAULT_DATASET + "." +
                                self.DEFAULT_TABLE_PREFIX + input_size,
                 'num_records': self.NUM_RECORDS[input_size],
                 'on_success_matcher': all_of(*pipeline_verifiers)}
   bigquery_io_read_pipeline.run(test_pipeline.get_full_options_as_args(
       **extra_opts))
Exemplo n.º 6
0
def test_many_elements():
    Box = xmlfied('box', foos=Many(Element(name='foo')))

    box = Box(foos=['a', 'b', 'c'])

    assert_that(etree.tostring(box.toxml()), all_of(
                                                    string_contains_in_order('<box>', '<foos>', '<foo>', 'a', '</foo>', '</foos>', '</box>'),
                                                    string_contains_in_order('<box>', '<foos>', '<foo>', 'b', '</foo>', '</foos>', '</box>'),
                                                    string_contains_in_order('<box>', '<foos>', '<foo>', 'c', '</foo>', '</foos>', '</box>'),
                                                    ))
Exemplo n.º 7
0
def test_step_attach(timed_report_for):
    report, start, stop = timed_report_for("""
    import pytest
    def test_ololo_pewpew():
        with pytest.allure.step(title='withattach'):
            pytest.allure.attach('myattach', 'abcdef')
    """)

    assert_that(report.findall('.//test-case/steps/step'), contains(all_of(step_with('withattach', start, stop, Status.PASSED),
                                                                           has_property('attachments',
                                                                                        has_property('attachment',
                                                                                                     has_entry('title', 'myattach'))))))
Exemplo n.º 8
0
    def test_one_job_fails_all_jobs_fail(self):

        # If one of the import jobs fails, then other jobs must not be performed.
        # This is to avoid reinsertion of some records when a pipeline fails and
        # is rerun.
        output_table_1 = '%s%s' % (self.output_table, 1)
        output_table_2 = '%s%s' % (self.output_table, 2)

        self.bigquery_client.get_or_create_table(
            self.project, self.dataset_id,
            output_table_1.split('.')[1],
            bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA),
            None, None)
        self.bigquery_client.get_or_create_table(
            self.project, self.dataset_id,
            output_table_2.split('.')[1],
            bigquery_tools.parse_table_schema_from_json(
                self.BIG_QUERY_SCHEMA_2), None, None)

        pipeline_verifiers = [
            BigqueryFullResultMatcher(project=self.project,
                                      query="SELECT name, language FROM %s" %
                                      output_table_1,
                                      data=[]),
            BigqueryFullResultMatcher(project=self.project,
                                      query="SELECT name, foundation FROM %s" %
                                      output_table_2,
                                      data=[])
        ]

        args = self.test_pipeline.get_full_options_as_args(
            experiments='use_beam_bq_sink')

        with self.assertRaises(Exception):
            with beam.Pipeline(argv=args) as p:
                input = p | beam.Create(_ELEMENTS)
                input2 = p | "Broken record" >> beam.Create(
                    ['language_broken_record'])

                input = (input, input2) | beam.Flatten()

                _ = (input
                     | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery(
                         table=lambda x:
                         (output_table_1
                          if 'language' in x else output_table_2),
                         create_disposition=(
                             beam.io.BigQueryDisposition.CREATE_IF_NEEDED),
                         write_disposition=beam.io.BigQueryDisposition.
                         WRITE_APPEND))

        hamcrest_assert(p, all_of(*pipeline_verifiers))
Exemplo n.º 9
0
    def test_leader_board_it(self):
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)

        success_condition = 'total_score=5000 LIMIT 1'
        users_query = ('SELECT total_score FROM [%s:%s.%s] '
                       'WHERE %s' %
                       (self.project, self.dataset.name,
                        self.OUTPUT_TABLE_USERS, success_condition))
        bq_users_verifier = BigqueryMatcher(self.project, users_query,
                                            self.DEFAULT_EXPECTED_CHECKSUM)

        teams_query = ('SELECT total_score FROM [%s:%s.%s] '
                       'WHERE %s' %
                       (self.project, self.dataset.name,
                        self.OUTPUT_TABLE_TEAMS, success_condition))
        bq_teams_verifier = BigqueryMatcher(self.project, teams_query,
                                            self.DEFAULT_EXPECTED_CHECKSUM)

        extra_opts = {
            'subscription':
            self.input_sub.full_name,
            'dataset':
            self.dataset.name,
            'topic':
            self.input_topic.full_name,
            'team_window_duration':
            1,
            'wait_until_finish_duration':
            self.WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher':
            all_of(state_verifier, bq_users_verifier, bq_teams_verifier)
        }

        # Register cleanup before pipeline execution.
        # Note that actual execution happens in reverse order.
        self.addCleanup(self._cleanup_pubsub)
        self.addCleanup(self._cleanup_dataset)
        self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name,
                        self.OUTPUT_TABLE_USERS)
        self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name,
                        self.OUTPUT_TABLE_TEAMS)

        # Generate input data and inject to PubSub.
        test_utils.wait_for_subscriptions_created(
            [self.input_topic, self.input_sub])
        self._inject_pubsub_game_events(self.input_topic,
                                        self.DEFAULT_INPUT_COUNT)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        leader_board.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 10
0
 def testNoMatchIfArgumentFailsToSatisfyAllOfManyOtherMatchers(self):
     self.assert_does_not_match(
         "matcher in the middle",
         all_of(
             equal_to("good"),
             equal_to("good"),
             equal_to("good"),
             equal_to("bad"),
             equal_to("good"),
             equal_to("good"),
         ),
         "good",
     )
Exemplo n.º 11
0
def test_cyrillic_exc(report_for):
    report = report_for(u"""
    # -*- coding: utf-8 -*-
    def test_foo():
        raise Exception(u'русские буквы')
    """)

    assert_that(report.findall('.//failure'), contains(
        all_of(has_property('message', u'Exception: русские буквы'),
               has_property('stack-trace', u'''def test_foo():
>       raise Exception(u'русские буквы')
E       Exception: русские буквы

test_cyrillic_exc.py:3: Exception'''))))
Exemplo n.º 12
0
  def run_datastore_write(self, limit=None):
    test_pipeline = TestPipeline(is_integration_test=True)
    current_time = datetime.now().strftime("%m%d%H%M%S")
    seed = random.randint(0, 100000)
    kind = 'testkind%s%d' % (current_time, seed)
    pipeline_verifiers = [PipelineStateMatcher()]
    extra_opts = {'kind': kind,
                  'num_entities': self.NUM_ENTITIES,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    if limit is not None:
      extra_opts['limit'] = limit

    datastore_write_it_pipeline.run(test_pipeline.get_full_options_as_args(
        **extra_opts))
Exemplo n.º 13
0
def test_unicode_labels(report_for):
    report = report_for(u"""
    # -*- coding: utf-8 -*-
    import allure

    @allure.feature(u'русские буквы')
    @allure.story(u'еще русские буквы')
    def test_a():
        pass
    """)

    assert_that(report, all_of(
        has_label('test_a', 'feature', u'русские буквы'),
        has_label('test_a', 'story', u'еще русские буквы')))
Exemplo n.º 14
0
  def run_datastore_write(self, limit=None):
    test_pipeline = TestPipeline(is_integration_test=True)
    current_time = datetime.now().strftime("%m%d%H%M%S")
    seed = random.randint(0, 100000)
    kind = 'testkind%s%d' % (current_time, seed)
    pipeline_verifiers = [PipelineStateMatcher()]
    extra_opts = {'kind': kind,
                  'num_entities': self.NUM_ENTITIES,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    if limit is not None:
      extra_opts['limit'] = limit

    datastore_write_it_pipeline.run(test_pipeline.get_full_options_as_args(
        **extra_opts))
 def test_big_query_standard_sql(self):
   verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
   expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   extra_opts = {'query': STANDARD_QUERY,
                 'output': self.output_table,
                 'output_schema': DIALECT_OUTPUT_SCHEMA,
                 'use_standard_sql': True,
                 'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
Exemplo n.º 16
0
def test_nested_steps(timed_report_for):
    report, start, stop = timed_report_for("""
    import pytest
    def test_ololo_pewpew():
        with pytest.allure.step(title='outer'):
            with pytest.allure.step(title='inner'):
                assert False

    """)

    assert_that(report.findall('.//test-case/steps/step'), contains(all_of(step_with('outer', start, stop, Status.FAILED),
                                                                           has_property('steps',
                                                                                        has_property('step',
                                                                                                     step_with('inner', start, stop, Status.FAILED))))))
 def test_big_query_standard_sql(self):
   verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
   expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   extra_opts = {'query': STANDARD_QUERY,
                 'output': self.output_table,
                 'output_schema': DIALECT_OUTPUT_SCHEMA,
                 'use_standard_sql': True,
                 'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
Exemplo n.º 18
0
def test_add_environment_in_testcase(report_for, result, environment_xml):
    report_for("""
    import pytest
    import allure

    def test_dummy():
        allure.environment(foo='bar')
        assert %s
    """ % result)

    assert_that(
        environment_xml().findall('.//parameter')[0],
        all_of(has_property('name', 'foo'), has_property('key', 'foo'),
               has_property('value', 'bar')))
Exemplo n.º 19
0
def test_nested_steps(timed_report_for):
    report, start, stop = timed_report_for("""
    import pytest
    def test_ololo_pewpew():
        with pytest.allure.step(title='outer'):
            with pytest.allure.step(title='inner'):
                assert False

    """)

    assert_that(report.findall('.//test-case/steps/step'), contains(all_of(step_with('outer', start, stop, Status.FAILED),
                                                                           has_property('steps',
                                                                                        has_property('step',
                                                                                                     step_with('inner', start, stop, Status.FAILED))))))
Exemplo n.º 20
0
    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Set on_success_matcher to verify pipeline state and pubsub output. These
        # verifications run on a (remote) worker.

        # Expect the state to be RUNNING since a streaming pipeline is usually
        # never DONE. The test runner will cancel the pipeline after verification.
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data.decode('utf-8')
                for pubsub_msg in expected_messages
            ]
        if self.runner_name == 'TestDirectRunner':
            strip_attributes = None
        else:
            strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            self.output_sub.name,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=strip_attributes)
        extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        for msg in self.INPUT_MESSAGES[self.runner_name]:
            self.pub_client.publish(self.input_topic.name, msg.data,
                                    **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
Exemplo n.º 21
0
  def test_leader_board_it(self):
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)

    success_condition = 'total_score=5000 LIMIT 1'
    users_query = ('SELECT total_score FROM [%s:%s.%s] '
                   'WHERE %s' % (self.project,
                                 self.dataset.name,
                                 self.OUTPUT_TABLE_USERS,
                                 success_condition))
    bq_users_verifier = BigqueryMatcher(self.project,
                                        users_query,
                                        self.DEFAULT_EXPECTED_CHECKSUM)

    teams_query = ('SELECT total_score FROM [%s:%s.%s] '
                   'WHERE %s' % (self.project,
                                 self.dataset.name,
                                 self.OUTPUT_TABLE_TEAMS,
                                 success_condition))
    bq_teams_verifier = BigqueryMatcher(self.project,
                                        teams_query,
                                        self.DEFAULT_EXPECTED_CHECKSUM)

    extra_opts = {'subscription': self.input_sub.full_name,
                  'dataset': self.dataset.name,
                  'topic': self.input_topic.full_name,
                  'team_window_duration': 1,
                  'wait_until_finish_duration':
                      self.WAIT_UNTIL_FINISH_DURATION,
                  'on_success_matcher': all_of(state_verifier,
                                               bq_users_verifier,
                                               bq_teams_verifier)}

    # Register cleanup before pipeline execution.
    # Note that actual execution happens in reverse order.
    self.addCleanup(self._cleanup_pubsub)
    self.addCleanup(self._cleanup_dataset)
    self.addCleanup(utils.delete_bq_table, self.project,
                    self.dataset.name, self.OUTPUT_TABLE_USERS)
    self.addCleanup(utils.delete_bq_table, self.project,
                    self.dataset.name, self.OUTPUT_TABLE_TEAMS)

    # Generate input data and inject to PubSub.
    test_utils.wait_for_subscriptions_created([self.input_topic,
                                               self.input_sub])
    self._inject_pubsub_game_events(self.input_topic, self.DEFAULT_INPUT_COUNT)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    leader_board.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 22
0
def test_many_elements():
    Box = xmlfied('box', foos=WrappedMany(Element(name='foo')))

    box = Box(foos=['a', 'b', 'c'])

    assert_that(
        etree.tostring(box.toxml()),
        all_of(
            string_contains_in_order('<box>', '<foos>', '<foo>', 'a', '</foo>',
                                     '</foos>', '</box>'),
            string_contains_in_order('<box>', '<foos>', '<foo>', 'b', '</foo>',
                                     '</foos>', '</box>'),
            string_contains_in_order('<box>', '<foos>', '<foo>', 'c', '</foo>',
                                     '</foos>', '</box>')))
Exemplo n.º 23
0
def test_cyrillic_exc(report_for):
    report = report_for(u"""
    # -*- coding: utf-8 -*-
    def test_foo():
        raise Exception(u'русские буквы')
    """)

    assert_that(report.findall('.//failure'), contains(
                                                       all_of(has_property('message', u'Exception: русские буквы'),
                                                              has_property('stack-trace', u'''def test_foo():
>       raise Exception(u'русские буквы')
E       Exception: русские буквы

test_cyrillic_exc.py:3: Exception'''))))
Exemplo n.º 24
0
    def _run_wordcount_it(self, run_wordcount, **opts):
        test_pipeline = TestPipeline(is_integration_test=True)
        extra_opts = {}

        # Set extra options to the pipeline for test purpose
        test_output = '/'.join([
            test_pipeline.get_option('output'),
            str(int(time.time() * 1000)), 'results'
        ])
        extra_opts['output'] = test_output

        test_input = test_pipeline.get_option('input')
        if test_input:
            extra_opts['input'] = test_input

        arg_sleep_secs = test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        expect_checksum = (test_pipeline.get_option('expect_checksum')
                           or self.DEFAULT_CHECKSUM)
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(test_output + '*-of-*', expect_checksum,
                                sleep_secs)
        ]
        extra_opts['on_success_matcher'] = all_of(*pipeline_verifiers)
        extra_opts.update(opts)

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [test_output + '*'])

        publish_to_bq = bool(
            test_pipeline.get_option('publish_to_big_query') or False)

        # Start measure time for performance test
        start_time = time.time()

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        run_wordcount(
            test_pipeline.get_full_options_as_args(**extra_opts),
            save_main_session=False,
        )

        end_time = time.time()
        run_time = end_time - start_time

        if publish_to_bq:
            self._publish_metrics(test_pipeline, run_time)
Exemplo n.º 25
0
  def test_streaming_wordcount_it(self):
    # Set extra options to the pipeline for test purpose
    pipeline_verifiers = [PipelineStateMatcher(PipelineState.RUNNING)]
    extra_opts = {'input_sub': self.input_sub.full_name,
                  'output_topic': self.output_topic.full_name,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Generate input data and inject to PubSub.
    test_utils.wait_for_subscriptions_created([self.input_sub])
    self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    streaming_wordcount.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
 def test_big_query_new_types(self):
   expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED)
   verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   self._setup_new_types_env()
   extra_opts = {
       'query': NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE),
       'output': self.output_table,
       'output_schema': NEW_TYPES_OUTPUT_SCHEMA,
       'use_standard_sql': False,
       'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
Exemplo n.º 27
0
 def run_bigquery_io_read_pipeline(self, input_size):
     test_pipeline = TestPipeline(is_integration_test=True)
     pipeline_verifiers = [
         PipelineStateMatcher(),
     ]
     extra_opts = {
         'input_table':
         self.DEFAULT_DATASET + "." + self.DEFAULT_TABLE_PREFIX +
         input_size,
         'num_records':
         self.NUM_RECORDS[input_size],
         'on_success_matcher':
         all_of(*pipeline_verifiers)
     }
     bigquery_io_read_pipeline.run(
         test_pipeline.get_full_options_as_args(**extra_opts))
  def test_big_query_legacy_sql(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]

    extra_opts = {'query': LEGACY_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': False,
                  'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)
 def test_big_query_new_types(self):
   expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED)
   verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   self._setup_new_types_env()
   extra_opts = {
       'query': NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE),
       'output': self.output_table,
       'output_schema': NEW_TYPES_OUTPUT_SCHEMA,
       'use_standard_sql': False,
       'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
Exemplo n.º 30
0
def test_many_nested():
    Item = xmlfied('item', value=Element())
    Box = xmlfied('box', items=WrappedMany(Nested()))

    box = Box(items=[])
    box.items.append(Item('a'))
    box.items.append(Item('a'))
    box.items.append(Item('a'))

    assert_that(
        etree.tostring(box.toxml()),
        all_of(
            string_contains_in_order('<box>', '<items>', '<item>', 'a',
                                     '</item>', '<item>', 'a', '</item>',
                                     '<item>', 'a', '</item>', '</items>',
                                     '</box>'), ))
Exemplo n.º 31
0
  def test_wordcount_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)

    # Set extra options to the pipeline for test purpose
    output = '/'.join([test_pipeline.get_option('output'),
                       test_pipeline.get_option('job_name'),
                       'results'])
    pipeline_verifiers = [PipelineStateMatcher(),
                          FileChecksumMatcher(output + '*-of-*',
                                              self.DEFAULT_CHECKSUM)]
    extra_opts = {'output': output,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
  def test_one_job_fails_all_jobs_fail(self):

    # If one of the import jobs fails, then other jobs must not be performed.
    # This is to avoid reinsertion of some records when a pipeline fails and
    # is rerun.
    output_table_1 = '%s%s' % (self.output_table, 1)
    output_table_2 = '%s%s' % (self.output_table, 2)

    self.bigquery_client.get_or_create_table(
        self.project, self.dataset_id, output_table_1.split('.')[1],
        bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA),
        None, None)
    self.bigquery_client.get_or_create_table(
        self.project, self.dataset_id, output_table_2.split('.')[1],
        bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA_2),
        None, None)

    pipeline_verifiers = [
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_1,
            data=[]),
        BigqueryFullResultMatcher(
            project=self.project,
            query="SELECT * FROM %s" % output_table_2,
            data=[])]

    args = self.test_pipeline.get_full_options_as_args(
        experiments='use_beam_bq_sink')

    with self.assertRaises(Exception):
      with beam.Pipeline(argv=args) as p:
        input = p | beam.Create(_ELEMENTS)
        input2 = p | "Broken record" >> beam.Create(['language_broken_record'])

        input = (input, input2) | beam.Flatten()

        _ = (input |
             "WriteWithMultipleDests" >> bigquery.WriteToBigQuery(
                 table=lambda x: (output_table_1
                                  if 'language' in x
                                  else output_table_2),
                 create_disposition=(
                     beam.io.BigQueryDisposition.CREATE_IF_NEEDED),
                 write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))

    hamcrest_assert(p, all_of(*pipeline_verifiers))
Exemplo n.º 33
0
    def test_bigquery_side_input_it(self):
        state_verifier = PipelineStateMatcher(PipelineState.DONE)
        NUM_GROUPS = 3

        extra_opts = {
            'output': self.output,
            'num_groups': str(NUM_GROUPS),
            'on_success_matcher': all_of(state_verifier)
        }

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [self.output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        bigquery_side_input.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 34
0
def test_step_attach(timed_report_for):
    report, start, stop = timed_report_for("""
    import pytest
    def test_ololo_pewpew():
        with pytest.allure.step(title='withattach'):
            pytest.allure.attach('myattach', 'abcdef')
    """)

    assert_that(
        report.findall('.//test-case/steps/step'),
        contains(
            all_of(
                step_with('withattach', start, stop, Status.PASSED),
                has_property(
                    'attachments',
                    has_property('attachment', has_entry('title',
                                                         'myattach'))))))
  def _test_streaming(self, with_attributes):
    """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
    # Set on_success_matcher to verify pipeline state and pubsub output. These
    # verifications run on a (remote) worker.

    # Expect the state to be RUNNING since a streaming pipeline is usually
    # never DONE. The test runner will cancel the pipeline after verification.
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
    if not with_attributes:
      expected_messages = [pubsub_msg.data.decode('utf-8')
                           for pubsub_msg in expected_messages]
    if self.runner_name == 'TestDirectRunner':
      strip_attributes = None
    else:
      strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project,
        self.output_sub.name,
        expected_messages,
        timeout=MESSAGE_MATCHER_TIMEOUT_S,
        with_attributes=with_attributes,
        strip_attributes=strip_attributes)
    extra_opts = {'input_subscription': self.input_sub.name,
                  'output_topic': self.output_topic.name,
                  'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
                  'on_success_matcher': all_of(state_verifier,
                                               pubsub_msg_verifier)}

    # Generate input data and inject to PubSub.
    for msg in self.INPUT_MESSAGES[self.runner_name]:
      self.pub_client.publish(self.input_topic.name, msg.data, **msg.attributes)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    pubsub_it_pipeline.run_pipeline(
        argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
        with_attributes=with_attributes,
        id_label=self.ID_LABEL,
        timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
  def test_big_query_legacy_sql(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]

    gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table
    extra_opts = {'query': LEGACY_QUERY,
                  'output': self.output_table,
                  'bq_temp_location': gs_location,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': False,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)
Exemplo n.º 37
0
  def test_user_score_it(self):

    state_verifier = PipelineStateMatcher(PipelineState.DONE)
    file_verifier = FileChecksumMatcher(self.output + '*-of-*',
                                        self.DEFAULT_EXPECTED_CHECKSUM)

    extra_opts = {'input': self.DEFAULT_INPUT_FILE,
                  'output': self.output + '/user-score',
                  'on_success_matcher': all_of(state_verifier,
                                               file_verifier)}

    # Register clean up before pipeline execution
    self.addCleanup(delete_files, [self.output + '*'])

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    user_score.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 38
0
def test_many_nested():
    Item = xmlfied('item', value=Element())
    Box = xmlfied('box', items=Many(Nested()))

    box = Box(items=[])
    box.items.append(Item('a'))
    box.items.append(Item('a'))
    box.items.append(Item('a'))

    assert_that(etree.tostring(box.toxml()), all_of(
                                                    string_contains_in_order('<box>',
                                                                             '<items>',
                                                                             '<item>', 'a', '</item>',
                                                                             '<item>', 'a', '</item>',
                                                                             '<item>', 'a', '</item>',
                                                                             '</items>',
                                                                             '</box>'),
                                                    ))
  def test_bigquery_tornadoes_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)

    # Set extra options to the pipeline for test purpose
    output_table = ('BigQueryTornadoesIT'
                    '.monthly_tornadoes_%s' % int(round(time.time() * 1000)))
    query = 'SELECT month, tornado_count FROM [%s]' % output_table
    pipeline_verifiers = [PipelineStateMatcher(),
                          BigqueryMatcher(
                              project=test_pipeline.get_option('project'),
                              query=query,
                              checksum=self.DEFAULT_CHECKSUM)]
    extra_opts = {'output': output_table,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    bigquery_tornadoes.run(
        test_pipeline.get_full_options_as_args(**extra_opts))
  def test_bigquery_tornadoes_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)

    # Set extra options to the pipeline for test purpose
    output_table = ('BigQueryTornadoesIT'
                    '.monthly_tornadoes_%s' % int(round(time.time() * 1000)))
    query = 'SELECT month, tornado_count FROM [%s]' % output_table
    pipeline_verifiers = [PipelineStateMatcher(),
                          BigqueryMatcher(
                              project=test_pipeline.get_option('project'),
                              query=query,
                              checksum=self.DEFAULT_CHECKSUM)]
    extra_opts = {'output': output_table,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    bigquery_tornadoes.run(
        test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 41
0
  def run_pipeline(self):
    # Waits for messages to appear in output topic.
    expected_msg = [msg.encode('utf-8') for msg in MESSAGES_TO_PUBLISH]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project, self.output_sub.name, expected_msg, timeout=600)

    # Checks that pipeline initializes to RUNNING state.
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)

    extra_opts = {
        'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
        'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier),
        'experiment': 'beam_fn_api',
        'input_subscription': self.input_sub.name,
        'output_topic': self.output_topic.name,
    }

    argv = self.test_pipeline.get_full_options_as_args(**extra_opts)
    return dataflow_exercise_streaming_metrics_pipeline.run(argv)
Exemplo n.º 42
0
    def test_user_score_it(self):

        state_verifier = PipelineStateMatcher(PipelineState.DONE)
        file_verifier = FileChecksumMatcher(self.output + '*-of-*',
                                            self.DEFAULT_EXPECTED_CHECKSUM)

        extra_opts = {
            'input': self.DEFAULT_INPUT_FILE,
            'output': self.output + '/user-score',
            'on_success_matcher': all_of(state_verifier, file_verifier)
        }

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [self.output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        user_score.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 43
0
  def test_bqfl_streaming(self):
    if isinstance(self.test_pipeline.runner, TestDataflowRunner):
      self.skipTest("TestStream is not supported on TestDataflowRunner")
    output_table = '%s_%s' % (self.output_table, 'ints')
    _SIZE = 100
    schema = self.BIG_QUERY_STREAMING_SCHEMA
    l = [{'Integr': i} for i in range(_SIZE)]

    state_matcher = PipelineStateMatcher(PipelineState.RUNNING)
    bq_matcher = BigqueryFullResultStreamingMatcher(
        project=self.project,
        query="SELECT Integr FROM %s"
        % output_table,
        data=[(i,) for i in range(100)])

    args = self.test_pipeline.get_full_options_as_args(
        on_success_matcher=all_of(state_matcher, bq_matcher),
        experiments='use_beam_bq_sink',
        streaming=True)
    with beam.Pipeline(argv=args) as p:
      stream_source = (TestStream()
                       .advance_watermark_to(0)
                       .advance_processing_time(100)
                       .add_elements(l[:_SIZE//4])
                       .advance_processing_time(100)
                       .advance_watermark_to(100)
                       .add_elements(l[_SIZE//4:2*_SIZE//4])
                       .advance_processing_time(100)
                       .advance_watermark_to(200)
                       .add_elements(l[2*_SIZE//4:3*_SIZE//4])
                       .advance_processing_time(100)
                       .advance_watermark_to(300)
                       .add_elements(l[3*_SIZE//4:])
                       .advance_processing_time(100)
                       .advance_watermark_to_infinity())
      _ = (p
           | stream_source
           | bigquery.WriteToBigQuery(output_table,
                                      schema=schema,
                                      method=bigquery.WriteToBigQuery \
                                        .Method.FILE_LOADS,
                                      triggering_frequency=100))
Exemplo n.º 44
0
    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Build expected dataset.
        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data for pubsub_msg in expected_messages
            ]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            OUTPUT_SUB + self.uuid,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE])
        extra_opts = {
            'input_subscription': self.input_sub.full_name,
            'output_topic': self.output_topic.full_name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        test_utils.wait_for_subscriptions_created([self.input_sub])
        for msg in self.INPUT_MESSAGES:
            self.input_topic.publish(msg.data, **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
Exemplo n.º 45
0
    def test_wordcount_it(self):
        test_pipeline = TestPipeline(is_integration_test=True)

        # Set extra options to the pipeline for test purpose
        output = '/'.join([
            test_pipeline.get_option('output'),
            test_pipeline.get_option('job_name'), 'results'
        ])
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM)
        ]
        extra_opts = {
            'output': output,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 46
0
    def setUp(self):
        super(GroupByKeyStreamingTest, self).setUp()
        self.topic_short_name = self.pipeline.get_option('pubsub_topic_name')
        self.setup_pubsub()

        timeout = self.pipeline.get_option('timeout') or DEFAULT_TIMEOUT
        expected_num_of_records = self.pipeline.get_option('num_of_records')
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project_id,
            self.output_sub.name,
            expected_msg_len=int(expected_num_of_records),
            timeout=int(timeout))

        self.extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'metrics_namespace': self.metrics_namespace,
            'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher': all_of(pubsub_msg_verifier)
        }
Exemplo n.º 47
0
  def test_big_query_standard_sql_kms_key(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]
    extra_opts = {'query': STANDARD_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': True,
                  'on_success_matcher': all_of(*pipeline_verifiers),
                  'kms_key': KMS_KEY
                 }
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)

    table = self.bigquery_client.get_table(
        self.project, self.dataset_id, 'output_table')
    self.assertEqual(KMS_KEY, table.encryptionConfiguration.kmsKeyName)
  def test_big_query_standard_sql_kms_key(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]
    extra_opts = {'query': STANDARD_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': True,
                  'on_success_matcher': all_of(*pipeline_verifiers),
                  'kms_key': KMS_KEY
                 }
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)

    table = self.bigquery_client.get_table(
        self.project, self.dataset_id, 'output_table')
    self.assertEqual(KMS_KEY, table.encryptionConfiguration.kmsKeyName)
Exemplo n.º 49
0
  def test_game_stats_it(self):
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)

    success_condition = 'mean_duration=300 LIMIT 1'
    sessions_query = ('SELECT mean_duration FROM [%s:%s.%s] '
                      'WHERE %s' % (self.project,
                                    self.dataset.name,
                                    self.OUTPUT_TABLE_SESSIONS,
                                    success_condition))
    bq_sessions_verifier = BigqueryMatcher(self.project,
                                           sessions_query,
                                           self.DEFAULT_EXPECTED_CHECKSUM)

    # TODO(mariagh): Add teams table verifier once game_stats.py is fixed.

    extra_opts = {'subscription': self.input_sub.name,
                  'dataset': self.dataset.name,
                  'topic': self.input_topic.name,
                  'fixed_window_duration': 1,
                  'user_activity_window_duration': 1,
                  'wait_until_finish_duration':
                      self.WAIT_UNTIL_FINISH_DURATION,
                  'on_success_matcher': all_of(state_verifier,
                                               bq_sessions_verifier)}

    # Register cleanup before pipeline execution.
    # Note that actual execution happens in reverse order.
    self.addCleanup(self._cleanup_pubsub)
    self.addCleanup(self._cleanup_dataset)
    self.addCleanup(utils.delete_bq_table, self.project,
                    self.dataset.name, self.OUTPUT_TABLE_SESSIONS)
    self.addCleanup(utils.delete_bq_table, self.project,
                    self.dataset.name, self.OUTPUT_TABLE_TEAMS)

    # Generate input data and inject to PubSub.
    self._inject_pubsub_game_events(self.input_topic, self.DEFAULT_INPUT_COUNT)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    game_stats.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 50
0
    def test_big_query_legacy_sql(self):
        verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
        expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
        pipeline_verifiers = [
            PipelineStateMatcher(),
            BigqueryMatcher(project=self.project,
                            query=verify_query,
                            checksum=expected_checksum)
        ]

        gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table
        extra_opts = {
            'query': LEGACY_QUERY,
            'output': self.output_table,
            'bq_temp_location': gs_location,
            'output_schema': DIALECT_OUTPUT_SCHEMA,
            'use_standard_sql': False,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }
        options = self.test_pipeline.get_full_options_as_args(**extra_opts)
        big_query_query_to_table_pipeline.run_bq_pipeline(options)
  def test_streaming_wordcount_debugging_it(self):

    # Set extra options to the pipeline for test purpose
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project, self.output_sub.name, EXPECTED_MESSAGE, timeout=400)
    extra_opts = {
        'input_subscription': self.input_sub.name,
        'output_topic': self.output_topic.name,
        'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
        'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
    }

    # Generate input data and inject to PubSub.
    self._inject_data(self.input_topic, SAMPLE_MESSAGES)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    streaming_wordcount_debugging.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts),
        save_main_session=False)
Exemplo n.º 52
0
  def test_wordcount_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)

    # Set extra options to the pipeline for test purpose
    output = '/'.join([test_pipeline.get_option('output'),
                       str(int(time.time())),
                       'results'])
    arg_sleep_secs = test_pipeline.get_option('sleep_secs')
    sleep_secs = int(arg_sleep_secs) if arg_sleep_secs is not None else None
    pipeline_verifiers = [PipelineStateMatcher(),
                          FileChecksumMatcher(output + '*-of-*',
                                              self.DEFAULT_CHECKSUM,
                                              sleep_secs)]
    extra_opts = {'output': output,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Register clean up before pipeline execution
    self.addCleanup(delete_files, [output + '*'])

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 53
0
  def test_datastore_wordcount_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)
    dataset = test_pipeline.get_option("project")
    kind = self.DATASTORE_WORDCOUNT_KIND
    output = '/'.join([test_pipeline.get_option('output'),
                       str(int(time.time() * 1000)),
                       'datastore_wordcount_results'])

    arg_sleep_secs = test_pipeline.get_option('sleep_secs')
    sleep_secs = int(arg_sleep_secs) if arg_sleep_secs is not None else None
    pipeline_verifiers = [PipelineStateMatcher(),
                          FileChecksumMatcher(output + '*-of-*',
                                              self.EXPECTED_CHECKSUM,
                                              sleep_secs)]
    extra_opts = {'dataset': dataset,
                  'kind': kind,
                  'output': output,
                  'read_only': True,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    datastore_wordcount.run(test_pipeline.get_full_options_as_args(
        **extra_opts))
  def _test_streaming(self, with_attributes):
    """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
    # Build expected dataset.
    # Set extra options to the pipeline for test purpose
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    expected_messages = self.EXPECTED_OUTPUT_MESSAGES
    if not with_attributes:
      expected_messages = [pubsub_msg.data for pubsub_msg in expected_messages]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project,
        OUTPUT_SUB + self.uuid,
        expected_messages,
        timeout=MESSAGE_MATCHER_TIMEOUT_S,
        with_attributes=with_attributes,
        strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE])
    extra_opts = {'input_subscription': self.input_sub.full_name,
                  'output_topic': self.output_topic.full_name,
                  'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
                  'on_success_matcher': all_of(state_verifier,
                                               pubsub_msg_verifier)}

    # Generate input data and inject to PubSub.
    test_utils.wait_for_subscriptions_created([self.input_sub])
    for msg in self.INPUT_MESSAGES:
      self.input_topic.publish(msg.data, **msg.attributes)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    pubsub_it_pipeline.run_pipeline(
        argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
        with_attributes=with_attributes,
        id_label=self.ID_LABEL,
        timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
  def test_streaming_wordcount_it(self):
    # Build expected dataset.
    expected_msg = [('%d: 1' % num) for num in range(DEFAULT_INPUT_NUMBERS)]

    # Set extra options to the pipeline for test purpose
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    pubsub_msg_verifier = PubSubMessageMatcher(self.project,
                                               self.output_sub.name,
                                               expected_msg,
                                               timeout=400)
    extra_opts = {'input_subscription': self.input_sub.name,
                  'output_topic': self.output_topic.name,
                  'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
                  'on_success_matcher': all_of(state_verifier,
                                               pubsub_msg_verifier)}

    # Generate input data and inject to PubSub.
    self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    streaming_wordcount.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts))
Exemplo n.º 56
0
 def test_return_next_cell_in_valid_range(self):
     for x in range(999):
         assert_that(
             self.next_cell_calculator.next(), all_of(greater_than_or_equal_to(0), less_than_or_equal_to(100))
         )
Exemplo n.º 57
0
 def __init__(self, *element_matchers):
     delegates = [has_item(e) for e in element_matchers]
     self.matcher = all_of(*delegates)
Exemplo n.º 58
0
def has_properties(*keys_valuematchers, **kv_args):
    """Matches if an object has properties satisfying all of a dictionary
    of string property names and corresponding value matchers.

    :param matcher_dict: A dictionary mapping keys to associated value matchers,
        or to expected values for
        :py:func:`~hamcrest.core.core.isequal.equal_to` matching.

    Note that the keys must be actual keys, not matchers. Any value argument
    that is not a matcher is implicitly wrapped in an
    :py:func:`~hamcrest.core.core.isequal.equal_to` matcher to check for
    equality.

    Examples::

        has_properties({'foo':equal_to(1), 'bar':equal_to(2)})
        has_properties({'foo':1, 'bar':2})

    ``has_properties`` also accepts a list of keyword arguments:

    .. function:: has_properties(keyword1=value_matcher1[, keyword2=value_matcher2[, ...]])

    :param keyword1: A keyword to look up.
    :param valueMatcher1: The matcher to satisfy for the value, or an expected
        value for :py:func:`~hamcrest.core.core.isequal.equal_to` matching.

    Examples::

        has_properties(foo=equal_to(1), bar=equal_to(2))
        has_properties(foo=1, bar=2)

    Finally, ``has_properties`` also accepts a list of alternating keys and their
    value matchers:

    .. function:: has_properties(key1, value_matcher1[, ...])

    :param key1: A key (not a matcher) to look up.
    :param valueMatcher1: The matcher to satisfy for the value, or an expected
        value for :py:func:`~hamcrest.core.core.isequal.equal_to` matching.

    Examples::

        has_properties('foo', equal_to(1), 'bar', equal_to(2))
        has_properties('foo', 1, 'bar', 2)

    """
    if len(keys_valuematchers) == 1:
        try:
            base_dict = keys_valuematchers[0].copy()
            for key in base_dict:
                base_dict[key] = wrap_shortcut(base_dict[key])
        except AttributeError:
            raise ValueError('single-argument calls to has_properties must pass a dict as the argument')
    else:
        if len(keys_valuematchers) % 2:
            raise ValueError('has_properties requires key-value pairs')
        base_dict = {}
        for index in range(int(len(keys_valuematchers) / 2)):
            base_dict[keys_valuematchers[2 * index]] = wrap_shortcut(keys_valuematchers[2 * index + 1])

    for key, value in kv_args.items():
        base_dict[key] = wrap_shortcut(value)

    return all_of(*[has_property(property_name, property_value_matcher) for \
                   property_name, property_value_matcher in base_dict.items()])
  def test_bigquery_tornadoes_it(self):
    test_pipeline = TestPipeline(is_integration_test=True)

    # Set extra options to the pipeline for test purpose
    project = test_pipeline.get_option('project')

    dataset = 'BigQueryTornadoesIT'
    table = 'monthly_tornadoes_%s' % int(round(time.time() * 1000))
    output_table = '.'.join([dataset, table])
    query = 'SELECT month, tornado_count FROM `%s`' % output_table

    pipeline_verifiers = [PipelineStateMatcher(),
                          BigqueryMatcher(
                              project=project,
                              query=query,
                              checksum=self.DEFAULT_CHECKSUM)]
    extra_opts = {'output': output_table,
                  'on_success_matcher': all_of(*pipeline_verifiers)}

    # Register cleanup before pipeline execution.
    # Note that actual execution happens in reverse order.
    self.addCleanup(utils.delete_bq_table, project, dataset, table)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    bigquery_tornadoes.run(
        test_pipeline.get_full_options_as_args(**extra_opts))