示例#1
0
    def _run_wordcount_it(self, **opts):
        test_pipeline = TestPipeline(is_integration_test=True)

        # Set extra options to the pipeline for test purpose
        output = '/'.join([
            test_pipeline.get_option('output'),
            str(int(time.time() * 1000)), 'results'
        ])
        arg_sleep_secs = test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM,
                                sleep_secs)
        ]
        extra_opts = {
            'output': output,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }
        extra_opts.update(opts)

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
    def _run_wordcount_it(self, run_wordcount, **opts):
        test_pipeline = TestPipeline(is_integration_test=True)
        extra_opts = {}

        # Set extra options to the pipeline for test purpose
        test_output = '/'.join([
            test_pipeline.get_option('output'),
            str(int(time.time() * 1000)), 'results'
        ])
        extra_opts['output'] = test_output

        test_input = test_pipeline.get_option('input')
        if test_input:
            extra_opts['input'] = test_input

        arg_sleep_secs = test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        expect_checksum = (test_pipeline.get_option('expect_checksum')
                           or self.DEFAULT_CHECKSUM)
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(test_output + '*-of-*', expect_checksum,
                                sleep_secs)
        ]
        extra_opts['on_success_matcher'] = all_of(*pipeline_verifiers)
        extra_opts.update(opts)

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [test_output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        run_wordcount(test_pipeline.get_full_options_as_args(**extra_opts),
                      save_main_session=False)
    def test_datastore_wordcount_it(self):
        test_pipeline = TestPipeline(is_integration_test=True)
        dataset = test_pipeline.get_option("project")
        kind = self.DATASTORE_WORDCOUNT_KIND
        output = '/'.join([
            test_pipeline.get_option('output'),
            str(int(time.time() * 1000)), 'datastore_wordcount_results'
        ])

        arg_sleep_secs = test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(output + '*-of-*', self.EXPECTED_CHECKSUM,
                                sleep_secs)
        ]
        extra_opts = {
            'dataset': dataset,
            'kind': kind,
            'output': output,
            'read_only': True,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }

        datastore_wordcount.run(
            test_pipeline.get_full_options_as_args(**extra_opts))
示例#4
0
    def test_datastore_wordcount_it(self):
        test_pipeline = TestPipeline(is_integration_test=True)
        kind = self.DATASTORE_WORDCOUNT_KIND
        output = '/'.join([
            test_pipeline.get_option('output'),
            str(int(time.time() * 1000)), 'datastore_wordcount_results'
        ])

        arg_sleep_secs = test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        pipeline_verifiers = [
            PipelineStateMatcher(),
            FileChecksumMatcher(output + '*-of-*', self.EXPECTED_CHECKSUM,
                                sleep_secs)
        ]
        extra_opts = {
            'kind': kind,
            'output': output,
            # Comment this out to regenerate input data on Datastore (delete
            # existing data first using the bulk delete Dataflow template).
            'read_only': True,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }

        datastore_wordcount.run(
            test_pipeline.get_full_options_as_args(**extra_opts))
示例#5
0
    def test_user_score_it(self):

        state_verifier = PipelineStateMatcher(PipelineState.DONE)
        file_verifier = FileChecksumMatcher(self.output + '*-of-*',
                                            self.DEFAULT_EXPECTED_CHECKSUM)

        extra_opts = {
            'input': self.DEFAULT_INPUT_FILE,
            'output': self.output + '/user-score',
            'on_success_matcher': all_of(state_verifier, file_verifier)
        }

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [self.output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        user_score.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))
示例#6
0
    def test_userscore_output_checksum_on_small_input(self):
        # Small dataset to prevent Out of Memory when running in local runners
        INPUT_FILE = 'gs://apache-beam-samples/game/small/gaming_data.csv'
        EXPECTED_CHECKSUM = '5b1bc0e8080e3c0f162809ac4c0f49acab23854e'
        state_verifier = PipelineStateMatcher(PipelineState.DONE)
        arg_sleep_secs = self.test_pipeline.get_option('sleep_secs')
        sleep_secs = int(
            arg_sleep_secs) if arg_sleep_secs is not None else None
        file_verifier = FileChecksumMatcher(self.output + '/*-of-*',
                                            EXPECTED_CHECKSUM, sleep_secs)

        extra_opts = {
            'input': INPUT_FILE,
            'output': self.output + '/user-score',
            'on_success_matcher': all_of(state_verifier, file_verifier)
        }

        # Register clean up before pipeline execution
        self.addCleanup(delete_files, [self.output + '*'])

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        user_score.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))