def test_hourly_team_score_output_checksum_on_small_input(self): # Small dataset to prevent Out of Memory when running in local runners INPUT_FILE = 'gs://apache-beam-samples/game/small/gaming_data.csv' EXPECTED_CHECKSUM = '91143e81622aa391eb62eaa3f3a5123401edb07d' state_verifier = PipelineStateMatcher(PipelineState.DONE) query = ( 'SELECT COUNT(*) FROM `%s.%s.%s`' % (self.project, self.dataset_ref.dataset_id, self.OUTPUT_TABLE)) bigquery_verifier = BigqueryMatcher(self.project, query, EXPECTED_CHECKSUM) extra_opts = { 'input': INPUT_FILE, 'dataset': self.dataset_ref.dataset_id, 'window_duration': 1, 'on_success_matcher': all_of(state_verifier, bigquery_verifier) } # Register clean up before pipeline execution # Note that actual execution happens in reverse order. self.addCleanup(utils.delete_bq_dataset, self.project, self.dataset_ref) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. hourly_team_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_hourly_team_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) query = ( 'SELECT COUNT(*) FROM `%s.%s.%s`' % (self.project, self.dataset_ref.dataset_id, self.OUTPUT_TABLE)) bigquery_verifier = BigqueryMatcher(self.project, query, self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = { 'input': self.DEFAULT_INPUT_FILE, 'dataset': self.dataset_ref.dataset_id, 'window_duration': 1, 'on_success_matcher': all_of(state_verifier, bigquery_verifier) } # Register clean up before pipeline execution # Note that actual execution happens in reverse order. self.addCleanup(utils.delete_bq_dataset, self.project, self.dataset_ref) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. hourly_team_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_hourly_team_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project, self.dataset.name, self.OUTPUT_TABLE)) bigquery_verifier = BigqueryMatcher(self.project, query, self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = {'input': self.DEFAULT_INPUT_FILE, 'dataset': self.dataset.name, 'window_duration': 1, 'on_success_matcher': all_of(state_verifier, bigquery_verifier)} # Register clean up before pipeline execution # Note that actual execution happens in reverse order. self.addCleanup(self._cleanup_dataset) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. hourly_team_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))