def run(in_args=None): """Runs the pre-processing pipeline.""" pipeline_options = PipelineOptions.from_dictionary(vars(in_args)) p = beam.Pipeline(options=pipeline_options) configure_pipeline(p, in_args) p.run()
def test_option_with_space(self): options = PipelineOptions(flags=['--option with space= value with space']) self.assertEqual( getattr(options.view_as(PipelineOptionsTest.MockOptions), 'option with space'), ' value with space') options_from_dict = PipelineOptions.from_dictionary( options.get_all_options()) self.assertEqual( getattr(options_from_dict.view_as(PipelineOptionsTest.MockOptions), 'option with space'), ' value with space')
def test_option_with_space(self): options = PipelineOptions(flags=['--option with space= value with space']) self.assertEqual( getattr(options.view_as(PipelineOptionsTest.MockOptions), 'option with space'), ' value with space') options_from_dict = PipelineOptions.from_dictionary( options.get_all_options()) self.assertEqual( getattr(options_from_dict.view_as(PipelineOptionsTest.MockOptions), 'option with space'), ' value with space')
def test_from_dictionary(self): for case in PipelineOptionsTest.TEST_CASES: options = PipelineOptions(flags=case['flags']) all_options_dict = options.get_all_options() options_from_dict = PipelineOptions.from_dictionary(all_options_dict) self.assertEqual(options_from_dict.view_as( PipelineOptionsTest.MockOptions).mock_flag, case['expected']['mock_flag']) self.assertEqual(options.view_as( PipelineOptionsTest.MockOptions).mock_option, case['expected']['mock_option'])
def test_from_dictionary(self): for case in PipelineOptionsTest.TEST_CASES: options = PipelineOptions(flags=case['flags']) all_options_dict = options.get_all_options() options_from_dict = PipelineOptions.from_dictionary(all_options_dict) self.assertEqual(options_from_dict.view_as( PipelineOptionsTest.MockOptions).mock_flag, case['expected']['mock_flag']) self.assertEqual(options.view_as( PipelineOptionsTest.MockOptions).mock_option, case['expected']['mock_option'])
def launch(): """Launch the Dataflow pipeline.""" is_cron = request.headers.get('X-Appengine-Cron', False) logging.info("is_cron is %s", is_cron) # Comment out the following test to allow non cron-initiated requests. if not is_cron: return 'Blocked.' pipeline_options = { 'project': PROJECT, 'staging_location': 'gs://' + BUCKET + '/staging', 'runner': 'DataflowRunner', 'setup_file': './setup.py', 'job_name': PROJECT + '-twcount', 'max_num_workers': 10, 'temp_location': 'gs://' + BUCKET + '/temp' } # define and launch the pipeline (non-blocking). pipe.process_datastore_tweets( PROJECT, DATASET, PipelineOptions.from_dictionary(pipeline_options)) return 'Done.'