def test_run_preconfigured_job_without_parameters_substitution(self): _test_id = str(uuid.uuid4()) _job_name = "TEST_PIG_{}".format(_test_id) _input_dir = self.copy_file_from_local( self.temp_file("hello,world,world", ".txt")) _output_dir = "/tmp/data_{}".format(_test_id) _commands = "A = load '{}' using PigStorage(',');".format(_input_dir) _commands += "B = foreach A generate \$0 as id;" _commands += "STORE B into '{}';".format(_output_dir) # create job configuration. can also be loaded from .ini file _config = Configuration.create() _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands) _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, 'enabled') _config.set( _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE, 'input_dir={}\noutput_dir={}'.format(_input_dir, _output_dir)) try: _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name) _result = _pig.run() _result.if_failed_raise( AssertionError("test_run_preconfigured_job failed")) self.assertTrue( HDFS(_output_dir).exists(), "Cannot find job output") finally: self.delete_file_in_hdfs(_input_dir) self.delete_file_in_hdfs(_output_dir)
def test_run_preconfigured_job_without_parameters_substitution(self): _test_id = str(uuid.uuid4()) _job_name = "TEST_PIG_{}".format(_test_id) _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt")) _output_dir = "/tmp/data_{}".format(_test_id) _commands = "A = load '{}' using PigStorage(',');".format(_input_dir) _commands += "B = foreach A generate \$0 as id;" _commands += "STORE B into '{}';".format(_output_dir) # create job configuration. can also be loaded from .ini file _config = Configuration.create() _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands) _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, "enabled") _config.set( _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE, "input_dir={}\noutput_dir={}".format(_input_dir, _output_dir), ) try: _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name) _result = _pig.run() _result.if_failed_raise(AssertionError("test_run_preconfigured_job failed")) self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output") finally: self.delete_file_in_hdfs(_input_dir) self.delete_file_in_hdfs(_output_dir)
def test_load_preconfigured_job(self): _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"' metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini')) pig = Pig.load_preconfigured_job(job_name='pig test', config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True), command_executor=mock_executor(expected_command=_command)) pig.without_split_filter().run()
def test_load_preconfigured_job(self): _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"' metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/pig/pig.ini')) pig = Pig.load_preconfigured_job( job_name='pig test', config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True), command_executor=mock_executor(expected_command=_command)) pig.without_split_filter().run()