Пример #1
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING,
                    _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, 'enabled')
        _config.set(
            _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            'input_dir={}\noutput_dir={}'.format(_input_dir, _output_dir))
        try:
            _pig = Pig.load_preconfigured_job(config=_config,
                                              job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(
                AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(
                HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Пример #2
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, "enabled")
        _config.set(
            _job_name,
            TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            "input_dir={}\noutput_dir={}".format(_input_dir, _output_dir),
        )
        try:
            _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Пример #3
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(job_name='pig test',
                                      config=Configuration.load(
                                          metastore=metastore,
                                          readonly=False, accepts_nulls=True),
                                      command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Пример #4
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(
         job_name='pig test',
         config=Configuration.load(metastore=metastore,
                                   readonly=False,
                                   accepts_nulls=True),
         command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()