def test_configure_logging(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-logfile pig.log -brief -debug ' '-f "wordcount.pig"')) \ .log_config(logfile="pig.log", debug=True, brief=True) \ .run()
def test_with_property_file(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-propertyFile pig.properties ' '-x mapreduce ' '-f "wordcount.pig"')) \ .with_property_file("pig.properties").using_mode().run()
def test_log4j_configs_injections(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-log4jconf ~/log4j.properties ' '-f "wordcount.pig"')) \ .log4j_config("~/log4j.properties") \ .run()
def test_with_param_query(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-param_file params.properties ' '-f "wordcount.pig"')) \ .load_parameters_from_file("params.properties") \ .run()
def test_with_param_file(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-param param001=value001 ' '-param param002=value002 ' '-x mapreduce ' '-f "wordcount.pig"')) \ .with_parameter("param001", "value001").using_mode() \ .with_parameter("param002", "value002").run()
def test_logging_configuration(self): files = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt")) path = "/tmp/pig_log" commands = "A = load '$input_dir' using PigStorage(',');" commands += "B = foreach A generate \$0 as id;" commands += "STORE B into '$output_dir';" files_s = self.temp_file(commands) try: import os os.makedirs(path) pig = ( Pig.load_commands_from_file(files_s) .with_parameter("input_dir", files) .with_parameter("output_dir", "/tmp/data") ) pig.log_config(logfile=path + "pig") self.assertEqual(os.path.exists(path), pig.run().is_ok()) finally: import shutil shutil.rmtree(path) self.delete_file_in_hdfs() self.delete_file_in_hdfs(files) self.delete_local(files_s)
def merge_snapshot_with_updates(context): context["partition"] = datetime.now().strftime('%Y%m%d') pig_job = Pig.load_commands_from_file(_pig_script) \ .with_parameter("active_snapshot", _scd_active_snapshot) \ .with_parameter("data_updates", os.path.join(_hdfs_tmpdir.path, os.path.basename(_scd_updates))) \ .with_parameter('output', _hdfs_job_output) \ .with_parameter("date", context["partition"]) pig_job.run()
def test_run_commands_from_file(self): _test_id = str(uuid.uuid4()) _inputs = self.copy_file_from_local( self.temp_file("hello,world,world", ".txt")) commands = "A = load '$input_dir' using PigStorage(',');" commands += "B = foreach A generate \$0 as id;" commands += "STORE B into '$output_dir';" files_s = self.temp_file(commands) try: _output_dir = "/tmp/data_{}".format(_test_id) pig = Pig.load_commands_from_file(files_s) \ .with_parameter("input_dir", _inputs) \ .with_parameter("output_dir", _output_dir) self.assertTrue(pig.run().is_ok()) self.assertTrue(HDFS(_output_dir).exists()) finally: self.delete_local(files_s) self.delete_file_in_hdfs() self.delete_file_in_hdfs(_inputs)
def test_run_commands_from_file(self): _test_id = str(uuid.uuid4()) _inputs = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt")) commands = "A = load '$input_dir' using PigStorage(',');" commands += "B = foreach A generate \$0 as id;" commands += "STORE B into '$output_dir';" files_s = self.temp_file(commands) try: _output_dir = "/tmp/data_{}".format(_test_id) pig = ( Pig.load_commands_from_file(files_s) .with_parameter("input_dir", _inputs) .with_parameter("output_dir", _output_dir) ) self.assertTrue(pig.run().is_ok()) self.assertTrue(HDFS(_output_dir).exists()) finally: self.delete_local(files_s) self.delete_file_in_hdfs() self.delete_file_in_hdfs(_inputs)
def test_logging_configuration(self): files = self.copy_file_from_local( self.temp_file("hello,world,world", ".txt")) path = "/tmp/pig_log" commands = "A = load '$input_dir' using PigStorage(',');" commands += "B = foreach A generate \$0 as id;" commands += "STORE B into '$output_dir';" files_s = self.temp_file(commands) try: import os os.makedirs(path) pig = Pig.load_commands_from_file(files_s).with_parameter("input_dir", files) \ .with_parameter("output_dir", "/tmp/data") pig.log_config(logfile=path + "pig") self.assertEqual(os.path.exists(path), pig.run().is_ok()) finally: import shutil shutil.rmtree(path) self.delete_file_in_hdfs() self.delete_file_in_hdfs(files) self.delete_local(files_s)
def test_optimization_disabling(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off SplitFilter -f "wordcount.pig"')) \ .without_split_filter().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off PushUpFilter -f "wordcount.pig"')) \ .without_pushup_filter().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off MergeFilter -f "wordcount.pig"')) \ .without_merge_filter().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off PushDownForeachFlatten -f "wordcount.pig"')) \ .without_push_down_foreach_flatten().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off LimitOptimizer -f "wordcount.pig"')) \ .without_limit_optimizer().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off ColumnMapKeyPrune -f "wordcount.pig"')) \ .without_column_map_key_prune().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off AddForEach -f "wordcount.pig"')) \ .without_add_foreach().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off MergeForEach -f "wordcount.pig"')) \ .without_merge_foreach().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off GroupByConstParallelSetter -f "wordcount.pig"')) \ .without_groupby_const_parallel_setter().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -optimizer_off All -f "wordcount.pig"')) \ .disable_all_optimizations().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-optimizer_off LimitOptimizer ' '-optimizer_off AddForEach ' '-f "wordcount.pig"')) \ .without_add_foreach().without_limit_optimizer().run() Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig ' '-x tez ' '-optimizer_off LimitOptimizer ' '-optimizer_off AddForEach ' '-no_multiquery ' '-f "wordcount.pig"')) \ .without_add_foreach().using_mode(type="tez")\ .without_limit_optimizer() \ .without_multiquery().run()
def test_run_script_from_file_verbose(self): Pig.load_commands_from_file( path='wordcount.pig', command_executor=mock_executor('pig -verbose -f "wordcount.pig"')) \ .debug()