def test_hadoop_extra_args_comes_first(self): job = MRWordCount( ['--cmdenv', 'FOO=bar', '--hadoop-arg', '-libjar', '--hadoop-arg', 'qux.jar', '--jobconf', 'baz=qux', '--partitioner', 'java.lang.Object']) job.HADOOP_INPUT_FORMAT = 'FooInputFormat' job.HADOOP_OUTPUT_FORMAT = 'BarOutputFormat' with job.make_runner() as runner: hadoop_args = runner._hadoop_args_for_step(0) self.assertEqual(hadoop_args[:2], ['-libjar', 'qux.jar']) self.assertEqual(len(hadoop_args), 12)
def test_hadoop_extra_args_comes_first(self): job = MRWordCount([ '--cmdenv', 'FOO=bar', '--hadoop-arg', '-libjar', '--hadoop-arg', 'qux.jar', '--jobconf', 'baz=qux', '--partitioner', 'java.lang.Object' ]) job.HADOOP_INPUT_FORMAT = 'FooInputFormat' job.HADOOP_OUTPUT_FORMAT = 'BarOutputFormat' with job.make_runner() as runner: hadoop_args = runner._hadoop_args_for_step(0) self.assertEqual(hadoop_args[:2], ['-libjar', 'qux.jar']) self.assertEqual(len(hadoop_args), 12)
def test_hadoop_input_format(self): input_format = "org.apache.hadoop.mapred.SequenceFileInputFormat" # one-step job job1 = MRWordCount() # no cmd-line argument for this because it's part of job semantics job1.HADOOP_INPUT_FORMAT = input_format with job1.make_runner() as runner1: self.assertEqual(runner1._hadoop_args_for_step(0), ["-inputformat", input_format]) # multi-step job: only use -inputformat on the first step job2 = MRTwoStepJob() job2.HADOOP_INPUT_FORMAT = input_format with job2.make_runner() as runner2: self.assertEqual(runner2._hadoop_args_for_step(0), ["-inputformat", input_format]) self.assertEqual(runner2._hadoop_args_for_step(1), [])
def test_hadoop_input_format(self): input_format = 'org.apache.hadoop.mapred.SequenceFileInputFormat' # one-step job job1 = MRWordCount() # no cmd-line argument for this because it's part of job semantics job1.HADOOP_INPUT_FORMAT = input_format with job1.make_runner() as runner1: self.assertEqual(runner1._hadoop_args_for_step(0), ['-inputformat', input_format]) # multi-step job: only use -inputformat on the first step job2 = MRTwoStepJob() job2.HADOOP_INPUT_FORMAT = input_format with job2.make_runner() as runner2: self.assertEqual(runner2._hadoop_args_for_step(0), ['-inputformat', input_format]) self.assertEqual(runner2._hadoop_args_for_step(1), [])
def test_hadoop_extra_args_comes_first(self): job = MRWordCount( [ "--cmdenv", "FOO=bar", "--hadoop-arg", "-libjar", "--hadoop-arg", "qux.jar", "--jobconf", "baz=qux", "--partitioner", "java.lang.Object", ] ) job.HADOOP_INPUT_FORMAT = "FooInputFormat" job.HADOOP_OUTPUT_FORMAT = "BarOutputFormat" with job.make_runner() as runner: hadoop_args = runner._hadoop_args_for_step(0) self.assertEqual(hadoop_args[:2], ["-libjar", "qux.jar"]) self.assertEqual(len(hadoop_args), 12)