class MRBossTestCase(MockBotoTestCase): def setUp(self): super(MRBossTestCase, self).setUp() self.make_runner() def tearDown(self): self.cleanup_runner() super(MRBossTestCase, self).tearDown() def make_runner(self): self.runner = EMRJobRunner(conf_paths=[]) self.add_mock_s3_data({'walrus': {}}) self.runner = EMRJobRunner(s3_sync_wait_time=0, s3_tmp_dir='s3://walrus/tmp', conf_paths=[]) self.runner._s3_log_dir_uri = BUCKET_URI + LOG_DIR self.prepare_runner_for_ssh(self.runner) self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd') def cleanup_runner(self): """This method assumes ``prepare_runner_for_ssh()`` was called. That method isn't a "proper" setup method because it requires different arguments for different tests. """ shutil.rmtree(self.output_dir) self.runner.cleanup() def test_one_node(self): mock_ssh_file('testmaster', 'some_file', b'file contents') _run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: self.assertEqual(f.read().rstrip(), 'file contents') self.assertEqual(os.listdir(self.output_dir), ['master']) def test_two_nodes(self): self.add_slave() self.runner._opts['num_ec2_instances'] = 2 mock_ssh_file('testmaster', 'some_file', b'file contents 1') mock_ssh_file('testmaster!testslave0', 'some_file', b'file contents 2') self.runner.fs # force initialization of _ssh_fs _run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: self.assertEqual(f.read().rstrip(), 'file contents 1') with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'), 'r') as f: self.assertEqual(f.read().strip(), 'file contents 2') self.assertEqual(sorted(os.listdir(self.output_dir)), ['master', 'slave testslave0'])
class MRBossTestCase(MockBotoTestCase): def setUp(self): super(MRBossTestCase, self).setUp() self.make_runner() def tearDown(self): self.cleanup_runner() super(MRBossTestCase, self).tearDown() def make_runner(self): self.runner = EMRJobRunner(conf_paths=[]) self.add_mock_s3_data({'walrus': {}}) self.runner = EMRJobRunner(s3_sync_wait_time=0, s3_tmp_dir='s3://walrus/tmp', conf_paths=[]) self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR self.prepare_runner_for_ssh(self.runner) self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd') def cleanup_runner(self): """This method assumes ``prepare_runner_for_ssh()`` was called. That method isn't a "proper" setup method because it requires different arguments for different tests. """ shutil.rmtree(self.output_dir) self.runner.cleanup() def test_one_node(self): mock_ssh_file('testmaster', 'some_file', b'file contents') run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: self.assertEqual(f.read().rstrip(), 'file contents') self.assertEqual(os.listdir(self.output_dir), ['master']) def test_two_nodes(self): self.add_slave() self.runner._opts['num_ec2_instances'] = 2 mock_ssh_file('testmaster', 'some_file', b'file contents 1') mock_ssh_file('testmaster!testslave0', 'some_file', b'file contents 2') self.runner.fs # force initialization of _ssh_fs run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: self.assertEqual(f.read().rstrip(), 'file contents 1') with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'), 'r') as f: self.assertEqual(f.read().strip(), 'file contents 2') self.assertEqual(sorted(os.listdir(self.output_dir)), ['master', 'slave testslave0'])
def test_spark_script_step_without_mr_job_script(self): spark_script_path = self.makefile('a_spark_script.py') steps = MRSparkScript(['--script', spark_script_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO()) runner.run() runner.cleanup()
def test_spark_jar_step_without_mr_job_script(self): spark_jar_path = self.makefile('fireflies.jar') steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO()) runner.run() runner.cleanup()
def test_jar_step_without_mr_job_script(self): jar_path = self.makefile('dora.jar') steps = MRJustAJar(['--jar', jar_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack')) runner.run() runner.cleanup()
class MRBossTestCase(MockEMRAndS3TestCase): @setup def make_runner(self): self.runner = EMRJobRunner(conf_path=False) self.add_mock_s3_data({'walrus': {}}) self.runner = EMRJobRunner(s3_sync_wait_time=0, s3_scratch_uri='s3://walrus/tmp', conf_path=False) self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR self.prepare_runner_for_ssh(self.runner) self.runner._enable_slave_ssh_access() self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd') @teardown def cleanup_runner(self): """This method assumes ``prepare_runner_for_ssh()`` was called. That method isn't a "proper" setup method because it requires different arguments for different tests. """ shutil.rmtree(self.output_dir) self.runner.cleanup() self.teardown_ssh() def test_one_node(self): mock_ssh_file('testmaster', 'some_file', 'file contents') run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: assert_equal(f.read(), 'file contents\n') assert_equal(os.listdir(self.output_dir), ['master']) def test_two_nodes(self): self.add_slave() self.runner._opts['num_ec2_instances'] = 2 mock_ssh_file('testmaster', 'some_file', 'file contents 1') mock_ssh_file('testmaster!testslave0', 'some_file', 'file contents 2') run_on_all_nodes(self.runner, self.output_dir, ['cat', 'some_file'], print_stderr=False) with open(os.path.join(self.output_dir, 'master', 'stdout'), 'r') as f: assert_equal(f.read(), 'file contents 1\n') with open(os.path.join(self.output_dir, 'slave testslave0', 'stdout'), 'r') as f: assert_equal(f.read(), 'file contents 2\n') assert_equal(sorted(os.listdir(self.output_dir)), ['master', 'slave testslave0'])
class FindProbableCauseOfFailureTestCase(MockEMRAndS3TestCase): # We're mostly concerned here that the right log files are read in the # right order. parsing of the logs is handled by tests.parse_test @setup def make_runner(self): self.runner = EMRJobRunner(s3_sync_wait_time=0, s3_scratch_uri='s3://walrus/tmp', conf_path=False) self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR @teardown def cleanup_runner(self): self.runner.cleanup() def test_empty(self): self.add_mock_s3_data({'walrus': {}}) assert_equal(self.runner._find_probable_cause_of_failure([1]), None) def test_python_exception(self): self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), } }) assert_equal( self.runner._find_probable_cause_of_failure([1]), { 'lines': list(StringIO(PY_EXCEPTION)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'stderr', 'input_uri': BUCKET_URI + 'input.gz' }) def test_python_exception_without_input_uri(self): self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'stderr': (GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE), } }) assert_equal( self.runner._find_probable_cause_of_failure([1]), { 'lines': list(StringIO(PY_EXCEPTION)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'stderr', 'input_uri': None }) def test_java_exception(self): self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz') + GARBAGE + CHILD_ERR_LINE + JAVA_STACK_TRACE + GARBAGE, } }) assert_equal( self.runner._find_probable_cause_of_failure([1]), { 'lines': list(StringIO(JAVA_STACK_TRACE)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'syslog', 'input_uri': BUCKET_URI + 'input.gz' }) def test_java_exception_without_input_uri(self): self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE + GARBAGE, } }) assert_equal( self.runner._find_probable_cause_of_failure([1]), { 'lines': list(StringIO(JAVA_STACK_TRACE)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'syslog', 'input_uri': None }) def test_hadoop_streaming_error(self): # we should look only at step 2 since the errors in the other # steps are boring # # we include input.gz just to test that we DON'T check for it self.add_mock_s3_data({ 'walrus': { LOG_DIR + 'steps/1/syslog': GARBAGE + HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n', LOG_DIR + 'steps/2/syslog': GARBAGE + make_input_uri_line(BUCKET_URI + 'input.gz') + HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', LOG_DIR + 'steps/3/syslog': HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n', } }) assert_equal( self.runner._find_probable_cause_of_failure([1, 2, 3]), { 'lines': [USEFUL_HADOOP_ERROR + '\n'], 's3_log_file_uri': BUCKET_URI + LOG_DIR + 'steps/2/syslog', 'input_uri': None }) def test_later_task_attempt_steps_win(self): # should look at later steps first self.add_mock_s3_data({ 'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000004_0/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, } }) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal( failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000004_0/syslog') def test_later_step_logs_win(self): self.add_mock_s3_data({ 'walrus': { LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', LOG_DIR + 'steps/2/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', } }) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + LOG_DIR + 'steps/2/syslog') def test_reducer_beats_mapper(self): # should look at reducers over mappers self.add_mock_s3_data({ 'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000126_3/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, } }) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal( failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/syslog') def test_more_attempts_win(self): # look at fourth attempt before looking at first attempt self.add_mock_s3_data({ 'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000004_3/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, } }) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal( failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000004_3/syslog') def test_py_exception_beats_java_stack_trace(self): self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'stderr': TRACEBACK_START + PY_EXCEPTION, ATTEMPT_0_DIR + 'syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, } }) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + ATTEMPT_0_DIR + 'stderr') def test_exception_beats_hadoop_error(self): self.add_mock_s3_data({ 'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', } }) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal( failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr') def test_step_filtering(self): # same as previous test, but step 2 is filtered out self.add_mock_s3_data({ 'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', } }) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + LOG_DIR + 'steps/1/syslog') def test_ignore_errors_from_steps_that_later_succeeded(self): # This tests the fix for Issue #31 self.add_mock_s3_data({ 'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), ATTEMPT_1_DIR + 'stderr': '', ATTEMPT_1_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), } }) assert_equal(self.runner._find_probable_cause_of_failure([1]), None)
class FindProbableCauseOfFailureTestCase(MockEMRAndS3TestCase): # We're mostly concerned here that the right log files are read in the # right order. parsing of the logs is handled by tests.parse_test @setup def make_runner(self): self.runner = EMRJobRunner(s3_sync_wait_time=0, s3_scratch_uri='s3://walrus/tmp', conf_path=False) self.runner._s3_job_log_uri = BUCKET_URI + LOG_DIR @teardown def cleanup_runner(self): self.runner.cleanup() def test_empty(self): self.add_mock_s3_data({'walrus': {}}) assert_equal(self.runner._find_probable_cause_of_failure([1]), None) def test_python_exception(self): self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), }}) assert_equal(self.runner._find_probable_cause_of_failure([1]), {'lines': list(StringIO(PY_EXCEPTION)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'stderr', 'input_uri': BUCKET_URI + 'input.gz'}) def test_python_exception_without_input_uri(self): self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'stderr': ( GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE), }}) assert_equal(self.runner._find_probable_cause_of_failure([1]), {'lines': list(StringIO(PY_EXCEPTION)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'stderr', 'input_uri': None}) def test_java_exception(self): self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz') + GARBAGE + CHILD_ERR_LINE + JAVA_STACK_TRACE + GARBAGE, }}) assert_equal(self.runner._find_probable_cause_of_failure([1]), {'lines': list(StringIO(JAVA_STACK_TRACE)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'syslog', 'input_uri': BUCKET_URI + 'input.gz'}) def test_java_exception_without_input_uri(self): self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE + GARBAGE, }}) assert_equal(self.runner._find_probable_cause_of_failure([1]), {'lines': list(StringIO(JAVA_STACK_TRACE)), 's3_log_file_uri': BUCKET_URI + ATTEMPT_0_DIR + 'syslog', 'input_uri': None}) def test_hadoop_streaming_error(self): # we should look only at step 2 since the errors in the other # steps are boring # # we include input.gz just to test that we DON'T check for it self.add_mock_s3_data({'walrus': { LOG_DIR + 'steps/1/syslog': GARBAGE + HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n', LOG_DIR + 'steps/2/syslog': GARBAGE + make_input_uri_line(BUCKET_URI + 'input.gz') + HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', LOG_DIR + 'steps/3/syslog': HADOOP_ERR_LINE_PREFIX + BORING_HADOOP_ERROR + '\n', }}) assert_equal(self.runner._find_probable_cause_of_failure([1, 2, 3]), {'lines': [USEFUL_HADOOP_ERROR + '\n'], 's3_log_file_uri': BUCKET_URI + LOG_DIR + 'steps/2/syslog', 'input_uri': None}) def test_later_task_attempt_steps_win(self): # should look at later steps first self.add_mock_s3_data({'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000004_0/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, }}) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000004_0/syslog') def test_later_step_logs_win(self): self.add_mock_s3_data({'walrus': { LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', LOG_DIR + 'steps/2/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', }}) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + LOG_DIR + 'steps/2/syslog') def test_reducer_beats_mapper(self): # should look at reducers over mappers self.add_mock_s3_data({'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000126_3/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, }}) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_r_000126_3/syslog') def test_more_attempts_win(self): # look at fourth attempt before looking at first attempt self.add_mock_s3_data({'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000004_3/syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, }}) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0001_m_000004_3/syslog') def test_py_exception_beats_java_stack_trace(self): self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'stderr': TRACEBACK_START + PY_EXCEPTION, ATTEMPT_0_DIR + 'syslog': CHILD_ERR_LINE + JAVA_STACK_TRACE, }}) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + ATTEMPT_0_DIR + 'stderr') def test_exception_beats_hadoop_error(self): self.add_mock_s3_data({'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', }}) failure = self.runner._find_probable_cause_of_failure([1, 2]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr') def test_step_filtering(self): # same as previous test, but step 2 is filtered out self.add_mock_s3_data({'walrus': { TASK_ATTEMPTS_DIR + 'attempt_201007271720_0002_m_000126_0/stderr': TRACEBACK_START + PY_EXCEPTION, LOG_DIR + 'steps/1/syslog': HADOOP_ERR_LINE_PREFIX + USEFUL_HADOOP_ERROR + '\n', }}) failure = self.runner._find_probable_cause_of_failure([1]) assert_equal(failure['s3_log_file_uri'], BUCKET_URI + LOG_DIR + 'steps/1/syslog') def test_ignore_errors_from_steps_that_later_succeeded(self): # This tests the fix for Issue #31 self.add_mock_s3_data({'walrus': { ATTEMPT_0_DIR + 'stderr': GARBAGE + TRACEBACK_START + PY_EXCEPTION + GARBAGE, ATTEMPT_0_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), ATTEMPT_1_DIR + 'stderr': '', ATTEMPT_1_DIR + 'syslog': make_input_uri_line(BUCKET_URI + 'input.gz'), }}) assert_equal(self.runner._find_probable_cause_of_failure([1]), None)