Пример #1
0
    def setUp(self):
        # if save_sys_std() *doesn't* work, don't mess up other tests
        super(SaveSysStdTestCase, self).setUp()

        self.stdin = self.start(patch('sys.stdin'))
        self.stdout = self.start(patch('sys.stdout'))
        self.stderr = self.start(patch('sys.stderr'))
Пример #2
0
    def test_logging_stderr_in_cleanup(self):

        def mock_Popen(*args, **kwargs):
            mock_proc = MagicMock()

            mock_proc.stdout = MagicMock()
            mock_proc.stdout.__iter__.return_value = [
                b'line1\n', b'line2\n']

            mock_proc.stderr = MagicMock()
            mock_proc.stderr.__iter__.return_value = [
                b'Emergency, everybody to get from street\n']

            mock_proc.wait.return_value = 0

            return mock_proc

        self.start(patch('mrjob.fs.hadoop.Popen', mock_Popen))

        mock_log = self.start(patch('mrjob.fs.hadoop.log'))

        fs = HadoopFilesystem()

        data = b''.join(fs._cat_file('/some/path'))
        self.assertEqual(data, b'line1\nline2\n')

        mock_log.error.assert_called_once_with(
            'STDERR: Emergency, everybody to get from street')
Пример #3
0
    def setUp(self):
        super(InterpretEMRStepStderrTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_step_syslog() should return, and have
        # _cat_log() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_paths_catted = []

        def mock_cat_log(fs, path):
            if path in self.mock_paths:
                self.mock_paths_catted.append(path)
            return path

        def mock_parse_task_stderr(path_from_mock_cat_log):
            return self.path_to_mock_result.get(path_from_mock_cat_log)

        # need to mock ls so that _ls_task_syslogs() can work
        def mock_exists(path):
            return path in self.mock_paths

        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log = self.start(patch("mrjob.logs.step._cat_log", side_effect=mock_cat_log))

        self.start(patch("mrjob.logs.step._parse_task_stderr", side_effect=mock_parse_task_stderr))
Пример #4
0
    def setUp(self):
        super(AuditUsageTestCase, self).setUp()

        self.repeat_sleep = self.start(patch('time.sleep'))
        # this is called once per cluster (no pagination), so we can
        # test quantity as well as whether it was called
        self.describe_cluster_sleep = self.start(
            patch('mrjob.tools.emr.audit_usage.sleep'))
Пример #5
0
    def setUp(self):
        super(StepPickingTestCase, self).setUp()

        self.pick_error = self.start(
            patch('mrjob.emr.EMRJobRunner._pick_error',
                  side_effect=StopIteration))

        self.log = self.start(
            patch('mrjob.tools.diagnose.log'))
Пример #6
0
    def setUp(self):
        super(InterpretTaskLogsTestCase, self).setUp()

        self.runner._ls_task_logs = Mock()
        self._interpret_task_logs = (
            self.start(patch('mrjob.logs.mixin._interpret_task_logs')))
        self._interpret_spark_task_logs = (
            self.start(patch('mrjob.logs.mixin._interpret_spark_task_logs')))
        self.runner.get_hadoop_version = Mock(return_value='2.7.1')
Пример #7
0
    def setUp(self):
        super(LsTaskLogsTestCase, self).setUp()

        self._ls_task_logs = self.start(patch(
            'mrjob.logs.mixin._ls_task_logs'))
        self._ls_spark_task_logs = self.start(patch(
            'mrjob.logs.mixin._ls_spark_task_logs'))

        self.runner._stream_task_log_dirs = Mock()
Пример #8
0
    def setUp(self):
        super(JoinTestCase, self).setUp()

        # os.path.join() and posixpath.join() do the same thing in
        # UNIX and OS X, so track which one we called
        self.start(patch('os.path.join', wraps=os.path.join))
        self.start(patch('posixpath.join', wraps=posixpath.join))

        self.fs = Filesystem()
Пример #9
0
    def setUp(self):
        super(DeprecatedSwitchesTestCase, self).setUp()

        self._maybe_terminate_clusters = self.start(patch(
            'mrjob.tools.emr.terminate_idle_clusters.'
            '_maybe_terminate_clusters'))

        self.log = self.start(
            patch('mrjob.tools.emr.terminate_idle_clusters.log'))
Пример #10
0
    def setUp(self):
        super(InterpretEMRBootstrapStderrTestCase, self).setUp()

        self.mock_fs = Mock()

        self.mock_parse_task_stderr = self.start(
            patch('mrjob.logs.bootstrap._parse_task_stderr',
                  return_value=dict(message='BOOM!\n')))

        self.mock_cat_log = self.start(patch('mrjob.logs.bootstrap._cat_log'))
Пример #11
0
    def setUp(self):
        super(SortBinTestCase, self).setUp()

        # these patches are only okay if they don't raise an exception;
        # otherwise that hands an un-pickleable stacktrace to multiprocessing
        self.check_call = self.start(patch(
            'mrjob.local.check_call', wraps=check_call))

        self._sort_lines_in_memory = self.start(patch(
            'mrjob.local._sort_lines_in_memory',
            wraps=_sort_lines_in_memory))
Пример #12
0
    def setUp(self):
        super(StepPickingTestCase, self).setUp()

        self.pick_error = self.start(
            patch('mrjob.emr.EMRJobRunner._pick_error',
                  side_effect=StopIteration))

        self.log = self.start(
            patch('mrjob.tools.diagnose.log'))

        # don't print logging messages when we start the diagnose tool
        self.log_to_stream = self.start(
            patch('mrjob.launch.log_to_stream'))
Пример #13
0
    def setUp(self):
        super(WrapAWSClientTestCase, self).setUp()

        # don't actually wait between retries
        self.sleep = self.start(patch('time.sleep'))

        self.log = self.start(patch('mrjob.retry.log'))

        self.list_buckets = self.start(patch(
            'tests.mock_boto3.s3.MockS3Client.list_buckets',
            side_effect=[dict(Buckets=[])]))

        self.client = self.client('s3')
        self.wrapped_client = _wrap_aws_client(self.client)
Пример #14
0
    def setUp(self):
        super(SparkPyFilesTestCase, self).setUp()

        # don't bother actually running spark
        self.start(patch(
            'mrjob.spark.runner.SparkMRJobRunner._run_spark_submit',
            return_value=0))
Пример #15
0
    def test_get_location_is_forbidden(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        access_denied_error = ClientError(
            dict(
                Error=dict(
                    Code='AccessDenied',
                    Message='Access Denied',
                ),
                ResponseMetadata=dict(
                    HTTPStatusCode=403
                ),
            ),
            'GetBucketLocation')

        with patch(
                'tests.mock_boto3.s3.MockS3Client.get_bucket_location',
                side_effect=access_denied_error):

            bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.meta.client.meta.endpoint_url,
                         'https://s3.amazonaws.com')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
Пример #16
0
    def setUp(self):
        self._dataproc_client = MockDataprocClient(self)
        self._gcs_client = MockGCSClient(self)
        self._gcs_fs = self._gcs_client._fs

        self.start(patch.object(
            DataprocJobRunner, 'api_client', self._dataproc_client))

        self.gcs_patch_api_client = patch.object(
            GCSFilesystem, 'api_client', self._gcs_client)
        self.gcs_patch_download_io = patch.object(
            GCSFilesystem, '_download_io', self._gcs_client.download_io)
        self.gcs_patch_upload_io = patch.object(
            GCSFilesystem, '_upload_io', self._gcs_client.upload_io)
        self.start(self.gcs_patch_api_client)
        self.start(self.gcs_patch_download_io)
        self.start(self.gcs_patch_upload_io)

        self.start(patch('mrjob.dataproc._read_gcloud_config',
                         lambda: _GCLOUD_CONFIG))

        super(MockGoogleAPITestCase, self).setUp()

        # patch slow things
        def fake_create_mrjob_tar_gz(mocked_self, *args, **kwargs):
            mocked_self._mrjob_tar_gz_path = self.fake_mrjob_tgz_path
            return self.fake_mrjob_tgz_path

        self.start(patch.object(
            DataprocJobRunner, '_create_mrjob_tar_gz',
            fake_create_mrjob_tar_gz))

        self.start(patch.object(time, 'sleep'))
Пример #17
0
    def setUp(self):
        super(LsLogsTestCase, self).setUp()

        self.mock_fs = Mock()
        self.mock_paths = []

        def mock_fs_ls(log_dir):
            prefix = log_dir.rstrip('/') + '/'

            exists = False

            for p in self.mock_paths:
                if isinstance(p, Exception):
                    raise p
                elif p.startswith(prefix):
                    yield p
                    exists = True

            if not exists:
                raise IOError

        def mock_fs_exists(log_dir):
            return any(mock_fs_ls(log_dir))

        self.mock_fs.ls = Mock(side_effect=mock_fs_ls)
        self.mock_fs.exists = Mock(side_effect=mock_fs_exists)

        # a matcher that cheerfully passes through kwargs
        def mock_matcher(path, **kwargs):
            return dict(**kwargs)

        self.mock_matcher = Mock(side_effect=mock_matcher)

        self.log = self.start(patch('mrjob.logs.wrap.log'))
Пример #18
0
    def test_infer_from_hadoop_bin_realpath(self):
        with patch('posixpath.realpath', return_value='/ha/do/op/bin'):
            self.runner = HadoopJobRunner(hadoop_bin=['/usr/bin/hadoop'])
            self.mock_paths.append('/ha/do/op/hadoop-streaming.jar')

            self.assertEqual(self.runner._find_hadoop_streaming_jar(),
                             '/ha/do/op/hadoop-streaming.jar')
Пример #19
0
    def setUp(self):
        super(InterpretTaskLogsTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_task_{syslog,stderr}() should return, and have
        # _cat_log_lines() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_log_callback = Mock()

        self.mock_paths_catted = []

        def mock_cat_log_lines(fs, path):
            if path in self.mock_paths:
                self.mock_paths_catted.append(path)
            return path

        # (the actual log-parsing functions take lines from the log)
        def mock_parse_task_syslog(path_from_mock_cat_log_lines):
            # default is {}
            return self.path_to_mock_result.get(
                path_from_mock_cat_log_lines, {})

        def mock_parse_task_stderr(path_from_mock_cat_log_lines):
            # default is None
            return self.path_to_mock_result.get(path_from_mock_cat_log_lines)

        def mock_exists(path):
            return path in self.mock_paths or path == 'MOCK_LOG_DIR'

        # need to mock ls so that _ls_task_logs() can work
        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(side_effect=mock_exists)
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log_lines = self.start(
            patch('mrjob.logs.task._cat_log_lines',
                  side_effect=mock_cat_log_lines))

        self.start(patch('mrjob.logs.task._parse_task_syslog',
                         side_effect=mock_parse_task_syslog))
        self.start(patch('mrjob.logs.task._parse_task_stderr',
                         side_effect=mock_parse_task_stderr))
Пример #20
0
    def setUp(self):
        super(MRBossTestCase, self).setUp()

        self.ssh_worker_hosts = self.start(patch(
            'mrjob.emr.EMRJobRunner._ssh_worker_hosts',
            return_value=[]))

        self.make_runner()
Пример #21
0
    def test_option_debug_printout(self):
        log = self.start(patch('mrjob.runner.log'))

        InlineMRJobRunner(owner='dave')

        debug = ''.join(a[0] + '\n' for a, kw in log.debug.call_args_list)

        self.assertIn("'owner'", debug)
        self.assertIn("'dave'", debug)
Пример #22
0
    def setUp(self):
        super(PickErrorsTestCase, self).setUp()

        self.runner._interpret_history_log = Mock()
        self.runner._interpret_step_logs = Mock()
        self.runner._interpret_task_logs = Mock()

        self._pick_error = self.start(
            patch('mrjob.logs.mixin._pick_error'))
Пример #23
0
    def setUp(self):
        super(CatLogsTestCase, self)

        self.mock_data = None

        self.mock_fs = Mock()
        self.mock_fs.cat = Mock(return_value=())
        self.mock_fs.exists = Mock(return_value=True)

        self.mock_log = self.start(patch('mrjob.logs.wrap.log'))
Пример #24
0
    def setUp(self):
        super(InterpretHistoryLogTestCase, self).setUp()

        self.mock_fs = Mock()

        # don't include errors in return value, as they get patched
        mock_return_value = dict(
            counters={'foo': {'bar': 42}},
            errors=[])

        self.mock_parse_yarn_history_log = self.start(
            patch('mrjob.logs.history._parse_yarn_history_log',
                  return_value=mock_return_value))

        self.mock_parse_pre_yarn_history_log = self.start(
            patch('mrjob.logs.history._parse_pre_yarn_history_log',
                  return_value=mock_return_value))

        self.mock_cat_log = self.start(patch('mrjob.logs.history._cat_log'))
Пример #25
0
    def test_disable_check_input_paths(self):
        missing_data = os.path.join(self.tmp_dir, 'data')

        job = MRWordCount(['--no-check-input-paths', missing_data])

        self.start(patch('mrjob.inline.InlineMRJobRunner._run',
                   side_effect=StopIteration))

        with job.make_runner() as runner:
            self.assertRaises(StopIteration, runner.run)
Пример #26
0
    def setUp(self):
        super(StreamingLogDirsTestCase, self).setUp()

        self.log = self.start(patch('mrjob.hadoop.log'))

        self.runner = HadoopJobRunner()
        self.runner._hadoop_log_dirs = Mock(return_value=[])
        self.runner.fs.exists = Mock(return_value=True)

        self.log.reset_mock()  # ignore logging from HadoopJobRunner init
Пример #27
0
    def test_hadoop_home_regression(self):
        # kill $HADOOP_HOME if it exists
        try:
            del os.environ['HADOOP_HOME']
        except KeyError:
            pass

        with patch('mrjob.hadoop.find_hadoop_streaming_jar',
                   return_value='some.jar'):
            HadoopJobRunner(hadoop_home=self.tmp_dir, conf_paths=[])
Пример #28
0
    def test_explicit_spark_tmp_dir_path(self):
        # posixpath.join() and os.path.join() are the same on UNIX
        self.start(patch('os.path.join', lambda *paths: '/./'.join(paths)))

        runner = SparkMRJobRunner(spark_tmp_dir='/path/to/tmp')

        self.assertTrue(runner._spark_tmp_dir.startswith('/path/to/tmp/./'))
        self.assertGreater(len(runner._spark_tmp_dir), len('/path/to/tmp/./'))

        self.assertIsNone(runner._upload_mgr)
Пример #29
0
    def test_get_location_other_error(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        with patch(
                'tests.mockboto.MockBucket.get_location',
                side_effect=boto.exception.S3ResponseError(404, 'Not Found')):

            self.assertRaises(boto.exception.S3ResponseError,
                              fs.get_bucket, 'walrus')
Пример #30
0
    def setUp(self):
        super(FindHadoopBinTestCase, self).setUp()

        # track calls to which()
        self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which))

        # keep which() from searching in /bin, etc.
        os.environ['PATH'] = self.tmp_dir

        # create basic HadoopFilesystem (okay to overwrite)
        self.fs = HadoopFilesystem()
Пример #31
0
    def setUp(self):
        super(ParseOutputLine, self).setUp()

        self.log = self.start(patch('mrjob.job.log'))
Пример #32
0
    def setUp(self):
        super(LsHistoryLogsTestCase, self).setUp()

        self._ls_history_logs = self.start(
            patch('mrjob.logs.mixin._ls_history_logs'))
        self.runner._stream_history_log_dirs = Mock()
Пример #33
0
 def test_empty(self):
     with patch('getpass.getuser') as getuser:
         getuser.return_value = 'dave'
         self.assertEqual(fully_qualify_hdfs_path(''), 'hdfs:///user/dave/')
Пример #34
0
 def setUp(self):
     super(DeprecatedReadFileTestCase, self).setUp()
     self.start(patch('mrjob.util.log'))
Пример #35
0
    def setUp(self):
        super(GCSFilesystemInitTestCase, self).setUp()

        self.log = self.start(patch('mrjob.fs.gcs.log'))

        self.Client = self.start(patch('google.cloud.storage.client.Client'))
Пример #36
0
 def setUp(self):
     super(TranslateJobConfDictTestCase, self).setUp()
     self.log = self.start(patch('mrjob.compat.log'))
Пример #37
0
    def setUp(self):
        super(UnexpectedOptsWarningTestCase, self).setUp()

        self.log = self.start(patch('mrjob.runner.log'))
Пример #38
0
    def setUp(self):
        super(SparkSubmitLocallyTestCase, self).setUp()

        # don't set up logging
        self.set_up_logging = self.start(
            patch('mrjob.job.MRJob.set_up_logging'))
Пример #39
0
    def setUp(self):
        super(DeprecatedOptionHooksTestCase, self).setUp()

        self.start(patch('mrjob.launch.log'))
Пример #40
0
    def setUp(self):
        super(PrintHelpTestCase, self).setUp()

        self.exit = self.start(patch('sys.exit'))
        self.stdout = self.start(patch.object(sys, 'stdout', StringIO()))
Пример #41
0
    def setUp(self):
        super(S3FSTestCase, self).setUp()
        self.fs = S3Filesystem()

        self.TransferConfig = self.start(
            patch('boto3.s3.transfer.TransferConfig'))
Пример #42
0
    def setUp(self):
        super(SingleSparkContextTestCase, self).setUp()

        self.start(
            patch('pyspark.SparkContext', return_value=self.spark_context))
Пример #43
0
    def setUp(self):
        super(ParseIndentedCountersTestCase, self).setUp()

        self.log = self.start(patch('mrjob.logs.step.log'))
Пример #44
0
 def monkey_patch_argv(self, *args):
     p = patch('sys.argv', [sys.argv[0]] + list(args))
     self.addCleanup(p.stop)
     p.start()
Пример #45
0
 def monkey_patch_stderr(self):
     p = patch('sys.stderr', mock_stdout_or_stderr())
     self.addCleanup(p.stop)
     p.start()
Пример #46
0
    def setUp(self):
        super(LsTaskSyslogsTestCase, self).setUp()

        self._ls_task_syslogs = self.start(
            patch('mrjob.logs.mixin._ls_task_syslogs'))
        self.runner._stream_task_log_dirs = Mock()
Пример #47
0
 def setUp(self):
     super(RegionAndZoneOptsTestCase, self).setUp()
     self.log = self.start(patch('mrjob.dataproc.log'))
Пример #48
0
 def setUp(self):
     super(PassStepsToRunnerTestCase, self).setUp()
     self.log = self.start(patch('mrjob.runner.log'))
Пример #49
0
 def test_relative_path(self):
     with patch('getpass.getuser') as getuser:
         getuser.return_value = 'dave'
         self.assertEqual(fully_qualify_hdfs_path('path/to/chocolate'),
                          'hdfs:///user/dave/path/to/chocolate')
Пример #50
0
    def setUp(self):
        super(SparkTmpDirTestCase, self).setUp()

        self.log = self.start(patch('mrjob.spark.runner.log'))
Пример #51
0
 def setUp(self):
     self.runner = self.MockRunner()
     self.log = self.start(patch('mrjob.logs.mixin.log'))
Пример #52
0
    def setUp(self):
        super(GroupStepsTestCase, self).setUp()

        self.run_step_on_spark = self.start(patch(
            'mrjob.spark.runner.SparkMRJobRunner._run_step_on_spark'))
Пример #53
0
def patch_fs_s3():
    m_boto = MagicMock()
    m_s3 = m_boto.connect_s3()
    m_s3.get_all_buckets.__name__ = 'get_all_buckets'
    return patch('mrjob.fs.s3.boto', m_boto)
Пример #54
0
    def setUp(self):
        super(InterpretHistoryLogTestCase, self).setUp()

        self.runner._ls_history_logs = Mock()
        self._interpret_history_log = (self.start(
            patch('mrjob.logs.mixin._interpret_history_log')))
Пример #55
0
    def setUp(self):
        super(NumCoresTestCase, self).setUp()

        self.pool = self.start(patch('mrjob.local.Pool', wraps=Pool))
Пример #56
0
 def setUp(self):
     super(LocalMRJobRunnerEndToEndTestCase, self).setUp()
     self.start(patch('os.symlink', side_effect=OSError))
Пример #57
0
 def setUp(self):
     super(MultipleConfigFilesMachineryTestCase, self).setUp()
     self.log = self.start(patch('mrjob.conf.log'))
Пример #58
0
    def setUp(self):
        super(MockFilesystemsTestCase, self).setUp()

        self.log = self.start(patch('mrjob.spark.runner.log'))
Пример #59
0
 def setUp(self):
     super(MRJobConfNoYAMLTestCase, self).setUp()
     self.start(patch('mrjob.conf.yaml', None))
Пример #60
0
    def setUp(self):
        super(DeprecatedFileUploadArgsTestCase, self).setUp()

        self.log = self.start(patch('mrjob.runner.log'))