Exemplo n.º 1
0
    def test_stdin_bypasses_wrapper_script(self):
        job = MROSWalkJob([
            '-r', 'local',
            '--setup', 'cat > stdin.txt',
        ])
        job.sandbox(stdin=BytesIO(b'some input\n'))

        # local mode doesn't currently pipe input into stdin
        # (see issue #567), so this test would hang if it failed
        def alarm_handler(*args, **kwargs):
            raise Exception('Setup script stalled on stdin')

        try:
            self._old_alarm_handler = signal.signal(
                signal.SIGALRM, alarm_handler)
            signal.alarm(10)

            with job.make_runner() as r:
                r.run()

                path_to_size = dict(job.parse_output_line(line)
                                    for line in r.stream_output())

                self.assertEqual(path_to_size.get('./stdin.txt'), 0)
                # input gets passed through by identity mapper
                self.assertEqual(path_to_size.get(None), 'some input')

        finally:
            signal.alarm(0)
            signal.signal(signal.SIGALRM, self._old_alarm_handler)
Exemplo n.º 2
0
    def test_bad_setup_command(self):
        bar_path = os.path.join(self.tmp_dir, "bar")
        baz_path = os.path.join(self.tmp_dir, "baz")

        job = MROSWalkJob(
            [
                "-r",
                "local",
                "--setup",
                "touch %s" % bar_path,
                "--setup",
                "false",  # always "fails"
                "--setup",
                "touch %s" % baz_path,
                "--cleanup-on-failure=ALL",
            ]
        )
        job.sandbox()

        with job.make_runner() as r:
            self.assertRaises(Exception, r.run)

            # first command got run but not third one
            self.assertTrue(os.path.exists(bar_path))
            self.assertFalse(os.path.exists(baz_path))
Exemplo n.º 3
0
    def test_setup_command(self):
        job = MROSWalkJob(["-r", "local", "--setup", "touch bar"])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line) for line in r.stream_output())

        self.assertIn("./bar", path_to_size)
Exemplo n.º 4
0
    def test_setup_script(self):
        job = MROSWalkJob(["-r", "local", "--setup", self.foo_sh + "#"])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line) for line in r.stream_output())

            self.assertEqual(path_to_size.get("./foo.sh"), self.foo_sh_size)
            self.assertIn("./foo.sh-made-this", path_to_size)
Exemplo n.º 5
0
    def test_file_upload(self):
        job = MROSWalkJob(["-r", "local", "--file", self.foo_sh, "--file", self.foo_sh + "#bar.sh"])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line) for line in r.stream_output())

        self.assertEqual(path_to_size.get("./foo.sh"), self.foo_sh_size)
        self.assertEqual(path_to_size.get("./bar.sh"), self.foo_sh_size)
Exemplo n.º 6
0
    def test_archive_upload(self):
        job = MROSWalkJob(["-r", "local", "--archive", self.foo_tar_gz, "--archive", self.foo_tar_gz + "#foo"])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line) for line in r.stream_output())

        self.assertEqual(path_to_size.get("./foo.tar.gz/foo.py"), self.foo_py_size)
        self.assertEqual(path_to_size.get("./foo/foo.py"), self.foo_py_size)
Exemplo n.º 7
0
    def test_python_archive(self):
        job = MROSWalkJob(["-r", "local", "--setup", "export PYTHONPATH=%s#/:$PYTHONPATH" % self.foo_tar_gz])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line) for line in r.stream_output())

        # foo.py should be there, and getsize() should be patched to return
        # double the number of bytes
        self.assertEqual(path_to_size.get("./foo.tar.gz/foo.py"), self.foo_py_size * 2)
Exemplo n.º 8
0
    def test_setup_command(self):
        job = MROSWalkJob(
            ['-r', 'local',
             '--setup', 'touch bar'])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        self.assertIn('./bar', path_to_size)
Exemplo n.º 9
0
    def test_setup_command(self):
        job = MROSWalkJob(
            ['-r', 'spark',
             '--spark-master', _LOCAL_CLUSTER_MASTER,
             '--setup', 'touch bar'])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output(r.cat_output()))

        self.assertIn('./bar', path_to_size)
Exemplo n.º 10
0
    def test_setup_script(self):
        job = MROSWalkJob(
            ['-r', 'local',
             '--setup', self.foo_sh + '#'])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

            self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size)
            self.assertIn('./foo.sh-made-this', path_to_size)
Exemplo n.º 11
0
    def test_file_upload(self):
        job = MROSWalkJob(['-r', 'local',
                           '--file', self.foo_sh,
                           '--file', self.foo_sh + '#bar.sh',
                           ])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size)
        self.assertEqual(path_to_size.get('./bar.sh'), self.foo_sh_size)
Exemplo n.º 12
0
    def test_deprecated_python_archive_option(self):
        job = MROSWalkJob(
            ['-r', 'local',
             '--python-archive', self.foo_tar_gz])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        # foo.py should be there, and getsize() should be patched to return
        # double the number of bytes
        self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'),
                         self.foo_py_size * 2)
Exemplo n.º 13
0
    def test_wrapper_script_only_writes_to_stderr(self):
        job = MROSWalkJob(["-r", "local", "--setup", "echo stray output"])
        job.sandbox()

        with no_handlers_for_logger("mrjob.local"):
            stderr = StringIO()
            log_to_stream("mrjob.local", stderr, debug=True)

            with job.make_runner() as r:
                r.run()

                output = b"".join(r.stream_output())

                # stray ouput should be in stderr, not the job's output
                self.assertIn("stray output", stderr.getvalue())
                self.assertNotIn(b"stray output", output)
Exemplo n.º 14
0
    def test_archive_upload(self):
        job = MROSWalkJob(['--runner=local', '--no-bootstrap-mrjob',
                           '--archive', self.foo_tar_gz,
                           '--archive', self.foo_tar_gz + '#foo',
                           ])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'),
                         self.foo_py_size)
        self.assertEqual(path_to_size.get('./foo/foo.py'),
                         self.foo_py_size)
Exemplo n.º 15
0
    def test_bad_setup_command(self):
        bar_path = os.path.join(self.tmp_dir, 'bar')
        baz_path = os.path.join(self.tmp_dir, 'baz')

        job = MROSWalkJob([
            '-r', 'local',
            '--setup', 'touch %s' % bar_path,
            '--setup', 'false',  # always "fails"
            '--setup', 'touch %s' % baz_path,
        ])
        job.sandbox()

        with job.make_runner() as r:
            self.assertRaises(Exception, r.run)

            # first command got run but not third one
            self.assertTrue(os.path.exists(bar_path))
            self.assertFalse(os.path.exists(baz_path))
Exemplo n.º 16
0
    def test_wrapper_script_only_writes_to_stderr(self):
        job = MROSWalkJob([
            '-r', 'local',
            '--setup', 'echo stray output',
        ])
        job.sandbox()

        with no_handlers_for_logger('mrjob.local'):
            stderr = StringIO()
            log_to_stream('mrjob.local', stderr, debug=True)

            with job.make_runner() as r:
                r.run()

                output = b''.join(r.stream_output())

                # stray ouput should be in stderr, not the job's output
                self.assertIn('stray output', stderr.getvalue())
                self.assertNotIn(b'stray output', output)
Exemplo n.º 17
0
    def test_wrapper_script_only_writes_to_stderr(self):
        job = MROSWalkJob([
            '-r', 'local',
            '--setup', 'echo stray output',
        ])
        job.sandbox()

        with no_handlers_for_logger('mrjob.local'):
            stderr = StringIO()
            log_to_stream('mrjob.local', stderr)

            with job.make_runner() as r:
                r.run()

                output = b''.join(r.stream_output())

                # stray ouput should be in stderr, not the job's output
                self.assertIn('stray output', stderr.getvalue())
                self.assertNotIn(b'stray output', output)
Exemplo n.º 18
0
    def test_stdin_bypasses_wrapper_script(self):
        job = MROSWalkJob([
            '-r',
            'local',
            '--setup',
            'cat > stdin.txt',
        ])
        job.sandbox(stdin=StringIO('some input\n'))

        # local mode doesn't currently pipe input into stdin
        # (see issue #567), so this test would hang if it failed
        def alarm_handler(*args, **kwargs):
            raise Exception('Setup script stalled on stdin')

        try:
            self._old_alarm_handler = signal.signal(signal.SIGALRM,
                                                    alarm_handler)
            signal.alarm(2)

            with job.make_runner() as r:
                r.run()

                path_to_size = dict(
                    job.parse_output_line(line) for line in r.stream_output())

                self.assertEqual(path_to_size.get('./stdin.txt'), 0)
                # input gets passed through by identity mapper
                self.assertEqual(path_to_size.get(None), 'some input')

        finally:
            signal.alarm(0)
            signal.signal(signal.SIGALRM, self._old_alarm_handler)
Exemplo n.º 19
0
    def test_setup_command(self):
        job = MROSWalkJob(['-r', 'local', '--setup', 'touch bar'])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(
                job.parse_output_line(line) for line in r.stream_output())

        self.assertIn('./bar', path_to_size)
Exemplo n.º 20
0
    def test_setup_script(self):
        job = MROSWalkJob(['-r', 'local', '--setup', self.foo_sh + '#'])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(
                job.parse_output_line(line) for line in r.stream_output())

            self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size)
            self.assertIn('./foo.sh-made-this', path_to_size)
Exemplo n.º 21
0
    def test_setup_command(self):
        job = MROSWalkJob([
            '-r', 'spark', '--spark-master', _LOCAL_CLUSTER_MASTER, '--setup',
            'touch bar'
        ])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output(r.cat_output()))

        self.assertIn('./bar', path_to_size)
Exemplo n.º 22
0
    def test_file_uris(self):
        f1_path = self.makefile('f1', b'contents')
        f2_uri = 'file://' + self.makefile('f2', b'stuff')

        job = MROSWalkJob(['--files', '%s,%s' % (f1_path, f2_uri)])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            path_to_size = dict(job.parse_output(runner.cat_output()))

            self.assertEqual(path_to_size.get('./f1'), 8)
            self.assertEqual(path_to_size.get('./f2'), 5)
Exemplo n.º 23
0
    def test_deprecated_python_archive_option(self):
        job = MROSWalkJob(['-r', 'local', '--python-archive', self.foo_tar_gz])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(
                job.parse_output_line(line) for line in r.stream_output())

        # foo.py should be there, and getsize() should be patched to return
        # double the number of bytes
        self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'),
                         self.foo_py_size * 2)
Exemplo n.º 24
0
    def test_file_upload(self):
        job = MROSWalkJob(['-r', 'local',
                           '--file', self.foo_sh,
                           '--file', self.foo_sh + '#bar.sh',
                           ])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size)
        self.assertEqual(path_to_size.get('./bar.sh'), self.foo_sh_size)
Exemplo n.º 25
0
    def test_archive_upload(self):
        job = MROSWalkJob(['-r', 'local',
                           '--archive', self.foo_tar_gz,
                           '--archive', self.foo_tar_gz + '#foo',
                           ])
        job.sandbox()

        with job.make_runner() as r:
            r.run()

            path_to_size = dict(job.parse_output_line(line)
                                for line in r.stream_output())

        self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'),
                         self.foo_py_size)
        self.assertEqual(path_to_size.get('./foo/foo.py'),
                         self.foo_py_size)
Exemplo n.º 26
0
    def test_archive_uris(self):
        qux_dir = self.makedirs('qux')
        self.makefile(join(qux_dir, 'bar'), b'baz')

        qux_tar_gz = make_archive(join(self.tmp_dir, 'qux'), 'gztar', qux_dir)
        qux_tar_gz_uri = 'file://' + qux_tar_gz

        job = MROSWalkJob(
            ['--archives',
             '%s#qux,%s#qux2' % (qux_tar_gz, qux_tar_gz_uri)])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            path_to_size = dict(job.parse_output(runner.cat_output()))

            self.assertEqual(path_to_size.get('./qux/bar'), 3)
            self.assertEqual(path_to_size.get('./qux2/bar'), 3)