示例#1
0
    def test_mapper_pre_filter(self):
        data = b'x\ny\nz\n'
        job = MRFilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'cat -e'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
示例#2
0
    def test_mapper_pre_filter(self):
        data = b'x\ny\nz\n'
        job = MRFilterJob(['--mapper-filter', 'cat -e', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in to_lines(r.cat_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
示例#3
0
    def test_pre_filter_failure(self):
        # regression test for #1524

        data = b'x\ny\nz\n'
        # grep will return exit code 1 because there are no matches
        job = MRFilterJob(['--mapper-filter', 'grep w', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'grep w'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [])
示例#4
0
    def test_pre_filter_failure(self):
        # regression test for #1524

        data = b'x\ny\nz\n'
        # grep will return exit code 1 because there are no matches
        job = MRFilterJob(['--mapper-filter', 'grep w', '--runner=local'])
        job.sandbox(stdin=BytesIO(data))
        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'grep w'}}])

            r.run()

            lines = [line.strip() for line in to_lines(r.cat_output())]
            self.assertEqual(sorted(lines), [])
示例#5
0
    def test_pre_filter_on_compressed_data(self):
        # regression test for #1061
        input_gz_path = self.makefile('data.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'x\ny\nz\n')
        input_gz.close()

        job = MRFilterJob(
            ['--mapper-filter', 'cat -e', '--runner=local', input_gz_path])
        with job.make_runner() as r:
            self.assertEqual(r._get_steps(), [{
                'type': 'streaming',
                'mapper': {
                    'type': 'script',
                    'pre_filter': 'cat -e'
                }
            }])

            r.run()

            lines = [line.strip() for line in list(r.stream_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
示例#6
0
    def test_pre_filter_on_compressed_data(self):
        # regression test for #1061
        input_gz_path = self.makefile('data.gz')
        input_gz = gzip.GzipFile(input_gz_path, 'wb')
        input_gz.write(b'x\ny\nz\n')
        input_gz.close()

        job = MRFilterJob([
            '--mapper-filter', 'cat -e', '--runner=local', input_gz_path])
        job.sandbox()

        with job.make_runner() as r:
            self.assertEqual(
                r._get_steps(),
                [{
                    'type': 'streaming',
                    'mapper': {
                        'type': 'script',
                        'pre_filter': 'cat -e'}}])

            r.run()

            lines = [line.strip() for line in to_lines(r.cat_output())]
            self.assertEqual(sorted(lines), [b'x$', b'y$', b'z$'])
示例#7
0
文件: test_inline.py 项目: qui/mrjob
    def test_no_pre_filters(self):
        job = MRFilterJob(['-r', 'inline', '--mapper-filter', 'grep foo'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)
示例#8
0
文件: test_inline.py 项目: Yelp/mrjob
    def test_no_pre_filters(self):
        job = MRFilterJob(['-r', 'inline', '--mapper-filter', 'grep foo'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)