示例#1
0
    def test_empty(self):
        # this doesn't work on the inline runner because
        # Spark doesn't have a working dir to upload stop_words.txt
        # to. See below for what does and doesn't work in inline
        # runner
        job = MRSparkScriptWordcount(['-r', 'local'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            self.assertEqual(sorted(to_lines(runner.cat_output())), [])
示例#2
0
    def test_spark_script_mrjob(self):
        text = b'one fish\ntwo fish\nred fish\nblue fish\n'

        job = MRSparkScriptWordcount(['-r', 'spark'])
        job.sandbox(stdin=BytesIO(text))

        counts = {}

        with job.make_runner() as runner:
            runner.run()

            for line in to_lines(runner.cat_output()):
                k, v = safeeval(line)
                counts[k] = v

        self.assertEqual(counts, dict(blue=1, fish=4, one=1, red=1, two=1))
示例#3
0
    def test_spark_script_mrjob(self):
        text = b'one fish\ntwo fish\nred fish\nblue fish\n'

        job = MRSparkScriptWordcount(['-r', 'local'])
        job.sandbox(stdin=BytesIO(text))

        counts = {}

        with job.make_runner() as runner:
            runner.run()

            for line in to_lines(runner.cat_output()):
                k, v = safeeval(line)
                counts[k] = v

        self.assertEqual(counts, dict(
            blue=1, fish=4, one=1, red=1, two=1))
示例#4
0
    def test_count_words(self):
        job = MRSparkScriptWordcount(['-r', 'local'])
        job.sandbox(
            stdin=BytesIO(b'Mary had a little lamb\nlittle lamb\nlittle lamb'))

        with job.make_runner() as runner:
            runner.run()

            output = sorted(
                safeeval(line) for line in to_lines(runner.cat_output()))

            self.assertEqual(output, [
                ('a', 1),
                ('had', 1),
                ('lamb', 3),
                ('little', 3),
                ('mary', 1),
            ])
示例#5
0
文件: test_inline.py 项目: qui/mrjob
    def test_no_spark_script_steps(self):
        # just a sanity check; _STEP_TYPES is tested in a lot of ways
        job = MRSparkScriptWordcount(['-r', 'inline'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)
示例#6
0
文件: test_inline.py 项目: Yelp/mrjob
    def test_no_spark_script_steps(self):
        # just a sanity check; _STEP_TYPES is tested in a lot of ways
        job = MRSparkScriptWordcount(['-r', 'inline'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)