def test_no_mapper(self): # read from STDIN, a local file, and a remote file stdin = BytesIO(b'foo\nbar\n') local_input_path = os.path.join(self.tmp_dir, 'input') with open(local_input_path, 'wb') as local_input_file: local_input_file.write(b'one fish two fish\nred fish blue fish\n') remote_input_path = 'gs://walrus/data/foo' self.put_gcs_multi({ remote_input_path: b'foo\n' }) mr_job = MRNoMapper(['-r', 'dataproc', '-v', '-', local_input_path, remote_input_path]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() # setup fake output self.put_job_output_parts(runner, [ b'1\t["blue", "one", "red", "two"]\n', b'4\t["fish"]\n']) for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) results.append((key, value)) self.assertEqual(sorted(results), [(1, ['blue', 'one', 'red', 'two']), (4, ['fish'])])
def test_no_mapper(self): # read from STDIN, a local file, and a remote file stdin = BytesIO(b"foo\nbar\n") local_input_path = os.path.join(self.tmp_dir, "input") with open(local_input_path, "wb") as local_input_file: local_input_file.write(b"one fish two fish\nred fish blue fish\n") remote_input_path = "gs://walrus/data/foo" self.put_gcs_multi({remote_input_path: b"foo\n"}) mr_job = MRNoMapper(["-r", "dataproc", "-v", "-", local_input_path, remote_input_path]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() # setup fake output self.put_job_output_parts(runner, [b'1\t["blue", "one", "red", "two"]\n', b'4\t["fish"]\n']) for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) results.append((key, value)) self.assertEqual(sorted(results), [(1, ["blue", "one", "red", "two"]), (4, ["fish"])])
def test_step_with_no_mapper(self): mr_job = MRNoMapper(['-r', self.RUNNER]) mr_job.sandbox(stdin=BytesIO( b'one fish two fish\nred fish blue fish\n')) with mr_job.make_runner() as runner: runner.run() self.assertEqual( sorted(mr_job.parse_output(runner.cat_output())), [(1, ['blue', 'one', 'red', 'two']), (4, ['fish'])])
def test_no_mapper(self): # read from STDIN, a local file, and a remote file stdin = BytesIO(b'foo\nbar\n') local_input_path = os.path.join(self.tmp_dir, 'input') with open(local_input_path, 'wb') as local_input_file: local_input_file.write(b'one fish two fish\nred fish blue fish\n') remote_input_path = 'gs://walrus/data/foo' self.put_gcs_multi({remote_input_path: b'foo\n'}) mr_job = MRNoMapper( ['-r', 'dataproc', '-v', '-', local_input_path, remote_input_path]) mr_job.sandbox(stdin=stdin) results = [] with mr_job.make_runner() as runner: runner.run() # setup fake output self.put_job_output_parts( runner, [b'1\t["blue", "one", "red", "two"]\n', b'4\t["fish"]\n']) results.extend(mr_job.parse_output(runner.cat_output())) self.assertEqual(sorted(results), [(1, ['blue', 'one', 'red', 'two']), (4, ['fish'])])
def test_step_with_no_mapper(self): mr_job = MRNoMapper(['-r', self.RUNNER]) mr_job.sandbox( stdin=BytesIO(b'one fish two fish\nred fish blue fish\n')) with mr_job.make_runner() as runner: runner.run() self.assertEqual(sorted(mr_job.parse_output(runner.cat_output())), [(1, ['blue', 'one', 'red', 'two']), (4, ['fish'])])
def test_step_with_no_mapper(self): mr_job = MRNoMapper(['-r', self.RUNNER]) mr_job.sandbox(stdin=BytesIO( b'one fish two fish\nred fish blue fish\n')) with mr_job.make_runner() as runner: runner.run() results = [mr_job.parse_output_line(line) for line in runner.stream_output()] self.assertEqual(sorted(results), [(1, ['blue', 'one', 'red', 'two']), (4, ['fish'])])