Exemplo n.º 1
0
 def _step_output_uri(self, step_num):
     """URI to use as output for the given step. This is either an
     intermediate dir (see :py:meth:`intermediate_output_uri`) or
     ``self._output_dir`` for the final step."""
     if step_num == len(self._get_steps()) - 1:
         return to_uri(self._output_dir)
     else:
         return to_uri(self._intermediate_output_dir(step_num))
Exemplo n.º 2
0
Arquivo: runner.py Projeto: stug/mrjob
 def _step_output_uri(self, step_num):
     """URI to use as output for the given step. This is either an
     intermediate dir (see :py:meth:`intermediate_output_uri`) or
     ``self._output_dir`` for the final step."""
     if step_num == len(self._get_steps()) - 1:
         return to_uri(self._output_dir)
     else:
         return to_uri(self._intermediate_output_dir(step_num))
Exemplo n.º 3
0
 def _step_input_uris(self, step_num):
     """A list of URIs to use as input for the given step. For all
     except the first step, this list will have a single item (a
     directory)."""
     if step_num == 0:
         return [self._upload_mgr.uri(path) if self._upload_mgr
                 else to_uri(path)
                 for path in self._get_input_paths()]
     else:
         return [to_uri(self._intermediate_output_dir(step_num - 1))]
Exemplo n.º 4
0
Arquivo: runner.py Projeto: stug/mrjob
 def _step_input_uris(self, step_num):
     """A list of URIs to use as input for the given step. For all
     except the first step, this list will have a single item (a
     directory)."""
     if step_num == 0:
         return [
             self._upload_mgr.uri(path)
             if self._upload_mgr else to_uri(path)
             for path in self._get_input_paths()
         ]
     else:
         return [to_uri(self._intermediate_output_dir(step_num - 1))]
Exemplo n.º 5
0
    def test_relative_path_to_uri(self):
        tmp_dir = realpath(gettempdir())

        with save_cwd():
            chdir(tmp_dir)

            foo_uri = to_uri('foo.db')

            self.assertEqual(foo_uri[:8], 'file:///')
            self.assertEqual(foo_uri,
                             'file://' + join(pathname2url(tmp_dir), 'foo.db'))
Exemplo n.º 6
0
    def _check_spark_tmp_dir_opt(self):
        # warn if spark_tmp_dir isn't actually visible to Spark executors
        # (see #2062)
        tmp_dir_is_local = to_uri(
            self._opts['spark_tmp_dir']).startswith('file://')
        spark_master_is_local = self._spark_master().startswith('local')

        if tmp_dir_is_local != spark_master_is_local:
            log.warning(
                'Warning: executors on Spark master %s may not be able to'
                ' access spark_tmp_dir %s' %
                (self._spark_master(), self._opts['spark_tmp_dir']))
Exemplo n.º 7
0
    def test_local_output_dir_and_step_output_dir(self):
        input1_path = self.makefile('input1')
        input2_path = self.makefile('input2')

        output_dir = self.makedirs('output')
        step_output_dir = self.makedirs('step_output')

        # this has three steps, which lets us test step numbering
        job = MRCountingJob([
            '-r', 'local',
            '--output-dir', output_dir,
            '--step-output-dir', step_output_dir,
            input1_path, input2_path])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._num_steps(), 3)

            input_uris_0 = runner._step_input_uris(0)
            self.assertEqual([os.path.basename(uri) for uri in input_uris_0],
                             ['input1', 'input2'])
            self.assertEqual([uri[:8] for uri in input_uris_0],
                             ['file:///', 'file:///'])

            output_uri_0 = runner._step_output_uri(0)
            self.assertEqual(output_uri_0,
                             to_uri(os.path.join(step_output_dir, '0000')))

            input_uris_1 = runner._step_input_uris(1)
            self.assertEqual(input_uris_1, [output_uri_0])

            output_uri_1 = runner._step_output_uri(1)
            self.assertEqual(output_uri_1,
                             to_uri(os.path.join(step_output_dir, '0001')))

            input_uris_2 = runner._step_input_uris(2)
            self.assertEqual(input_uris_2, [output_uri_1])

            output_uri_2 = runner._step_output_uri(2)
            self.assertEqual(output_uri_2, to_uri(output_dir))
Exemplo n.º 8
0
    def test_local_output_dir_and_step_output_dir(self):
        input1_path = self.makefile('input1')
        input2_path = self.makefile('input2')

        output_dir = self.makedirs('output')
        step_output_dir = self.makedirs('step_output')

        # this has three steps, which lets us test step numbering
        job = MRCountingJob([
            '-r', 'local',
            '--output-dir', output_dir,
            '--step-output-dir', step_output_dir,
            input1_path, input2_path])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._num_steps(), 3)

            input_uris_0 = runner._step_input_uris(0)
            self.assertEqual([os.path.basename(uri) for uri in input_uris_0],
                             ['input1', 'input2'])
            self.assertEqual([uri[:8] for uri in input_uris_0],
                             ['file:///', 'file:///'])

            output_uri_0 = runner._step_output_uri(0)
            self.assertEqual(output_uri_0,
                             to_uri(os.path.join(step_output_dir, '0000')))

            input_uris_1 = runner._step_input_uris(1)
            self.assertEqual(input_uris_1, [output_uri_0])

            output_uri_1 = runner._step_output_uri(1)
            self.assertEqual(output_uri_1,
                             to_uri(os.path.join(step_output_dir, '0001')))

            input_uris_2 = runner._step_input_uris(2)
            self.assertEqual(input_uris_2, [output_uri_1])

            output_uri_2 = runner._step_output_uri(2)
            self.assertEqual(output_uri_2, to_uri(output_dir))
Exemplo n.º 9
0
 def test_to_uri(self):
     self.assertEqual(to_uri('/path/to/file'), 'file:///path/to/file')
     self.assertEqual(to_uri('s3://a/uri'), 's3://a/uri')