Exemplo n.º 1
0
def run_controller(use_gloo, gloo_run, use_mpi, mpi_run, use_jsrun, js_run, verbosity):
    # keep logic in sync with is_gloo_used(...)
    verbose = verbosity is not None and verbosity >= 2
    if use_gloo:
        if not gloo_built(verbose=verbose):
            raise ValueError('Gloo support has not been built.  If this is not expected, ensure CMake is installed '
                             'and reinstall Horovod with HOROVOD_WITH_GLOO=1 to debug the build error.')
        gloo_run()
    elif use_mpi:
        if not mpi_built(verbose=verbose):
            raise ValueError('MPI support has not been built.  If this is not expected, ensure MPI is installed '
                             'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.')
        mpi_run()
    elif use_jsrun:
        if not mpi_built(verbose=verbose):
            raise ValueError('MPI support has not been built.  If this is not expected, ensure MPI is installed '
                             'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.')
        if not lsf.LSFUtils.using_lsf():
            raise ValueError(
                'Horovod did not detect an LSF job.  The jsrun launcher can only be used in that environment. '
                'Please, pick a different launcher for other environments.')
        js_run()
    else:
        if mpi_built(verbose=verbose):
            if lsf.LSFUtils.using_lsf() and is_jsrun_installed():
                js_run()
            else:
                mpi_run()
        elif gloo_built(verbose=verbose):
            gloo_run()
        else:
            raise ValueError('Neither MPI nor Gloo support has been built. Try reinstalling Horovod ensuring that '
                             'either MPI is installed (MPI) or CMake is installed (Gloo).')
Exemplo n.º 2
0
    def test_js_run(self):
        if _get_mpi_implementation_flags(False)[0] is None:
            self.skipTest("MPI is not available")

        cmd = ['cmd', 'arg1', 'arg2']
        env = {'env1': 'val1', 'env2': 'val2'}
        stdout = '<stdout>'
        stderr = '<stderr>'
        settings = hvd_settings.Settings(
            verbose=0,
            extra_mpi_args='>mpi-extra args go here<',
            num_proc=4,
            hosts='localhost:2,127.0.0.1:2',
            output_filename='>output filename goes here<',
            run_func_mode=True
        )

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], []

        with mock.patch("horovod.runner.js_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags):
            with mock.patch("horovod.runner.js_run.safe_shell_exec.execute", return_value=0) as execute:
                js_run(settings, None, env, cmd, stdout=stdout, stderr=stderr)

                # call the mocked _get_mpi_implementation_flags method
                mpi_flags, _ = horovod.runner.js_run._get_mpi_implementation_flags(False)
                self.assertIsNotNone(mpi_flags)
                expected_command = ('jsrun '
                                    '--erf_input /tmp/rankfile '
                                    '--stdio_stderr >output filename goes here< '
                                    '--stdio_stdout >output filename goes here< '
                                    '--smpiargs \'{mpi_args} >mpi-extra args go here<\' '
                                    'cmd arg1 arg2').format(mpi_args=' '.join(mpi_flags))
                expected_env = {'env1': 'val1', 'env2': 'val2'}
                execute.assert_called_once_with(expected_command, env=expected_env, stdout=stdout, stderr=stderr)
Exemplo n.º 3
0
 def js_run_fn():
     js_run(settings, nics, env, command)