def _launch_job(args, remote_host_names, settings, nics, command): env = os.environ.copy() config_parser.set_env_from_args(env, args) if args.use_gloo: if not gloo_built(verbose=(settings.verbose >= 2)): raise ValueError( 'Gloo support has not been built. If this is not expected, ensure CMake is installed ' 'and reinstall Horovod with HOROVOD_WITH_GLOO=1 to debug the build error.' ) gloo_run(settings, remote_host_names, nics, env, network._get_driver_ip(nics), command) elif args.use_mpi: if not mpi_built(verbose=(settings.verbose >= 2)): raise ValueError( 'MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.' ) mpi_run(settings, nics, env, command) elif args.use_jsrun: if not mpi_built(verbose=(settings.verbose >= 2)): raise ValueError( 'MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.' ) if not lsf.LSFUtils.using_lsf(): raise ValueError( 'Horovod did not detect an LSF job. The jsrun launcher can only be used in that environment. ' 'Please, pick a different launcher for other environments.') js_run(settings, nics, env, command) else: if mpi_built(verbose=(settings.verbose >= 2)): if lsf.LSFUtils.using_lsf() and is_jsrun_installed(): js_run(settings, nics, env, command) else: mpi_run(settings, nics, env, command) elif gloo_built(verbose=(settings.verbose >= 2)): gloo_run(settings, remote_host_names, nics, env, network._get_driver_ip(nics), command) else: raise ValueError( 'Neither MPI nor Gloo support has been built. Try reinstalling Horovod ensuring that ' 'either MPI is installed (MPI) or CMake is installed (Gloo).')
def test_js_run(self): if _get_mpi_implementation_flags(False)[0] is None: self.skipTest("MPI is not available") cmd = ['cmd', 'arg1', 'arg2'] env = {'env1': 'val1', 'env2': 'val2'} stdout = '<stdout>' stderr = '<stderr>' settings = hvd_settings.Settings( verbose=0, extra_mpi_args='>mpi-extra args go here<', num_hosts=2, num_proc=4, hosts='>host names go here<', output_filename='>output filename goes here<', run_func_mode=True) def mpi_impl_flags(tcp): return ["--mock-mpi-impl-flags"], [] with mock.patch("horovod.run.js_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.js_run.safe_shell_exec.execute", return_value=0) as execute: js_run(settings, None, env, cmd, stdout=stdout, stderr=stderr) # call the mocked _get_mpi_implementation_flags method mpi_flags, _ = horovod.run.js_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) expected_command = ( 'jsrun ' '--erf_input /tmp/rankfile ' '--stdio_stderr >output filename goes here< ' '--stdio_stdout >output filename goes here< ' '--smpiargs \'{mpi_args} >mpi-extra args go here<\' ' 'cmd arg1 arg2').format(mpi_args=' '.join(mpi_flags)) expected_env = {'env1': 'val1', 'env2': 'val2'} execute.assert_called_once_with(expected_command, env=expected_env, stdout=stdout, stderr=stderr)
def run_controller(use_gloo, gloo_run, use_mpi, mpi_run, use_jsrun, js_run, verbosity): # keep logic in sync with is_gloo_used(...) verbose = verbosity is not None and verbosity >= 2 if use_gloo: if not gloo_built(verbose=verbose): raise ValueError( 'Gloo support has not been built. If this is not expected, ensure CMake is installed ' 'and reinstall Horovod with HOROVOD_WITH_GLOO=1 to debug the build error.' ) gloo_run() elif use_mpi: if not mpi_built(verbose=verbose): raise ValueError( 'MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.' ) mpi_run() elif use_jsrun: if not mpi_built(verbose=verbose): raise ValueError( 'MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.' ) if not lsf.LSFUtils.using_lsf(): raise ValueError( 'Horovod did not detect an LSF job. The jsrun launcher can only be used in that environment. ' 'Please, pick a different launcher for other environments.') js_run() else: if mpi_built(verbose=verbose): if lsf.LSFUtils.using_lsf() and is_jsrun_installed(): js_run() else: mpi_run() elif gloo_built(verbose=verbose): gloo_run() else: raise ValueError( 'Neither MPI nor Gloo support has been built. Try reinstalling Horovod ensuring that ' 'either MPI is installed (MPI) or CMake is installed (Gloo).')
def test_js_run(self): if _get_mpi_implementation_flags(False)[0] is None: self.skipTest("MPI is not available") cmd = ['cmd', 'arg1', 'arg2'] env = {'env1': 'val1', 'env2': 'val2'} stdout = '<stdout>' stderr = '<stderr>' settings = hvd_settings.Settings( verbose=0, extra_mpi_args='>mpi-extra args go here<', num_hosts=2, num_proc=4, hosts='>host names go here<', output_filename='>output filename goes here<', run_func_mode=True) run_func = MagicMock(return_value=0) js_run(settings, None, env, cmd, stdout=stdout, stderr=stderr, run_func=run_func) mpi_flags, _ = _get_mpi_implementation_flags(False) self.assertIsNotNone(mpi_flags) expected_command = ( 'jsrun ' '--erf_input /tmp/rankfile ' '--stdio_stderr >output filename goes here< ' '--stdio_stdout >output filename goes here< ' '--smpiargs \'{mpi_args} >mpi-extra args go here<\' ' 'cmd arg1 arg2').format(mpi_args=' '.join(mpi_flags)) expected_env = {'env1': 'val1', 'env2': 'val2'} run_func.assert_called_once_with(command=expected_command, env=expected_env, stdout=stdout, stderr=stderr)
def js_run_fn(): js_run(settings, nics, env, command)