def run_controller(use_gloo, gloo_run, use_mpi, mpi_run, use_jsrun, js_run, verbosity): # keep logic in sync with is_gloo_used(...) verbose = verbosity is not None and verbosity >= 2 if use_gloo: if not gloo_built(verbose=verbose): raise ValueError('Gloo support has not been built. If this is not expected, ensure CMake is installed ' 'and reinstall Horovod with HOROVOD_WITH_GLOO=1 to debug the build error.') gloo_run() elif use_mpi: if not mpi_built(verbose=verbose): raise ValueError('MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.') mpi_run() elif use_jsrun: if not mpi_built(verbose=verbose): raise ValueError('MPI support has not been built. If this is not expected, ensure MPI is installed ' 'and reinstall Horovod with HOROVOD_WITH_MPI=1 to debug the build error.') if not lsf.LSFUtils.using_lsf(): raise ValueError( 'Horovod did not detect an LSF job. The jsrun launcher can only be used in that environment. ' 'Please, pick a different launcher for other environments.') js_run() else: if mpi_built(verbose=verbose): if lsf.LSFUtils.using_lsf() and is_jsrun_installed(): js_run() else: mpi_run() elif gloo_built(verbose=verbose): gloo_run() else: raise ValueError('Neither MPI nor Gloo support has been built. Try reinstalling Horovod ensuring that ' 'either MPI is installed (MPI) or CMake is installed (Gloo).')
def test_mpi_run_minimal(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.runner.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.runner.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, None, {}, cmd) # call the mocked _get_mpi_implementation_flags method mpi_flags, binding_args = horovod.runner.mpi_run._get_mpi_implementation_flags(False) self.assertIsNotNone(mpi_flags) expected_cmd = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 2 -H localhost:2 ' '{binding_args} ' '{mpi_flags} ' 'cmd').format(binding_args=' '.join(binding_args), mpi_flags=' '.join(mpi_flags)) # remove PYTHONPATH from execute's env # we cannot know the exact value of that env variable # we test right handling of PYTHONPATH in test_mpi_run_*pythonpath* below self.assertIn('env', execute.call_args.kwargs) if 'PYTHONPATH' in execute.call_args.kwargs['env']: execute.call_args.kwargs['env'].pop('PYTHONPATH') expected_env = {'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_cmd, env=expected_env, stdout=None, stderr=None)
def test_mpi_run_full(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd', 'arg1', 'arg2'] nics = ['eth0', 'eth1'] env = {'env1': 'val1', 'env2': 'val2'} stdout = '<stdout>' stderr = '<stderr>' tmout = timeout.Timeout(5, message='Timed out waiting for something.') settings = hvd_settings.Settings( verbose=0, ssh_port=1022, extra_mpi_args='>mpi-extra args go here<', binding_args='>binding args go here<', key=secret.make_secret_key(), start_timeout=tmout, num_proc=1, hosts='localhost:1', output_filename='>output filename goes here<', run_func_mode=True ) def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], [] with mock.patch("horovod.runner.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags) as impl: with mock.patch("horovod.runner.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, nics, env, cmd, stdout=stdout, stderr=stderr) # assert call on _get_mpi_implementation_flags impl.assert_called_once_with(None, env=env) # call the mocked _get_mpi_implementation_flags method ourselves mpi_flags, _ = horovod.runner.mpi_run._get_mpi_implementation_flags(False) self.assertIsNotNone(mpi_flags) expected_command = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 1 -H {hosts} ' '>binding args go here< ' '{mpi_flags} ' '-mca plm_rsh_args "-p 1022" ' '-mca btl_tcp_if_include eth0,eth1 -x NCCL_SOCKET_IFNAME=eth0,eth1 ' '--output-filename >output filename goes here< ' '-x env1 -x env2 ' '>mpi-extra args go here< ' 'cmd arg1 arg2').format(hosts=settings.hosts, mpi_flags=' '.join(mpi_flags)) # remove PYTHONPATH from execute's env # we cannot know the exact value of that env variable # we test right handling of PYTHONPATH in test_mpi_run_*pythonpath* below self.assertIn('env', execute.call_args.kwargs) if 'PYTHONPATH' in execute.call_args.kwargs['env']: execute.call_args.kwargs['env'].pop('PYTHONPATH') expected_env = {'env1': 'val1', 'env2': 'val2', 'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_command, env=expected_env, stdout=stdout, stderr=stderr)
def test_mpi_run_with_os_environ(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.runner.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.runner.mpi_run.safe_shell_exec.execute", return_value=0): with pytest.raises(Exception, match="^env argument must be a dict, not <class 'os._Environ'>: "): mpi_run(settings, None, os.environ, cmd)
def test_mpi_run_with_non_zero_exit(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return [], [] with mock.patch("horovod.runner.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.runner.mpi_run.safe_shell_exec.execute", return_value=1): with pytest.raises(RuntimeError, match="^mpirun failed with exit code 1$"): mpi_run(settings, None, {}, cmd)
def do_test_mpi_run_env_override(self, sysenv, argenv, env_var, expected): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.runner.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags),\ mock.patch("horovod.runner.mpi_run.safe_shell_exec.execute", return_value=0) as execute,\ override_env(sysenv): mpi_run(settings, None, argenv, cmd) # assert the env variable in the execute's env self.assertIn('env', execute.call_args.kwargs) self.assertEqual(execute.call_args.kwargs['env'].get(env_var), expected)
def mpi_run_fn(): mpi_run(settings, nics, env, command)