def _run_elastic(args): # construct host discovery component if args.host_discovery_script: discover_hosts = discovery.HostDiscoveryScript( args.host_discovery_script, args.slots) elif args.hosts: _, available_host_slots = hosts.parse_hosts_and_slots(args.hosts) if len(available_host_slots) < 2: raise ValueError( 'Cannot run in fault tolerance mode with fewer than 2 hosts.') discover_hosts = discovery.FixedHosts(available_host_slots) else: raise ValueError( 'One of --host-discovery-script, --hosts, or --hostnames must be provided' ) # horovodrun has to finish all the checks before this timeout runs out. if args.start_timeout: start_timeout = args.start_timeout else: # Lookup default timeout from the environment variable. start_timeout = int(os.getenv('HOROVOD_START_TIMEOUT', '30')) tmout = timeout.Timeout(start_timeout, message='Timed out waiting for {activity}. Please ' 'check connectivity between servers. You ' 'may need to increase the --start-timeout ' 'parameter if you have too many servers.') settings = elastic_settings.ElasticSettings( discovery=discover_hosts, min_num_proc=args.min_num_proc or args.num_proc, max_num_proc=args.max_num_proc, elastic_timeout=args.elastic_timeout, reset_limit=args.reset_limit, cooldown_range=args.cooldown_range, num_proc=args.num_proc, verbose=2 if args.verbose else 0, ssh_port=args.ssh_port, ssh_identity_file=args.ssh_identity_file, extra_mpi_args=args.mpi_args, key=secret.make_secret_key(), start_timeout=tmout, output_filename=args.output_filename, run_func_mode=args.run_func is not None, nics=args.nics, prefix_output_with_timestamp=args.prefix_output_with_timestamp) if not gloo_built(verbose=(settings.verbose >= 2)): raise ValueError( 'Gloo support is required to use elastic training, but has not been built. Ensure CMake is ' 'installed and reinstall Horovod with HOROVOD_WITH_GLOO=1 to debug the build error.' ) env = os.environ.copy() config_parser.set_env_from_args(env, args) executable = args.executable or sys.executable return gloo_run_elastic(settings, env, args.run_func if args.run_func else args.command, executable)
def mpi_run(settings, nics, env, command, stdout=None, stderr=None): """ Runs mpi_run. Args: settings: Settings for running MPI. Note: settings.num_proc and settings.hosts must not be None. nics: Interfaces to include by MPI. env: Environment dictionary to use for running command. command: Command and arguments to run as a list of string. stdout: Stdout of the mpi process. Only used when settings.run_func_mode is True. stderr: Stderr of the mpi process. Only used when settings.run_func_mode is True. """ if env is not None and not isinstance(env, dict): raise Exception('env argument must be a dict, not {type}: {env}' .format(type=type(env), env=env)) mpi_impl_flags, impl_binding_args, mpi = _get_mpi_implementation_flags(settings.tcp_flag, env=env) if mpi_impl_flags is None: raise Exception(_MPI_NOT_FOUND_ERROR_MSG) impi = _IMPI_IMPL == mpi ssh_args = [] if settings.ssh_port: ssh_args += [f'-p {settings.ssh_port}'] if settings.ssh_identity_file: ssh_args += [f'-i {settings.ssh_identity_file}'] mpi_ssh_args = '' if ssh_args: joined_ssh_args = ' '.join(ssh_args) mpi_ssh_args = f'-bootstrap=ssh -bootstrap-exec-args \"{joined_ssh_args}\"' if impi else f'-mca plm_rsh_args \"{joined_ssh_args}\"' tcp_intf_arg = '-mca btl_tcp_if_include {nics}'.format( nics=','.join(nics)) if nics and not impi else '' nccl_socket_intf_arg = '-{opt} NCCL_SOCKET_IFNAME={nics}'.format( opt='genv' if impi else 'x', nics=','.join(nics)) if nics else '' # On large cluster runs (e.g. Summit), we need extra settings to work around OpenMPI issues host_names, host_to_slots = hosts.parse_hosts_and_slots(settings.hosts) if not impi and host_names and len(host_names) >= _LARGE_CLUSTER_THRESHOLD: mpi_impl_flags.append('-mca plm_rsh_no_tree_spawn true') mpi_impl_flags.append('-mca plm_rsh_num_concurrent {}'.format(len(host_names))) # if user does not specify any hosts, mpirun by default uses local host. # There is no need to specify localhost. hosts_arg = '-{opt} {hosts}'.format(opt='hosts' if impi else 'H', hosts=','.join(host_names) if host_names and impi else settings.hosts) ppn_arg = ' ' if host_to_slots and impi: ppn = host_to_slots[host_names[0]] for h_name in host_names[1:]: if ppn != host_to_slots[h_name]: raise Exception('''Different slots in -hosts parameter are not supported in Intel(R) MPI. Use -machinefile <machine_file> for this purpose.''') ppn_arg = ' -ppn {} '.format(ppn) if settings.prefix_output_with_timestamp and not impi: mpi_impl_flags.append('--timestamp-output') binding_args = settings.binding_args if settings.binding_args and not impi else ' '.join(impl_binding_args) basic_args = '-l' if impi else '--allow-run-as-root --tag-output' output = [] if settings.output_filename: output.append('-outfile-pattern' if impi else '--output-filename') output.append(settings.output_filename) env_list = '' if impi else ' '.join( '-x %s' % key for key in sorted(env.keys()) if env_util.is_exportable(key)) # Pass all the env variables to the mpirun command. mpirun_command = ( 'mpirun {basic_args} ' '-np {num_proc}{ppn_arg}{hosts_arg} ' '{binding_args} ' '{mpi_args} ' '{mpi_ssh_args} ' '{tcp_intf_arg} ' '{nccl_socket_intf_arg} ' '{output_filename_arg} ' '{env} {extra_mpi_args} {command}' # expect a lot of environment variables .format(basic_args=basic_args, num_proc=settings.num_proc, ppn_arg=ppn_arg, hosts_arg=hosts_arg, binding_args=binding_args, mpi_args=' '.join(mpi_impl_flags), tcp_intf_arg=tcp_intf_arg, nccl_socket_intf_arg=nccl_socket_intf_arg, mpi_ssh_args=mpi_ssh_args, output_filename_arg=' '.join(output), env=env_list, extra_mpi_args=settings.extra_mpi_args if settings.extra_mpi_args else '', command=' '.join(quote(par) for par in command)) ) if settings.verbose >= 2: print(mpirun_command) # we need the driver's PATH and PYTHONPATH in env to run mpirun, # env for mpirun is different to env encoded in mpirun_command for var in ['PATH', 'PYTHONPATH']: if var not in env and var in os.environ: # copy env so we do not leak env modifications env = copy.copy(env) # copy var over from os.environ env[var] = os.environ[var] # Execute the mpirun command. if settings.run_func_mode: exit_code = safe_shell_exec.execute(mpirun_command, env=env, stdout=stdout, stderr=stderr) if exit_code != 0: raise RuntimeError("mpirun failed with exit code {exit_code}".format(exit_code=exit_code)) else: os.execve('/bin/sh', ['/bin/sh', '-c', mpirun_command], env)
def _run_static(args): nics_set = set(args.nics.split(',')) if args.nics else None # horovodrun has to finish all the checks before this timeout runs out. if args.start_timeout: start_timeout = args.start_timeout else: # Lookup default timeout from the environment variable. start_timeout = int(os.getenv('HOROVOD_START_TIMEOUT', '30')) tmout = timeout.Timeout(start_timeout, message='Timed out waiting for {activity}. Please ' 'check connectivity between servers. You ' 'may need to increase the --start-timeout ' 'parameter if you have too many servers.') settings = hvd_settings.Settings(verbose=2 if args.verbose else 0, ssh_port=args.ssh_port, ssh_identity_file=args.ssh_identity_file, extra_mpi_args=args.mpi_args, tcp_flag=args.tcp_flag, binding_args=args.binding_args, key=secret.make_secret_key(), start_timeout=tmout, num_proc=args.np, hosts=args.hosts, output_filename=args.output_filename, run_func_mode=args.run_func is not None, nics=nics_set) # This cache stores the results of checks performed by horovod # during the initialization step. It can be disabled by setting # --disable-cache flag. fn_cache = None if not args.disable_cache: params = '' if args.np: params += str(args.np) + ' ' if args.hosts: params += str(args.hosts) + ' ' if args.ssh_port: params += str(args.ssh_port) if args.ssh_identity_file: params += args.ssh_identity_file parameters_hash = hashlib.md5(params.encode('utf-8')).hexdigest() fn_cache = cache.Cache(CACHE_FOLDER, CACHE_STALENESS_THRESHOLD_MINUTES, parameters_hash) all_host_names, _ = hosts.parse_hosts_and_slots(args.hosts) if settings.verbose >= 2: print('Filtering local host names.') remote_host_names = network.filter_local_addresses(all_host_names) if settings.verbose >= 2: print('Remote host found: ' + ' '.join(remote_host_names)) if len(remote_host_names) > 0: if settings.verbose >= 2: print('Checking ssh on all remote hosts.') # Check if we can ssh into all remote hosts successfully. if not _check_all_hosts_ssh_successful(remote_host_names, args.ssh_port, args.ssh_identity_file, fn_cache=fn_cache): raise RuntimeError('could not connect to some hosts via ssh') if settings.verbose >= 2: print('SSH was successful into all the remote hosts.') nics = driver_service.get_common_interfaces(settings, all_host_names, remote_host_names, fn_cache) if args.run_func: # get the driver IPv4 address driver_ip = network.get_driver_ip(nics) run_func_server = KVStoreServer(verbose=settings.verbose) run_func_server_port = run_func_server.start_server() put_data_into_kvstore(driver_ip, run_func_server_port, 'runfunc', 'func', args.run_func) command = [ sys.executable, '-m', 'horovod.runner.run_task', str(driver_ip), str(run_func_server_port) ] try: _launch_job(args, settings, nics, command) results = [None] * args.np # TODO: make it parallel to improve performance for i in range(args.np): results[i] = read_data_from_kvstore(driver_ip, run_func_server_port, 'runfunc_result', str(i)) return results finally: run_func_server.shutdown_server() else: command = args.command _launch_job(args, settings, nics, command) return None
def mpi_run(settings, nics, env, command, stdout=None, stderr=None): """ Runs mpi_run. Args: settings: Settings for running MPI. Note: settings.num_proc and settings.hosts must not be None. nics: Interfaces to include by MPI. env: Environment dictionary to use for running command. command: Command and arguments to run as a list of string. stdout: Stdout of the mpi process. Only used when settings.run_func_mode is True. stderr: Stderr of the mpi process. Only used when settings.run_func_mode is True. """ if env is not None and not isinstance(env, dict): raise Exception( 'env argument must be a dict, not {type}: {env}'.format( type=type(env), env=env)) mpi_impl_flags, impl_binding_args = _get_mpi_implementation_flags( settings.tcp_flag, env=env) if mpi_impl_flags is None: raise Exception(_MPI_NOT_FOUND_ERROR_MSG) ssh_port_arg = '-mca plm_rsh_args \"-p {ssh_port}\"'.format( ssh_port=settings.ssh_port) if settings.ssh_port else '' # if user does not specify any hosts, mpirun by default uses local host. # There is no need to specify localhost. hosts_arg = '-H {hosts}'.format(hosts=settings.hosts) tcp_intf_arg = '-mca btl_tcp_if_include {nics}'.format( nics=','.join(nics)) if nics else '' nccl_socket_intf_arg = '-x NCCL_SOCKET_IFNAME={nics}'.format( nics=','.join(nics)) if nics else '' # On large cluster runs (e.g. Summit), we need extra settings to work around OpenMPI issues host_names, _ = hosts.parse_hosts_and_slots(settings.hosts) if host_names and len(host_names) >= _LARGE_CLUSTER_THRESHOLD: mpi_impl_flags.append('-mca plm_rsh_no_tree_spawn true') mpi_impl_flags.append('-mca plm_rsh_num_concurrent {}'.format( len(host_names))) binding_args = settings.binding_args if settings.binding_args else ' '.join( impl_binding_args) # Pass all the env variables to the mpirun command. mpirun_command = ( 'mpirun --allow-run-as-root --tag-output ' '-np {num_proc} {hosts_arg} ' '{binding_args} ' '{mpi_args} ' '{ssh_port_arg} ' '{tcp_intf_arg} ' '{nccl_socket_intf_arg} ' '{output_filename_arg} ' '{env} {extra_mpi_args} {command}' # expect a lot of environment variables .format(num_proc=settings.num_proc, hosts_arg=hosts_arg, binding_args=binding_args, mpi_args=' '.join(mpi_impl_flags), tcp_intf_arg=tcp_intf_arg, nccl_socket_intf_arg=nccl_socket_intf_arg, ssh_port_arg=ssh_port_arg, output_filename_arg='--output-filename ' + settings.output_filename if settings.output_filename else '', env=' '.join('-x %s' % key for key in sorted(env.keys()) if env_util.is_exportable(key)), extra_mpi_args=settings.extra_mpi_args if settings.extra_mpi_args else '', command=' '.join(quote(par) for par in command))) if settings.verbose >= 2: print(mpirun_command) # we need the driver's PATH and PYTHONPATH in env to run mpirun, # env for mpirun is different to env encoded in mpirun_command for var in ['PATH', 'PYTHONPATH']: if var not in env and var in os.environ: # copy env so we do not leak env modifications env = copy.copy(env) # copy var over from os.environ env[var] = os.environ[var] # Execute the mpirun command. if settings.run_func_mode: exit_code = safe_shell_exec.execute(mpirun_command, env=env, stdout=stdout, stderr=stderr) if exit_code != 0: raise RuntimeError( "mpirun failed with exit code {exit_code}".format( exit_code=exit_code)) else: os.execve('/bin/sh', ['/bin/sh', '-c', mpirun_command], env)