def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \ 'runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exist on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def __init__(self, state, launch_dir, fworker=None, host=None, ip=None, trackers=None, action=None, state_history=None, launch_id=None, fw_id=None): """ :param state: (str) the state of the Launch (e.g. RUNNING, COMPLETED) :param launch_dir: (str) the directory where the Launch takes place :param fworker: (FWorker) The FireWorker running the Launch :param host: (str) the hostname where the launch took place (set automatically if None) :param ip: (str) the IP address where the launch took place (set automatically if None) :param trackers: ([Tracker]) File Trackers for this Launch :param action: (FWAction) the output of the Launch :param state_history: ([dict]) a history of all states of the Launch and when they occurred :param launch_id: (int) launch_id set by the LaunchPad :param fw_id: (int) id of the Firework this Launch is running """ if state not in Firework.STATE_RANKS: raise ValueError("Invalid launch state: {}".format(state)) self.launch_dir = launch_dir self.fworker = fworker or FWorker() self.host = host or get_my_host() self.ip = ip or get_my_ip() self.trackers = trackers if trackers else [] self.action = action if action else None self.state_history = state_history if state_history else [] self.state = state self.launch_id = launch_id self.fw_id = fw_id
def __init__(self, state, launch_dir, fworker=None, host=None, ip=None, trackers=None, action=None, state_history=None, launch_id=None, fw_id=None): """ Args: state (str): the state of the Launch (e.g. RUNNING, COMPLETED) launch_dir (str): the directory where the Launch takes place fworker (FWorker): The FireWorker running the Launch host (str): the hostname where the launch took place (set automatically if None) ip (str): the IP address where the launch took place (set automatically if None) trackers ([Tracker]): File Trackers for this Launch action (FWAction): the output of the Launch state_history ([dict]): a history of all states of the Launch and when they occurred launch_id (int): launch_id set by the LaunchPad fw_id (int): id of the Firework this Launch is running """ if state not in Firework.STATE_RANKS: raise ValueError("Invalid launch state: {}".format(state)) self.launch_dir = launch_dir self.fworker = fworker or FWorker() self.host = host or get_my_host() self.ip = ip or get_my_ip() self.trackers = trackers if trackers else [] self.action = action if action else None self.state_history = state_history if state_history else [] self.state = state self.launch_id = launch_id self.fw_id = fw_id
def __init__(self, state, launch_dir, fworker=None, host=None, ip=None, action=None, state_history=None, launch_id=None, fw_id=None): """ :param state: (str) the state of the Launch (e.g. RUNNING, COMPLETED) :param launch_dir: (str) the directory where the Launch takes place :param fworker: (FWorker) The FireWorker running the Launch :param host: (str) the hostname where the launch took place (set automatically if None) :param ip: (str) the IP address where the launch took place (set automatically if None) :param action: (FWAction) the output of the Launch :param state_history: (list) a history of all states of the Launch and when they occurred :param launch_id: (int) launch_id set by the LaunchPad :param fw_id: (int) id of the FireWork this Launch is running """ if state not in FireWork.STATE_RANKS: raise ValueError("Invalid launch state: {}".format(state)) self.fworker = fworker self.fw_id = fw_id self.host = host if host else get_my_host() self.ip = ip if ip else get_my_ip() self.launch_dir = launch_dir self.action = action if action else None self.state_history = state_history if state_history else [] self.state = state self.launch_id = launch_id
def launch_multiprocess(launchpad, fworker, loglvl, nlaunches, num_jobs, sleep_time, total_node_list=None, ppn=1, timeout=None, exclude_current_node=False, local_redirect=False): """ Launch the jobs in the job packing mode. Args: launchpad (LaunchPad) fworker (FWorker) loglvl (str): level at which to output logs nlaunches (int): 0 means 'until completion', -1 or "infinite" means to loop forever num_jobs(int): number of sub jobs sleep_time (int): secs to sleep between rapidfire loop iterations total_node_list ([str]): contents of NODEFILE (doesn't affect execution) ppn (int): processors per node (doesn't affect execution) timeout (int): # of seconds after which to stop the rapidfire process exclude_current_node: Don't use the script launching node as a compute node local_redirect (bool): redirect standard input and output to local file """ # parse node file contents if exclude_current_node: host = get_my_host() l_dir = launchpad.get_logdir() if launchpad else None l_logger = get_fw_logger('rocket.launcher', l_dir=l_dir, stream_level=loglvl) if host in total_node_list: log_multi(l_logger, "Remove the current node \"{}\" from compute node".format(host)) total_node_list.remove(host) else: log_multi(l_logger, "The current node is not in the node list, keep the node list as is") node_lists, sub_nproc_list = split_node_lists(num_jobs, total_node_list, ppn) # create shared dataserver ds = DataServer.setup(launchpad) port = ds.address[1] manager = Manager() running_ids_dict = manager.dict() firing_state_dict = manager.dict() # launch rapidfire processes processes = start_rockets(fworker, nlaunches, sleep_time, loglvl, port, node_lists, sub_nproc_list, timeout=timeout, running_ids_dict=running_ids_dict, local_redirect=local_redirect, firing_state_dict=firing_state_dict) FWData().Running_IDs = running_ids_dict FWData().FiringState = firing_state_dict # start pinging service ping_stop = threading.Event() ping_thread = threading.Thread(target=ping_multilaunch, args=(port, ping_stop)) ping_thread.start() # wait for completion for p in processes: p.join() ping_stop.set() ping_thread.join() ds.shutdown()
def launch_multiprocess(launchpad, fworker, loglvl, nlaunches, num_jobs, sleep_time, total_node_list=None, ppn=1, timeout=None, exclude_current_node=False): """ Launch the jobs in the job packing mode. Args: launchpad (LaunchPad) fworker (FWorker) loglvl (str): level at which to output logs nlaunches (int): 0 means 'until completion', -1 or "infinite" means to loop forever num_jobs(int): number of sub jobs sleep_time (int): secs to sleep between rapidfire loop iterations total_node_list ([str]): contents of NODEFILE (doesn't affect execution) ppn (int): processors per node (doesn't affect execution) timeout (int): # of seconds after which to stop the rapidfire process exclude_current_node: Don't use the script launching node as a compute node """ # parse node file contents if exclude_current_node: host = get_my_host() l_dir = launchpad.get_logdir() if launchpad else None l_logger = get_fw_logger('rocket.launcher', l_dir=l_dir, stream_level=loglvl) if host in total_node_list: log_multi(l_logger, "Remove the current node \"{}\" from compute node".format(host)) total_node_list.remove(host) else: log_multi(l_logger, "The current node is not in the node list, keep the node list as is") node_lists, sub_nproc_list = split_node_lists(num_jobs, total_node_list, ppn) # create shared dataserver ds = DataServer.setup(launchpad) port = ds.address[1] manager = Manager() running_ids_dict = manager.dict() # launch rapidfire processes processes = start_rockets(fworker, nlaunches, sleep_time, loglvl, port, node_lists, sub_nproc_list, timeout=timeout, running_ids_dict=running_ids_dict) FWData().Running_IDs = running_ids_dict # start pinging service ping_stop = threading.Event() ping_thread = threading.Thread(target=ping_multilaunch, args=(port, ping_stop)) ping_thread.start() # wait for completion for p in processes: p.join() ping_stop.set() ping_thread.join() ds.shutdown()
def __init__( self, state, launch_dir, fworker=None, host=None, ip=None, trackers=None, action=None, state_history=None, launch_id=None, fw_id=None, ): """ Args: state (str): the state of the Launch (e.g. RUNNING, COMPLETED) launch_dir (str): the directory where the Launch takes place fworker (FWorker): The FireWorker running the Launch host (str): the hostname where the launch took place (set automatically if None) ip (str): the IP address where the launch took place (set automatically if None) trackers ([Tracker]): File Trackers for this Launch action (FWAction): the output of the Launch state_history ([dict]): a history of all states of the Launch and when they occurred launch_id (int): launch_id set by the LaunchPad fw_id (int): id of the Firework this Launch is running """ if state not in Firework.STATE_RANKS: raise ValueError("Invalid launch state: {}".format(state)) self.launch_dir = launch_dir self.fworker = fworker or FWorker() self.host = host or get_my_host() self.ip = ip or get_my_ip() self.trackers = trackers if trackers else [] self.action = action if action else None self.state_history = state_history if state_history else [] self.state = state self.launch_id = launch_id self.fw_id = fw_id
def arlaunch(): """ Function rapid-fire job launching """ m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser( 'rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser( 'multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') single_parser.add_argument('--pdb', help='shortcut to invoke debugger on error', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument( '--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) rapid_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument( '--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") multi_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file') parser.add_argument('-w', '--fworker_file', required=True, help='path to fworker file') parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = AiiDAFWorker.from_file(args.fworker_file) # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as fhandle: total_node_list = [ line.strip() for line in fhandle.readlines() ] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser('multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def rlaunch(): m_description = ( "This program launches one or more Rockets. A Rocket retrieves a job from the " 'central database and runs it. The "single-shot" option launches a single Rocket, ' 'whereas the "rapidfire" option loops until all FireWorks are completed.' ) parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help="command", dest="command") single_parser = subparsers.add_parser("singleshot", help="launch a single Rocket") rapid_parser = subparsers.add_parser( "rapidfire", help="launch multiple Rockets (loop until all FireWorks complete)") multi_parser = subparsers.add_parser( "multi", help="launches multiple Rockets simultaneously") single_parser.add_argument("-f", "--fw_id", help="specific fw_id to run", default=None, type=int) single_parser.add_argument("--offline", help="run in offline mode (FW.json required)", action="store_true") single_parser.add_argument("--pdb", help="shortcut to invoke debugger on error", action="store_true") rapid_parser.add_argument("--nlaunches", help='num_launches (int or "infinite"; ' "default 0 is all jobs in DB)", default=0) rapid_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) rapid_parser.add_argument( "--max_loops", help= "after this many sleep loops, quit even in infinite nlaunches mode (default -1 is infinite loops)", default=-1, type=int, ) rapid_parser.add_argument("--sleep", help="sleep time between loops (secs)", default=None, type=int) rapid_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument("num_jobs", help="the number of jobs to run in parallel", type=int) multi_parser.add_argument( "--nlaunches", help="number of FireWorks to run in series per " 'parallel job (int or "infinite"; default 0 is ' "all jobs in DB)", default=0, ) multi_parser.add_argument( "--sleep", help="sleep time between loops in infinite launch mode (secs)", default=None, type=int) multi_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) multi_parser.add_argument( "--nodefile", help="nodefile name or environment variable name " "containing the node file name (for populating" " FWData only)", default=None, type=str, ) multi_parser.add_argument( "--ppn", help="processors per node (for populating FWData only)", default=1, type=int) multi_parser.add_argument( "--exclude_current_node", help="Don't use the script launching node as compute node", action="store_true") multi_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument("-l", "--launchpad_file", help="path to launchpad file") parser.add_argument("-w", "--fworker_file", help="path to fworker file") parser.add_argument( "-c", "--config_dir", help= "path to a directory containing the config file (used if -l, -w unspecified)", default=CONFIG_FILE_DIR, ) parser.add_argument("--loglvl", help="level to print log messages", default="INFO") parser.add_argument("-s", "--silencer", help="shortcut to mute log messages", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, "my_launchpad.yaml")): args.launchpad_file = os.path.join(args.config_dir, "my_launchpad.yaml") elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, "my_fworker.yaml")): args.fworker_file = os.path.join(args.config_dir, "my_fworker.yaml") elif not args.fworker_file: args.fworker_file = FWORKER_LOC args.loglvl = "CRITICAL" if args.silencer else args.loglvl if args.command == "singleshot" and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == "rapidfire": rapidfire( launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect, ) elif args.command == "multi": total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile) as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess( launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect, ) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)