def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker, queueadapter, args.launch_dir, args.nlaunches, args.maxjobs_queue, args.maxjobs_block, args.sleep, args.reserve, args.loglvl) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False)
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') elif not args.fworker_file: args.fworker_file = FWORKER_LOC if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') elif not args.queueadapter_file: args.queueadapter_file = QUEUEADAPTER_LOC launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir, nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue, njobs_block=args.maxjobs_block, sleep_time=args.sleep, reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode)
def recover_offline(args): lp = get_lp(args) fworker_name = FWorker.from_file( args.fworker_file).name if args.fworker_file else None failed_fws = [] recovered_fws = [] for l in lp.offline_runs.find({ "completed": False, "deprecated": False }, { "launch_id": 1, "fw_id": 1 }): if fworker_name and lp.launches.count({ "launch_id": l["launch_id"], "fworker.name": fworker_name }) == 0: continue fw = lp.recover_offline(l['launch_id'], args.ignore_errors, args.print_errors) if fw: failed_fws.append(l['fw_id']) else: recovered_fws.append(l['fw_id']) lp.m_logger.info( "FINISHED recovering offline runs. {} job(s) recovered: {}".format( len(recovered_fws), recovered_fws)) if failed_fws: lp.m_logger.info( "FAILED to recover offline fw_ids: {}".format(failed_fws))
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') elif not args.fworker_file: args.fworker_file = FWORKER_LOC if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') elif not args.queueadapter_file: args.queueadapter_file = QUEUEADAPTER_LOC launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir, nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue, njobs_block=args.maxjobs_block, sleep_time=args.sleep, reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode, args.fw_id)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \ 'runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exist on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def get_fworker(fworker): if fworker: my_fwkr = fworker elif FWORKER_LOC: my_fwkr = FWorker.from_file(FWORKER_LOC) else: my_fwkr = FWorker() return my_fwkr
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn)
def recover_offline(args): lp = get_lp(args) fworker_name = FWorker.from_file(args.fworker_file).name if args.fworker_file else None failed_fws = [] recovered_fws = [] for l in lp.offline_runs.find({"completed": False, "deprecated": False}, {"launch_id": 1, "fw_id":1}): if fworker_name and lp.launches.count({"launch_id": l["launch_id"], "fworker.name": fworker_name}) == 0: continue fw = lp.recover_offline(l['launch_id'], args.ignore_errors, args.print_errors) if fw: failed_fws.append(l['fw_id']) else: recovered_fws.append(l['fw_id']) lp.m_logger.info("FINISHED recovering offline runs. {} job(s) recovered: {}".format(len(recovered_fws), recovered_fws)) if failed_fws: lp.m_logger.info("FAILED to recover offline fw_ids: {}".format(failed_fws))
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job ' '(int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing ' 'the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) parser.add_argument('--exclude_current_node', help="Don't use the script launching node as compute node", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete mlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node)
import os, json from pymongo import DESCENDING, ASCENDING from fireworks.fw_config import CONFIG_FILE_DIR, SORT_FWS from fireworks.core.fworker import FWorker from fireworks.core.launchpad import LaunchPad from pymongo import ReturnDocument launchpad = LaunchPad.from_file( os.path.join(CONFIG_FILE_DIR, 'my_launchpad.yaml')) fworker = FWorker.from_file(os.path.join(CONFIG_FILE_DIR, 'my_fworker.yaml')) #print launchpad._get_a_fw_to_run(query=fworker.query, checkout=False) m_query = dict(fworker.query) m_query['state'] = 'READY' sortby = [("spec._priority", DESCENDING)] if SORT_FWS.upper() == "FIFO": sortby.append(("created_on", ASCENDING)) elif SORT_FWS.upper() == "FILO": sortby.append(("created_on", DESCENDING)) #print json.dumps(m_query, indent=4) projection = { '_id': 0, 'fw_id': 1, 'spec._fworker': 1, 'spec.task_type': 1, 'spec._queueadapter': 1, 'spec.mpsnl.about.remarks': 1, 'spec.snl.about.remarks': 1, 'spec.prev_vasp_dir': 1, 'updated_on': 1, 'state': 1 }
parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=None) parser.add_argument('--logdir', help='path to a directory for logging', default=None) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() if not args.launchpad_file and os.path.exists('my_launchpad.yaml'): args.launchpad_file = 'my_launchpad.yaml' if not args.fworker_file and os.path.exists('my_fworker.yaml'): args.fworker_file = 'my_fworker.yaml' args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.launchpad_file: launchpad = LaunchPad.from_file(args.launchpad_file) else: launchpad = LaunchPad(logdir=args.logdir, strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.logdir, args.loglvl, args.nlaunches, args.sleep) else: launch_rocket(launchpad, fworker, args.logdir, args.loglvl, args.fw_id)
import os, json from pymongo import DESCENDING, ASCENDING from fireworks.fw_config import CONFIG_FILE_DIR, SORT_FWS from fireworks.core.fworker import FWorker from fireworks.core.launchpad import LaunchPad from pymongo import ReturnDocument launchpad = LaunchPad.from_file(os.path.join(CONFIG_FILE_DIR, 'my_launchpad.yaml')) fworker = FWorker.from_file(os.path.join(CONFIG_FILE_DIR, 'my_fworker.yaml')) #print launchpad._get_a_fw_to_run(query=fworker.query, checkout=False) m_query = dict(fworker.query) m_query['state'] = 'READY' sortby = [("spec._priority", DESCENDING)] if SORT_FWS.upper() == "FIFO": sortby.append(("created_on", ASCENDING)) elif SORT_FWS.upper() == "FILO": sortby.append(("created_on", DESCENDING)) #print json.dumps(m_query, indent=4) projection = { '_id': 0, 'fw_id': 1, 'spec._fworker': 1, 'spec.task_type': 1, 'spec._queueadapter': 1, 'spec.mpsnl.about.remarks': 1, 'spec.snl.about.remarks': 1, 'spec.prev_vasp_dir': 1, 'updated_on': 1, 'state': 1 } fw_ids = [] for idoc, doc in enumerate(launchpad.fireworks.find(m_query, projection=projection, sort=sortby).limit(100)): #print doc if 'walltime' in doc['spec']['_queueadapter']: walltime = doc['spec']['_queueadapter']['walltime'] if int(walltime.split(':')[0]) > 48: launchpad.fireworks.find_one_and_update(
def rlaunch(): m_description = ( "This program launches one or more Rockets. A Rocket retrieves a job from the " 'central database and runs it. The "single-shot" option launches a single Rocket, ' 'whereas the "rapidfire" option loops until all FireWorks are completed.' ) parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help="command", dest="command") single_parser = subparsers.add_parser("singleshot", help="launch a single Rocket") rapid_parser = subparsers.add_parser( "rapidfire", help="launch multiple Rockets (loop until all FireWorks complete)") multi_parser = subparsers.add_parser( "multi", help="launches multiple Rockets simultaneously") single_parser.add_argument("-f", "--fw_id", help="specific fw_id to run", default=None, type=int) single_parser.add_argument("--offline", help="run in offline mode (FW.json required)", action="store_true") single_parser.add_argument("--pdb", help="shortcut to invoke debugger on error", action="store_true") rapid_parser.add_argument("--nlaunches", help='num_launches (int or "infinite"; ' "default 0 is all jobs in DB)", default=0) rapid_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) rapid_parser.add_argument( "--max_loops", help= "after this many sleep loops, quit even in infinite nlaunches mode (default -1 is infinite loops)", default=-1, type=int, ) rapid_parser.add_argument("--sleep", help="sleep time between loops (secs)", default=None, type=int) rapid_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument("num_jobs", help="the number of jobs to run in parallel", type=int) multi_parser.add_argument( "--nlaunches", help="number of FireWorks to run in series per " 'parallel job (int or "infinite"; default 0 is ' "all jobs in DB)", default=0, ) multi_parser.add_argument( "--sleep", help="sleep time between loops in infinite launch mode (secs)", default=None, type=int) multi_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) multi_parser.add_argument( "--nodefile", help="nodefile name or environment variable name " "containing the node file name (for populating" " FWData only)", default=None, type=str, ) multi_parser.add_argument( "--ppn", help="processors per node (for populating FWData only)", default=1, type=int) multi_parser.add_argument( "--exclude_current_node", help="Don't use the script launching node as compute node", action="store_true") multi_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument("-l", "--launchpad_file", help="path to launchpad file") parser.add_argument("-w", "--fworker_file", help="path to fworker file") parser.add_argument( "-c", "--config_dir", help= "path to a directory containing the config file (used if -l, -w unspecified)", default=CONFIG_FILE_DIR, ) parser.add_argument("--loglvl", help="level to print log messages", default="INFO") parser.add_argument("-s", "--silencer", help="shortcut to mute log messages", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, "my_launchpad.yaml")): args.launchpad_file = os.path.join(args.config_dir, "my_launchpad.yaml") elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, "my_fworker.yaml")): args.fworker_file = os.path.join(args.config_dir, "my_fworker.yaml") elif not args.fworker_file: args.fworker_file = FWORKER_LOC args.loglvl = "CRITICAL" if args.silencer else args.loglvl if args.command == "singleshot" and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == "rapidfire": rapidfire( launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect, ) elif args.command == "multi": total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile) as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess( launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect, ) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser( 'rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser( 'multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument( '--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) rapid_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument( '--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") multi_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite")') rapid_parser.add_argument('--sleep', help='sleep time between loops', default=60, type=int) args = parser.parse_args() if not args.launchpad_file and os.path.exists('my_launchpad.yaml'): args.launchpad_file = 'my_launchpad.yaml' if not args.fworker_file and os.path.exists('my_fworker.yaml'): args.fworker_file = 'my_fworker.yaml' launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else None fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() rocket_params = QueueParams.from_file(args.queue_params_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl # TODO: the number of arguments here is crazy! if args.command == 'rapidfire': rapidfire(rocket_params, args.launch_dir, args.njobs_queue, args.njobs_block, args.loglvl, args.nlaunches, args.sleep, launchpad, fworker, args.reserve) else: launch_rocket_to_queue(rocket_params, args.launch_dir, args.loglvl, launchpad, fworker, args.reserve)
default='INFO') parser.add_argument('--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() if not args.launchpad_file and os.path.exists('my_launchpad.yaml'): args.launchpad_file = 'my_launchpad.yaml' if not args.fworker_file and os.path.exists('my_fworker.yaml'): args.fworker_file = 'my_fworker.yaml' args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.launchpad_file: launchpad = LaunchPad.from_file(args.launchpad_file) else: launchpad = LaunchPad(logdir=args.logdir, strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.logdir, args.loglvl, args.nlaunches, args.sleep) else: launch_rocket(launchpad, fworker, args.logdir, args.loglvl, args.fw_id)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument( '--nlaunches', help='number of FireWorks to run in series per parallel job ' '(int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument( '--nodefile', help='nodefile name or environment variable name containing ' 'the node file name (for populating FWData only)', default=None, type=str) parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) parser.add_argument( '--exclude_current_node', help="Don't use the script launching node as compute node", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete mlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser('multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
parser.add_argument('--logdir', help='path to a directory for logging', default=None) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('-r', '--reserve', help='reserve a fw', action='store_true') parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=None) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=None) rapid_parser.add_argument('-q', '--njobs_queue', help='maximum jobs to keep in queue for this user', default=10, type=int) rapid_parser.add_argument('-b', '--njobs_block', help='maximum jobs to put in a block', default=500, type=int) rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite")') rapid_parser.add_argument('--sleep', help='sleep time between loops', default=60, type=int) args = parser.parse_args() if not args.launchpad_file and os.path.exists('my_launchpad.yaml'): args.launchpad_file = 'my_launchpad.yaml' if not args.fworker_file and os.path.exists('my_fworker.yaml'): args.fworker_file = 'my_fworker.yaml' launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else None fworker = FWorker.from_file(args.fworker_file) if args.fworker_file else FWorker() rocket_params = QueueParams.from_file(args.queue_params_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl # TODO: the number of arguments here is crazy! if args.command == 'rapidfire': rapidfire(rocket_params, args.launch_dir, args.njobs_queue, args.njobs_block, args.loglvl, args.nlaunches, args.sleep, launchpad, fworker, args.reserve) else: launch_rocket_to_queue(rocket_params, args.launch_dir, args.loglvl, launchpad, fworker, args.reserve)