def test_early_exit(self): os.chdir(MODULE_DIR) script_text = "echo hello from process $PPID; sleep 2" fw1 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=1) fw2 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=2) fw3 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=3) fw4 = Firework(ScriptTask.from_str(shell_cmd=script_text, parameters={"stdout_file": "task.out"}), fw_id=4) wf = Workflow([fw1, fw2, fw3, fw4], {1: [2, 3], 2: [4], 3: [4]}) self.lp.add_wf(wf) launch_multiprocess(self.lp, FWorker(), 'DEBUG', 0, 2, sleep_time=0.5) fw2 = self.lp.get_fw_by_id(2) fw3 = self.lp.get_fw_by_id(3) with open(os.path.join(fw2.launches[0].launch_dir, "task.out")) as f: fw2_text = f.read() with open(os.path.join(fw3.launches[0].launch_dir, "task.out")) as f: fw3_text = f.read() self.assertNotEqual(fw2_text, fw3_text)
def test_tracker_mlaunch(self): """ Test the tracker for mlaunch """ self._teardown([self.dest1, self.dest2]) try: def add_wf(j, dest, tracker, name): fts = [] for i in range(j, j + 25): ft = ScriptTask.from_str( 'echo "' + str(i) + '" >> ' + dest, {'store_stdout': True}) fts.append(ft) fw1 = Firework(fts, spec={'_trackers': [tracker]}, fw_id=j + 1, name=name + '1') fts = [] for i in range(j + 25, j + 50): ft = ScriptTask.from_str( 'echo "' + str(i) + '" >> ' + dest, {'store_stdout': True}) fts.append(ft) fw2 = Firework(fts, spec={'_trackers': [tracker]}, fw_id=j + 2, name=name + '2') wf = Workflow([fw1, fw2], links_dict={fw1: [fw2]}) self.lp.add_wf(wf) add_wf(0, self.dest1, self.tracker1, 'a_test') add_wf(50, self.dest2, self.tracker2, 'b_test') try: launch_multiprocess(self.lp, self.fworker, 'ERROR', 0, 2, 0, ppn=2) except: pass self.assertEqual('48\n49', self.tracker1.track_file()) self.assertEqual('98\n99', self.tracker2.track_file()) finally: self._teardown([self.dest1, self.dest2]) pwd = os.getcwd() for ldir in glob.glob(os.path.join(pwd, 'launcher_*')): shutil.rmtree(ldir) pass
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn)
def test_tracker_mlaunch(self): """ Test the tracker for mlaunch """ self._teardown([self.dest1,self.dest2]) try: def add_wf(j, dest, tracker, name): fts = [] for i in range(j,j+25): ft = ScriptTask.from_str('echo "' + str(i) + '" >> '+ dest, {'store_stdout':True}) fts.append(ft) fw1 = Firework(fts, spec={'_trackers':[tracker]}, fw_id=j+1, name=name+'1') fts = [] for i in range(j+25,j+50): ft = ScriptTask.from_str('echo "' + str(i) + '" >> ' + dest, {'store_stdout':True}) fts.append(ft) fw2 = Firework(fts, spec={'_trackers':[tracker]}, fw_id=j+2, name=name+'2') wf = Workflow([fw1, fw2], links_dict={fw1:[fw2]}) self.lp.add_wf(wf) add_wf(0, self.dest1, self.tracker1, 'a_test') add_wf(50, self.dest2, self.tracker2, 'b_test') try: launch_multiprocess(self.lp, self.fworker, 'ERROR', 0, 2, 0, ppn=2) except: pass self.assertEqual('48\n49',self.tracker1.track_file()) self.assertEqual('98\n99',self.tracker2.track_file()) finally: self._teardown([self.dest1,self.dest2]) pwd = os.getcwd() for ldir in glob.glob(os.path.join(pwd,'launcher_*')): shutil.rmtree(ldir) pass
def test_checkout_fw(self): os.chdir(MODULE_DIR) self.lp.add_wf(Firework(ScriptTask.from_str( shell_cmd='echo "hello 1"', parameters={"stdout_file": "task.out"}), fw_id=1)) self.lp.add_wf(Firework(ScriptTask.from_str( shell_cmd='echo "hello 2"', parameters={"stdout_file": "task.out"}), fw_id=2)) launch_multiprocess(self.lp, FWorker(), 'DEBUG', 0, 2, 10) fw1 = self.lp.get_fw_by_id(1) fw2 = self.lp.get_fw_by_id(2) self.assertEqual(fw1.launches[0].state_history[-1]["state"], "COMPLETED") self.assertEqual(fw2.launches[0].state_history[-1]["state"], "COMPLETED") with open(os.path.join(fw1.launches[0].launch_dir, "task.out")) as f: self.assertEqual(f.readlines(), ['hello 1\n']) with open(os.path.join(fw2.launches[0].launch_dir, "task.out")) as f: self.assertEqual(f.readlines(), ['hello 2\n'])
def arlaunch(): """ Function rapid-fire job launching """ m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser( 'rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser( 'multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') single_parser.add_argument('--pdb', help='shortcut to invoke debugger on error', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument( '--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) rapid_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument( '--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") multi_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file') parser.add_argument('-w', '--fworker_file', required=True, help='path to fworker file') parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = AiiDAFWorker.from_file(args.fworker_file) # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as fhandle: total_node_list = [ line.strip() for line in fhandle.readlines() ] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser('multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def runParallel(self): #test #launch two jobs simultaneously (2, one on each core) launch_multiprocess(self.launchpad, self.worker, 'INFO', 0, 2, 10)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument( '--nlaunches', help='number of FireWorks to run in series per parallel job ' '(int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument( '--nodefile', help='nodefile name or environment variable name containing ' 'the node file name (for populating FWData only)', default=None, type=str) parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) parser.add_argument( '--exclude_current_node', help="Don't use the script launching node as compute node", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete mlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job ' '(int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing ' 'the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) parser.add_argument('--exclude_current_node', help="Don't use the script launching node as compute node", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete mlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node)
def rlaunch(): m_description = ( "This program launches one or more Rockets. A Rocket retrieves a job from the " 'central database and runs it. The "single-shot" option launches a single Rocket, ' 'whereas the "rapidfire" option loops until all FireWorks are completed.' ) parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help="command", dest="command") single_parser = subparsers.add_parser("singleshot", help="launch a single Rocket") rapid_parser = subparsers.add_parser( "rapidfire", help="launch multiple Rockets (loop until all FireWorks complete)") multi_parser = subparsers.add_parser( "multi", help="launches multiple Rockets simultaneously") single_parser.add_argument("-f", "--fw_id", help="specific fw_id to run", default=None, type=int) single_parser.add_argument("--offline", help="run in offline mode (FW.json required)", action="store_true") single_parser.add_argument("--pdb", help="shortcut to invoke debugger on error", action="store_true") rapid_parser.add_argument("--nlaunches", help='num_launches (int or "infinite"; ' "default 0 is all jobs in DB)", default=0) rapid_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) rapid_parser.add_argument( "--max_loops", help= "after this many sleep loops, quit even in infinite nlaunches mode (default -1 is infinite loops)", default=-1, type=int, ) rapid_parser.add_argument("--sleep", help="sleep time between loops (secs)", default=None, type=int) rapid_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument("num_jobs", help="the number of jobs to run in parallel", type=int) multi_parser.add_argument( "--nlaunches", help="number of FireWorks to run in series per " 'parallel job (int or "infinite"; default 0 is ' "all jobs in DB)", default=0, ) multi_parser.add_argument( "--sleep", help="sleep time between loops in infinite launch mode (secs)", default=None, type=int) multi_parser.add_argument( "--timeout", help="timeout (secs) after which to quit (default None)", default=None, type=int) multi_parser.add_argument( "--nodefile", help="nodefile name or environment variable name " "containing the node file name (for populating" " FWData only)", default=None, type=str, ) multi_parser.add_argument( "--ppn", help="processors per node (for populating FWData only)", default=1, type=int) multi_parser.add_argument( "--exclude_current_node", help="Don't use the script launching node as compute node", action="store_true") multi_parser.add_argument( "--local_redirect", help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument("-l", "--launchpad_file", help="path to launchpad file") parser.add_argument("-w", "--fworker_file", help="path to fworker file") parser.add_argument( "-c", "--config_dir", help= "path to a directory containing the config file (used if -l, -w unspecified)", default=CONFIG_FILE_DIR, ) parser.add_argument("--loglvl", help="level to print log messages", default="INFO") parser.add_argument("-s", "--silencer", help="shortcut to mute log messages", action="store_true") try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, "my_launchpad.yaml")): args.launchpad_file = os.path.join(args.config_dir, "my_launchpad.yaml") elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, "my_fworker.yaml")): args.fworker_file = os.path.join(args.config_dir, "my_fworker.yaml") elif not args.fworker_file: args.fworker_file = FWORKER_LOC args.loglvl = "CRITICAL" if args.silencer else args.loglvl if args.command == "singleshot" and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == "rapidfire": rapidfire( launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect, ) elif args.command == "multi": total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile) as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess( launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect, ) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)