def cmd_help(client, *args): """ :param client: :param args: """ fprint(HTML( "The following are the cli commands:" "\n\t\\<cmd>help</cmd> - show this message" "\n\t\\<cmd>quit</cmd> - exit from client" "\n\t\\<cmd>version</cmd> - show parstream's version information" "\n\t\\<cmd>timing</cmd> - toggle query timing" "\n\t\\<cmd>pretty</cmd> - toggle pretty output" "\n\t\\<cmd>users</cmd> - show parstream users" "\n\t\\<cmd>process</cmd> - show parstream process information" "\n\t\\<cmd>cluster</cmd> - show parstream cluster nodes information" "\n\t\\<cmd>tables</cmd> <arg>[table]</arg> - print a table's list or column's list if table given" "\n\t\\<cmd>settings</cmd> <arg>[key1 key2]</arg> - prints settings' list of a given keys or all" "\n\t\\<cmd>format</cmd> <arg>fmt</arg> - change parstream output format for current session" "\n\t\\<cmd>file</cmd> <arg>filename</arg> - executes statements from given file" "\n\t\\<cmd>partitions</cmd> <arg>table</arg> - show partitions of given table" "\n\t\\<cmd>disc</cmd> <arg>[partition]</arg> - show disk usage or usage of partition LIKE if given" ), style=Style.from_dict({ 'cmd': '#ff0066', 'arg': '#44ff00', }))
def detect_site(): sites = [] env_check = os.environ.get('CLUSTER', None) if env_check == 'niagara': sites.append('niagara') env_check = os.environ.get('NERSC_HOST', None) if env_check == 'cori': sites.append('cori') if len(sites) == 0: warnings.warn( 'No site specified through --site and did not automatically detect any sites. Using generic SLURM template.' ) sites.append('generic') elif len(sites) == 1: fprint( HTML( f'<ansiyellow>No site specified through --site; detected <b>{sites[0]}</b> automatically.</ansiyellow>' )) else: raise_exception( f"More than one site detected through environment variables: {sites}. Please specificy explicitly through the --site argument." ) return sites[0]
def get_license_info(path_to_license): """Gets information about license, such as: * name * license text * description * is fsf/osi approved * is gpl compatible * linking (static, dynamic) * for what is intended * permissions * conditions * limitations """ try: with open(path_to_license) as file: return toml.load(file) except FileNotFoundError as err: fprint(HTML(f"<ansired><b>Error: License not found</b></ansired>\n{err}")) exit()
def color_print(text: str, color: str, extra: str = ''): fprint(HTML(f'<{color}>{text}</{color}>{extra}'))
def table_print(input: [dict]): header = input[0].keys() fprint(HTML(f"<u>{' '.join(header)}</u>")) for line in input: fprint(' '.join(str(x) for x in line.values())) print()
def main(): if '--show-site-path' in sys.argv: print(get_site_path()) sys.exit(0) parser = argparse.ArgumentParser( description='A pipeline script plumbing tool.') parser.add_argument('project', help='Name of project.') parser.add_argument('config_yaml', help='Path to the configuration file.') parser.add_argument( "--site", type=str, default=None, help="Name of a pre-defined cluster site. " "If not specified, will attempt to detect automatically.") parser.add_argument("--dry-run", action='store_true', help='Only show submissions.') parser.add_argument( "--no-reuse", action='store_true', help= 'Do not reuse any stages (unless implicitly requested through the --skip argument).' ) parser.add_argument( "--ignore-git", action='store_true', help='Ignore git differences when deciding whether to reuse a stage.') parser.add_argument("--force-local", action='store_true', help='Force local run.') parser.add_argument( "--force-slurm", action='store_true', help="Force SLURM. " "If SLURM is not detected and --dry-run is not enabled, this will fail." ) parser.add_argument( '--skip', nargs='+', help= 'List of stages to skip, separated by space. These stages will be skipped even if others depend on them.' ) parser.add_argument( "-A", "--account", type=str, default=None, help= 'sbatch account argument. e.g. on cori, use this to select the account that is charged.' ) parser.add_argument( "-q", "--qos", type=str, default=None, help= 'sbatch QOS argument. e.g. on cori, the default is debug, which only provides 30 minutes, so you should explicitly use "--qos regular" on cori.' ) args = parser.parse_args() if args.force_local and args.force_slurm: raise_exception("You can\'t force both local and SLURM.") # Load config file with open(args.config_yaml, 'r') as stream: config = yaml.safe_load(stream) # Root directory root_dir = config['root_dir'] # Check if we have SLURM have_slurm = check_slurm() if not (have_slurm): print("No SLURM detected. We will be locally " "executing commands serially.") if args.dry_run: print("We are doing a dry run, so we will just print to " "screen.") # Do git checks pkg_gitdict = gitcheck(config, 'gitcheck_pkgs', package=True) pth_gitdict = gitcheck(config, 'gitcheck_paths', package=False) # Get global values if any global_vals = config.get('globals', {}) # Get stages in order of dependencies cstages = config['stages'] stage_names = list(cstages.keys()) # Unroll loops from cstags into ostages cstages = config['stages'] ostages = {} # Unrolled stage dictionary unroll_map = { } # dict mapping parent stage name to list of unrolled iterated stage names for stage in stage_names: # Check if this is a looped stage if ('arg' in cstages[stage]): if (type(cstages[stage]['arg'])) in [list, tuple]: unroll_map[stage] = [] for k, arg in enumerate(cstages[stage]['arg']): if len(shlex.split(arg)) > 1: raise Exception( "Argument should not be interpretable as multiple arguments." ) new_stage_name = f'{stage}_{arg}' if new_stage_name in stage_names: raise_exception( f"Internal loop stage name " "clashes with {stage}_!loop_iteration_{k}. " "Please use a different stage name.") ostages[new_stage_name] = copy.deepcopy(cstages[stage]) del ostages[new_stage_name]['arg'] ostages[new_stage_name]['arg'] = cstages[stage]['arg'][k] unroll_map[stage].append(new_stage_name) else: if len(shlex.split(cstages[stage]['arg'])) > 1: raise Exception( "Argument should not be interpretable as multiple arguments." ) ostages[stage] = copy.deepcopy(cstages[stage]) else: ostages[stage] = copy.deepcopy(cstages[stage]) stage_names = list(ostages.keys()) # Map dependencies deps = {} # Mapping from stage to its dependencies depended = [] # List of stages that are depended on for sname in stage_names: ds = ostages[sname].get('depends') if ds is None: continue my_deps = [] for d in ds: if not (d in stage_names) and not (d in unroll_map): raise_exception( f"Stage {d} required by {sname} not found in configuration file." ) if d in unroll_map: my_deps = my_deps + unroll_map[d] else: my_deps.append(d) depended = depended + my_deps deps[sname] = my_deps if has_loop(deps): raise_exception("Circular dependency detected.") stages = flatten(deps) # Ordered by dependency # The above only contains stages that depend on something or something also depends on # Let's just append any others and warn about them for ostage in stage_names: if ostage not in stages: fprint( HTML( f"<ansiyellow>WARNING: stage {ostage} does not depend on anything, and nothing depends on it. Adding to queue anyway...</ansiyellow>" )) stages.append(ostage) if set(stages) != set(stage_names): raise_exception( "Internal error in arranging stages. Please report this bug.") # Check sanity of skipped stages list if args.skip is None: args.skip = [] for skipstage in args.skip: if not (skipstage in stages): raise_exception( "Asked to skip a stage that is not in the list of stages.") # Parse arguments and prepare SLURM scripts if have_slurm or args.force_slurm: site = detect_site() if args.site is None else args.site sbatch_config = load_template(site) else: site = 'local' ########################### reuse_stages = [] if not (args.no_reuse): # We decide which ones to resume here last_time = 0 last_time_local = 0 last_job = None last_job_local = None for stage in stages: # We check if the last submitted job (if it exists) was completed # TODO: add check for local runs, not just sbatch root = get_out_file_root(root_dir, stage, args.project, site) + "_" suffix = ".txt" fs = glob.glob(root + "*" + suffix) if len(fs) == 0: completed = False else: last_job = max([ sint(re.search(rf'{root}(.*?){suffix}', f).group(1)) for f in fs ]) output = run_local([ 'sacct', '-j', str(last_job), '--format=State', '--parsable2' ], verbose=False).split('\n') completed = True for i, line in enumerate(output): if i == 0: if line.strip() != 'State': raise_exception("Unexpected output from sacct.") elif i == (len(output) - 1): if line.strip() != '': raise_exception("Unexpected output from sacct.") else: if line.strip() != 'COMPLETED': completed = False break if completed: # Get time, to compare with possible completed local run with open( get_stage_config_filename(root_dir, stage, args.project, last_job), 'r') as stream: last_time = yaml.safe_load(stream)['stage']['time'] # Also check for local run completed outputs root = get_local_out_file(root_dir, stage, args.project) + "_" suffix = ".txt" fs = glob.glob(root + "*" + suffix) completed_local = False if len(fs) != 0: last_job_local = max([ sint(re.search(rf'{root}(.*?){suffix}', f).group(1)) for f in fs ]) with open( get_local_out_file(root_dir, stage, args.project) + f"_{last_job_local}.txt", 'r') as f: cstatus = f.read().strip() if cstatus == 'COMPLETED': completed_local = True with open( get_stage_config_filename(root_dir, stage, args.project, last_job_local), 'r') as stream: last_time_local = yaml.safe_load( stream)['stage']['time'] if completed or completed_local: last_job = [last_job, last_job_local ][argmax([last_time, last_time_local])] if last_job is None: raise_exception( "Error in last completed job detection. Report bug.") else: continue # Next check if dictionaries match try: with open( get_stage_config_filename(root_dir, stage, args.project, last_job), 'r') as stream: saved_config = yaml.safe_load(stream) except: fprint( HTML( f"<ansiyellow>Could not find saved configuration for {stage} even though completed job was detected. Will not re-use this stage.</ansiyellow>" )) continue # We don't need to make sure the parallel options are the same saved_config['stage'][stage].pop('parallel', None) comp_dict = copy.deepcopy(ostages[stage]) comp_dict.pop('parallel', None) # TODO: make sure this comparison of nested dictionaries is sufficient if saved_config['stage'][stage] != comp_dict: continue # Next we check if there are git differences if not (args.ignore_git): if saved_config['stage']['pkg_gitdict'] != pkg_gitdict: continue if saved_config['stage']['pth_gitdict'] != pth_gitdict: continue # We made it this far, which means this stage can be reused reuse_stages.append(stage) ########################### is_sbatch = (have_slurm or args.force_slurm) and not (args.force_local) is_local = not (have_slurm) or args.force_local if sum([int(x) for x in [is_sbatch, is_local]]) != 1: raise_exception("Inconsistency in submission vs. local. Report bug.") # Check if any reused stages have dependencies that are not reused # If so we will not reuse those stages # Algorithm is linear since `stages` is already sorted for stage in stages: if stage in reuse_stages: if not (stage in deps): continue redo = False for d in deps[stage]: if not (d in reuse_stages) and not (d in args.skip): redo = True if redo: reuse_stages.remove(stage) # A summary and a prompt print(f"SUMMARY FOR SUBMISSION OF PROJECT {args.project}") for stage in stages: if stage in reuse_stages: sumtxt = '<red><b>[REUSE]</b></red>' elif stage in args.skip: sumtxt = '<red><b>[SKIP]</b></red>' else: sumtxt = '<green>[SUBMIT]</green>' fprint(HTML(stage + '\t\t' + sumtxt)) reply = query_yes_no("Proceed with this?") if not (reply): sys.exit(0) # Make project directory proj_dir = get_project_dir(root_dir, args.project) os.makedirs(proj_dir, exist_ok=True) jobids = {} for stage in stages: if stage in args.skip: fprint( HTML( f"<ansiyellow>Skipping stage {stage} as requested.</ansiyellow>" )) if stage in depended: fprint( HTML( f"<ansiyellow>WARNING: skipped stage {stage} is depended on by others.</ansiyellow>" )) continue if stage in reuse_stages: fprint( HTML( f"<ansiyellow>Reusing stage {stage} as requested.</ansiyellow>" )) if stage in depended: fprint( HTML( f"<ansiyellow>WARNING: reused stage {stage} is depended on by others.</ansiyellow>" )) continue # Get command execution, script, pargs = get_command(global_vals, copy.deepcopy(ostages), stage) # Make output directory output_dir = get_output_dir(root_dir, stage, args.project) os.makedirs(output_dir, exist_ok=True) # Construct dependency string now_deps = deps.get(stage, []) if len(now_deps) >= 1: jlist = [] for s in now_deps: if not (s in args.skip) and not (s in reuse_stages): jlist.append(jobids[s]) if len(jlist) >= 1: depstr = ':'.join(jlist) depstr = "--dependency=afterok:" + depstr else: depstr = None else: depstr = None if is_sbatch: jobid = submit_slurm(stage, sbatch_config, copy.deepcopy(ostages[stage]).get( 'parallel', None), execution, script, pargs, dry_run=args.dry_run, output_dir=output_dir, project=args.project, site=site, root_dir=root_dir, depstr=depstr, account=args.account, qos=args.qos) if is_local: if pargs == '': cmds = [execution, script, '--output-dir', output_dir] else: cmds = [execution, script, pargs, '--output-dir', output_dir] run_local(cmds, dry_run=args.dry_run) # Get current time in Unix milliseconds and use that as jobid jobid = str(int(time.time() * 1e3)) # Save job completion confirmation with open( get_local_out_file(root_dir, stage, args.project) + f"_{jobid}.txt", 'w') as f: f.write('COMPLETED') if not (args.dry_run): out_dict = {} out_dict['stage'] = {stage: copy.deepcopy(ostages[stage])} out_dict['stage']['pkg_gitdict'] = pkg_gitdict out_dict['stage']['pth_gitdict'] = pth_gitdict init_time_ms = int(time.time() * 1e3) # Save time when it was saved out_dict['stage']['time'] = init_time_ms with open( get_stage_config_filename(root_dir, stage, args.project, jobid), 'w') as f: yaml.dump(out_dict, f, default_flow_style=False) jobids[stage] = jobid
def raise_exception(message): fprint(HTML(f"<red>{message}</red>")) raise Exception
def submit_slurm(stage, sbatch_config, parallel_config, execution, script, pargs, dry_run, output_dir, site, project, root_dir, depstr=None, account=None, qos=None): cpn = sbatch_config['cores_per_node'] template = sbatch_config['template'] try: nproc = parallel_config['nproc'] except (TypeError, KeyError) as e: nproc = 1 fprint( HTML( f"<ansiyellow>No stage['parallel']['nproc'] found for {stage}. Assuming number of MPI processes nproc=1.</ansiyellow>" )) try: memory_gb = parallel_config['memory_gb'] if 'threads' in list(parallel_config.keys()): raise_exception( "Both memory_gb and threads should not be specified.") if not ('memory_per_node_gb' in list(sbatch_config.keys())): raise_exception( "Using memory_gb but no memory_per_node_gb in site configuration." ) if not ('min_threads' in list(parallel_config.keys())): raise_exception("Need min_threads if using memory_gb.") # Maximum number of processes per node threads = max( math.ceil(1. * cpn / sbatch_config['memory_per_node_gb'] * parallel_config['memory_gb']), parallel_config['min_threads']) threads = threads + (threads % 2) fprint( HTML( f"<ansiyellow>Converted memory {memory_gb} GB to number of threads {threads}.</ansiyellow>" )) except (TypeError, KeyError) as e: threads = None if threads is None: try: threads = parallel_config['threads'] except (TypeError, KeyError) as e: threads = cpn fprint( HTML( f"<ansiyellow>No stage['parallel']['threads'] found for {stage}. Assuming number of OpenMP threads={cpn}.</ansiyellow>" )) try: walltime = parallel_config['walltime'] except (TypeError, KeyError) as e: walltime = "00:15:00" fprint( HTML( f"<ansiyellow>No stage['parallel']['walltime'] found for <b>{stage}</b>. Assuming <b>walltime of {walltime}</b>.</ansiyellow>" )) num_cores = nproc * threads num_nodes = int(math.ceil(num_cores / cpn)) totcores = num_nodes * cpn tasks_per_node = int(nproc * 1. / num_nodes) percent_used = num_cores * 100. / float(totcores) if percent_used < 90.: fprint( HTML( f"<ansiyellow>Stage {stage}: with {nproc} MPI process(es) and {threads} thread(s) and {num_nodes} nodes in the request, this means a node will have less than 90% of its cores utilized. Reconsider the way you choose your thread count or number of processes.</ansiyellow>" )) template = template.replace('!JOBNAME', f'{stage}_{project}') template = template.replace('!NODES', str(num_nodes)) template = template.replace('!WALL', walltime) template = template.replace('!TASKSPERNODE', str(tasks_per_node)) template = template.replace('!TASKS', str(nproc)) # must come below !TASKSPERNODE template = template.replace('!THREADS', str(threads)) cmd = ' '.join([execution, script, pargs]) + f' --output-dir {output_dir}' template = template.replace('!CMD', cmd) template = template.replace( '!OUT', get_out_file_root(root_dir, stage, project, site)) if dry_run: fprint(HTML(f'<skyblue><b>{stage}</b></skyblue>')) fprint(HTML(f'<skyblue><b>{"".join(["="]*len(stage))}</b></skyblue>')) fprint(HTML(f'<skyblue>{template}</skyblue>')) # Get current time in Unix milliseconds to define log directory init_time_ms = int(time.time() * 1e3) fname = get_sbatch_script_filename(output_dir, project, stage, site, init_time_ms) if not (dry_run): with open(fname, 'w') as f: f.write(template) cmds = [] cmds.append('sbatch') cmds.append(f'--parsable') if not (account is None): cmds.append(f'--account={account}') if not (qos is None): cmds.append(f'--qos={qos}') if depstr is not None: cmds.append(f'{depstr}') cmds.append(fname) jobid = run_local(cmds, dry_run).strip() print(f"Submitted and obtained jobid {jobid}") return jobid
def main(): """Main function, mostly managing cli arguments""" args = options.get_args(get_git_info("name"), get_git_info("email")) if args.license is not None: # --license, -l print( gen_license_text( get_license_info(f"{script_dir}/license/{args.license.upper()}.toml")[ "license" ], args.year, args.fullname, args.email, args.projectname, ), end="", ) if args.save is not None: # --save, -s if args.license is not None: license = get_license_info( f"{script_dir}/license/{args.license.upper()}.toml" )["license"] save_license_to_file( license, args.save, args.email, args.year, args.fullname, args.projectname, ) else: fprint(HTML(f"<ansired><b>Error: Set --license/-l flag</b></ansired>")) if args.info is not None: # --info , -i print_license_data(args.info.upper()) if args.what is not None: # --what, -w license = "" if args.what is False: # if no argument passed if sys.stdin.isatty(): # if nothing piped args.what = "LICENSE" else: for line in sys.stdin: license = license + "\n" + line try: license_name = licensename.from_file(args.what) except FileNotFoundError: license_name = licensename.from_text(license) if license_name is not None: print_license_data(license_name.upper()) else: fprint(HTML("<ansired><b>Error: Unknown license</b></ansired>")) if args.version is not None: # --version -v # I was bored, but rainbow looks nice fprint( HTML( "<ansired>p</ansired>" "<ansiyellow>o</ansiyellow>" "<ansigreen>l</ansigreen>" "<ansicyan>i</ansicyan>" "<ansiblue>c</ansiblue>" "<ansimagenta>e</ansimagenta>" "<ansiblue>n</ansiblue>" "<ansicyan>s</ansicyan>" "<ansigreen>e</ansigreen>" f" {__version__}" ) ) if len(sys.argv) == 1: # No arguments passed print(f"Type: 'policense -h' to get some help.")
def print_license_data(license_name): """Prints license information * name * description * is fsf/osi approved * is gpl compatible * linking (dynamic, static) * for what is intended * permissions * conditions * limitations """ license = get_license_info(f"{script_dir}/license/{license_name}.toml") fprint(HTML(f"<reverse><b>{license['name']}</b></reverse>")) for line in textwrap.wrap(license["description"], width=70): fprint(HTML(f"<i>{line}</i>")) if "references" in license: for i, link in enumerate(license["references"]): fprint(HTML(f"<ansiblue>[{i+1}] {link}</ansiblue>")) # FSF/OSI Approved if license["fsf"]: fprint(HTML("<ansicyan>FSF Approved ✔</ansicyan>")) else: fprint(HTML("<ansired>FSF Approved ✖</ansired>")) if license["osi"]: fprint(HTML("<ansicyan>OSI Approved ✔</ansicyan>")) else: fprint(HTML("<ansired>OSI Approved ✖</ansired>")) # Is compatible with GPL? if license["gpl_compatible"] is True: fprint(HTML("<ansicyan>GPL Compatible ✔</ansicyan>")) elif license["gpl_compatible"] is False: fprint(HTML("<ansired>GPL Compatible ✖</ansired>")) else: fprint( HTML( f"<ansicyan>GPL Compatible ✔<i>{license['gpl_compatible']}</i></ansicyan>" ) ) # Copyleft if not license["copyleft"]: fprint(HTML("<ansired>Copyleft ✖</ansired>")) else: fprint(HTML(f"<ansicyan>Copyleft ✔<i>{license['copyleft']}</i></ansicyan>")) # Allows static, dynamic linking? if license["static_linking"] is True: fprint(HTML("<ansicyan>Static Linking ✔</ansicyan>")) else: fprint( HTML( f"<ansired>Static Linking ✖<i>{license['static_linking']}</i></ansired>" ) ) if license["dynamic_linking"] is True: fprint(HTML("<ansicyan>Dynamic Linking ✔</ansicyan>")) else: fprint( HTML( f"<ansired>Dynamic Linking ✖<i>{license['dynamic_linking']}</i></ansired>" ) ) # For what stuff is licensed intended fprint(HTML(f"<ansimagenta>Intended for {license['intended_for']}</ansimagenta>")) # Permissions, Conditions and Limitations if "permissions" in license: fprint(HTML("\n<ansigreen>Permissions</ansigreen>")) for permission in license["permissions"]: print(permission) if "conditions" in license: fprint(HTML("\n<ansiblue>Conditions</ansiblue>")) for condition in license["conditions"]: print(condition) if "limitations" in license: fprint(HTML("\n<ansired>Limitations</ansired>")) for limitation in license["limitations"]: print(limitation)