def get_janis_from_module_spec(spec, include_commandtools=False): """ Get all the Janis.Workflow's that are defined in the file (__module__ == 'module.name') :return: List of all the subclasses of a workflow """ if include_commandtools: Logger.log("Expanded search to commandtools in " + str(spec)) potentials = [] for k, ptype in spec.__dict__.items(): if isinstance(ptype, Tool): potentials.append((k, ptype)) continue if not callable(ptype): continue if isabstract(ptype): continue if not isclass(ptype): continue if ptype.__module__ != "module.name": continue if ptype == Workflow: continue if issubclass(ptype, Workflow): potentials.append((k, ptype())) if include_commandtools and issubclass(ptype, Tool): potentials.append((k, ptype())) return potentials
def send_email(subject: str, body: str): nots = JanisConfiguration.manager().notifications mail_program = nots.mail_program if not mail_program: return Logger.log("Skipping email send as no mail program is configured") if not nots.email or nots.email.lower() == "none": Logger.log("Skipping notify status change as no email") return emails: List[str] = nots.email if isinstance( nots.email, list ) else nots.email.split(",") email_template = f"""\ Content-Type: text/html To: {"; ".join(emails)} From: [email protected] Subject: {subject} {body}""" command = f"echo '{email_template}' | {mail_program}" Logger.log("Sending email with command: " + str(command.replace("\n", "\\n"))) try: subprocess.call(command, shell=True) except Exception as e: Logger.critical(f"Couldn't send email '{subject}' to {emails}: {e}")
def start_or_submit(self, run_in_background, watch=False): # check container environment is loaded metadb = self.database.workflowmetadata jc = metadb.configuration metadb.containertype = jc.container.__name__ metadb.containerversion = jc.container.test_available_by_getting_version() # this happens for all workflows no matter what type self.set_status(TaskStatus.QUEUED) wid = metadb.wid # resubmit the engine if not run_in_background: return self.resume() loglevel = LogLevel.get_str(Logger.CONSOLE_LEVEL) command = ["janis", "--logLevel", loglevel, "resume", "--foreground", wid] scriptdir = self.get_path_for_component(self.WorkflowManagerPath.configuration) logdir = self.get_path_for_component(self.WorkflowManagerPath.logs) jc.template.template.submit_detatched_resume( wid=wid, command=command, scriptdir=scriptdir, logsdir=logdir, config=jc ) Logger.info("Submitted detatched engine") if watch: Logger.log("Watching submitted workflow") self.show_status_screen()
def get_token(self, info: ContainerInfo) -> Optional[str]: req = self.build_token_request(info) if req is None: return None Logger.log(f"Requesting auth token for {info}: " + req.full_url) response = request.urlopen(req) data = response.read() res = json.loads( data.decode(response.info().get_content_charset("utf-8"))) return res.get("token")
def populate_cache(self): self._id_cache = set() idkeys = set(self.get_id_keys()) idkeys_ordered = list(idkeys) prows = f"SELECT {', '.join(idkeys_ordered)} FROM {self._tablename}" with self.with_cursor() as cursor: Logger.log("Running query: " + str(prows)) rows = cursor.execute(prows).fetchall() for row in rows: self._id_cache.add(row)
def from_template(name, options) -> EnvironmentTemplate: template = get_template(name) if not template: raise Exception( f"Couldn't find Configuration template with name: '{name}'") Logger.log(f"Got template '{template.__name__}' from id = {name}") validate_template_params(template, options) newoptions = {**options} # newoptions.pop("template") return template(**newoptions)
def __init__(self, db_path: Optional[str], readonly=False): self.readonly = readonly if not db_path: config_dir = EnvVariables.config_dir.resolve(True) Logger.log( f"db_path wasn't provided to config manager, using config_dir: '{config_dir}/janis.db'" ) db_path = fully_qualify_filename(os.path.join(config_dir, "janis.db")) self.db_path = db_path self.is_new = not os.path.exists(db_path) cp = os.path.dirname(db_path) os.makedirs(cp, exist_ok=True) self._connection: Optional[sqlite3.Connection] = None self._taskDB: Optional[TasksDbProvider] = None
def send_email(subject: str, body: str): import tempfile, os nots = PreparedJob.instance().notifications mail_program = nots.mail_program if not mail_program: return Logger.debug( "Skipping email send as no mail program is configured") if not nots.email or nots.email.lower() == "none": Logger.debug("Skipping notify status change as no email") return emails: List[str] = (nots.email if isinstance(nots.email, list) else nots.email.split(",")) Logger.debug(f"Sending email with subject {subject} to {emails}") email_template = f"""\ Content-Type: text/html To: {"; ".join(emails)} From: {nots.from_email} Subject: {subject} {body}""" # 2020-08-24 mfranklin: Write to disk and cat, because some emails are just too big fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp: # do stuff with temp file tmp.write(email_template) command = f"cat '{path}' | {mail_program}" Logger.log("Sending email with command: " + str(command.replace("\n", "\\n"))) try: subprocess.call(command, shell=True) Logger.debug("Sent email successfully") except Exception as e: Logger.critical( f"Couldn't send email '{subject}' to {emails}: {e}") finally: os.remove(path)
def __init__( self, mail_program: str = None, container_dir: str = None, load_instructions=None, build_instructions=f"singularity pull $image docker://${{docker}}", max_cores=None, max_ram=None, max_duration=None, can_run_in_foreground=True, run_in_background=False, ): super().__init__( mail_program=mail_program, containertype=Singularity, max_cores=max_cores, max_ram=max_ram, max_duration=max_duration, can_run_in_foreground=can_run_in_foreground, run_in_background=run_in_background, ) self.singularity_load_instructions = load_instructions self.singularity_build_instructions = build_instructions self.singularity_container_dir = self.process_container_dir(container_dir) # if container_dir isn't specified invalid_paths = self.validate_paths( {"Singularity Container Directory": self.singularity_container_dir} ) if len(invalid_paths) > 0: raise Exception( f"Expected an absolute paths for {', '.join(invalid_paths)}" ) Logger.log( f"Setting Singularity: containerdir={container_dir}, loadinstructions={load_instructions}" ) Singularity.containerdir = container_dir Singularity.loadinstructions = load_instructions Singularity.buildinstructions = build_instructions pass
def filter_updates(self, jobs: List[RunJobModel], add_inserts_to_cache=True ) -> Tuple[List[RunJobModel], List[RunJobModel]]: # don't call super, it'll break because of the cache updates = [] inserts = [] if len(jobs) == 0: return updates, inserts self.populate_cache_if_required() idkeys = set(self.get_id_keys()) idkeys_ordered = list(idkeys) dbalias_map = {t.dbalias: t.name for t in self._base.keymap()} skipped = 0 for job in jobs: el_idkey = tuple( [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered]) jstatus = self._cache_completed_ids.get(el_idkey) if jstatus is None: inserts.append(job) elif job.status.value != jstatus: updates.append(job) elif jstatus: skipped += 1 self._cache_completed_ids[el_idkey] = job.status.value if skipped: Logger.log( f"Skipped updating {skipped} jobs as those jobs were already in a final state" ) memory = getsizeof(self._cache_completed_ids) // 1024 if (self.job_cache_last_idx < len(self.job_cache_warnings) and memory > self.job_cache_warnings[self.job_cache_last_idx]): Logger.warn(f"Job cache is using {memory} MB") self.job_cache_last_idx += 1 return updates, inserts
def __init__( self, mail_program: str, container_dir: str, load_instructions=None, build_instructions=f"singularity pull $image docker://${{docker}}", max_cores=None, max_ram=None, can_run_in_foreground=True, run_in_background=False, ): super().__init__( mail_program=mail_program, containertype=Singularity, max_cores=max_cores, max_ram=max_ram, can_run_in_foreground=can_run_in_foreground, run_in_background=run_in_background, ) self.singularity_load_instructions = load_instructions self.singularity_container_dir = container_dir self.singularity_build_instructions = build_instructions Logger.log( f"Setting Singularity: containerdir={container_dir}, loadinstructions={load_instructions}" ) invalid_paths = self.validate_paths({"Container Dir": container_dir}) if len(invalid_paths) > 0: raise Exception( f"Expected an absolute paths for {', '.join(invalid_paths)}") # little bit hacky Singularity.containerdir = container_dir Singularity.loadinstructions = load_instructions Singularity.buildinstructions = build_instructions pass
def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description): from janis_assistant.main import WorkflowManager, run_with_outputs if os.path.exists(output_dir): try: wm = WorkflowManager.from_path_get_latest_manager( output_dir, readonly=True ) outs_raw = wm.database.outputsDB.get() outs = { o.id_: o.value or o.new_path for o in outs_raw if o.value or o.new_path } if len(outs) > 0: out_val = first_value(outs) Logger.info( f"Using cached value of transformation ({description}) for {inpid}: {out_val}" ) return out_val Logger.log( f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})" ) except Exception as e: Logger.debug( f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}" ) outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir) if not outs or len(outs) < 1: Logger.critical( f"Couldn't get outputs from transformation ({description}) for '{inpid}'" ) return None return first_value(outs)
def submit_workflow_if_required(self): if self.database.progressDB.has(ProgressKeys.submitWorkflow): return Logger.log(f"Workflow '{self.wid}' has submitted, skipping") fn_wf = self.database.workflowmetadata.submission_workflow fn_inp = self.database.workflowmetadata.submission_inputs fn_deps = self.database.workflowmetadata.submission_resources engine = self.get_engine() Logger.debug(f"Submitting task '{self.wid}' to '{engine.id()}'") self._engine_wid = engine.start_from_paths(self.wid, fn_wf, fn_inp, fn_deps) self.database.workflowmetadata.engine_wid = self._engine_wid Logger.info( f"Submitted workflow ({self.wid}), got engine id = '{self.get_engine_wid()}'" ) self.database.progressDB.set(ProgressKeys.submitWorkflow)
def prepare_all_tools(): JanisShed.hydrate(modules=[janis_unix, janis_bioinformatics]) data_types = JanisShed.get_all_datatypes() tools = { ts[0].id(): {t.version(): t for t in ts} for ts in JanisShed.get_all_tools() } Logger.info(f"Preparing documentation for {len(tools)} tools") Logger.info(f"Preparing documentation for {len(data_types)} data_types") tool_module_index = {} dt_module_index = {} ROOT_KEY = "root" for toolname, toolsbyversion in tools.items(): # tool = tool_vs[0][0]() tool_versions = sort_tool_versions(list(toolsbyversion.keys())) default_version = tool_versions[0] Logger.log( f"Preparing {toolname}, found {len(tool_versions)} version[s] ({','.join(tool_versions)})" ) defaulttool = toolsbyversion[default_version] if isclass(defaulttool): defaulttool = defaulttool() try: tool_path_components = list( filter( lambda a: bool(a), [defaulttool.tool_module(), defaulttool.tool_provider()], )) except Exception as e: Logger.critical(f"Failed to generate docs for {toolname}: {e}") continue # (toolURL, tool, isPrimary) toolurl_to_tool = [(toolname.lower(), defaulttool, True)] + [ (get_tool_url(toolname, v), toolsbyversion[v], False) for v in tool_versions ] path_components = "/".join(tool_path_components) output_dir = f"{tools_dir}/{path_components}/".lower() if not os.path.exists(output_dir): os.makedirs(output_dir) for (toolurl, tool, isprimary) in toolurl_to_tool: output_str = prepare_tool(tool, tool_versions, not isprimary) output_filename = output_dir + toolurl + ".rst" with open(output_filename, "w+") as tool_file: tool_file.write(output_str) nested_keys_append_with_root(tool_module_index, tool_path_components, toolname, root_key=ROOT_KEY) Logger.log("Prepared " + toolname) for d in data_types: # tool = tool_vs[0][0]() if issubclass(d, Array): Logger.info("Skipping Array DataType") continue try: dt = d() except: print(d.__name__ + " failed to instantiate") continue did = dt.name().lower() Logger.log("Preparing " + dt.name()) output_str = prepare_data_type(dt) dt_path_components = [] # dt_path_components = list(filter( # lambda a: bool(a), # [, tool.tool_provider()] # )) path_components = "/".join(dt_path_components) output_dir = f"{dt_dir}{path_components}/" output_filename = output_dir + did + ".rst" if not os.path.exists(output_dir): os.makedirs(output_dir) nested_keys_append_with_root(dt_module_index, dt_path_components, did, root_key=ROOT_KEY) with open(output_filename, "w+") as dt_file: dt_file.write(output_str) Logger.log("Prepared " + did) def prepare_modules_in_index(contents, title, dir, max_depth=1): module_filename = dir + "/index.rst" module_tools = sorted( set(contents[ROOT_KEY] if ROOT_KEY in contents else [])) submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY) indexed_submodules_tools = [m.lower() for m in submodule_keys] with open(module_filename, "w+") as module_file: module_file.write( get_tool_toc( alltoolsmap=tools, title=title, intro_text= f"Automatically generated index page for {title}:", subpages=indexed_submodules_tools, tools=module_tools, max_depth=max_depth, )) for submodule in submodule_keys: prepare_modules_in_index(contents=contents[submodule], title=submodule, dir=f"{dir}/{submodule}/") def prepare_dtmodules_in_index(contents, title, dir, max_depth=1): module_filename = dir + "/index.rst" module_tools = sorted( set(contents[ROOT_KEY] if ROOT_KEY in contents else [])) submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY) indexed_submodules_tools = [ m.lower() + "/index" for m in submodule_keys ] with open(module_filename, "w+") as module_file: module_file.write( get_toc( title=title, intro_text= f"Automatically generated index page for {title}:", subpages=indexed_submodules_tools + module_tools, max_depth=max_depth, )) for submodule in submodule_keys: prepare_modules_in_index(contents=contents[submodule], title=submodule, dir=f"{dir}/{submodule}/") prepare_modules_in_index(tool_module_index, title="Tools", dir=tools_dir) prepare_dtmodules_in_index(dt_module_index, title="Data Types", dir=dt_dir, max_depth=1)
def save_changes(self): rows = self.metadata.get_encoded_changes() Logger.log(f"Updating workflow fields: {rows}") self.save_encoded_rows(rows) self.metadata.discard_changes()
def get_file_from_searchname(name, cwd): if cwd == ".": cwd = os.getcwd() Logger.log(f"Searching for a file called '{name}'") resolved = os.path.expanduser(name) if os.path.exists(resolved) and os.path.isfile(resolved): Logger.log(f"Found file called '{name}'") return resolved Logger.log(f"Searching for file '{name}' in the cwd, '{cwd}'") with Path(cwd): if os.path.exists(name) and os.path.isfile(resolved): Logger.log(f"Found file in '{cwd}' called '{name}'") return os.path.join(cwd, name) Logger.log( f"Attempting to get search path $JANIS_SEARCHPATH from environment variables" ) search_path = os.getenv("JANIS_SEARCHPATH") if search_path: Logger.log( f"Got value for env JANIS_SEARCHPATH '{search_path}', searching for file '{name}' here." ) if os.path.exists(search_path): with Path(search_path): if os.path.exists(name) and os.path.isfile(resolved): Logger.log( f"Found file in '{search_path}' called '{name}'") return os.path.join(search_path, name) else: Logger.warn( f"Search path '{search_path}' (obtained from $JANIS_SEARCHPATH) does not exist " ) else: Logger.log( "Couldn't find JANIS_SEARCHPATH in environment variables, skipping" ) Logger.log( f"Couldn't find a file with filename '{name}' in any of the following: " f"full path, current working directory ({cwd}) or the search path.") return None
def insert_or_update_many(self, els: List[T]): if len(els) == 0: return queries: Dict[str, List[List[any]]] = {} update_separator = ",\n" tab = "\t" idkeys = set(self.get_id_keys()) idkeys_ordered = list(idkeys) pkeys_ordered = self.get_primary_keys() existing_keys = set() # (*pkeys_ordered) # get all primary keys dbalias_map: Dict[str, DatabaseObjectField] = { t.dbalias: t for t in self._base.keymap() } updates, inserts = self.filter_updates(els) def add_query(query, values): if query in queries: queries[query].append(values) else: queries[query] = [values] for job in updates: keys, values = job.prepare_insert() # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered] keys_np, values_np = [], [] for k, v in zip(keys, values): if k in idkeys: continue keys_np.append(k) values_np.append(v) # problem is we want to update matching on some fields when they are NULL, our WHERE statement # should be something like: # WHERE id1 = ? AND id2 = ? AND id3 is null AND id4 is null id_keyvalues = { pkey: prep_object_for_db( getattr(job, dbalias_map[pkey].name), encode=dbalias_map[pkey].encode, ) for pkey in idkeys_ordered } id_withvalues_keyvalue_ordered = [ (idkey, idvalue) for idkey, idvalue in id_keyvalues.items() if idvalue is not None ] id_withvalues_updater_keys = [ f"{idkey} = ?" for idkey, _ in id_withvalues_keyvalue_ordered ] id_withvalues_updater_values = [ idvalue for _, idvalue in id_withvalues_keyvalue_ordered ] id_novalues_updater_keys = [ f"{idkey} is NULL" for idkey, idvalue in id_keyvalues.items() if idvalue is None ] prepared_statement = f""" UPDATE {self._tablename} SET {', '.join(f'{k} = ?' for k in keys_np)} WHERE {" AND ".join([*id_withvalues_updater_keys, *id_novalues_updater_keys])} """ vtuple = ( *values_np, *id_withvalues_updater_values, ) add_query(prepared_statement, vtuple) for job in inserts: keys, values = job.prepare_insert() # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered] prepared_statement = f""" INSERT INTO {self._tablename} ({', '.join(keys)}) VALUES ({', '.join(f'?' for _ in keys)}); """ add_query(prepared_statement, values) Logger.log( f"DB {self._tablename}: Inserting {len(inserts)} and updating {len(updates)} rows" ) with self.with_cursor() as cursor: start = DateUtil.now() if len(inserts) + len(updates) > 300: Logger.warn( f"DB '{self._tablename}' is inserting {len(inserts)} and updating {len(updates)} rows, this might take a while" ) for query, vvalues in queries.items(): try: Logger.log( f"Running query: {query}\n\t: values: {vvalues}") cursor.executemany(query, vvalues) except OperationalError as e: Logger.log_ex(e) seconds = (DateUtil.now() - start).total_seconds() if seconds > 2: Logger.warn( f"DB '{self._tablename}' took {second_formatter(seconds)} to insert {len(inserts)} and update {len(updates)} rows" ) return True