def get_janis_from_module_spec(spec, include_commandtools=False): """ Get all the Janis.Workflow's that are defined in the file (__module__ == 'module.name') :return: List of all the subclasses of a workflow """ if include_commandtools: Logger.log("Expanded search to commandtools in " + str(spec)) potentials = [] for k, ptype in spec.__dict__.items(): if isinstance(ptype, Workflow): potentials.append((k, ptype)) continue if not callable(ptype): continue if isabstract(ptype): continue if not isclass(ptype): continue if ptype.__module__ != "module.name": continue if ptype == Workflow: continue if issubclass(ptype, Workflow): potentials.append((k, ptype())) if include_commandtools and issubclass(ptype, CommandTool): potentials.append((k, ptype())) return potentials
def prepare_tool( tool: Tool, toolversions: List[str], isorphan: bool, is_published_pipeline: bool = False, ): # Stuff to list on the documentation page: # - Versions of tools # - Generated command # - Cool if it grouped the tools by vendor # - if not tool: return None try: if is_published_pipeline: return "" if tool.type() == ToolType.CommandTool: return prepare_commandtool_page(tool, toolversions) elif tool.type() == ToolType.Workflow: return prepare_workflow_page(tool, toolversions) elif tool.type() == ToolType.CodeTool: return prepare_code_tool_page(tool, toolversions) except Exception as e: traceback.print_exc() Logger.critical("Couldn't generate documentation for " + tool.id() + " " + str(e))
def query_tasks(self, status, name) -> Dict[str, RunModel]: rows: List[TaskRow] = self.get_lazy_db_connection().get_all_tasks() failed = [] relevant = {} for row in rows: if not os.path.exists(row.execution_dir): failed.append(row.submission_id) continue try: metadb = WorkflowManager.has( row.execution_dir, submission_id=row.submission_id, name=name, status=status, ) if metadb: model = metadb model.outdir = row.output_dir relevant[row.submission_id] = model except Exception as e: Logger.critical(f"Couldn't check workflow '{row.submission_id}': {e}") failed.append(row.submission_id) if failed: failedstr = ", ".join(failed) Logger.warn( f"Couldn't get information for tasks: {failedstr}, run" f"'janis cleanup' to clean up your tasks." ) return relevant
def pickle_obj(obj): if obj is None: return None try: return pickle.dumps(obj, protocol=2) except Exception as ex: Logger.warn(f"Couldn't pickle {repr(obj)} as encountered {repr(ex)}") return None
def unpickle_obj(obj): if obj is None: return None try: return pickle.loads(obj) except Exception as ex: Logger.warn(f"Couldn't unpickle {repr(obj)} as encountered {repr(ex)}") return None
def __init__(self, host, repository, image, tag, chash: str): self.host = host self.repository = repository self.image = image self.tag = tag self.chash = chash if image is None: Logger.warn(f"{str(self)} didn't have an image, so setting to None") self.image = "ubuntu"
def __init__(self, dblocation, tablename, readonly=False): sqlitedict.logger.disabled = True ro = "r" if readonly else "c" Logger.debug( f"Opening connection to {dblocation}/{tablename} with mode {ro}") self.kvdb = sqlitedict.SqliteDict(dblocation, tablename=tablename, autocommit=True, flag=ro)
def get_token(self, info: ContainerInfo) -> Optional[str]: req = self.build_token_request(info) if req is None: return None Logger.log(f"Requesting auth token for {info}: " + req.full_url) response = request.urlopen(req) data = response.read() res = json.loads( data.decode(response.info().get_content_charset("utf-8"))) return res.get("token")
def populate_cache(self): self._id_cache = set() idkeys = set(self.get_id_keys()) idkeys_ordered = list(idkeys) prows = f"SELECT {', '.join(idkeys_ordered)} FROM {self._tablename}" with self.with_cursor() as cursor: Logger.log("Running query: " + str(prows)) rows = cursor.execute(prows).fetchall() for row in rows: self._id_cache.add(row)
def exec_command(self, command): cmd = ["singularity", "run", "instance://" + self.instancename] cmd.extend(command) if isinstance(command, list) else cmd.append(command) try: val = subprocess.check_output(cmd) return val.strip().decode() if val else val, 0 except subprocess.CalledProcessError as e: Logger.critical("Singularity exec_command failed") return str(e), e.returncode
def __setattr__(self, name, value): if name in self.attributes_to_persist: try: self.kvdb[name] = value except Exception as e: Logger.critical(f"Failed to write {name}={value} due to: {e}") return super().__setattr__(name, value)
def deserialize_inner(val): if val is None: return None try: return json.loads(val) except Exception as ex: # tbh, sometimes the mysql database converts '"myvalue"' -> 'myvalue' (dropping the quotes), we'll do Logger.debug( f"Couldn't deserialize value, using string representation instead (value: {repr(val)}): {repr(ex)}" ) return str(val)
def stop_container(self): try: Logger.info(f"Stopping mysql container '{self.instancename}'") if self.run_logger: self.run_logger.terminate() cmd = ["singularity", "instance", "stop", self.instancename] return subprocess.check_output(cmd) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't stop singularity instance '{self.instancename}': {e}" )
def mark_paused(self): try: self.database.workflowmetadata.please_pause = True Logger.info( "Marked workflow as paused, this may take some time properly pause" ) self.database.workflowmetadata.commit() return True except Exception as e: Logger.critical("Couldn't mark paused: " + str(e)) return False
def set_status(self, status: TaskStatus, force_notification=False): prev = self.database.workflowmetadata.status if prev == status and not force_notification: return Logger.info("Status changed to: " + str(status)) self.database.workflowmetadata.status = status self.database.commit() # send an email here NotificationManager.notify_status_change(status, self.database.get_metadata())
def from_template(name, options) -> EnvironmentTemplate: template = get_template(name) if not template: raise Exception( f"Couldn't find Configuration template with name: '{name}'") Logger.log(f"Got template '{template.__name__}' from id = {name}") validate_template_params(template, options) newoptions = {**options} # newoptions.pop("template") return template(**newoptions)
def get_tag_and_cleanup_prefix( prefix, ) -> Optional[Tuple[str, str, bool, Optional[DataType]]]: """ :param prefix: :return: (raw_element, potentialID, hasSeparator, potentialType) """ # cases: # -a ADAPTER # --adapter=ADAPTER # --quality-cutoff=[5'CUTOFF,]3'CUTOFF el = prefix.lstrip() has_equals = False pretag = None potential_type = None # if prefix is split by ':' or split by if ":" in el or "=" in el: parts = None if ":" in el: parts = el.split(":") elif "=" in el: parts = el.split("=") has_equals = True if len(parts) > 2: Logger.warn( f"Unexpected number of components in the tag '{el}' to guess the type, using '{parts[0]}' and skipping type inference" ) else: el, pt = parts[0], guess_type(parts[1]) if not potential_type and pt: potential_type = pt if " " in el: el = el.split(" ")[0] titleComponents = [l.strip().lower() for l in el.split("-") if l] if len(titleComponents) == 0: Logger.critical( f"Title components for tag '{prefix}' does not have a component") return None tag = "_".join(titleComponents) if tag.lower() in common_replacements: tag = common_replacements[tag.lower()] if tag.lower() == "outputfilename": potential_type = Filename return el, tag, has_equals, potential_type
def get_list_of_contigs_from_bed(bedfile: str) -> Set[str]: try: contigs = set() with open_potentially_compressed_file(bedfile) as fp: for l in fp: contig: str = l.split("\t")[0] if contig: contigs.add(contig.strip()) return contigs except Exception as e: Logger.critical( f"Couldn't get contigs from bedfile {bedfile}: {str(e)}") return set()
def get_list_of_contigs_from_fastafai(fai_idx: str) -> Set[str]: # Structure contig, size, location, basesPerLine and bytesPerLine try: contigs = set() with open_potentially_compressed_file(fai_idx) as f: for l in f: contigs.add(l.split("\t")[0]) return contigs except Exception as e: Logger.critical( f"Couldn't get contigs from reference {fai_idx}: {str(e)}") return set()
def get_digest(self, info: ContainerInfo) -> Optional[str]: try: token = self.get_token(info) except Exception as e: Logger.critical( f"Couldn't get digest for container (couldn't get token): '{str(info)}': {str(e)}" ) return None if token: Logger.debug( f"Got token for '{info}': {token[: min(5, len(token) - 1)]}..." ) try: req = self.build_request(info, token) Logger.debug(f"Requesting digest from: {req.full_url}") with request.urlopen(req) as response: rheaders = response.headers digest = rheaders.get("etag", rheaders.get("Docker-Content-Digest")) if digest is not None: digest = digest.replace("'", "").replace('"', "") return digest except Exception as e: Logger.critical( f"Couldn't get digest for container '{str(info)}': {str(e)}")
def remove_exec_dir(self): status = self.database.workflowmetadata.status keep_intermediate = self.database.workflowmetadata.keepexecutiondir if ( not keep_intermediate and status is not None and status == TaskStatus.COMPLETED ): execdir = self.get_path_for_component(self.WorkflowManagerPath.execution) if execdir and execdir != "None": Logger.info("Cleaning up execution directory") self.environment.filescheme.rm_dir(execdir) self.database.progressDB.set(ProgressKeys.cleanedUp)
def guess_type(potential_type: str): if not potential_type: return None l = potential_type.lower() hopeful_type = JanisShed.get_datatype(l) if not hopeful_type: if "st" in potential_type: hopeful_type = String if hopeful_type: Logger.info( f"Found type {hopeful_type.__name__} from tag: {potential_type}") return hopeful_type
def load_templates_if_required(): import importlib_metadata global additional_templates if additional_templates is None: additional_templates = {} eps = importlib_metadata.entry_points().get(TEMPLATE_EP, []) for entrypoint in eps: # pkg_resources.iter_entry_points(group=TEMPLATE_EP): try: additional_templates[entrypoint.name] = entrypoint.load() except ImportError as e: Logger.critical( f"Couldn't import janis template '{entrypoint.name}': {e}" ) continue
def get_by_id(self, submission_id, allow_operational_errors=True) -> Optional[SubmissionModel]: s = self.get( where=("id = ?", [submission_id]), allow_operational_errors=allow_operational_errors, ) if s is None: return None if len(s) != 1: Logger.warn( f"Couldn't get submission with id={submission_id}, query returned {len(s)} results." ) return None return s[0]
def run_test_case( tool_id: str, test_case: str, engine: EngineType, output: Optional[Dict] = None, config: str = None, ) -> Dict[str, Any]: tool = test_helpers.get_one_tool(tool_id) if not tool: raise Exception(f"Tool {tool_id} not found") runner = ToolTestSuiteRunner(tool, config=config) tests_to_run = [ tc for tc in tool.tests() if tc.name.lower() == test_case.lower() ] if not tests_to_run: raise Exception(f"Test case {test_case} not found") if len(tests_to_run) > 1: raise Exception( f"There is more than one test case with the same name {test_case}") if output is not None: Logger.info( "Dryrun: validating test using provided output data without running the workflow" ) failed = set() succeeded = set() execution_error = "" try: failed, succeeded, output = runner.run_one_test_case(t=tests_to_run[0], engine=engine, output=output) except Exception as e: execution_error = str(e) except SystemExit as e: execution_error = f"Workflow execution failed (exit code: {e.code})" return { "failed": list(failed), "succeeded": list(succeeded), "output": output, "execution_error": execution_error, }
def send_slack_notification(result: Dict, option: NotificationOption): Logger.info("sending notification to Slack") if len(result["failed"]) == 0 and not result["execution_error"]: failed = False status = "Test Succeeded" icon = ":white_check_mark:" else: failed = True status = "Test Failed" icon = ":x:" test_description = "" if option.test_id: test_description = f" *{option.test_id}*" summary_block = { "type": "section", "text": { "type": "mrkdwn", "text": f"{icon} {status}{test_description}: {option.tool_name} - {option.test_case}", }, } blocks = [summary_block] if failed and result["failed"]: failed_expected_output = [] for f in result["failed"]: failed_expected_output.append(f":black_small_square: {f}") failed_block = { "type": "section", "text": {"type": "mrkdwn", "text": "\n".join(failed_expected_output)}, } blocks.append(failed_block) if result["execution_error"]: text = result["execution_error"].replace("\n", "<br />") execution_error_block = { "type": "section", "text": {"type": "mrkdwn", "text": f"{result['execution_error']}"}, } blocks.append(execution_error_block) request = {"blocks": blocks} resp = requests.post(url=option.url, json=request) if resp.status_code == requests.codes.ok: Logger.info("Notification sent") else: Logger.warn("Failed to send slack notification") Logger.warn(f"{resp.status_code}: {resp.text}") return resp.status_code, resp.text
def get( self, keys: Union[str, List[str]] = "*", where: Tuple[str, List[any]] = None, allow_operational_errors=True, ) -> Optional[List[T]]: jkeys = ", ".join(keys) if isinstance(keys, list) else keys if jkeys == "*": keys = [t.dbalias for t in self._base.keymap()] jkeys = ", ".join(keys) if isinstance(keys, list) else keys values = [] whereclauses = [] if self._scopes: scopes = self._scopes.items() whereclauses.extend(f"{k} = ?" for k, _ in scopes) values.extend(v for _, v in scopes) if where: whereclauses.append(where[0]) values.extend(where[1]) query = f"SELECT {jkeys} FROM {self._tablename}" if whereclauses: query += f" WHERE {' AND '.join(whereclauses)}" with self.with_cursor() as cursor: try: rows = cursor.execute(query, values).fetchall() except OperationalError as e: if not allow_operational_errors: raise e if "readonly database" in str(e): # mfranklin: idk, this sometimes happens. We're doing a select query, idk sqlite3 driver... Logger.debug( f"Got readonly error when running query: '{query}', skipping for now" ) return None elif "locked" in str(e): Logger.debug( f"We hit the janis database.{self._tablename} at the same time the janis process wrote to it, we'll skip for now " ) return None raise parsedrows = [self._base.deserialize(keys, r) for r in rows] return parsedrows
def start(self): """ """ self.prepare_mysql_dirs() self.container.ensure_downloaded() # before we start, we want to create a Database for Cromwell, we can do this by # binding a directory of scripts to /docker-entrypoint-initdb.d (runs *.sh, *.sql, *.sql.gz) # Source: https://hub.docker.com/_/mysql#initializing-a-fresh-instance self.container.bindpoints = { "/var/lib/mysql": self.datadirectory, "/var/run/mysqld": self.mysqldoverride, "/etc/mysql/": self.sqlconfdir, "/docker-entrypoint-initdb.d": self.startupscriptsdir, } if isinstance(self.container, Docker): self.container.exposedports = {self.forwardedport: None} self.container.environment_variables["MYSQL_ALLOW_EMPTY_PASSWORD"] = 1 self.container.environment_variables["MYSQL_INITDB_SKIP_TZINFO"] = 1 self.container.start_container() # Have to wait for it to initialise sleep(10) cmd = [ "mysqladmin", "ping", "-h", f"127.0.0.1", "-P", str(self.forwardedport), "-u", "root", "--wait=60", ] while True: (response, rc) = self.container.exec_command(cmd) if response == "mysqld is alive": return if response: Logger.critical("MySQL error: " + str(response)) raise Exception(response) else: Logger.critical(rc)
def do_fromwdl(args): from janis_core import WdlParser, Logger Logger.info(f"Loading WDL file: {args.wdlfile}") tool = WdlParser.from_doc(args.wdlfile) Logger.info(f"Loaded {tool.type()}: {tool.versioned_id()}") translated = tool.translate( args.translation, to_console=args.output is None, to_disk=args.output is not None, export_path=args.output, ) return translated
def save_metadata_if_required(self): if self.database.progressDB.has(ProgressKeys.savedMetadata): return Logger.debug(f"Workflow '{self.wid}' has saved metadata, skipping") engine = self.get_engine() metadir = self.get_path_for_component(self.WorkflowManagerPath.metadata) if isinstance(engine, Cromwell): import json meta = engine.raw_metadata(self.get_engine_wid()).meta with open(os.path.join(metadir, "metadata.json"), "w+") as fp: json.dump(meta, fp) elif isinstance(engine, CWLTool): import json meta = engine.metadata(self.wid) self.set_status(meta.status) with open(os.path.join(metadir, "metadata.json"), "w+") as fp: json.dump(meta.outputs, fp) else: raise Exception( f"Don't know how to save metadata for engine '{engine.id()}'" ) self.database.progressDB.set(ProgressKeys.savedMetadata)