def process_potential_out(key, out): if isinstance(out, list): outs = [CWLTool.process_potential_out(key, o) for o in out] ups = {} for o in outs: for k, v in o.items(): if k not in ups: ups[k] = [] ups[k].append(v) return ups updates = {} if isinstance(out, str): updates[key] = WorkflowOutputModel( tag=key, original_path=None, iscopyable=False, timestamp=DateUtil.now(), value=out, new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) elif "path" in out: updates[key] = WorkflowOutputModel( tag=key, iscopyable=True, original_path=out["path"], timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) for s in out.get("secondaryFiles", []): path = s["path"] ext = path.rpartition(".")[-1] newk = f"{key}_{ext}" updates[newk] = WorkflowOutputModel( tag=newk, original_path=path, iscopyable=True, timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) return updates
def set(self, key: ProgressKeys): if self.has(key): return with self.with_cursor() as cursor: cursor.execute(self._insert_statement, (self.wid, key.value, str(DateUtil.now())))
def parse_output(key, value): newkey = "".join(key.split(".")[1:]) fileloc = value if isinstance(value, dict): fileloc = value["location"] if isinstance(fileloc, list): return newkey, [Cromwell.parse_output(key, f)[1] for f in fileloc] # It's hard to know whether the value we get is a File or just a value, # so we'll write it in both values and let Janis figure it out later return ( newkey, WorkflowOutputModel( tag=newkey, original_path=fileloc, timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, value=fileloc, iscopyable=True, ), )
def update_paths(self, tag: str, original_path: str, new_path: str): with self.with_cursor() as cursor: cursor.execute( """\ UPDATE outputs SET original_path=?, new_path=?, timestamp=? WHERE wid = ? AND tag = ? """, (original_path, new_path, DateUtil.now(), self.wid, tag), ) self.commit()
def format(self, **kwargs): tb = " " nl = "\n" fin = self.finish if self.finish else DateUtil.now() duration = round( (fin - self.start).total_seconds()) if self.start else 0 updated_text = "Unknown" if self.last_updated: secs_ago = int( (DateUtil.now() - self.last_updated).total_seconds()) if secs_ago > 2: updated_text = second_formatter(secs_ago) + " ago" else: updated_text = "Just now" updated_text += f" ({self.last_updated.replace(microsecond=0).isoformat()})" return f"""\ WID: {self.wid} EngId: {self.engine_wid} Name: {self.name} Engine: {self.engine} Task Dir: {self.outdir} Exec Dir: {self.execution_dir} Status: {self.status} Duration: {second_formatter(duration)} Start: {self.start.isoformat() if self.start else 'N/A'} Finish: {self.finish.isoformat() if self.finish else "N/A"} Updated: {updated_text} Jobs: {nl.join(j.format(tb, **kwargs) for j in sorted(self.jobs, key=lambda j: j.start or DateUtil.now()))} {("Outputs:" + "".join(nl + tb + o.format() for o in self.outputs) if self.outputs else '')} {("Error: " + self.error) if self.error else ''} """.strip()
def start_from_paths(self, wid, source_path: str, input_path: str, deps_path: str): from janis_assistant.management.configuration import JanisConfiguration jc = JanisConfiguration.manager() self.taskmeta = { "start": DateUtil.now(), "status": TaskStatus.PROCESSING, "jobs": {}, } config: CWLToolConfiguration = self.config if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE: config.debug = True config.disable_color = True # more options if not config.tmpdir_prefix: config.outdir = self.execution_dir + "/" config.tmpdir_prefix = self.execution_dir + "/" config.leave_tmpdir = True if jc.call_caching_enabled: config.cachedir = os.path.join(self.execution_dir, "cached/") cmd = config.build_command_line(source_path, input_path) Logger.debug("Running command: '" + " ".join(cmd) + "'") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, preexec_fn=os.setsid, stderr=subprocess.PIPE) self.taskmeta["status"] = TaskStatus.RUNNING Logger.info("CWLTool has started with pid=" + str(process.pid)) self.process_id = process.pid self._logger = CWLToolLogger( wid, process, logfp=open(self.logfile, "a+"), metadata_callback=self.task_did_update, exit_function=self.task_did_exit, ) return wid
def task_did_exit(self, logger: CWLToolLogger, status: TaskStatus): Logger.debug("CWLTool fired 'did exit'") self.taskmeta["status"] = status self.taskmeta["finish"] = DateUtil.now() self.taskmeta["outputs"] = logger.outputs if status != TaskStatus.COMPLETED: js: Dict[str, WorkflowJobModel] = self.taskmeta.get("jobs") for j in js.values(): if j.status != TaskStatus.COMPLETED: j.status = status if logger.error: self.taskmeta["error"] = logger.error for callback in self.progress_callbacks.get(logger.wid, []): callback(self.metadata(logger.wid))
def save_metadata(self, metadata: WorkflowModel): # mfranklin: DO NOT UPDATE THE STATUS HERE! # Let's just say the actual workflow metadata has to updated separately alljobs = self.flatten_jobs(metadata.jobs or []) self.jobsDB.update_or_insert_many(alljobs) self.workflowmetadata.last_updated = DateUtil.now() if metadata.error: self.workflowmetadata.error = metadata.error if metadata.execution_dir: self.workflowmetadata.execution_dir = metadata.execution_dir if metadata.finish: self.workflowmetadata.finish = metadata.finish return
def from_janis( wid: str, outdir: str, tool: Tool, environment: Environment, hints: Dict[str, str], validation_requirements: Optional[ValidationRequirements], batchrun_requirements: Optional[BatchRunRequirements], inputs_dict: dict = None, dryrun=False, watch=True, max_cores=None, max_memory=None, keep_intermediate_files=False, run_in_background=True, dbconfig=None, allow_empty_container=False, container_override: dict = None, check_files=True, ): jc = JanisConfiguration.manager() # output directory has been created environment.identifier += "_" + wid tm = WorkflowManager(wid=wid, outdir=outdir, environment=environment) tm.database.runs.insert(wid) tm.database.workflowmetadata.wid = wid tm.database.workflowmetadata.engine = environment.engine tm.database.workflowmetadata.filescheme = environment.filescheme tm.database.workflowmetadata.environment = environment.id() tm.database.workflowmetadata.name = tool.id() tm.database.workflowmetadata.start = DateUtil.now() tm.database.workflowmetadata.executiondir = None tm.database.workflowmetadata.keepexecutiondir = keep_intermediate_files tm.database.workflowmetadata.configuration = jc tm.database.workflowmetadata.dbconfig = dbconfig # This is the only time we're allowed to skip the tm.set_status # This is a temporary stop gap until "notification on status" is implemented. # tm.set_status(TaskStatus.PROCESSING) tm.database.workflowmetadata.status = TaskStatus.PROCESSING tm.database.commit() spec = get_ideal_specification_for_engine(environment.engine) spec_translator = get_translator(spec) tool_evaluate = tm.prepare_and_output_workflow_to_evaluate_if_required( tool=tool, translator=spec_translator, validation=validation_requirements, batchrun=batchrun_requirements, hints=hints, additional_inputs=inputs_dict, max_cores=max_cores or jc.environment.max_cores, max_memory=max_memory or jc.environment.max_ram, allow_empty_container=allow_empty_container, container_override=container_override, check_files=check_files, ) outdir_workflow = tm.get_path_for_component( WorkflowManager.WorkflowManagerPath.workflow ) tm.database.workflowmetadata.submission_workflow = os.path.join( outdir_workflow, spec_translator.filename(tool_evaluate) ) tm.database.workflowmetadata.submission_inputs = os.path.join( outdir_workflow, spec_translator.inputs_filename(tool_evaluate) ) tm.database.workflowmetadata.submission_resources = os.path.join( outdir_workflow, spec_translator.dependencies_filename(tool_evaluate) ) tm.database.commit() if not dryrun: if ( not run_in_background and jc.template and jc.template.template and jc.template.template.can_run_in_foreground is False ): raise Exception( f"Your template '{jc.template.template.__class__.__name__}' is not allowed to run " f"in the foreground, try adding the '--background' argument" ) tm.start_or_submit(run_in_background=run_in_background, watch=watch) else: tm.set_status(TaskStatus.DRY_RUN) tm.database.commit() return tm
def format(self, pre, monochrome=False, brief=False, **kwargs): tb = " " fin = self.finish if self.finish else DateUtil.now() time = round(DateUtil.secs_difference(self.start, fin)) if self.start else None # percentage = ( # (round(1000 * time / self.supertime) / 10) # if (self.start and self.supertime) # else None # ) status = self.status or (sorted(self.events, key=lambda e: e.timestamp)[-1].status if self.events else TaskStatus.PROCESSING) name = self.name if self.shard is not None and self.shard >= 0: name += f"_shard-{self.shard}" if self.attempt and self.attempt > 1: name += f"_attempt-{self.attempt}" standard = pre + f"[{status.symbol()}] {name} ({second_formatter(time)})" col = "" uncol = "" if not monochrome: if status == TaskStatus.FAILED: col = _bcolors.FAIL elif status == TaskStatus.COMPLETED: col = _bcolors.OKGREEN # else: # col = _bcolors.UNDERLINE uncol = _bcolors.ENDC if status != TaskStatus.COMPLETED or brief == False: if self.jobs: ppre = pre + tb subs: List[WorkflowJobModel] = sorted( self.jobs, key=lambda j: j.start if j.start else DateUtil.now(), reverse=False, ) return (col + standard + "".join([ "\n" + j.format(ppre, monochrome, brief, **kwargs) for j in subs ]) + uncol) fields: List[Tuple[str, str]] = [] if status == TaskStatus.COMPLETED: if not self.finish: raise Exception( f"Finish was null for completed task: {self.name}") if self.cached: fields.append(("from cache", str(self.cached))) elif status == TaskStatus.RUNNING: fields.extend([("batchid", self.batchid), ("backend", self.backend)]) elif status == TaskStatus.FAILED: fields.extend([("stdout", self.stdout), ("stderr", self.stderr)]) elif status == TaskStatus.PROCESSING: pass elif status == TaskStatus.QUEUED: pass else: return ( standard + f" :: Unimplemented status: '{status}' for task: '{self.name}'" ) ppre = "\n" + " " * len(pre) + 2 * tb retval = standard + "".join(f"{ppre}{f[0]}: {f[1]}" for f in fields if f[1]) return col + retval + uncol
def insert(self, wid: str): with self.with_cursor() as cursor: cursor.execute(self._insert_statement, (wid, str(DateUtil.now())))
def process_metadataupdate_if_match(self, line): match = self.statusupdateregex.match(line) if not match: return name, action = match.groups() s = name.split(" ") if len(s) == 0 or len(s) > 2: return Logger.critical("Unsure how to handle metadata update: " + str(line)) component = s[0] stepname = s[1] if len(s) > 1 else None status = None parentid = "_".join(self.workflow_scope) or None if component == "workflow": if action == "start": if stepname: # stepname is empty for root workflow self.workflow_scope.append(stepname) stepname = None status = TaskStatus.RUNNING elif action == "completed success": if len(self.workflow_scope) > 0: self.workflow_scope.pop(0) status = TaskStatus.COMPLETED elif component == "step": if action == "start": status = TaskStatus.RUNNING elif action == "completed success": status = TaskStatus.COMPLETED if not status: return if not stepname: # return WorkflowModel return jid = f"{parentid}_{stepname}" if parentid else stepname start = DateUtil.now() if status == TaskStatus.RUNNING else None finish = DateUtil.now() if status == TaskStatus.COMPLETED else None job = WorkflowJobModel( jid=jid, parentjid=parentid, name=stepname, status=status, attempt=None, shard=None, start=start, finish=finish, backend="local", batchid="", cached=False, container=None, stderr=self.logfp.name, stdout=None, ) self.metadata_callback(self, job)