Пример #1
0
def get_janis_from_module_spec(spec, include_commandtools=False):
    """
    Get all the Janis.Workflow's that are defined in the file (__module__ == 'module.name')
    :return: List of all the subclasses of a workflow
    """

    if include_commandtools:
        Logger.log("Expanded search to commandtools in " + str(spec))

    potentials = []
    for k, ptype in spec.__dict__.items():
        if isinstance(ptype, Tool):
            potentials.append((k, ptype))
            continue
        if not callable(ptype):
            continue
        if isabstract(ptype):
            continue
        if not isclass(ptype):
            continue
        if ptype.__module__ != "module.name":
            continue
        if ptype == Workflow:
            continue
        if issubclass(ptype, Workflow):
            potentials.append((k, ptype()))
        if include_commandtools and issubclass(ptype, Tool):
            potentials.append((k, ptype()))

    return potentials
    def send_email(subject: str, body: str):

        nots = JanisConfiguration.manager().notifications

        mail_program = nots.mail_program

        if not mail_program:
            return Logger.log("Skipping email send as no mail program is configured")

        if not nots.email or nots.email.lower() == "none":
            Logger.log("Skipping notify status change as no email")
            return

        emails: List[str] = nots.email if isinstance(
            nots.email, list
        ) else nots.email.split(",")

        email_template = f"""\
Content-Type: text/html
To: {"; ".join(emails)}
From: [email protected]
Subject: {subject}

{body}"""

        command = f"echo '{email_template}' | {mail_program}"
        Logger.log("Sending email with command: " + str(command.replace("\n", "\\n")))
        try:
            subprocess.call(command, shell=True)
        except Exception as e:
            Logger.critical(f"Couldn't send email '{subject}' to {emails}: {e}")
Пример #3
0
    def start_or_submit(self, run_in_background, watch=False):
        # check container environment is loaded
        metadb = self.database.workflowmetadata

        jc = metadb.configuration
        metadb.containertype = jc.container.__name__
        metadb.containerversion = jc.container.test_available_by_getting_version()

        # this happens for all workflows no matter what type
        self.set_status(TaskStatus.QUEUED)

        wid = metadb.wid

        # resubmit the engine
        if not run_in_background:
            return self.resume()

        loglevel = LogLevel.get_str(Logger.CONSOLE_LEVEL)
        command = ["janis", "--logLevel", loglevel, "resume", "--foreground", wid]
        scriptdir = self.get_path_for_component(self.WorkflowManagerPath.configuration)
        logdir = self.get_path_for_component(self.WorkflowManagerPath.logs)
        jc.template.template.submit_detatched_resume(
            wid=wid, command=command, scriptdir=scriptdir, logsdir=logdir, config=jc
        )

        Logger.info("Submitted detatched engine")

        if watch:
            Logger.log("Watching submitted workflow")
            self.show_status_screen()
 def get_token(self, info: ContainerInfo) -> Optional[str]:
     req = self.build_token_request(info)
     if req is None:
         return None
     Logger.log(f"Requesting auth token for {info}: " + req.full_url)
     response = request.urlopen(req)
     data = response.read()
     res = json.loads(
         data.decode(response.info().get_content_charset("utf-8")))
     return res.get("token")
Пример #5
0
 def populate_cache(self):
     self._id_cache = set()
     idkeys = set(self.get_id_keys())
     idkeys_ordered = list(idkeys)
     prows = f"SELECT {', '.join(idkeys_ordered)} FROM {self._tablename}"
     with self.with_cursor() as cursor:
         Logger.log("Running query: " + str(prows))
         rows = cursor.execute(prows).fetchall()
         for row in rows:
             self._id_cache.add(row)
Пример #6
0
def from_template(name, options) -> EnvironmentTemplate:
    template = get_template(name)
    if not template:
        raise Exception(
            f"Couldn't find Configuration template with name: '{name}'")

    Logger.log(f"Got template '{template.__name__}' from id = {name}")

    validate_template_params(template, options)
    newoptions = {**options}
    # newoptions.pop("template")

    return template(**newoptions)
    def __init__(self, db_path: Optional[str], readonly=False):

        self.readonly = readonly
        if not db_path:
            config_dir = EnvVariables.config_dir.resolve(True)
            Logger.log(
                f"db_path wasn't provided to config manager, using config_dir: '{config_dir}/janis.db'"
            )
            db_path = fully_qualify_filename(os.path.join(config_dir, "janis.db"))
        self.db_path = db_path
        self.is_new = not os.path.exists(db_path)

        cp = os.path.dirname(db_path)
        os.makedirs(cp, exist_ok=True)

        self._connection: Optional[sqlite3.Connection] = None
        self._taskDB: Optional[TasksDbProvider] = None
    def send_email(subject: str, body: str):
        import tempfile, os

        nots = PreparedJob.instance().notifications

        mail_program = nots.mail_program

        if not mail_program:
            return Logger.debug(
                "Skipping email send as no mail program is configured")

        if not nots.email or nots.email.lower() == "none":
            Logger.debug("Skipping notify status change as no email")
            return

        emails: List[str] = (nots.email if isinstance(nots.email, list) else
                             nots.email.split(","))
        Logger.debug(f"Sending email with subject {subject} to {emails}")

        email_template = f"""\
Content-Type: text/html
To: {"; ".join(emails)}
From: {nots.from_email}
Subject: {subject}

{body}"""

        # 2020-08-24 mfranklin: Write to disk and cat, because some emails are just too big
        fd, path = tempfile.mkstemp()
        try:
            with os.fdopen(fd, "w") as tmp:
                # do stuff with temp file
                tmp.write(email_template)

            command = f"cat '{path}' | {mail_program}"
            Logger.log("Sending email with command: " +
                       str(command.replace("\n", "\\n")))
            try:
                subprocess.call(command, shell=True)
                Logger.debug("Sent email successfully")
            except Exception as e:
                Logger.critical(
                    f"Couldn't send email '{subject}' to {emails}: {e}")
        finally:
            os.remove(path)
Пример #9
0
    def __init__(
        self,
        mail_program: str = None,
        container_dir: str = None,
        load_instructions=None,
        build_instructions=f"singularity pull $image docker://${{docker}}",
        max_cores=None,
        max_ram=None,
        max_duration=None,
        can_run_in_foreground=True,
        run_in_background=False,
    ):
        super().__init__(
            mail_program=mail_program,
            containertype=Singularity,
            max_cores=max_cores,
            max_ram=max_ram,
            max_duration=max_duration,
            can_run_in_foreground=can_run_in_foreground,
            run_in_background=run_in_background,
        )
        self.singularity_load_instructions = load_instructions
        self.singularity_build_instructions = build_instructions
        self.singularity_container_dir = self.process_container_dir(container_dir)

        # if container_dir isn't specified

        invalid_paths = self.validate_paths(
            {"Singularity Container Directory": self.singularity_container_dir}
        )
        if len(invalid_paths) > 0:
            raise Exception(
                f"Expected an absolute paths for {', '.join(invalid_paths)}"
            )

        Logger.log(
            f"Setting Singularity: containerdir={container_dir}, loadinstructions={load_instructions}"
        )

        Singularity.containerdir = container_dir
        Singularity.loadinstructions = load_instructions
        Singularity.buildinstructions = build_instructions

        pass
Пример #10
0
    def filter_updates(self,
                       jobs: List[RunJobModel],
                       add_inserts_to_cache=True
                       ) -> Tuple[List[RunJobModel], List[RunJobModel]]:
        # don't call super, it'll break because of the cache

        updates = []
        inserts = []

        if len(jobs) == 0:
            return updates, inserts

        self.populate_cache_if_required()

        idkeys = set(self.get_id_keys())
        idkeys_ordered = list(idkeys)
        dbalias_map = {t.dbalias: t.name for t in self._base.keymap()}
        skipped = 0

        for job in jobs:
            el_idkey = tuple(
                [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered])

            jstatus = self._cache_completed_ids.get(el_idkey)
            if jstatus is None:
                inserts.append(job)
            elif job.status.value != jstatus:
                updates.append(job)
            elif jstatus:
                skipped += 1
            self._cache_completed_ids[el_idkey] = job.status.value

        if skipped:
            Logger.log(
                f"Skipped updating {skipped} jobs as those jobs were already in a final state"
            )
        memory = getsizeof(self._cache_completed_ids) // 1024
        if (self.job_cache_last_idx < len(self.job_cache_warnings)
                and memory > self.job_cache_warnings[self.job_cache_last_idx]):
            Logger.warn(f"Job cache is using {memory} MB")
            self.job_cache_last_idx += 1

        return updates, inserts
Пример #11
0
    def __init__(
        self,
        mail_program: str,
        container_dir: str,
        load_instructions=None,
        build_instructions=f"singularity pull $image docker://${{docker}}",
        max_cores=None,
        max_ram=None,
        can_run_in_foreground=True,
        run_in_background=False,
    ):
        super().__init__(
            mail_program=mail_program,
            containertype=Singularity,
            max_cores=max_cores,
            max_ram=max_ram,
            can_run_in_foreground=can_run_in_foreground,
            run_in_background=run_in_background,
        )
        self.singularity_load_instructions = load_instructions
        self.singularity_container_dir = container_dir
        self.singularity_build_instructions = build_instructions

        Logger.log(
            f"Setting Singularity: containerdir={container_dir}, loadinstructions={load_instructions}"
        )

        invalid_paths = self.validate_paths({"Container Dir": container_dir})

        if len(invalid_paths) > 0:
            raise Exception(
                f"Expected an absolute paths for {', '.join(invalid_paths)}")

        # little bit hacky
        Singularity.containerdir = container_dir
        Singularity.loadinstructions = load_instructions
        Singularity.buildinstructions = build_instructions

        pass
Пример #12
0
    def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description):

        from janis_assistant.main import WorkflowManager, run_with_outputs

        if os.path.exists(output_dir):
            try:
                wm = WorkflowManager.from_path_get_latest_manager(
                    output_dir, readonly=True
                )
                outs_raw = wm.database.outputsDB.get()
                outs = {
                    o.id_: o.value or o.new_path
                    for o in outs_raw
                    if o.value or o.new_path
                }
                if len(outs) > 0:
                    out_val = first_value(outs)
                    Logger.info(
                        f"Using cached value of transformation ({description}) for {inpid}: {out_val}"
                    )
                    return out_val
                Logger.log(
                    f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})"
                )
            except Exception as e:
                Logger.debug(
                    f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}"
                )

        outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir)
        if not outs or len(outs) < 1:
            Logger.critical(
                f"Couldn't get outputs from transformation ({description}) for '{inpid}'"
            )
            return None

        return first_value(outs)
Пример #13
0
    def submit_workflow_if_required(self):
        if self.database.progressDB.has(ProgressKeys.submitWorkflow):
            return Logger.log(f"Workflow '{self.wid}' has submitted, skipping")

        fn_wf = self.database.workflowmetadata.submission_workflow
        fn_inp = self.database.workflowmetadata.submission_inputs
        fn_deps = self.database.workflowmetadata.submission_resources

        engine = self.get_engine()

        Logger.debug(f"Submitting task '{self.wid}' to '{engine.id()}'")
        self._engine_wid = engine.start_from_paths(self.wid, fn_wf, fn_inp, fn_deps)
        self.database.workflowmetadata.engine_wid = self._engine_wid

        Logger.info(
            f"Submitted workflow ({self.wid}), got engine id = '{self.get_engine_wid()}'"
        )
        self.database.progressDB.set(ProgressKeys.submitWorkflow)
Пример #14
0
def prepare_all_tools():
    JanisShed.hydrate(modules=[janis_unix, janis_bioinformatics])

    data_types = JanisShed.get_all_datatypes()
    tools = {
        ts[0].id(): {t.version(): t
                     for t in ts}
        for ts in JanisShed.get_all_tools()
    }

    Logger.info(f"Preparing documentation for {len(tools)} tools")
    Logger.info(f"Preparing documentation for {len(data_types)} data_types")

    tool_module_index = {}
    dt_module_index = {}
    ROOT_KEY = "root"

    for toolname, toolsbyversion in tools.items():
        # tool = tool_vs[0][0]()
        tool_versions = sort_tool_versions(list(toolsbyversion.keys()))
        default_version = tool_versions[0]
        Logger.log(
            f"Preparing {toolname}, found {len(tool_versions)} version[s] ({','.join(tool_versions)})"
        )

        defaulttool = toolsbyversion[default_version]
        if isclass(defaulttool):
            defaulttool = defaulttool()
        try:
            tool_path_components = list(
                filter(
                    lambda a: bool(a),
                    [defaulttool.tool_module(),
                     defaulttool.tool_provider()],
                ))
        except Exception as e:
            Logger.critical(f"Failed to generate docs for {toolname}: {e}")
            continue

        # (toolURL, tool, isPrimary)
        toolurl_to_tool = [(toolname.lower(), defaulttool, True)] + [
            (get_tool_url(toolname, v), toolsbyversion[v], False)
            for v in tool_versions
        ]

        path_components = "/".join(tool_path_components)
        output_dir = f"{tools_dir}/{path_components}/".lower()
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        for (toolurl, tool, isprimary) in toolurl_to_tool:
            output_str = prepare_tool(tool, tool_versions, not isprimary)
            output_filename = output_dir + toolurl + ".rst"
            with open(output_filename, "w+") as tool_file:
                tool_file.write(output_str)

        nested_keys_append_with_root(tool_module_index,
                                     tool_path_components,
                                     toolname,
                                     root_key=ROOT_KEY)

        Logger.log("Prepared " + toolname)

    for d in data_types:
        # tool = tool_vs[0][0]()
        if issubclass(d, Array):
            Logger.info("Skipping Array DataType")
            continue
        try:
            dt = d()
        except:
            print(d.__name__ + " failed to instantiate")
            continue
        did = dt.name().lower()
        Logger.log("Preparing " + dt.name())
        output_str = prepare_data_type(dt)

        dt_path_components = []
        # dt_path_components = list(filter(
        #     lambda a: bool(a),
        #     [, tool.tool_provider()]
        # ))

        path_components = "/".join(dt_path_components)
        output_dir = f"{dt_dir}{path_components}/"
        output_filename = output_dir + did + ".rst"

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        nested_keys_append_with_root(dt_module_index,
                                     dt_path_components,
                                     did,
                                     root_key=ROOT_KEY)

        with open(output_filename, "w+") as dt_file:
            dt_file.write(output_str)

        Logger.log("Prepared " + did)

    def prepare_modules_in_index(contents, title, dir, max_depth=1):
        module_filename = dir + "/index.rst"
        module_tools = sorted(
            set(contents[ROOT_KEY] if ROOT_KEY in contents else []))
        submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY)
        indexed_submodules_tools = [m.lower() for m in submodule_keys]

        with open(module_filename, "w+") as module_file:
            module_file.write(
                get_tool_toc(
                    alltoolsmap=tools,
                    title=title,
                    intro_text=
                    f"Automatically generated index page for {title}:",
                    subpages=indexed_submodules_tools,
                    tools=module_tools,
                    max_depth=max_depth,
                ))

        for submodule in submodule_keys:
            prepare_modules_in_index(contents=contents[submodule],
                                     title=submodule,
                                     dir=f"{dir}/{submodule}/")

    def prepare_dtmodules_in_index(contents, title, dir, max_depth=1):
        module_filename = dir + "/index.rst"
        module_tools = sorted(
            set(contents[ROOT_KEY] if ROOT_KEY in contents else []))
        submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY)
        indexed_submodules_tools = [
            m.lower() + "/index" for m in submodule_keys
        ]

        with open(module_filename, "w+") as module_file:
            module_file.write(
                get_toc(
                    title=title,
                    intro_text=
                    f"Automatically generated index page for {title}:",
                    subpages=indexed_submodules_tools + module_tools,
                    max_depth=max_depth,
                ))

        for submodule in submodule_keys:
            prepare_modules_in_index(contents=contents[submodule],
                                     title=submodule,
                                     dir=f"{dir}/{submodule}/")

    prepare_modules_in_index(tool_module_index, title="Tools", dir=tools_dir)
    prepare_dtmodules_in_index(dt_module_index,
                               title="Data Types",
                               dir=dt_dir,
                               max_depth=1)
Пример #15
0
 def save_changes(self):
     rows = self.metadata.get_encoded_changes()
     Logger.log(f"Updating workflow fields: {rows}")
     self.save_encoded_rows(rows)
     self.metadata.discard_changes()
def get_file_from_searchname(name, cwd):
    if cwd == ".":
        cwd = os.getcwd()
    Logger.log(f"Searching for a file called '{name}'")
    resolved = os.path.expanduser(name)
    if os.path.exists(resolved) and os.path.isfile(resolved):
        Logger.log(f"Found file called '{name}'")
        return resolved

    Logger.log(f"Searching for file '{name}' in the cwd, '{cwd}'")
    with Path(cwd):
        if os.path.exists(name) and os.path.isfile(resolved):
            Logger.log(f"Found file in '{cwd}' called '{name}'")
            return os.path.join(cwd, name)

    Logger.log(
        f"Attempting to get search path $JANIS_SEARCHPATH from environment variables"
    )
    search_path = os.getenv("JANIS_SEARCHPATH")
    if search_path:
        Logger.log(
            f"Got value for env JANIS_SEARCHPATH '{search_path}', searching for file '{name}' here."
        )
        if os.path.exists(search_path):
            with Path(search_path):
                if os.path.exists(name) and os.path.isfile(resolved):
                    Logger.log(
                        f"Found file in '{search_path}' called '{name}'")
                    return os.path.join(search_path, name)
        else:
            Logger.warn(
                f"Search path '{search_path}' (obtained from $JANIS_SEARCHPATH) does not exist "
            )
    else:
        Logger.log(
            "Couldn't find JANIS_SEARCHPATH in environment variables, skipping"
        )

    Logger.log(
        f"Couldn't find a file with filename '{name}' in any of the following: "
        f"full path, current working directory ({cwd}) or the search path.")
    return None
Пример #17
0
    def insert_or_update_many(self, els: List[T]):
        if len(els) == 0:
            return
        queries: Dict[str, List[List[any]]] = {}
        update_separator = ",\n"
        tab = "\t"

        idkeys = set(self.get_id_keys())
        idkeys_ordered = list(idkeys)
        pkeys_ordered = self.get_primary_keys()
        existing_keys = set()  # (*pkeys_ordered)

        # get all primary keys

        dbalias_map: Dict[str, DatabaseObjectField] = {
            t.dbalias: t
            for t in self._base.keymap()
        }

        updates, inserts = self.filter_updates(els)

        def add_query(query, values):
            if query in queries:
                queries[query].append(values)
            else:
                queries[query] = [values]

        for job in updates:
            keys, values = job.prepare_insert()
            # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered]

            keys_np, values_np = [], []
            for k, v in zip(keys, values):
                if k in idkeys:
                    continue

                keys_np.append(k)
                values_np.append(v)

            # problem is we want to update matching on some fields when they are NULL, our WHERE statement
            # should be something like:
            #   WHERE id1 = ? AND id2 = ? AND id3 is null AND id4 is null

            id_keyvalues = {
                pkey: prep_object_for_db(
                    getattr(job, dbalias_map[pkey].name),
                    encode=dbalias_map[pkey].encode,
                )
                for pkey in idkeys_ordered
            }
            id_withvalues_keyvalue_ordered = [
                (idkey, idvalue) for idkey, idvalue in id_keyvalues.items()
                if idvalue is not None
            ]
            id_withvalues_updater_keys = [
                f"{idkey} = ?" for idkey, _ in id_withvalues_keyvalue_ordered
            ]
            id_withvalues_updater_values = [
                idvalue for _, idvalue in id_withvalues_keyvalue_ordered
            ]
            id_novalues_updater_keys = [
                f"{idkey} is NULL" for idkey, idvalue in id_keyvalues.items()
                if idvalue is None
            ]

            prepared_statement = f"""
            UPDATE {self._tablename}
                SET {', '.join(f'{k} = ?' for k in keys_np)}
            WHERE
                {" AND ".join([*id_withvalues_updater_keys, *id_novalues_updater_keys])}
            """
            vtuple = (
                *values_np,
                *id_withvalues_updater_values,
            )

            add_query(prepared_statement, vtuple)

        for job in inserts:
            keys, values = job.prepare_insert()
            # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered]
            prepared_statement = f"""
            INSERT INTO {self._tablename}
                ({', '.join(keys)})
            VALUES
                ({', '.join(f'?' for _ in keys)});
            """
            add_query(prepared_statement, values)

        Logger.log(
            f"DB {self._tablename}: Inserting {len(inserts)} and updating {len(updates)} rows"
        )
        with self.with_cursor() as cursor:
            start = DateUtil.now()
            if len(inserts) + len(updates) > 300:
                Logger.warn(
                    f"DB '{self._tablename}' is inserting {len(inserts)} and updating {len(updates)} rows, this might take a while"
                )
            for query, vvalues in queries.items():
                try:
                    Logger.log(
                        f"Running query: {query}\n\t: values: {vvalues}")
                    cursor.executemany(query, vvalues)
                except OperationalError as e:
                    Logger.log_ex(e)
            seconds = (DateUtil.now() - start).total_seconds()
            if seconds > 2:
                Logger.warn(
                    f"DB '{self._tablename}' took {second_formatter(seconds)} to insert {len(inserts)} and update {len(updates)} rows"
                )

        return True