Exemplo n.º 1
0
 def concretize_param(p, wildcards, is_from_callable):
     if not is_from_callable:
         if isinstance(p, str):
             return apply_wildcards(p, wildcards)
         if isinstance(p, list):
             return [(apply_wildcards(v, wildcards) if isinstance(
                 v, str) else v) for v in p]
     return p
Exemplo n.º 2
0
 def get_outputfiles(self, job: Job):
     if job.rule.output[0].is_multiext:
         prefix_len = len(
             apply_wildcards(job.rule.output[0].multiext_prefix,
                             job.wildcards))
         yield from ((f, f[prefix_len:]) for f in job.output)
     else:
         yield (job.output[0], "")
Exemplo n.º 3
0
 def expand_group(self, wildcards):
     """Expand the group given wildcards."""
     if callable(self.group):
         return self.apply_input_function(self.group, wildcards)
     elif isinstance(self.group, str):
         return apply_wildcards(self.group, wildcards, dynamic_fill=DYNAMIC_FILL)
     else:
         return self.group
Exemplo n.º 4
0
 def get_outputfiles(self, job: Job):
     if job.rule.output[0].is_multiext:
         prefix_len = len(
             apply_wildcards(job.rule.output[0].multiext_prefix,
                             job.wildcards))
         yield from ((f, f[prefix_len:]) for f in job.output)
     else:
         assert (
             len(job.output) == 1
         ), "bug: multiple output files in cacheable job but multiext not used for declaring them"
         yield (job.output[0], "")
Exemplo n.º 5
0
 def __init__(self, name, wildcards, job):
     if name is None:
         name = "Other"
         self.is_other = True
     else:
         self.is_other = False
         try:
             name = apply_wildcards(name, wildcards)
         except AttributeError as e:
             raise WorkflowError("Failed to resolve wildcards.", e, rule=job.rule)
     self.name = name
     h = hashlib.sha256()
     h.update(name.encode())
     self.id = h.hexdigest()
Exemplo n.º 6
0
 def __init__(self, name, wildcards, job):
     if name is None:
         name = "Other"
         self.is_other = True
     else:
         self.is_other = False
         try:
             name = apply_wildcards(name, wildcards)
         except AttributeError as e:
             raise WorkflowError("Failed to resolve wildcards.",
                                 e,
                                 rule=job.rule)
     self.name = name
     self.id = "results-{name}".format(name=urllib.parse.quote(name))
Exemplo n.º 7
0
def expand_report_argument(item, wildcards, job):
    if is_callable(item):
        aux_params = get_input_function_aux_params(item,
                                                   {"params": job.params})
        try:
            item = item(wildcards, **aux_params)
        except Exception as e:
            raise InputFunctionException(e, rule=job.rule, wildcards=wildcards)
    if isinstance(item, str):
        try:
            return apply_wildcards(item, wildcards)
        except AttributeError as e:
            raise WorkflowError("Failed to resolve wildcards.",
                                e,
                                rule=job.rule)
    else:
        return item
Exemplo n.º 8
0
def auto_report(dag, path, stylesheet=None):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports."
        )

    mode_embedded = True
    if path.endswith(".zip"):
        mode_embedded = False
    elif not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html or .zip")

    custom_stylesheet = None
    if stylesheet is not None:
        try:
            with open(stylesheet) as s:
                custom_stylesheet = s.read()
        except (Exception, BaseException) as e:
            raise WorkflowError("Unable to read custom report stylesheet.", e)

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(lambda: defaultdict(list))
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError(
                        "File {} marked for report but does " "not exist.".format(f)
                    )
                report_obj = get_flag_value(f, "report")

                def register_file(
                    f, wildcards_overwrite=None, aux_files=None, name_overwrite=None
                ):
                    wildcards = wildcards_overwrite or job.wildcards
                    category = Category(
                        report_obj.category, wildcards=wildcards, job=job
                    )
                    subcategory = Category(
                        report_obj.subcategory, wildcards=wildcards, job=job
                    )

                    results[category][subcategory].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            dag.workflow,
                            wildcards_overwrite=wildcards_overwrite,
                            mode_embedded=mode_embedded,
                            aux_files=aux_files,
                            name_overwrite=name_overwrite,
                        )
                    )
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                elif os.path.isdir(f):
                    if report_obj.htmlindex:
                        if mode_embedded:
                            raise WorkflowError(
                                "Directory marked for report specifies htmlindex. "
                                "This is unsupported when requesting a pure HTML report. "
                                "Please use store as zip instead (--report report.zip)."
                            )
                        aux_files = []
                        index_found = False
                        for root, dirs, files in os.walk(f):
                            for name in files:
                                if name != ".snakemake_timestamp":
                                    filepath = os.path.join(root, name)
                                    if (
                                        os.path.relpath(filepath, f)
                                        != report_obj.htmlindex
                                    ):
                                        aux_files.append(filepath)
                                    else:
                                        index_found = True
                        if not index_found:
                            raise WorkflowError(
                                "Given htmlindex {} not found in directory "
                                "marked for report".format(report_obj.htmlindex)
                            )
                        register_file(
                            os.path.join(f, report_obj.htmlindex),
                            aux_files=aux_files,
                            name_overwrite="{}.html".format(os.path.basename(f)),
                        )
                    elif report_obj.patterns:
                        if not isinstance(report_obj.patterns, list):
                            raise WorkflowError(
                                "Invalid patterns given for report. Must be list.",
                                rule=job.rule,
                            )

                        for pattern in report_obj.patterns:
                            pattern = os.path.join(f, pattern)
                            wildcards = glob_wildcards(pattern)._asdict()
                            names = wildcards.keys()
                            for w in zip(*wildcards.values()):
                                w = dict(zip(names, w))
                                w.update(job.wildcards_dict)
                                w = Wildcards(fromdict=w)
                                f = apply_wildcards(pattern, w)
                                register_file(f, wildcards_overwrite=w)
                    else:
                        raise WorkflowError(
                            "Directory marked for report but neither file patterns "
                            "given via patterns=[...], nor htmlindex given. "
                            "See report documentation.",
                            rule=job.rule,
                        )

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning(
                    "Missing metadata for file {}. Maybe metadata "
                    "was deleted or it was created using an older "
                    "version of Snakemake. This is a non critical "
                    "warning.".format(f)
                )
                continue

            def get_time(rectime, metatime, sel_func):
                if metatime is None:
                    return rectime
                return sel_func(metatime, rectime)

            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = get_time(rec.starttime, meta["starttime"], min)
                rec.endtime = get_time(rec.endtime, meta["endtime"], max)
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning(
                    "Metadata for file {} was created with a too "
                    "old Snakemake version.".format(f)
                )

    for subcats in results.values():
        for catresults in subcats.values():
            catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [
        {"rule": rec.rule, "runtime": rec.endtime - rec.starttime}
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    def get_datetime(rectime):
        try:
            return datetime.datetime.fromtimestamp(rectime).isoformat()
        except OSError:
            return None

    # prepare end times
    timeline = [
        {
            "rule": rec.rule,
            "starttime": get_datetime(rec.starttime),
            "endtime": get_datetime(rec.endtime),
        }
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent(
        """

    .. _Workflow: javascript:show_panel('workflow')
    .. _Statistics: javascript:show_panel('statistics')
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}")
    {% endfor %}
    {% for res in files %}
    .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}")
    {% endfor %}
    """
    )
    for cat, subcats in results.items():
        for subcat, catresults in subcats.items():
            for res in catresults:
                res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with dag.workflow.sourcecache.open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(
                    snakemake=Snakemake, categories=results, files=files
                ),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(
        res.size
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
    )

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports."
        )

    template = env.get_template("report.html.jinja2")

    logger.info("Downloading resources and rendering HTML.")

    rendered = template.render(
        results=results,
        results_size=results_size,
        configfiles=configfiles,
        text=text,
        rulegraph_nodes=rulegraph["nodes"],
        rulegraph_links=rulegraph["links"],
        rulegraph_width=xmax + 20,
        rulegraph_height=ymax + 20,
        runtimes=runtimes,
        timeline=timeline,
        rules=[rec for recs in rules.values() for rec in recs],
        version=__version__,
        now=now,
        pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"),
        custom_stylesheet=custom_stylesheet,
        mode_embedded=mode_embedded,
    )

    # TODO look into supporting .WARC format, also see (https://webrecorder.io)

    if not mode_embedded:
        with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout:
            folder = Path(Path(path).stem)
            # store results in data folder
            for subcats in results.values():
                for catresults in subcats.values():
                    for result in catresults:
                        # write raw data
                        zipout.write(result.path, str(folder.joinpath(result.data_uri)))
                        # write thumbnail
                        if result.is_img and result.png_content:
                            zipout.writestr(
                                str(folder.joinpath(result.png_uri)), result.png_content
                            )
                        # write aux files
                        parent = folder.joinpath(result.data_uri).parent
                        for aux_path in result.aux_files:
                            # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path)))))
                            zipout.write(
                                aux_path,
                                str(
                                    parent.joinpath(
                                        os.path.relpath(
                                            aux_path, os.path.dirname(result.path)
                                        )
                                    )
                                ),
                            )

            # write report html
            zipout.writestr(str(folder.joinpath("report.html")), rendered)
    else:
        with open(path, "w", encoding="utf-8") as htmlout:
            htmlout.write(rendered)

    logger.info("Report created: {}.".format(path))
Exemplo n.º 9
0
 def format(self, item, *args, **kwargs):
     if 'wc' in kwargs:
         item = apply_wildcards(item, kwargs['wc'])
     return item
Exemplo n.º 10
0
 def apply_wildcards(self, wildcards, _):
     return CondaEnvNameSpec(apply_wildcards(self.name, wildcards))
Exemplo n.º 11
0
 def concretize_param(p, wildcards):
     if isinstance(p, str):
         return apply_wildcards(p, wildcards)
     return p
Exemplo n.º 12
0
def auto_report(dag, path):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports.")

    if not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html")

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(list)
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError("File {} marked for report but does "
                                        "not exist.".format(f))
                report_obj = get_flag_value(f, "report")
                category = Category(report_obj.category)

                def register_file(f, wildcards_overwrite=None):
                    results[category].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            wildcards_overwrite=wildcards_overwrite,
                        ))
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                if os.path.isdir(f):
                    if not isinstance(report_obj.patterns, list):
                        raise WorkflowError(
                            "Invalid patterns given for report. Must be list.",
                            rule=job.rule,
                        )
                    if not report_obj.patterns:
                        raise WorkflowError(
                            "Directory marked for report but no file patterns given via patterns=[...]. "
                            "See report documentation.",
                            rule=job.rule,
                        )
                    for pattern in report_obj.patterns:
                        pattern = os.path.join(f, pattern)
                        wildcards = glob_wildcards(pattern)._asdict()
                        names = wildcards.keys()
                        for w in zip(*wildcards.values()):
                            w = dict(zip(names, w))
                            w.update(job.wildcards_dict)
                            w = Wildcards(fromdict=w)
                            f = apply_wildcards(pattern, w)
                            register_file(f, wildcards_overwrite=w)

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning("Missing metadata for file {}. Maybe metadata "
                               "was deleted or it was created using an older "
                               "version of Snakemake. This is a non critical "
                               "warning.".format(f))
                continue
            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = min(rec.starttime, meta["starttime"])
                rec.endtime = max(rec.endtime, meta["endtime"])
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning("Metadata for file {} was created with a too "
                               "old Snakemake version.".format(f))

    for catresults in results.values():
        catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [{
        "rule": rec.rule,
        "runtime": rec.endtime - rec.starttime
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare end times
    timeline = [{
        "rule":
        rec.rule,
        "starttime":
        datetime.datetime.fromtimestamp(rec.starttime).isoformat(),
        "endtime":
        datetime.datetime.fromtimestamp(rec.endtime).isoformat(),
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res for cat in results.values() for res in cat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent("""

    .. _Results: #results
    .. _Rules: #rules
    .. _Statistics: #stats
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: #{{ cat.id }}
    {% for res in files %}
    .. _{{ res.target }}: #{{ res.id }}
    {% endfor %}
    {% endfor %}
    .. _
    """)
    for cat, catresults in results.items():
        for res in catresults:
            res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(snakemake=Snakemake,
                                             categories=results,
                                             files=files),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(res.size for cat in results.values() for res in cat)

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports.")

    # render HTML
    template = env.get_template("report.html")
    with open(path, "w", encoding="utf-8") as out:
        out.write(
            template.render(
                results=results,
                results_size=results_size,
                configfiles=configfiles,
                text=text,
                rulegraph_nodes=rulegraph["nodes"],
                rulegraph_links=rulegraph["links"],
                rulegraph_width=xmax + 20,
                rulegraph_height=ymax + 20,
                runtimes=runtimes,
                timeline=timeline,
                rules=[rec for recs in rules.values() for rec in recs],
                version=__version__,
                now=now,
                pygments_css=HtmlFormatter(
                    style="trac").get_style_defs(".source"),
            ))
    logger.info("Report created.")
Exemplo n.º 13
0
 def concretize_param(p, wildcards, is_from_callable):
     if not is_from_callable and isinstance(p, str):
         return apply_wildcards(p, wildcards)
     return p
Exemplo n.º 14
0
 def concretize_param(p, wildcards):
     if isinstance(p, str):
         return apply_wildcards(p, wildcards)
     return p