Exemplo n.º 1
0
 def benchmark_repeats(self):
     if self.benchmark is not None:
         return get_flag_value(self.benchmark, "repeat") or 1
Exemplo n.º 2
0
def auto_report(dag, path, stylesheet=None):
    try:
        from jinja2 import Template, Environment, PackageLoader, UndefinedError
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports.")

    mode_embedded = True
    if path.endswith(".zip"):
        mode_embedded = False
    elif not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html or .zip")

    custom_stylesheet = None
    if stylesheet is not None:
        try:
            with open(stylesheet) as s:
                custom_stylesheet = s.read()
        except (Exception, BaseException) as e:
            raise WorkflowError("Unable to read custom report stylesheet.", e)

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report/template"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(lambda: defaultdict(list))
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError("File {} marked for report but does "
                                        "not exist.".format(f))
                report_obj = get_flag_value(f, "report")

                def register_file(f,
                                  wildcards_overwrite=None,
                                  aux_files=None,
                                  name_overwrite=None):
                    wildcards = wildcards_overwrite or job.wildcards
                    category = Category(report_obj.category,
                                        wildcards=wildcards,
                                        job=job)
                    subcategory = Category(report_obj.subcategory,
                                           wildcards=wildcards,
                                           job=job)
                    labels = expand_labels(report_obj.labels, wildcards, job)

                    results[category][subcategory].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            dag.workflow,
                            wildcards_overwrite=wildcards_overwrite,
                            mode_embedded=mode_embedded,
                            aux_files=aux_files,
                            name_overwrite=name_overwrite,
                            labels=labels,
                        ))
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                elif os.path.isdir(f):
                    if report_obj.htmlindex:
                        if mode_embedded:
                            raise WorkflowError(
                                "Directory marked for report specifies htmlindex. "
                                "This is unsupported when requesting a pure HTML report. "
                                "Please use store as zip instead (--report report.zip)."
                            )
                        aux_files = []
                        index_found = False
                        for root, dirs, files in os.walk(f):
                            for name in files:
                                if name != ".snakemake_timestamp":
                                    filepath = os.path.join(root, name)
                                    if (os.path.relpath(filepath, f) !=
                                            report_obj.htmlindex):
                                        aux_files.append(filepath)
                                    else:
                                        index_found = True
                        if not index_found:
                            raise WorkflowError(
                                "Given htmlindex {} not found in directory "
                                "marked for report".format(
                                    report_obj.htmlindex))
                        register_file(
                            os.path.join(f, report_obj.htmlindex),
                            aux_files=aux_files,
                            name_overwrite="{}.html".format(
                                os.path.basename(f)),
                        )
                    elif report_obj.patterns:
                        if not isinstance(report_obj.patterns, list):
                            raise WorkflowError(
                                "Invalid patterns given for report. Must be list.",
                                rule=job.rule,
                            )

                        for pattern in report_obj.patterns:
                            pattern = os.path.join(f, pattern)
                            wildcards = glob_wildcards(pattern)._asdict()
                            names = wildcards.keys()
                            for w in zip(*wildcards.values()):
                                w = dict(zip(names, w))
                                w.update(job.wildcards_dict)
                                w = Wildcards(fromdict=w)
                                f = apply_wildcards(pattern, w)
                                register_file(f, wildcards_overwrite=w)
                    else:
                        raise WorkflowError(
                            "Directory marked for report but neither file patterns "
                            "given via patterns=[...], nor htmlindex given. "
                            "See report documentation.",
                            rule=job.rule,
                        )

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning("Missing metadata for file {}. Maybe metadata "
                               "was deleted or it was created using an older "
                               "version of Snakemake. This is a non critical "
                               "warning.".format(f))
                continue

            def get_time(rectime, metatime, sel_func):
                if metatime is None:
                    return rectime
                return sel_func(metatime, rectime)

            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = get_time(rec.starttime, meta["starttime"], min)
                rec.endtime = get_time(rec.endtime, meta["endtime"], max)
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                logger.warning("Metadata for file {} was created with a too "
                               "old Snakemake version.".format(f))

    for subcats in results.values():
        for catresults in subcats.values():
            catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [{
        "rule": rec.rule,
        "runtime": rec.endtime - rec.starttime
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    def get_datetime(rectime):
        try:
            return datetime.datetime.fromtimestamp(rectime).isoformat()
        except OSError:
            return None

    # prepare end times
    timeline = [{
        "rule": rec.rule,
        "starttime": get_datetime(rec.starttime),
        "endtime": get_datetime(rec.endtime),
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)
    # In theory there could be more than one rule with the same name kept from above.
    # For now, we just keep the first.
    rules = {rulename: items[0] for rulename, items in rules.items()}

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res for cat in results.values()
        for subcat in cat.values() for res in subcat if res.target not in seen
    ]

    rst_links = textwrap.dedent("""

    .. _Workflow: javascript:show_panel('workflow')
    .. _Statistics: javascript:show_panel('statistics')
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: javascript:app.showCategory('{{ cat.name|urlencode }}')
    {% endfor %}
    {% for res in files %}
    .. _{{ res.target }}: javascript:app.showResultInfo('{{ res.path|urlencode }}')
    {% endfor %}
    """)
    for cat, subcats in results.items():
        for subcat, catresults in subcats.items():
            for res in catresults:
                res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with dag.workflow.sourcecache.open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links

            try:
                text = publish_parts(
                    env.from_string(text).render(snakemake=Snakemake,
                                                 categories=results,
                                                 files=files),
                    writer_name="html",
                )["body"]
            except UndefinedError as e:
                raise WorkflowError(
                    "Error rendering global report caption {}:".format(
                        dag.workflow.report_text.get_path_or_uri()),
                    e,
                )

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(res.size for cat in results.values()
                       for subcat in cat.values() for res in subcat)

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports.")

    categories = data.render_categories(results)
    rendered_results = data.render_results(results)
    rulegraph = data.render_rulegraph(rulegraph["nodes"], rulegraph["links"],
                                      rulegraph["links_direct"])
    rules = data.render_rules(rules)
    runtimes = data.render_runtimes(runtimes)
    timeline = data.render_timeline(timeline)
    packages = data.get_packages()

    template = env.get_template("index.html.jinja2")

    logger.info("Downloading resources and rendering HTML.")

    rendered = template.render(
        results=rendered_results,
        categories=categories,
        rulegraph=rulegraph,
        rules=rules,
        workflow_desc=json.dumps(text),
        runtimes=runtimes,
        timeline=timeline,
        packages=packages,
        pygments_css=HtmlFormatter(
            style="stata-dark").get_style_defs(".source"),
        custom_stylesheet=custom_stylesheet,
        logo=data_uri_from_file(
            Path(__file__).parent / "template" / "logo.svg"),
        now=now,
    )

    # TODO look into supporting .WARC format, also see (https://webrecorder.io)

    if not mode_embedded:
        with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout:
            folder = Path(Path(path).stem)
            # store results in data folder
            for subcats in results.values():
                for catresults in subcats.values():
                    for result in catresults:
                        # write raw data
                        zipout.write(result.path,
                                     str(folder.joinpath(result.data_uri)))
                        # write aux files
                        parent = folder.joinpath(result.data_uri).parent
                        for aux_path in result.aux_files:
                            # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path)))))
                            zipout.write(
                                aux_path,
                                str(
                                    parent.joinpath(
                                        os.path.relpath(
                                            aux_path,
                                            os.path.dirname(result.path)))),
                            )

            # write report html
            zipout.writestr(str(folder.joinpath("report.html")), rendered)
    else:
        with open(path, "w", encoding="utf-8") as htmlout:
            htmlout.write(rendered)

    logger.info("Report created: {}.".format(path))
Exemplo n.º 3
0
def auto_report(dag, path, stylesheet=None):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports.")

    if not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html")

    custom_stylesheet = None
    if stylesheet is not None:
        try:
            with open(stylesheet) as s:
                custom_stylesheet = s.read()
        except (Exception, BaseException) as e:
            raise WorkflowError("Unable to read custom report stylesheet.", e)

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(lambda: defaultdict(list))
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError("File {} marked for report but does "
                                        "not exist.".format(f))
                report_obj = get_flag_value(f, "report")

                def register_file(f, wildcards_overwrite=None):
                    wildcards = wildcards_overwrite or job.wildcards
                    category = Category(report_obj.category,
                                        wildcards=wildcards,
                                        job=job)
                    subcategory = Category(report_obj.subcategory,
                                           wildcards=wildcards,
                                           job=job)

                    results[category][subcategory].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            wildcards_overwrite=wildcards_overwrite,
                        ))
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                if os.path.isdir(f):
                    if not isinstance(report_obj.patterns, list):
                        raise WorkflowError(
                            "Invalid patterns given for report. Must be list.",
                            rule=job.rule,
                        )
                    if not report_obj.patterns:
                        raise WorkflowError(
                            "Directory marked for report but no file patterns given via patterns=[...]. "
                            "See report documentation.",
                            rule=job.rule,
                        )
                    for pattern in report_obj.patterns:
                        pattern = os.path.join(f, pattern)
                        wildcards = glob_wildcards(pattern)._asdict()
                        names = wildcards.keys()
                        for w in zip(*wildcards.values()):
                            w = dict(zip(names, w))
                            w.update(job.wildcards_dict)
                            w = Wildcards(fromdict=w)
                            f = apply_wildcards(pattern, w)
                            register_file(f, wildcards_overwrite=w)

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning("Missing metadata for file {}. Maybe metadata "
                               "was deleted or it was created using an older "
                               "version of Snakemake. This is a non critical "
                               "warning.".format(f))
                continue
            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = min(rec.starttime, meta["starttime"])
                rec.endtime = max(rec.endtime, meta["endtime"])
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning("Metadata for file {} was created with a too "
                               "old Snakemake version.".format(f))

    for subcats in results.values():
        for catresults in subcats.values():
            catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [{
        "rule": rec.rule,
        "runtime": rec.endtime - rec.starttime
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare end times
    timeline = [{
        "rule":
        rec.rule,
        "starttime":
        datetime.datetime.fromtimestamp(rec.starttime).isoformat(),
        "endtime":
        datetime.datetime.fromtimestamp(rec.endtime).isoformat(),
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res for cat in results.values()
        for subcat in cat.values() for res in subcat if res.target not in seen
    ]

    rst_links = textwrap.dedent("""

    .. _Workflow: javascript:show_panel('workflow')
    .. _Statistics: javascript:show_panel('statistics')
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}")
    {% endfor %}
    {% for res in files %}
    .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}")
    {% endfor %}
    """)
    for cat, subcats in results.items():
        for subcat, catresults in subcats.items():
            for res in catresults:
                res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(snakemake=Snakemake,
                                             categories=results,
                                             files=files),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(res.size for cat in results.values()
                       for subcat in cat.values() for res in subcat)

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports.")

    # render HTML
    template = env.get_template("report.html")
    with open(path, "w", encoding="utf-8") as out:
        out.write(
            template.render(
                results=results,
                results_size=results_size,
                configfiles=configfiles,
                text=text,
                rulegraph_nodes=rulegraph["nodes"],
                rulegraph_links=rulegraph["links"],
                rulegraph_width=xmax + 20,
                rulegraph_height=ymax + 20,
                runtimes=runtimes,
                timeline=timeline,
                rules=[rec for recs in rules.values() for rec in recs],
                version=__version__,
                now=now,
                pygments_css=HtmlFormatter(
                    style="trac").get_style_defs(".source"),
                custom_stylesheet=custom_stylesheet,
            ))
    logger.info("Report created.")