Пример #1
0
def _summarize_combined(samples, vkey):
    """Prepare summarized CSV and plot files for samples to combine together.

    Helps handle cases where we want to summarize over multiple samples.
    """
    validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], vkey))
    combined, _ = _group_validate_samples(samples, vkey, [["metadata", "validate_combine"]])
    for vname, vitems in combined.items():
        if vname:
            cur_combined = collections.defaultdict(int)
            for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))):
                validations = [variant.get(vkey) for variant in data.get("variants", [])]
                validations = [v for v in validations if v]
                if len(validations) == 0 and vkey in data:
                    validations = [data.get(vkey)]
                for validate in validations:
                    with open(validate["summary"]) as in_handle:
                        reader = csv.reader(in_handle)
                        next(reader)  # header
                        for _, caller, vtype, metric, value in reader:
                            cur_combined[(caller, vtype, metric)] += int(value)
            out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
            with open(out_csv, "w") as out_handle:
                writer = csv.writer(out_handle)
                header = ["sample", "caller", "vtype", "metric", "value"]
                writer.writerow(header)
                for (caller, variant_type, category), val in cur_combined.items():
                    writer.writerow(["combined-%s" % vname, caller, variant_type, category, val])
            plots = validateplot.classifyplot_from_valfile(out_csv)
Пример #2
0
def evaluate(data):
    """Provide evaluations for multiple callers split by structural variant type.
    """
    work_dir = utils.safe_makedir(
        os.path.join(data["dirs"]["work"], "structural",
                     dd.get_sample_name(data), "validate"))
    truth_sets = tz.get_in(["config", "algorithm", "svvalidate"], data)
    if truth_sets and data.get("sv"):
        if isinstance(truth_sets, dict):
            val_summary, df_csv = _evaluate_multi(data["sv"], truth_sets,
                                                  work_dir, data)
            summary_plots = _plot_evaluation(df_csv)
            data["sv-validate"] = {
                "csv": val_summary,
                "plot": summary_plots,
                "df": df_csv
            }
        else:
            assert isinstance(
                truth_sets,
                basestring) and utils.file_exists(truth_sets), truth_sets
            val_summary = _evaluate_vcf(data["sv"], truth_sets, work_dir, data)
            title = "%s structural variants" % dd.get_sample_name(data)
            summary_plots = validateplot.classifyplot_from_valfile(
                val_summary, outtype="png", title=title)
            data["sv-validate"] = {
                "csv": val_summary,
                "plot": summary_plots[0]
            }
    return data
Пример #3
0
def evaluate(data):
    """Provide evaluations for multiple callers split by structural variant type.
    """
    work_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "structural",
                                               dd.get_sample_name(data), "validate"))
    truth_sets = tz.get_in(["config", "algorithm", "svvalidate"], data)
    if truth_sets and data.get("sv"):
        if isinstance(truth_sets, dict):
            val_summary, df_csv = _evaluate_multi(data["sv"], truth_sets, work_dir, data)
            summary_plots = _plot_evaluation(df_csv)
            data["sv-validate"] = {"csv": val_summary, "plot": summary_plots, "df": df_csv}
        else:
            assert isinstance(truth_sets, basestring) and utils.file_exists(truth_sets), truth_sets
            val_summary = _evaluate_vcf(data["sv"], truth_sets, work_dir, data)
            title = "%s structural variants" % dd.get_sample_name(data)
            summary_plots = validateplot.classifyplot_from_valfile(val_summary, outtype="png", title=title)
            data["sv-validate"] = {"csv": val_summary, "plot": summary_plots[0]}
    return data
Пример #4
0
import sys
from bcbio.variation import validateplot

title = "smCounter2 UMI: VarDict 1.5.6; octopus 0.5.1b"
validateplot.classifyplot_from_valfile(sys.argv[1], outtype="png", title=title)
import sys
from bcbio.variation import validateplot

title="ICR142: GATK HaplotypeCaller, FreeBayes, VarDict, Platypus"
validateplot.classifyplot_from_valfile(sys.argv[1], outtype="png", title=title)