示例#1
0
def _goleft_indexcov(bam_file, data, out_dir):
    """Use goleft indexcov to estimate coverage distributions using BAM index.

    Only used for whole genome runs as captures typically don't have enough data
    to be useful for index-only summaries.
    """
    if not dd.get_coverage_interval(data) == "genome":
        return []
    out_dir = utils.safe_makedir(os.path.join(out_dir, "indexcov"))
    out_files = [os.path.join(out_dir, "%s-indexcov.%s" % (dd.get_sample_name(data), ext))
                 for ext in ["roc", "ped", "bed.gz"]]
    if not utils.file_uptodate(out_files[-1], bam_file):
        with transaction.tx_tmpdir(data) as tmp_dir:
            tmp_dir = utils.safe_makedir(os.path.join(tmp_dir, dd.get_sample_name(data)))
            gender_chroms = [x.name for x in ref.file_contigs(dd.get_ref_file(data)) if chromhacks.is_sex(x.name)]
            gender_args = "--sex %s" % (",".join(gender_chroms)) if gender_chroms else ""
            cmd = "goleft indexcov --directory {tmp_dir} {gender_args} -- {bam_file}"
            try:
                do.run(cmd.format(**locals()), "QC: goleft indexcov")
            except subprocess.CalledProcessError as msg:
                if not ("indexcov: no usable" in str(msg) or
                        ("indexcov: expected" in str(msg) and "sex chromosomes, found:" in str(msg))):
                    raise
            for out_file in out_files:
                orig_file = os.path.join(tmp_dir, os.path.basename(out_file))
                if utils.file_exists(orig_file):
                    utils.copy_plus(orig_file, out_file)
    # MultiQC needs non-gzipped/BED inputs so unpack the file
    out_bed = out_files[-1].replace(".bed.gz", ".tsv")
    if utils.file_exists(out_files[-1]) and not utils.file_exists(out_bed):
        with transaction.file_transaction(data, out_bed) as tx_out_bed:
            cmd = "gunzip -c %s > %s" % (out_files[-1], tx_out_bed)
            do.run(cmd, "Unpack indexcov BED file")
    out_files[-1] = out_bed
    return [x for x in out_files if utils.file_exists(x)]
示例#2
0
def _goleft_indexcov(bam_file, data, out_dir):
    """Use goleft indexcov to estimate coverage distributions using BAM index.

    Only used for whole genome runs as captures typically don't have enough data
    to be useful for index-only summaries.
    """
    if not dd.get_coverage_interval(data) == "genome":
        return []
    out_dir = utils.safe_makedir(os.path.join(out_dir, "indexcov"))
    out_files = [os.path.join(out_dir, "%s-indexcov.%s" % (dd.get_sample_name(data), ext))
                 for ext in ["roc", "ped", "bed.gz"]]
    if not utils.file_uptodate(out_files[-1], bam_file):
        with transaction.tx_tmpdir(data) as tmp_dir:
            tmp_dir = utils.safe_makedir(os.path.join(tmp_dir, dd.get_sample_name(data)))
            gender_chroms = [x.name for x in ref.file_contigs(dd.get_ref_file(data)) if chromhacks.is_sex(x.name)]
            gender_args = "--sex %s" % (",".join(gender_chroms)) if gender_chroms else ""
            cmd = "goleft indexcov --directory {tmp_dir} {gender_args} -- {bam_file}"
            try:
                do.run(cmd.format(**locals()), "QC: goleft indexcov")
            except subprocess.CalledProcessError as msg:
                if not ("indexcov: no usable" in str(msg) or
                        ("indexcov: expected" in str(msg) and "sex chromosomes, found:" in str(msg))):
                    raise
            for out_file in out_files:
                orig_file = os.path.join(tmp_dir, os.path.basename(out_file))
                if utils.file_exists(orig_file):
                    utils.copy_plus(orig_file, out_file)
    # MultiQC needs non-gzipped/BED inputs so unpack the file
    out_bed = out_files[-1].replace(".bed.gz", ".tsv")
    if utils.file_exists(out_files[-1]) and not utils.file_exists(out_bed):
        with transaction.file_transaction(data, out_bed) as tx_out_bed:
            cmd = "gunzip -c %s > %s" % (out_files[-1], tx_out_bed)
            do.run(cmd, "Unpack indexcov BED file")
    out_files[-1] = out_bed
    return [x for x in out_files if utils.file_exists(x)]