示例#1
0
def locus(args):
    """
    %prog locus bamfile

    Extract selected locus from a list of TREDs for validation, and run lobSTR.
    """
    from jcvi.formats.sam import get_minibam

    # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation
    INCLUDE = [
        "HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2", "FXTAS"
    ]
    db_choices = ("hg38", "hg19")

    p = OptionParser(locus.__doc__)
    p.add_option("--tred", choices=INCLUDE, help="TRED name")
    p.add_option("--ref",
                 choices=db_choices,
                 default="hg38",
                 help="Reference genome")
    p.set_home("lobstr")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (bamfile, ) = args
    ref = opts.ref
    lhome = opts.lobstr_home
    tred = opts.tred

    tredsfile = datafile("TREDs.meta.csv")
    tf = pd.read_csv(tredsfile, index_col=0)
    row = tf.ix[tred]
    tag = "repeat_location"
    ldb = "TREDs"
    if ref == "hg19":
        tag += "." + ref
        ldb += "-" + ref
    seqid, start_end = row[tag].split(":")

    PAD = 1000
    start, end = start_end.split("-")
    start, end = int(start) - PAD, int(end) + PAD
    region = "{}:{}-{}".format(seqid, start, end)

    minibamfile = get_minibam(bamfile, region)
    c = seqid.replace("chr", "")
    cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb)
    sh(cmd)

    parser = LobSTRvcf(columnidsfile=None)
    parser.parse(vcf, filtered=False)
    items = parser.items()
    if not items:
        print("No entry found!", file=sys.stderr)
        return

    k, v = parser.items()[0]
    print("{} => {}".format(tred, v.replace(",", "/")), file=sys.stderr)
示例#2
0
文件: str.py 项目: tanghaibao/jcvi
def locus(args):
    """
    %prog locus bamfile

    Extract selected locus from a list of TREDs for validation, and run lobSTR.
    """
    from jcvi.formats.sam import get_minibam
    # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation
    INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2",
               "FXTAS"]
    db_choices = ("hg38", "hg19")

    p = OptionParser(locus.__doc__)
    p.add_option("--tred", choices=INCLUDE,
                 help="TRED name")
    p.add_option("--ref", choices=db_choices, default="hg38",
                 help="Reference genome")
    p.set_home("lobstr")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    bamfile, = args
    ref = opts.ref
    lhome = opts.lobstr_home
    tred = opts.tred

    tredsfile = datafile("TREDs.meta.csv")
    tf = pd.read_csv(tredsfile, index_col=0)
    row = tf.ix[tred]
    tag = "repeat_location"
    ldb = "TREDs"
    if ref == "hg19":
        tag += "." + ref
        ldb += "-" + ref
    seqid, start_end = row[tag].split(":")

    PAD = 1000
    start, end = start_end.split('-')
    start, end = int(start) - PAD, int(end) + PAD
    region = "{}:{}-{}".format(seqid, start, end)

    minibamfile = get_minibam(bamfile, region)
    c = seqid.replace("chr", "")
    cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb)
    sh(cmd)

    parser = LobSTRvcf(columnidsfile=None)
    parser.parse(vcf, filtered=False)
    items = parser.items()
    if not items:
        print("No entry found!", file=sys.stderr)
        return

    k, v = parser.items()[0]
    print("{} => {}".format(tred, v.replace(',', '/')), file=sys.stderr)
示例#3
0
def locus(args):
    """
    %prog locus bamfile

    Extract selected locus from a list of TREDs for validation, and run lobSTR.
    """
    from jcvi.formats.sam import get_minibam
    # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation
    INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2"]

    p = OptionParser(locus.__doc__)
    p.add_option("--tred", choices=INCLUDE, help="TRED name")
    p.set_home("lobstr")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    bamfile, = args
    lhome = opts.lobstr_home
    tred = opts.tred

    tredsfile = op.join(datadir, "TREDs.meta.csv")
    tf = pd.read_csv(tredsfile, index_col=0)
    row = tf.ix[tred]
    seqid, start_end = row["repeat_location"].split(":")

    PAD = 1000
    start, end = start_end.split('-')
    start, end = int(start) - PAD, int(end) + PAD
    region = "{}:{}-{}".format(seqid, start, end)

    minibamfile = get_minibam(bamfile, region)
    c = seqid.replace("chr", "")
    cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, "TREDs")
    sh(cmd)

    parser = LobSTRvcf(columnidsfile=None)
    parser.parse(vcf, filtered=False)
    k, v = parser.items()[0]
    print >> sys.stderr, "{} => {}".format(tred, v.replace(',', '/'))
示例#4
0
def htt(args):
    """
    %prog htt bamfile chr4:3070000-3080000

    Extract HTT region and run lobSTR.
    """
    from jcvi.formats.sam import get_minibam

    p = OptionParser(htt.__doc__)
    p.set_home("lobstr")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, region = args
    lhome = opts.lobstr_home

    minibamfile = get_minibam(bamfile, region)
    c = region.split(":")[0].replace("chr", "")
    cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, "hg38")
    sh(cmd)
示例#5
0
文件: str.py 项目: sophy7074/jcvi
def htt(args):
    """
    %prog htt bamfile chr4:3070000-3080000

    Extract HTT region and run lobSTR.
    """
    from jcvi.formats.sam import get_minibam

    p = OptionParser(htt.__doc__)
    p.set_home("lobstr")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bamfile, region = args
    lhome = opts.lobstr_home

    minibamfile = get_minibam(bamfile, region)
    c = region.split(":")[0].replace("chr", "")
    cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, "hg38")
    sh(cmd)
示例#6
0
def run_mito(
    chrMfa, bamfile, opts, realignonly=False, svonly=False, store=None, cleanup=False
):
    from jcvi.formats.sam import get_minibam

    region = "chrM"
    minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region))
    if not op.exists(minibam):
        get_minibam(bamfile, region)
    else:
        logging.debug("{} found. Skipped.".format(minibam))

    speedseq_bin = op.join(opts.speedseq_home, "speedseq")

    realign = minibam.rsplit(".", 1)[0] + ".realign"
    realignbam = realign + ".bam"
    margs = " -v -t {} -o {}".format(opts.cpus, realign)
    if need_update(minibam, realign + ".bam"):
        cmd = speedseq_bin + " realign"
        cmd += margs
        cmd += " {} {}".format(chrMfa, minibam)
        sh(cmd)
    else:
        logging.debug("{} found. Skipped.".format(realignbam))

    if realignonly:
        return

    depthfile = realign + ".depth"
    if need_update(realignbam, depthfile):
        coverage(
            [
                chrMfa,
                realignbam,
                "--nosort",
                "--format=coverage",
                "--outfile={}".format(depthfile),
            ]
        )

    if store:
        push_to_s3(store, depthfile)

    vcffile = realign + ".sv.vcf.gz"
    if need_update(realignbam, vcffile):
        cmd = speedseq_bin + " sv"
        cmd += margs
        cmd += " -R {}".format(chrMfa)
        cmd += " -m {}".format(opts.support)
        cmd += " -B {} -D {} -S {}".format(
            realignbam, realign + ".discordants.bam", realign + ".splitters.bam"
        )
        sh(cmd)
    else:
        logging.debug("{} found. Skipped.".format(vcffile))

    if store:
        push_to_s3(store, vcffile)

    if svonly:
        if cleanup:
            do_cleanup(minibam, realignbam)
        return

    piledriver = realign + ".piledriver"
    if need_update(realignbam, piledriver):
        cmd = "bamtools piledriver -fasta {}".format(chrMfa)
        cmd += " -in {}".format(realignbam)
        sh(cmd, outfile=piledriver)

    if store:
        push_to_s3(store, piledriver)

    if cleanup:
        do_cleanup(minibam, realignbam)
示例#7
0
文件: delly.py 项目: xuanblo/jcvi
def run_mito(chrMfa, bamfile, opts, realignonly=False, svonly=False,
             store=None, cleanup=False):
    from jcvi.formats.sam import get_minibam
    region = "chrM"
    minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region))
    if not op.exists(minibam):
        get_minibam(bamfile, region)
    else:
        logging.debug("{} found. Skipped.".format(minibam))

    speedseq_bin = op.join(opts.speedseq_home, "speedseq")

    realign = minibam.rsplit(".", 1)[0] + ".realign"
    realignbam = realign + ".bam"
    margs = " -v -t {} -o {}".format(opts.cpus, realign)
    if need_update(minibam, realign + ".bam"):
        cmd = speedseq_bin + " realign"
        cmd += margs
        cmd += " {} {}".format(chrMfa, minibam)
        sh(cmd)
    else:
        logging.debug("{} found. Skipped.".format(realignbam))

    if realignonly:
        return

    depthfile = realign + ".depth"
    if need_update(realignbam, depthfile):
        coverage([chrMfa, realignbam, "--nosort", "--format=coverage",
                  "--outfile={}".format(depthfile)])

    if store:
        push_to_s3(store, depthfile)

    vcffile = realign + ".sv.vcf.gz"
    if need_update(realignbam, vcffile):
        cmd = speedseq_bin + " sv"
        cmd += margs
        cmd += " -R {}".format(chrMfa)
        cmd += " -m {}".format(opts.support)
        cmd += " -B {} -D {} -S {}".format(realignbam,
                        realign + ".discordants.bam", realign + ".splitters.bam")
        sh(cmd)
    else:
        logging.debug("{} found. Skipped.".format(vcffile))

    if store:
        push_to_s3(store, vcffile)

    if svonly:
        if cleanup:
            do_cleanup(minibam, realignbam)
        return

    piledriver = realign + ".piledriver"
    if need_update(realignbam, piledriver):
        cmd = "bamtools piledriver -fasta {}".format(chrMfa)
        cmd += " -in {}".format(realignbam)
        sh(cmd, outfile=piledriver)

    if store:
        push_to_s3(store, piledriver)

    if cleanup:
        do_cleanup(minibam, realignbam)