Exemplo n.º 1
0
Arquivo: tgbs.py Projeto: fw1121/jcvi
def bam(args):
    """
    %prog snp input.gsnap ref.fasta

    Convert GSNAP output to BAM.
    """
    from jcvi.formats.sizes import Sizes
    from jcvi.formats.sam import index

    p = OptionParser(bam.__doc__)
    p.set_home("eddyyeh")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    gsnapfile, fastafile = args
    EYHOME = opts.eddyyeh_home
    pf = gsnapfile.rsplit(".", 1)[0]
    uniqsam = pf + ".unique.sam"
    if need_update((gsnapfile, fastafile), uniqsam):
        cmd = op.join(EYHOME, "gsnap2gff3.pl")
        sizesfile = Sizes(fastafile).filename
        cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam)
        cmd += " -u -l {0} -p {1}".format(sizesfile, opts.cpus)
        sh(cmd)

    index([uniqsam])
Exemplo n.º 2
0
def bam(args):
    """
    %prog snp input.gsnap ref.fasta

    Convert GSNAP output to BAM.
    """
    from jcvi.formats.sizes import Sizes
    from jcvi.formats.sam import index

    p = OptionParser(bam.__doc__)
    p.set_home("eddyyeh")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    gsnapfile, fastafile = args
    EYHOME = opts.eddyyeh_home
    pf = gsnapfile.rsplit(".", 1)[0]
    uniqsam = pf + ".unique.sam"
    if need_update((gsnapfile, fastafile), uniqsam):
        cmd = op.join(EYHOME, "gsnap2gff3.pl")
        sizesfile = Sizes(fastafile).filename
        cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam)
        cmd += " -u -l {0} -p {1}".format(sizesfile, opts.cpus)
        sh(cmd)

    index([uniqsam])
Exemplo n.º 3
0
def simulate(args):
    """
    %prog simulate run_dir 1 300

    Simulate BAMs with varying inserts with dwgsim. The above command will
    simulate between 1 to 300 CAGs in the HD region, in a directory called
    `run_dir`.
    """
    p = OptionParser(simulate.__doc__)
    p.add_option("--ref",
                 default="/Users/htang/projects/ref/hg38.upper.fa",
                 help="Reference genome sequence")
    add_simulate_options(p)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    rundir, startunits, endunits = args
    startunits, endunits = int(startunits), int(endunits)
    basecwd = os.getcwd()
    mkdir(rundir)
    os.chdir(rundir)
    cwd = os.getcwd()

    # Huntington region
    pad_left, pad_right = 1000, 10000
    chr, start, end = 'chr4', 3074877, 3074933
    fasta = Fasta(opts.ref)
    seq_left = fasta[chr][start - pad_left:start - 1]
    seq_right = fasta[chr][end:end + pad_right]
    motif = 'CAG'
    reffastafile = "ref.fasta"
    seq = str(fasta[chr][start - pad_left:end + pad_right])
    make_fasta(seq, reffastafile, id=chr.upper())

    # Write fake sequence
    for units in range(startunits, endunits + 1):
        pf = str(units)
        mkdir(pf)
        os.chdir(pf)
        seq = str(seq_left) + motif * units + str(seq_right)
        fastafile = pf + ".fasta"
        make_fasta(seq, fastafile, id=chr.upper())

        # Simulate reads on it
        wgsim([
            fastafile, "--depth={}".format(opts.depth),
            "--readlen={}".format(opts.readlen),
            "--distance={}".format(opts.distance), "--outfile={}".format(pf)
        ])

        read1 = pf + ".bwa.read1.fastq"
        read2 = pf + ".bwa.read2.fastq"
        samfile, _ = align(["../{}".format(reffastafile), read1, read2])
        indexed_samfile = index([samfile])

        sh("mv {} ../{}.bam".format(indexed_samfile, pf))
        sh("mv {}.bai ../{}.bam.bai".format(indexed_samfile, pf))

        os.chdir(cwd)
        shutil.rmtree(pf)

    os.chdir(basecwd)