Exemplo n.º 1
0
def get_gse_scope_task(histogram, name, kmer_length, job_type, work_dir,
                       out_dir):
    """运行genomescope和findGSE预估基因组大小"""

    task = Task(id="scope_and_gse",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 2",
                script="""
{rscript} {script}/genomescope.R {histogram} {kmer_length} 150 {work_dir} -1 1
cp {work_dir}/plot.png {out_dir}/{name}.genomescope.png
cp {work_dir}/summary.txt {out_dir}/{name}.genomescope.txt
{rscript} {script}/findGSE.R {histogram} {kmer_length} {work_dir}
mv {work_dir}/v1.94.est.{name}.histogram.txt.sizek{kmer_length}.curvefitted.pdf {work_dir}/{name}.findgse.pdf
cp {work_dir}/v1.94.est.{name}.histogram.txt.genome.size.estimated.k{kmer_length}to{kmer_length}.fitted.txt {out_dir}/{name}.findgse.txt
#convert -density 300 -quality 300 {work_dir}/{name}.findgse.pdf {work_dir}/{name}.findgse.png
{gs} -dQUIET -dNOSAFER -r300 -dBATCH -sDEVICE=pngalpha -dNOPAUSE -dNOPROMPT -sOutputFile={out_dir}/{name}.findgse.png {work_dir}/{name}.findgse.pdf
#cp {work_dir}/{name}.findgse-0.png {out_dir}/{name}.findgse.png
rm -rf {work_dir}/round*
""".format(rscript=RSCRIPT,
           script=SCRIPTS,
           gs=GHOSTSCRIPT,
           name=name,
           kmer_length=kmer_length,
           histogram=histogram,
           work_dir=work_dir,
           out_dir=out_dir))

    return task, os.path.join(
        out_dir, "%s.genomescope.txt" %
        name), os.path.join(out_dir, "%s.findgse.txt" % name), os.path.join(
            out_dir,
            "%s.genomescope.png" % name), os.path.join(out_dir,
                                                       "%s.findgse.png" % name)
Exemplo n.º 2
0
def choose_data_task(r1, r2, name, kmer_stat, kmer_depth, job_type, work_dir, out_dir):

    for line in read_tsv(kmer_stat):
        if line[0]=="kmer_depth":
            sdepth = int(line[1])

    proportion=kmer_depth*1.0/sdepth
    if kmer_depth>sdepth:
        LOG.debug('The amount of sequencing data may be insufficient. Sequencing depth is only %s X' % sdepth)
        proportion = 1

    task = Task(
        id="choose_fastq",
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 2",
        script="""
if [ {proportion} -ge 1] ; then
    ln -s {r1} {name}_choose.r1.fastq
    ln -s {r2} {name}_choose.r2.fastq
else
    {seqkit} sample -p {proportion} -2 -o {name}_choose.r1.fastq {r1}
    {seqkit} sample -p {proportion} -2 -o {name}_choose.r2.fastq {r2}
fi
#cp {name}_choose.r1.fastq {name}_choose.r2.fastq
""".format(seqkit=SEQKIT,
            r1=r1,
            r2=r2,
            name=name,
            proportion=proportion,
            out_dir=out_dir
        )
    )

    return task, os.path.join(work_dir, "%s_choose.r1.fastq" % name), os.path.join(work_dir, "%s_choose.r2.fastq" % name)
Exemplo n.º 3
0
def kmerfreq_task(r1, r2, name, kmer_length, thread, job_type, work_dir,
                  out_dir):
    """kmerfreq 统计reads的kmer频率分布"""

    if kmer_length >= 17:
        kmer_length = 17

    task = Task(
        id="kmerfreq",
        work_dir=work_dir,
        type=job_type,
        option="-pe smp %s" % thread,
        script="""
export PATH={python}:$PATH
ls {r1} >{work_dir}/{name}.data
ls {r2} >>{work_dir}/{name}.data
{kmerfreq} -k {kmer_length} -t {thread} -p {work_dir}/{name} -q 33 -m 0 {work_dir}/{name}.data > {work_dir}/{name}.kmer.count
cp {work_dir}/{name}.freq.stat {out_dir}/{name}.kmerfreq.stat
python {script}/kmerfreq_stat.py {work_dir}/{name}.freq.stat >{work_dir}/{name}.kmer.stat
""".format(
            script=SCRIPTS,
            python=PYTHON_BIN,
            kmerfreq=KMERFREQ,
            kmer_length=kmer_length,  #kmer数
            name=name,
            r1=r1,  #文件列表
            r2=r2,
            thread=thread,
            work_dir=work_dir,
            out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.freq.stat" % name), os.path.join(
        work_dir,
        "%s.kmer.stat" % name), os.path.join(work_dir,
                                             "%s.genome_estimate" % name)
Exemplo n.º 4
0
def stat_mapcover_snp(reads, clean, depths, snps, job_type, work_dir, out_dir):

    task = Task(id="stat_cover_snp",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
export PATH={python}:$PATH

if [ -f "{clean}" ];then
    cp {clean} clean.stat_reads.tsv
else
    python {scripts}/stat_barcode.py --input {reads} --out clean.stat_reads.tsv >data.js
fi

python {scripts}/stat_map_coverage.py --input {depths} --clean clean.stat_reads.tsv --out stat_map_coverage.tsv
python {scripts}/stat_snp_gff.py --input {snps} >stat.snp.tsv
cp stat_map_coverage.tsv stat.snp.tsv {out_dir}
""".format(python=PYTHON_BIN,
           scripts=SCRIPTS,
           clean=clean,
           reads=reads,
           depths=depths,
           snps=snps,
           out_dir=out_dir))

    return task
Exemplo n.º 5
0
def create_soapdenovo_task(r1, r2, name, thread, queue, job_type, work_dir,
                           out_dir):

    option = {}
    option["soapdenovo"] = {
        "version":
        get_version(SOFTWARE_VERSION["soapdenovo"]),
        "option":
        "max_rd_len=151 avg_ins=400 reverse_seq=0 asm_flags=3 rank=1 pair_num_cutoff=3 map_len=64"
    }

    task = Task(id="soapdenovo_%s" % name,
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s %s" % (thread, queue),
                script="""
export PATH={soapdenovo}:{script}:$PATH
echo -e "max_rd_len=151\n[LIB]\navg_ins=400\nreverse_seq=0\nasm_flags=3\nrank=1\npair_num_cutoff=3\nmap_len=64\nq1={r1}\nq2={r2}\n" > {name}.config
echo -e "{r1} {r2}" >{name}_ngs.list
SOAPdenovo-127mer all -s {name}.config -K 47 -p {thread} -d 2 -R 2 -o {name} >ass.log
mv {name}.scafSeq {name}.asm.fasta
stat_genome.py -s {name}.asm.fasta -r {name}.asm.tsv
cp {name}.asm.tsv {name}.asm.fasta {out_dir}
""".format(script=SCRIPTS,
           soapdenovo=SOAPDENOVO_BIN,
           r1=r1,
           r2=r2,
           name=name,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.asm.fasta" % name), os.path.join(
        work_dir, "%s.asm.tsv" % name), option
Exemplo n.º 6
0
def create_kmerfreq_task(reads, name, kmer_length, thread, job_type, work_dir,
                         out_dir):

    option = {}
    option["kmerfreq"] = {
        "version": get_version(SOFTWARE_VERSION["kmerfreq"]),
        "option": "-q 33 -m 0 -k %s" % kmer_length
    }

    if kmer_length >= 17:
        kmer_length = 17

    task = Task(id="kmerfreq_%s" % name,
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={python}:$PATH
ls {reads} >{name}.data
{kmerfreq} -k {kmer_length} -t {thread} -p {name} -q 33 -m 0 {name}.data > {name}.kmer.count
python {script}/kmerfreq_stat.py {name}.freq.stat >{name}.kmer.stat
cp {name}.freq.stat {out_dir}/{name}.kmerfreq.stat
""".format(script=SCRIPTS,
           python=PYTHON_BIN,
           kmerfreq=KMERFREQ,
           kmer_length=kmer_length,
           name=name,
           reads=reads,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.freq.stat" % name), os.path.join(
        work_dir, "%s.genome_estimate" % name), option
Exemplo n.º 7
0
def create_unmap_task(name, reference, r1, r2, thread, job_type, work_dir,
                      out_dir):

    option = OrderedDict()
    option["minimap2"] = {
        "version": get_version(SOFTWARE_VERSION["minimap2"]),
        "option:": "-ax sr"
    }
    option["samblaster"] = {
        "version": get_version(SOFTWARE_VERSION["samblaster"]),
        "option:": "default"
    }

    task = Task(id="unmap__%s" % name,
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={minimap2}:{samblaster}:$PATH
minimap2 -t {thread} -ax sr {reference} {r1} {r2} |samblaster -u {name}.unmap.fq
#cp {name}.unmap.fq {out_dir}
""".format(minimap2=MINIMAP_BIN,
           samblaster=SAMBLASTER_BIN,
           reference=reference,
           r1=r1,
           r2=r2,
           name=name,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.unmap.fq" % name), option
Exemplo n.º 8
0
def create_gse_scope_task(histogram, name, kmer_length, job_type, work_dir,
                          out_dir):

    task = Task(id="scope_gse_%s" % name,
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 2",
                script="""
{rscript} {script}/genomescope.R {histogram} {kmer_length} 150 ./ -1 1
mv plot.png {name}.genomescope.png
mv summary.txt {name}.genomescope.txt
{rscript} {script}/findGSE.R {histogram} {kmer_length} ./
mv v1.94.est.{name}.histogram.txt.sizek{kmer_length}.curvefitted.pdf {name}.findgse.pdf
mv v1.94.est.{name}.histogram.txt.genome.size.estimated.k{kmer_length}to{kmer_length}.fitted.txt {name}.findgse.txt
{gs} -dQUIET -dNOSAFER -r300 -dBATCH -sDEVICE=pngalpha -dNOPAUSE -dNOPROMPT -sOutputFile={name}.findgse.png {name}.findgse.pdf
rm -rf round*
cp {name}.genomescope.png {name}.genomescope.txt {out_dir}
cp {name}.findgse.txt {name}.findgse.png {name}.findgse.pdf {out_dir}
""".format(rscript=RSCRIPT,
           script=SCRIPTS,
           gs=GHOSTSCRIPT,
           name=name,
           kmer_length=kmer_length,
           histogram=histogram,
           out_dir=out_dir))

    return task, os.path.join(
        work_dir, "%s.genomescope.txt" %
        name), os.path.join(work_dir, "%s.findgse.txt" % name), os.path.join(
            work_dir,
            "%s.genomescope.png" % name), os.path.join(work_dir,
                                                       "%s.findgse.png" % name)
Exemplo n.º 9
0
def merge_data_task(name, r1, r2, job_type, work_dir, out_dir):

    if r1[0].endswith(".gz") or r2[0].endswith(".gz"):
        suffix = ".gz"
        tools = "zcat"
    else:
        suffix = ""
        tools = "cat"

    if len(r1)<=1 and suffix == "":
        job_type = "local"
        run = """
ln -s {r1} {name}.clean.r1.fastq
ln -s {r2} {name}.clean.r2.fastq
""".format(r1=" ".join(r1), r2=" ".join(r2), name=name)
    else:
        run = """
{tools} {r1} >{name}.clean.r1.fastq
{tools} {r2} >{name}.clean.r2.fastq
""".format(tools=tools, r1=" ".join(r1), r2=" ".join(r2), name=name)

    task = Task(
        id="data_%s" % name,
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 1",
        script="""
{run}
""".format(run=run)
    )

    return task, os.path.join(work_dir, "%s.clean.r1.fastq" % name), os.path.join(work_dir, "%s.clean.r2.fastq" % name)
Exemplo n.º 10
0
def stat_gc_depth_task(genome, bam, name, window, job_type, work_dir, out_dir):

    bam = check_paths(bam)

    task = Task(id="stat_coverage",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
export PATH={samtools}:{python}:$PATH
samtools depth -aa {bam} > {work_dir}/{name}.depth
python {script}/stat_coverage.py -i {work_dir}/{name}.depth -d 1,5,10,20 -o {out_dir}/{name}.coverage.xlsx
python {script}/stat_length_gc.py -d {work_dir}/{name}.depth -g {genome} -n {out_dir}/{name}
python {script}/stat_gc_depth.py -d {work_dir}/{name}.depth -g {genome} -b 1000 -w 5000 -e 100 -n {work_dir}/{name}
python {script}/draw_depth_gc.py -gcd {work_dir}/{name}.stat_gc_depth.tsv -n {out_dir}/{name}
#python {script}/plot_gc_depth.py -gcd {work_dir}/{name}.stat_gc_depth.tsv -n {out_dir}/{name}
""".format(samtools=SAMTOOLS_BIN,
           script=SCRIPTS,
           python=PYTHON_BIN,
           genome=genome,
           bam=bam,
           name=name,
           window=window,
           work_dir=work_dir,
           out_dir=out_dir))

    return task, os.path.join(out_dir, "%s.gc_depth.png" % name)
Exemplo n.º 11
0
def soapdenovo_task(r1, r2, name, thread, queue, job_type, work_dir, out_dir):
    """soapdenove组装"""

    task = Task(id="soapdenovo",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s %s" % (thread, queue),
                script="""
export PATH={soapdenovo}:{script}:$PATH
echo -e "max_rd_len=151\n[LIB]\navg_ins=400\nreverse_seq=0\nasm_flags=3\nrank=1\npair_num_cutoff=3\nmap_len=64\nq1={r1}\nq2={r2}\n" > {name}.config
echo -e "{r1} {r2}" >{name}_ngs.list
SOAPdenovo-127mer all -s {name}.config -K 47 -p {thread} -d 2 -R 2 -o {name} >ass.log
cp {name}.scafSeq {out_dir}/{name}.asm.fasta
stat_genome.py -s {out_dir}/{name}.asm.fasta -r {out_dir}/{name}.asm.tsv
""".format(script=SCRIPTS,
           soapdenovo=SOAPDENOVO_BIN,
           r1=r1,
           r2=r2,
           name=name,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(out_dir, "%s.asm.fasta" % name), os.path.join(
        out_dir, "%s.asm.tsv" % name), os.path.join(work_dir,
                                                    "%s_ngs.list" % name)
Exemplo n.º 12
0
def get_heterozygosity_task(histo, estimate, kingdom, name, job_type, work_dir,
                            out_dir):

    task = Task(id="heterozygosity",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 2",
                script="""
export PATH={python}:$PATH
python {script}/fit_heterozygosity.py {histo} \
-e {estimate} --kingdom {kingdom} \
--name {name} --database {script} > {name}.heterozygosity.xls
cp {name}.heterozygosity.xls {out_dir}
cp {name}.kmer.p* {name}.heterozygosity.p* {out_dir}
""".format(script=SCRIPTS,
           python=PYTHON_BIN,
           histo=histo,
           estimate=estimate,
           name=name,
           kingdom=kingdom,
           out_dir=out_dir))

    return task, os.path.join(work_dir,
                              "%s.heterozygosity.xls" % name), os.path.join(
                                  work_dir, "%s.heterozygosity.png" % name)
Exemplo n.º 13
0
def bwa_index_task(genome, name, job_type, work_dir, out_dir):

    task = Task(id="bwa_index",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
export PATH={bwa}:$PATH
ln -sf {genome} {out_dir}/{name}.fasta
bwa index {out_dir}/{name}.fasta
""".format(bwa=BWA_BIN, genome=genome, name=name, out_dir=out_dir))

    return task, os.path.join(out_dir, "%s.fasta" % name)
Exemplo n.º 14
0
def merge_raw_data_task(name, r1, r2, tools, job_type, work_dir, out_dir):

    task = Task(id="merge_data",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
{tools} {r1} >{out_dir}/{name}.raw.r1.fastq
{tools} {r2} >{out_dir}/{name}.raw.r2.fastq
""".format(name=name, tools=tools, r1=r1, r2=r2, out_dir=out_dir))

    r1 = os.path.join(out_dir, "%s.raw.r1.fastq" % name)
    r2 = os.path.join(out_dir, "%s.raw.r2.fastq" % name)

    return task, r1, r2
Exemplo n.º 15
0
def stat_reads_task(reads, name, thread, job_type, work_dir, out_dir):

    task = Task(id="stat_reads_%s" % name,
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={python}:$PATH
python {scripts}/stat_barcode.py --input {reads} --out {name}.stat_reads.tsv >data.js
cp {name}.stat_reads.tsv {out_dir}
""".format(scripts=SCRIPTS,
           python=PYTHON_BIN,
           reads=reads,
           name=name,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.stat_reads.tsv" % name)
Exemplo n.º 16
0
def obtain_contamination_task(taxid, name, kingdom, job_type, work_dir, out_dir, mode="general", cratio=10):

    for line in open(taxid):
        if line.startswith("#"):
             line = line.split("\t")
             prok_ratio = float(line[0].split(':')[1])
             top10 = int(line[1].split(':')[1])
        break
            

    if prok_ratio>=cratio or top10 >0:
        LOG.info("There is serious contamination of the sample, strict mode is mandatory")
        mode = "strict"

    if mode == "strict":
        run = 'blastdbcmd -db {dbase} -dbtype "nucl" -taxidlist {name}.prokaryotic.taxid -out {name}.prokaryotic.fa'.format(
            dbase=NT_TAXON["fungi"],
            name=name)
        pfa = "%s.prokaryotic.fa" % name
    else:
        run = ""
        pfa = ""

    task = Task(
        id="blastdbcmd_%s" % name,
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 2",
        script="""
export PATH={blast}:$PATH
{script}/print.py {taxid} >{name}.prokaryotic.taxid
{run}
cat {pfa} {mbase} >{name}.ref.fa
#cp {name}.ref.fa {out_dir}
""".format(blast=BLAST_BIN,
            script=SCRIPTS,
            taxid=taxid,
            run=run,
            pfa=pfa,
            name=name,
            mbase=MC_TAXON[kingdom],
            out_dir=out_dir
        )
    )

    return task, os.path.join(work_dir, "%s.ref.fa" % name)
Exemplo n.º 17
0
def bwa_merge_bam_task(sort_bams, name, thread, job_type, work_dir, out_dir):

    task = Task(id="merge_bwa_bam",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={samtools}:$PATH
ls {sort_bams} >{out_dir}/bam.list
samtools merge -f -c --threads {thread} -b {out_dir}/bam.list {out_dir}/{name}.sorted.bam
samtools index {out_dir}/{name}.sorted.bam
#rm -rf {sort_bams}
""".format(samtools=SAMTOOLS_BIN,
           sort_bams=sort_bams,
           name=name,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(out_dir, "%s.sorted.bam" % name)
Exemplo n.º 18
0
def merge_bam_task(bams, name, thread, job_type, work_dir, out_dir):

    task = Task(id="merge_bam",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={samtools}:$PATH
samtools merge -f -c --threads {thread} {name}.sorted.bam {bams}
samtools index {name}.sorted.bam
#rm {bams}
#cp {name}.sorted.bam {out_dir}
""".format(samtools=SAMTOOLS_BIN,
           bams=bams,
           name=name,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.sorted.bam" % name)
Exemplo n.º 19
0
def create_jellyfish_task(reads,
                          name,
                          thread,
                          job_type,
                          work_dir,
                          out_dir,
                          mode="general"):

    option = {}
    option["jellyfish"] = {
        "version": get_version(SOFTWARE_VERSION["jellyfish"]),
        "option": "-m 21 -s 1G"
    }

    if mode == "general":
        histout = "%s.histogram.txt" % name
        runh = ""
    else:
        histout = "%s.histogram_old.txt" % name
        runh = "head -n 5000 %s.histogram_old.txt >%s.histogram.txt" % (name,
                                                                        name)

    task = Task(id="jellyfish",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={jellyfish}:$PATH
jellyfish count -m 21 -s 1G -t {thread} -C {reads} -o {name}.jellyfish.jf
jellyfish histo -f {name}.jellyfish.jf -t {thread} > {histout}
{runh}
jellyfish stats -v -o {name}.stats.kmer.txt {name}.jellyfish.jf
cp {name}.histogram.txt {out_dir}
rm -rf {name}.jellyfish.jf
""".format(jellyfish=JELLYFISH_BIN,
           reads=reads,
           name=name,
           histout=histout,
           runh=runh,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.histogram.txt" % name), option
Exemplo n.º 20
0
def split_ngs_task(fastq_list, name, number, data_type, job_type, work_dir,
                   out_dir):

    task = Task(id="split_ngs",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
{script}/splitfp.py -i {fastq_list} -w {out_dir} -o {name} -n {number} {type}
""".format(script=SCRIPTS,
           fastq_list=fastq_list,
           name=name,
           number=number,
           type=data_type,
           out_dir=out_dir))
    r1_name = '%s.r1.part_*.fastq' % name
    r2_name = '%s.r2.part_*.fastq' % name

    return task, out_dir, r1_name, r2_name
Exemplo n.º 21
0
def split_data(r1, r2, name, number, job_type, concurrent, refresh, work_dir, out_dir, platform="illumina"):

    if platform in ["PromethION", "GridION" , "RSII", "Sequel"]:
        read = "%s.part_*.fast*" % name
        r2 = ""
    elif platform in ["illumina", "mgi"]:
        read = "%s.r1.part_*.fastq" % name
    else:
        raise Exception("The input sequencing platform is abnormal.")

    dag = DAG("split_data")
    task = Task(
        id="split_data",
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 1",
        script="""
{script}/splitfp.py -r1 {r1} -r2 {r2} -o {name} -n {number}
#cp {name}.* {out_dir}
""".format(
            script=SCRIPTS,
            r1=r1,
            r2=r2,
            name=name,
            number=number,
            out_dir=out_dir
        )
    )

    dag.add_task(task)
    do_dag(dag, concurrent, refresh)

    temp = read_files(work_dir, read)
    reads = []

    if platform in ["illumina", "mgi"]:
        for i in temp:
            j = i.replace(".r1.part_", ".r2.part_")
            reads.append("%s %s" % (i, j))
    else:
        reads = temp

    return reads
Exemplo n.º 22
0
def split_data(r1, r2, name, number, job_type, work_dir, out_dir):

    if len(r1) != len(r2) and len(r2) <= 1:
        read = "%s.part_*.fast*" % name
        r2 = ""
    elif len(r1) == len(r2):
        read = "%s.r1.part_*.fastq" % name
    else:
        raise Exception("The input sequencing platform is abnormal.")

    dag = DAG("split_data")
    task = Task(id="split_data",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp 1",
                script="""
{script}/splitfp.py -r1 {r1} -r2 {r2} -o {name} -n {number}
#cp {name}.* {out_dir}
""".format(script=SCRIPTS,
           r1=r1,
           r2=r2,
           name=name,
           number=number,
           out_dir=out_dir))

    dag.add_task(task)
    do_dag(dag, 8, 10)

    temp = read_files(work_dir, read)
    reads = []

    if len(r1) == len(r2):
        for i in temp:
            j = i.replace(".r1.part_", ".r2.part_")
            reads.append("%s %s" % (i, j))
    else:
        reads = temp

    return reads
Exemplo n.º 23
0
def create_ncovann_task(genome, name, refgff, job_type, work_dir, out_dir):

    ann_task = Task(id="annotate_%s" % name,
                    work_dir=work_dir,
                    type=job_type,
                    option="-pe smp 1",
                    script="""
export PATH={tbl2asn}:$PATH
{script}/process_assembly.py {genome} --topology linear --moltype ss-RNA --completeness complete --gcode 1  --organism 'Unknow' --strain {name} > {name}.genomic.fasta
{script}/sed.py {refgff} --old MN908947.3 --new {name} > {name}.genomic.gff
{script}/gff2tbl.py {name}.genomic.gff >{name}.genomic.tbl
tbl2asn -i {name}.genomic.fasta -V b -s T
mv {name}.genomic.gbf {name}.genomic.gb
{script}/gb2protein.py {name}.genomic.gb >{name}.protein.fasta
cp {name}.genomic.fasta {name}.genomic.gff {name}.genomic.sqn {name}.genomic.gb {name}.protein.fasta {out_dir}
""".format(tbl2asn=TBL2ASN_BIN,
           script=SCRIPTS,
           genome=genome,
           name=name,
           refgff=refgff,
           out_dir=out_dir))

    return ann_task
Exemplo n.º 24
0
def get_jellyfish_task(fastq, name, depth, thread, job_type, work_dir,
                       out_dir):
    """运行jellyfish"""

    task = Task(id="jellyfish",
                work_dir=work_dir,
                type=job_type,
                option="-pe smp %s" % thread,
                script="""
export PATH={jellyfish}:$PATH
jellyfish count -m 21 -s 1G -t {thread} -C {fastq} -o {name}.jellyfish.jf
jellyfish histo -f {name}.jellyfish.jf -t {thread} > {name}.histogram_old.txt
head -n {depth} {name}.histogram_old.txt >{name}.histogram.txt
jellyfish stats -v -o {name}.stats.kmer.txt {name}.jellyfish.jf
cp {name}.histogram.txt {out_dir}
rm -rf {name}.jellyfish.jf
""".format(jellyfish=JELLYFISH_BIN,
           fastq=fastq,
           name=name,
           depth=depth,
           thread=thread,
           out_dir=out_dir))

    return task, os.path.join(work_dir, "%s.histogram.txt" % name)
Exemplo n.º 25
0
def sample_fastq_task(r1, r2, proportion, name, job_type, work_dir):
    """fastq 取样"""

    task = Task(
        id="sample_fastq",
        work_dir=work_dir,
        type=job_type,
        option="-pe smp 2",
        script="""
{seqkit} sample -p {proportion} -2 -o {work_dir}/{name}_choose.r1.fastq {r1}
{seqkit} sample -p {proportion} -2 -o {work_dir}/{name}_choose.r2.fastq {r2}
""".format(
            seqkit=SEQKIT,
            r1=r1,
            r2=r2,
            name=name,
            proportion=proportion,  #取样比例
            work_dir=work_dir))

    return task, os.path.join(work_dir,
                              "%s_choose.r1.fastq" % name), os.path.join(
                                  work_dir,
                                  "%s_choose.r2.fastq" % name), os.path.join(
                                      work_dir, "%s_choose.r*.fastq" % name)
Exemplo n.º 26
0
def create_ncovsnp_tasks(read, name, reffa, refgb, thread, job_type, work_dir,
                         out_dir):

    option = OrderedDict()
    option["samtools"] = {
        "version": get_version(SOFTWARE_VERSION["samtools"]),
        "option": "default"
    }
    option["medaka"] = {
        "version": get_version(SOFTWARE_VERSION["medaka"]),
        "option": "medaka_variant (parameters: default)"
    }
    option["snippy"] = {
        "version": get_version(SOFTWARE_VERSION["snippy"]),
        "option": "--minfrac 0.9"
    }

    snp_task = Task(id="medaka_%s" % name,
                    work_dir=work_dir,
                    type=job_type,
                    option="-pe smp %s" % thread,
                    script="""
export PATH={samtools}:{minimap}:{medaka}:{python}:$PATH
source {activate} medaka

minimap2 -ax map-ont -t {thread} {reffa} {read} |samtools sort -@ 5 - >{name}.sort.bam
samtools index {name}.sort.bam
samtools depth -a {name}.sort.bam -d 0 --reference {reffa} >{name}.depth.xls
medaka_variant -f {reffa} -i {name}.sort.bam -t {thread} -b 1000 -p {name}
cp medaka_variant/round_1.vcf {name}.raw.vcf
python {script}/vcf2fasta.py --vcf {name}.raw.vcf --depth {name}.depth.xls --refer {reffa} >{name}.raw_concencus.fasta
rm -rf medaka_variant {name}.sort.bam
#cp {name}.concencus.fasta {out_dir}
""".format(minimap=MINIMAP_BIN,
           samtools=SAMTOOLS_BIN,
           medaka=MEDAKA_BIN,
           activate=MEDAKA_ENV,
           script=SCRIPTS,
           python=PYTHON_BIN,
           read=read,
           name=name,
           reffa=reffa,
           thread=thread,
           out_dir=out_dir))

    snippy_task = Task(id="snippy_%s" % name,
                       work_dir=work_dir,
                       type=job_type,
                       option="-pe smp %s" % thread,
                       script="""
export PATH={snippy}:{python}:$PATH
python {script}/plot_depth_stat.py -i {name}.depth.xls -w 1 -o {name}
snippy --cpus {thread} --outdir {name} --minfrac 0.9 --ref {refgb} -ctgs {name}.raw_concencus.fasta
python {script}/filter_snp_gff.py -i {name}/snps.gff -d {name}.depth.xls >{name}.snps.gff
python {script}/snp_gff2fasta.py --gff {name}.snps.gff --refer {reffa} >{name}.concencus.fasta
cp {name}/snps.vcf {name}.snps.vcf
rm -rf {name}
cp {name}.concencus.fasta {out_dir}
cp {name}.depth.png {name}.depth.pdf {name}.snps.gff {out_dir}
""".format(snippy=SNIPPY_BIN,
           python=PYTHON_BIN,
           script=SCRIPTS,
           refgb=refgb,
           reffa=reffa,
           name=name,
           thread=thread,
           out_dir=out_dir))

    snippy_task.set_upstream(snp_task)

    return snp_task, snippy_task, os.path.join(out_dir, "%s.concencus.fasta" %
                                               name), option