def run(self, outfile, params):

        path = os.environ["PATH"]
        gp = P.get_parameters_as_namedtuple()
        cluster_queue = gp.cluster["queue"]
        cluster_memory_resource = gp.cluster["memory_resource"]
        cluster_parallel_environment = gp.cluster["parallel_environment"]
        outdir = os.path.dirname(outfile)
        outname = os.path.basename(outdir)
        # -sync y forces qsub to wait until job completes before
        # continuing.

        statement = (
            "{self.path} "
            "-p canu "
            "-d {outdir} "
            "-genomeSize={params.genome_size} "
            "gridOptionsJobName={outname} "
            "java={params.path_java} "
            "gridOptions=\"-q {cluster_queue} -v PATH={path} -sync y \" "
            "gridEngineMemoryOption=\"-l {cluster_memory_resource}=MEMORY\" "
            "gridEngineThreadsOption=\"-pe {cluster_parallel_environment} THREADS\" "
            "{params.options} "
            "{params.assembly_mode} "
            "{params.fasta} "
            ">& {outfile}.log; "
            "mv {outdir}/canu.contigs.fasta {outfile}".format(**locals()))

        return P.run(statement, without_cluster=True)
Пример #2
0
    def pre_process(self, infile, outfile, params):

        statements = []
        infile = IOTools.snip(infile, ".bam")
        tmpdir = P.get_parameters_as_namedtuple().tmpdir
        outprefix = os.path.basename(os.path.dirname(outfile))

        if params.copy_bam:
            statements.append("cp @[email protected] @[email protected]; "
                              "cp @[email protected] @[email protected]")

        if params.split_bam:
            statements.append("daisy bam2bam-split-reads "
                              "-i @[email protected] "
                              "-o - "
                              "{params.split_bam} "
                              "--log={outfile}_split_bam.log "
                              "2> {outfile}_split_bam.err "
                              "> @[email protected]; ".format(**locals()))

        if params.bam2bam:
            statements.append("daisy bam2bam "
                              "--stdin=@[email protected] "
                              "{params.bam2bam} "
                              "--log={outfile}_bam2bam.log "
                              "2> {outfile}_bam2bam.err "
                              "> @[email protected]; ".format(**locals()))

        if params.region:
            statements.append(
                "samtools view -b @[email protected] {} > @[email protected]".format(
                    params.region))

        if params.shift_quality:
            statements.append("samtools view -h @[email protected] "
                              "| perl -lane "
                              "'if(/^@/) {{print; next;}} "
                              "@qual=split(//, $F[10]); "
                              "$_=chr(ord($_)+{}) for (@qual); "
                              "$F[10]=join(\"\",@qual); "
                              "print join(\"\\t\", @F)' "
                              "| samtools view -bS > @[email protected]".format(
                                  params.shift_quality))

        if is_true(params.remove_chr):
            # also substitute chrM to MT.
            statements.append("samtools view -h @[email protected] "
                              "| awk -v OFS='\\t' '"
                              "$1 == \"@SQ\" "
                              "{{ gsub(\"chrM\", \"chrMT\", $2); "
                              "   gsub(\"chr\", \"\", $2); print; next }} "
                              "{{ gsub(\"chrM\", \"chrMT\", $3); "
                              "   gsub(\"chr\", \"\", $3); print; next}} '"
                              "| samtools view -bS - "
                              "2> {outfile}_remove_chr.log "
                              "> @[email protected]; ".format(**locals()))

        if not statements:
            return infile + ".bam", "", ""

        filename, build_statement, cleanup_statement = P.join_statements(
            statements, infile)
        filename += ".bam"
        build_statement += (
            "; samtools index {filename} >& {outfile}.index.log".format(
                **locals()))

        return filename, build_statement, cleanup_statement