Exemplo n.º 1
0
    def run(self):
        """Run"""
        iceq = IceQuiver(root_dir=self.root_dir, bas_fofn=None,
                         fasta_fofn=None, sge_opts=None,
                         prog_name="ice_quiver_merge")

        iceq.add_log(self.cmd_str())
        iceq.add_log("root_dir: {d}.".format(d=self.root_dir))
        iceq.add_log("Total number of chunks: N = {N}.".format(N=self.N))

        src = [iceq.submitted_quiver_jobs_log_of_chunk_i(i=i, num_chunks=self.N)
               for i in range(0, self.N)]
        for f in src:
            if not nfs_exists(f):
                raise IOError("Log {f} ".format(f=f) +
                              "of submitted quiver jobs does not exist.")

        dst = iceq.submitted_quiver_jobs_log

        iceq.add_log("Collecting submitted quiver jobs from:\n{src}\nto {dst}.".
                     format(src="\n".join(src), dst=dst))

        cat_files(src=src, dst=dst)

        iceq.close_log()
Exemplo n.º 2
0
    def run(self):
        """Run"""
        iceq = IceQuiver(root_dir=self.root_dir, bas_fofn=None,
                         fasta_fofn=None, sge_opts=None,
                         prog_name="ice_quiver_merge")

        iceq.add_log(self.cmd_str())
        iceq.add_log("root_dir: {d}.".format(d=self.root_dir))
        iceq.add_log("Total number of chunks: N = {N}.".format(N=self.N))

        src = [iceq.submitted_quiver_jobs_log_of_chunk_i(i=i, num_chunks=self.N)
               for i in range(0, self.N)]
        for f in src:
            if not nfs_exists(f):
                raise IOError("Log {f} ".format(f=f) +
                              "of submitted quiver jobs does not exist.")

        dst = iceq.submitted_quiver_jobs_log

        iceq.add_log("Collecting submitted quiver jobs from:\n{src}\nto {dst}.".
                     format(src="\n".join(src), dst=dst))

        cat_files(src=src, dst=dst)

        iceq.close_log()
Exemplo n.º 3
0
    def test_cat_files(self):
        """Test cat_files."""
        fn_1 = op.join(self.data_dir, "primers.fasta")
        fn_2 = op.join(self.data_dir, "test_phmmer.fasta")
        out_fn_1 = op.join(self.out_dir, "test_cat_1")
        out_fn_2 = op.join(self.out_dir, "test_cat_2")

        std_out_fn_2 = op.join(self.stdout_dir, "test_cat_2")

        cat_files(src=[fn_1], dst=out_fn_1)
        cat_files(src=[fn_1, fn_2], dst=out_fn_2)
        self.assertTrue(filecmp.cmp(out_fn_1, fn_1))
        self.assertTrue(filecmp.cmp(out_fn_2, std_out_fn_2))
Exemplo n.º 4
0
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn=self.primer_fn,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn_fasta,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert(self.summary.num_fl == self.summary.num_flnc +
               self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info("Detect chimeric reads from trimmed non-full-length " +
                         "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert(self.summary.num_nfl == self.summary.num_nflnc +
                   self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn_fasta, self.out_nflc_fn_fasta],
                      dst=self.out_nfl_fn_fasta)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn_fasta, self.out_nflnc_fn_fasta],
                      dst=self.out_all_reads_fn_fasta)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn_fasta)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn_fasta, self.out_nfl_fn_fasta],
                      dst=self.out_all_reads_fn_fasta)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn,
                       self._primer_report_fl_fn])
Exemplo n.º 5
0
    def runChimeraDetector(self):
        """Call chimera detection on full-length reads, and non-full-length
        reads if required."""
        # Create forward/reverse primers for chimera detection.
        self._processPrimers(
            primer_fn=self.primer_fn,
            window_size=self.chimera_detection_opts.primer_search_window,
            primer_out_fn=self.primer_chimera_fn,
            revcmp_primers=True)

        # Detect chimeras among full-length reads, separate flnc reads and
        # flc reads.
        logging.info("Detect chimeric reads from trimmed full-length reads.")
        (self.summary.num_flnc, self.summary.num_flc,
         self.summary.num_flnc_bases, _x) = \
            self._detect_chimera(in_fasta=self._trimmed_fl_reads_fn,
                                 out_nc_fasta=self.out_flnc_fn_fasta,
                                 out_c_fasta=self.out_flc_fn,
                                 primer_report_fn=self._primer_report_fl_fn,
                                 out_dom=self.out_trimmed_fl_dom_fn,
                                 num_reads=self.summary.num_fl,
                                 job_name="fl")
        assert (self.summary.num_fl == self.summary.num_flnc +
                self.summary.num_flc)
        logging.info("Done with chimera detection on trimmed full-length " +
                     "reads.")

        # Detect chimeras among non-full-length reads if required, separate
        # nflnc reads and nflc reads, rewrite self.primer_report_nfl_fn.
        if self.chimera_detection_opts.detect_chimera_nfl is True:
            logging.info(
                "Detect chimeric reads from trimmed non-full-length " +
                "reads.")
            (self.summary.num_nflnc, self.summary.num_nflc, _x, _y) = \
                self._detect_chimera(in_fasta=self._trimmed_nfl_reads_fn,
                                     out_nc_fasta=self.out_nflnc_fn,
                                     out_c_fasta=self.out_nflc_fn,
                                     primer_report_fn=self._primer_report_nfl_fn,
                                     out_dom=self.out_trimmed_nfl_dom_fn,
                                     num_reads=self.summary.num_nfl,
                                     job_name="nfl")
            assert (self.summary.num_nfl == self.summary.num_nflnc +
                    self.summary.num_nflc)
            logging.info("Done with chimera detection on trimmed " +
                         "non-full-length reads.")

            # Concatenate out_nflnc_fn and out_nflc_fn as out_nfl_fn
            cat_files(src=[self.out_nflnc_fn_fasta, self.out_nflc_fn_fasta],
                      dst=self.out_nfl_fn_fasta)
            # Concatenate out_flnc and out_nflnc to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn_fasta, self.out_nflnc_fn_fasta],
                      dst=self.out_all_reads_fn_fasta)

        else:
            # Soft link _trimmed_nfl_reads_fn as out_nfl_fn
            ln(self._trimmed_nfl_reads_fn, self.out_nfl_fn_fasta)
            # Concatenate out_flnc and out_nfl to make out_all_reads_fn
            cat_files(src=[self.out_flnc_fn_fasta, self.out_nfl_fn_fasta],
                      dst=self.out_all_reads_fn_fasta)

        # primer info of fl/nfl reads reported to _primer_report_fl_fn
        # and _primer_report_nfl_fn, concatenate them in order to make
        # a full report: primer_report_fn.
        cat_files(src=[self._primer_report_fl_fn, self._primer_report_nfl_fn],
                  dst=self.primer_report_fn)

        # Delete intermediate files.
        self._cleanup([self._primer_report_nfl_fn, self._primer_report_fl_fn])