示例#1
0
    def run(self):
        """Call ICE to cluster consensus isoforms."""
        self.add_log("Start to run cluster.", level=logging.INFO)

        if self.ice_opts.targeted_isoseq:
            first_split = 1000
            self.ice_opts.flnc_reads_per_split = 10000
            self.add_log("targeted_isoseq: further splitting JUST first split to 1000. Changing flnc_reads_per_split=10000.")
        else:
            first_split = None

        # Split flnc_fa into smaller files and save files to _flnc_splitted_fas.
        self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) +
                     "smaller files each containing {n} reads.".format(
                     n=self.ice_opts.flnc_reads_per_split),
                     level=logging.INFO)

        self._flnc_splitted_fqs = splitFastq(
            input_fastq=self.flnc_fa,
            reads_per_split=self.ice_opts.flnc_reads_per_split,
            out_dir=self.root_dir,
            out_prefix="input.split",
            first_split=first_split)


        self.add_log("Splitted files are: " +
                     "\n".join(self._flnc_splitted_fqs),
                     level=logging.INFO)

        firstSplit_fq = self._flnc_splitted_fqs[0]
        firstSplit_fa = firstSplit_fq[:firstSplit_fq.rfind('.')] + '.fasta'
        ice_fq2fa(firstSplit_fq, firstSplit_fa)


        # -------- commenting out below because we already have FASTQ!
        #firstSplit = self._flnc_splitted_fas[0]
        #firstSplit_fq = firstSplit[:firstSplit.rfind('.')] + '.fastq'
        #self.add_log("Converting first split file {0} + {1} into fastq\n".format(\
        #        firstSplit, self.ccs_fofn), level=logging.INFO)
        ## Convert this into FASTQ
        #ice_fa2fq(firstSplit, self.ccs_fofn, firstSplit_fq)
        #
        ## Set up probabbility and quality value model
        #if self.ice_opts.use_finer_qv:
        #    self._setProbQV_ccs(self.ccs_fofn, firstSplit)
        #else:
        #    self._setProbQV_fq(firstSplitFq=firstSplit_fq)
        self._setProbQV_fq(firstSplitFq=firstSplit_fq)

        # Initialize cluster by clique
        if os.path.exists(self.initPickleFN):
            self.add_log("Reading existing uc pickle: {0}".format(self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN) as f:
                uc = cPickle.load(f)
        else:
            self.add_log("Finding maximal cliques: initializing IceInit.",
                         level=logging.INFO)
            self.iceinit = IceInit(readsFa=firstSplit_fa,
                                   qver_get_func=self._probqv.get_smoothed,
                                   ice_opts=self.ice_opts,
                                   sge_opts=self.sge_opts,
                                   qvmean_get_func=self._probqv.get_mean)
            uc = self.iceinit.uc

            # Dump uc to a file
            self.add_log("Dumping initial clusters to {f}".format(
                         f=self.initPickleFN), level=logging.INFO)
            with open(self.initPickleFN, 'w') as f:
                cPickle.dump(uc, f)

        # Run IceIterative.
        self.add_log("Iterative clustering: initializing IceIterative.",
                     level=logging.INFO)
        #self.add_log("In Cluster. DEBUG: Calling Cluster with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))


        self.icec = IceIterative(
            fasta_filename=firstSplit_fa,
            fastq_filename=firstSplit_fq,
            fastq_filenames_to_add=self._flnc_splitted_fqs[1:],
            all_fasta_filename=self.flnc_fa,
            ccs_fofn=self.ccs_fofn,
            root_dir=self.root_dir,
            ice_opts=self.ice_opts,
            sge_opts=self.sge_opts,
            uc=uc,
            probQV=self._probqv,
            use_ccs_qv=self.ice_opts.use_finer_qv)
        self.add_log("IceIterative log: {f}.".format(f=self.icec.log_fn))
        self.icec.run()
        self.add_log("IceIterative completed.", level=logging.INFO)

        # IceIterative done, write predicted (unplished) consensus isoforms
        # to an output fasta
        self.add_log("Creating a link to unpolished consensus isoforms.")
        ln(self.icec.final_consensus_fa, self.out_fa)

        #self.add_log("In Cluster. DEBUG: End Cluster with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))


        # Call quiver to polish predicted consensus isoforms.
        if self.ice_opts.quiver is not True:
            self.add_log("Creating a link to cluster report.",
                         level=logging.INFO)
            ln(src=self.icec.report_fn, dst=self.report_fn)

            # Summarize cluster and write to summary_fn.
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.out_fa)
        else:  # self.ice_opts.quiver is True
            self.add_log("Polishing clusters: initializing IcePolish.",
                         level=logging.INFO)
            #self.add_log("In Cluster. DEBUG: Calling Polish with {0} {1} {2} ".format(self.bas_fofn, self.ccs_fofn, self.fasta_fofn))
            self.pol = Polish(root_dir=self.root_dir,
                              nfl_fa=self.nfl_fa,
                              bas_fofn=self.bas_fofn,
                              ccs_fofn=self.ccs_fofn,
                              fasta_fofn=self.fasta_fofn,
                              ice_opts=self.ice_opts,
                              sge_opts=self.sge_opts,
                              ipq_opts=self.ipq_opts,
                              nfl_reads_per_split=self.nfl_reads_per_split)
            self.add_log("IcePolish log: {f}.".format(f=self.pol.log_fn),
                         level=logging.INFO)
            self.pol.run()
            self.add_log("IcePolish completed.", level=logging.INFO)

            # cluster report
            self.add_log("Creating a link to cluster report.",
                         level=logging.INFO)
            ln(src=self.pol.iceq.report_fn, dst=self.report_fn)

            # Summarize cluster & polish and write to summary_fn.
            self.write_summary(summary_fn=self.summary_fn,
                               isoforms_fa=self.out_fa,
                               hq_fa=self.pol.icepq.quivered_good_fa,
                               lq_fa=self.pol.icepq.quivered_bad_fa)

        # Create log file.
        self.close_log()
        return 0