def run(self): """Call ICE to cluster consensus isoforms.""" self.add_log("Start to run cluster.", level=logging.INFO) #self.ice_opts.flnc_reads_per_split=1000 #FOR DEBUGGING, REMOVE LATER # Split flnc_fa into smaller files and save files to _flnc_splitted_fas. self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) + "smaller files each containing {n} reads.".format( n=self.ice_opts.flnc_reads_per_split), level=logging.INFO) self._flnc_splitted_fas = splitFasta( input_fasta=self.flnc_fa, reads_per_split=self.ice_opts.flnc_reads_per_split, out_dir=self.root_dir, out_prefix="input.split") self.add_log("Splitted files are: " + "\n".join(self._flnc_splitted_fas), level=logging.INFO) firstSplit = self._flnc_splitted_fas[0] firstSplit_fq = firstSplit[:firstSplit.rfind('.')] + '.fastq' self.add_log("Converting first split file {0} + {1} into fastq\n".format(\ firstSplit, self.ccs_fofn), level=logging.INFO) # Convert this into FASTQ ice_fa2fq(firstSplit, self.ccs_fofn, firstSplit_fq) # Set up probabbility and quality value model if self.ice_opts.use_finer_qv: self._setProbQV_ccs(self.ccs_fofn, firstSplit) else: self._setProbQV_fq(firstSplitFq=firstSplit_fq) # Initialize cluster by clique self.add_log("Finding maximal cliques: initializing IceInit.", level=logging.INFO) self.iceinit = IceInit(readsFa=firstSplit, qver_get_func=self._probqv.get_smoothed, ice_opts=self.ice_opts, sge_opts=self.sge_opts) uc = self.iceinit.uc # Dump uc to a file self.add_log( "Dumping initial clusters to {f}".format(f=self.initPickleFN), level=logging.INFO) with open(self.initPickleFN, 'w') as f: cPickle.dump(uc, f) # Run IceIterative. self.add_log("Iterative clustering: initializing IceIterative.", level=logging.INFO) self.icec = IceIterative( fasta_filename=firstSplit, fasta_filenames_to_add=self._flnc_splitted_fas[1:], all_fasta_filename=self.flnc_fa, ccs_fofn=self.ccs_fofn, root_dir=self.root_dir, ice_opts=self.ice_opts, sge_opts=self.sge_opts, uc=uc, probQV=self._probqv, fastq_filename=firstSplit_fq, use_ccs_qv=self.ice_opts.use_finer_qv) self.add_log("IceIterative log: {f}.".format(f=self.icec.log_fn)) self.icec.run() self.add_log("IceIterative completed.", level=logging.INFO) # IceIterative done, write predicted (unplished) consensus isoforms # to an output fasta self.add_log("Creating a link to unpolished consensus isoforms.") ln(self.icec.final_consensus_fa, self.out_fa) # Call quiver to polish predicted consensus isoforms. if self.ice_opts.quiver is not True: self.add_log("Creating a link to cluster report.", level=logging.INFO) ln(src=self.icec.report_fn, dst=self.report_fn) # Summarize cluster and write to summary_fn. self.write_summary(summary_fn=self.summary_fn, isoforms_fa=self.out_fa) else: # self.ice_opts.quiver is True self.add_log("Polishing clusters: initializing IcePolish.", level=logging.INFO) self.pol = Polish(root_dir=self.root_dir, nfl_fa=self.nfl_fa, bas_fofn=self.bas_fofn, ccs_fofn=self.ccs_fofn, fasta_fofn=self.fasta_fofn, ice_opts=self.ice_opts, sge_opts=self.sge_opts, ipq_opts=self.ipq_opts, nfl_reads_per_split=self.nfl_reads_per_split) self.add_log("IcePolish log: {f}.".format(f=self.pol.log_fn), level=logging.INFO) self.pol.run() self.add_log("IcePolish completed.", level=logging.INFO) # cluster report self.add_log("Creating a link to cluster report.", level=logging.INFO) ln(src=self.pol.iceq.report_fn, dst=self.report_fn) # Summarize cluster & polish and write to summary_fn. self.write_summary(summary_fn=self.summary_fn, isoforms_fa=self.out_fa, hq_fa=self.pol.icepq.quivered_good_fa, lq_fa=self.pol.icepq.quivered_bad_fa) # Create log file. self.close_log() return 0
def run(self): """Call ICE to cluster consensus isoforms.""" self.add_log("Start to run cluster.", level=logging.INFO) # Split flnc_fa into smaller files and save files to _flnc_splitted_fas. self.add_log("Splitting {flnc} into ".format(flnc=self.flnc_fa) + "smaller files each containing {n} reads.".format( n=self.ice_opts.flnc_reads_per_split), level=logging.INFO) self._flnc_splitted_fas = splitFasta( input_fasta=self.flnc_fa, reads_per_split=self.ice_opts.flnc_reads_per_split, out_dir=self.root_dir, out_prefix="input.split") self.add_log("Splitted files are: " + "\n".join(self._flnc_splitted_fas), level=logging.INFO) firstSplit = self._flnc_splitted_fas[0] # Set up probabbility and quality value model self._setProbQV(ccs_fofn=self.ccs_fofn, firstSplitFa=firstSplit) # Initialize cluster by clique # check if init.pickle already exists, if so, no need to run IceInit if os.path.exists(self.initPickleFN): self.add_log("{0} already exists. Reading to get uc.".format( self.initPickleFN), level=logging.INFO) with open(self.initPickleFN) as f: uc = cPickle.load(f) else: self.add_log("Finding maximal cliques.", level=logging.INFO) self.iceinit = IceInit(readsFa=firstSplit, qver_get_func=self._probqv.get_smoothed, ice_opts=self.ice_opts, sge_opts=self.sge_opts) uc = self.iceinit.uc # Dump uc to a file self.add_log( "Dumping initial clusters to {f}".format(f=self.initPickleFN), level=logging.INFO) with open(self.initPickleFN, 'w') as f: cPickle.dump(uc, f) # Run IceIterative. self.add_log("Iteratively clustering.", level=logging.INFO) self.icec = IceIterative( fasta_filename=firstSplit, fasta_filenames_to_add=self._flnc_splitted_fas[1:], all_fasta_filename=self.flnc_fa, ccs_fofn=self.ccs_fofn, root_dir=self.root_dir, ice_opts=self.ice_opts, sge_opts=self.sge_opts, uc=uc, probQV=self._probqv) self.icec.run() clean_up_after_ICE(self.root_dir) # IceIterative done, write predicted (unplished) consensus isoforms # to an output fasta self.add_log("Creating a link to unpolished consensus isoforms.") ln(self.icec.final_consensus_fa, self.out_fa) # Call quiver to polish predicted consensus isoforms. if self.ice_opts.quiver is not True: self.add_log("Creating a link to cluster report.") ln(src=self.icec.report_fn, dst=self.report_fn) self.add_log("Writing a summary to {f}".format(f=self.summary_fn), level=logging.INFO) self.writeSummary(fa=self.out_fa, summary_fn=self.summary_fn) else: # self.ice_opts.quiver is True #TODO review code self.pol = Polish(root_dir=self.root_dir, nfl_fa=self.nfl_fa, bas_fofn=self.bas_fofn, ccs_fofn=self.ccs_fofn, hq_isoforms_fa=self.hq_isoforms_fa, hq_isoforms_fq=self.hq_isoforms_fq, lq_isoforms_fa=self.lq_isoforms_fa, lq_isoforms_fq=self.lq_isoforms_fq, ice_opts=self.ice_opts, sge_opts=self.sge_opts) self.pol.run() # cluster report self.add_log("Creating a link to cluster report.") ln(src=self.pol.iceq.report_fn, dst=self.report_fn) # Write a summary. self.add_log("Writing a summary to {f}".format(f=self.summary_fn), level=logging.INFO) self.writeSummary(fa=self.out_fa, summary_fn=self.summary_fn, hq_fa=self.pol.icepq.quivered_good_fa, lq_fa=self.pol.icepq.quivered_bad_fa) # Create log file. self.close_log() return 0