def resolved_tool_contract_runner(rtc): """ For each cluster bin, create summary.json, cluster_report.csv, hq_isoforms.fa|fq, lq_isoforms.fa|fq Finally, merge all cluster bins and save all outputs to 'combined'. """ p = ChunkTasksPickle.read(rtc.task.input_files[0]) assert all([isinstance(task, ClusterChunkTask) for task in p]) p.sorted_by_attr(attr='cluster_bin_index') opts = rtc.task.options ipq_opts = IceQuiverHQLQOptions( qv_trim_5=opts[Constants.QV_TRIM_FIVEPRIME_ID], qv_trim_3=opts[Constants.QV_TRIM_THREEPRIME_ID], hq_quiver_min_accuracy=opts[Constants.HQ_QUIVER_MIN_ACCURACY_ID]) sample_name = get_sample_name( input_sample_name=opts[Constants.SAMPLE_NAME_ID]) out_consensus_isoforms_cs = rtc.task.output_files[0] out_summary = rtc.task.output_files[1] out_report = rtc.task.output_files[2] out_hq_cs = rtc.task.output_files[3] out_hq_fq = rtc.task.output_files[4] out_lq_cs = rtc.task.output_files[5] out_lq_fq = rtc.task.output_files[6] out_hq_lq_prefix_dict_pickle = rtc.task.output_files[7] assert out_consensus_isoforms_cs.endswith(".contigset.xml") assert out_hq_cs.endswith(".contigset.xml") assert out_lq_cs.endswith(".contigset.xml") out_consensus_isoforms_fa = out_consensus_isoforms_cs.replace( ".contigset.xml", ".fasta") out_hq_fa = out_hq_cs.replace('.contigset.xml', '.fasta') out_lq_fa = out_lq_cs.replace('.contigset.xml', '.fasta') hq_fq_fns, lq_fq_fns = [], [] split_uc_pickles, split_partial_uc_pickles = [], [] split_consensus_isoforms = [] cluster_bin_indices = [task.cluster_bin_index for task in p] cluster_out_dirs = [task.cluster_out_dir for task in p] # sanity check that Cluster indices are unique! assert len(set(cluster_bin_indices)) == len(cluster_bin_indices) for task in p: ice_pq = IceQuiverPostprocess(root_dir=task.cluster_out_dir, ipq_opts=ipq_opts) hq_fq_fns.append(ice_pq.quivered_good_fq) lq_fq_fns.append(ice_pq.quivered_bad_fq) split_uc_pickles.append(ice_pq.final_pickle_fn) split_partial_uc_pickles.append(ice_pq.nfl_all_pickle_fn) split_consensus_isoforms.append(ice_pq.final_consensus_fa) combined_dir = op.join(op.dirname(op.dirname(cluster_out_dirs[0])), "combined") mkdir(combined_dir) combined_files = CombinedFiles(combined_dir) log.info("Combining results of all cluster bins to %s.", combined_dir) log.info("Merging HQ|LQ isoforms from all cluster bins.") log.info("HQ isoforms are: %s.", ",".join(hq_fq_fns)) log.info("LQ isoforms are: %s.", ",".join(lq_fq_fns)) combine_polished_isoforms( split_indices=cluster_bin_indices, split_hq_fns=hq_fq_fns, split_lq_fns=lq_fq_fns, combined_hq_fa=combined_files.all_hq_fa, combined_hq_fq=combined_files.all_hq_fq, combined_lq_fa=combined_files.all_lq_fa, combined_lq_fq=combined_files.all_lq_fq, hq_lq_prefix_dict_pickle=combined_files.hq_lq_prefix_dict_pickle, sample_name=sample_name) ln(combined_files.all_hq_fa, out_hq_fa) #'HQ isoforms' ln(combined_files.all_hq_fq, out_hq_fq) #'HQ isoforms' ln(combined_files.all_lq_fa, out_lq_fa) #'LQ isoforms' ln(combined_files.all_lq_fq, out_lq_fq) #'LQ isoforms' ln(combined_files.hq_lq_prefix_dict_pickle, out_hq_lq_prefix_dict_pickle) as_contigset(out_hq_fa, out_hq_cs) as_contigset(out_lq_fa, out_lq_cs) log.info("Merging consensus isoforms from all cluster bins.") combine_consensus_isoforms(split_indices=cluster_bin_indices, split_files=split_consensus_isoforms, combined_consensus_isoforms_fa=combined_files. all_consensus_isoforms_fa, sample_name=sample_name) ln(combined_files.all_consensus_isoforms_fa, out_consensus_isoforms_fa) #consensus isoforms as_contigset(out_consensus_isoforms_fa, out_consensus_isoforms_cs) log.info("Writing cluster summary to %s", combined_files.all_cluster_summary_fn) write_cluster_summary(summary_fn=combined_files.all_cluster_summary_fn, isoforms_fa=out_consensus_isoforms_cs, hq_fa=out_hq_fa, lq_fa=out_lq_fa) ln(combined_files.all_cluster_summary_fn, out_summary) # "cluster summary" log.info("Writing cluster report to %s", combined_files.all_cluster_report_fn) write_combined_cluster_report( split_indices=cluster_bin_indices, split_uc_pickles=split_uc_pickles, split_partial_uc_pickles=split_partial_uc_pickles, report_fn=combined_files.all_cluster_report_fn, sample_name=sample_name) ln(combined_files.all_cluster_report_fn, out_report) # "cluster report"
def resolved_tool_contract_runner(rtc): """ For each cluster bin, create summary.json, cluster_report.csv, hq_isoforms.fa|fq, lq_isoforms.fa|fq Finally, merge all cluster bins and save all outputs to 'combined'. """ p = ChunkTasksPickle.read(rtc.task.input_files[0]) assert all([isinstance(task, ClusterChunkTask) for task in p]) p.sorted_by_attr(attr='cluster_bin_index') opts = rtc.task.options ipq_opts = IceQuiverHQLQOptions(qv_trim_5=opts[Constants.QV_TRIM_FIVEPRIME_ID], qv_trim_3=opts[Constants.QV_TRIM_THREEPRIME_ID], hq_quiver_min_accuracy=opts[Constants.HQ_QUIVER_MIN_ACCURACY_ID]) sample_name = get_sample_name(input_sample_name=opts[Constants.SAMPLE_NAME_ID]) out_consensus_isoforms_cs = rtc.task.output_files[0] out_summary = rtc.task.output_files[1] out_report = rtc.task.output_files[2] out_hq_cs = rtc.task.output_files[3] out_hq_fq = rtc.task.output_files[4] out_lq_cs = rtc.task.output_files[5] out_lq_fq = rtc.task.output_files[6] out_hq_lq_prefix_dict_pickle = rtc.task.output_files[7] assert out_consensus_isoforms_cs.endswith(".contigset.xml") assert out_hq_cs.endswith(".contigset.xml") assert out_lq_cs.endswith(".contigset.xml") out_consensus_isoforms_fa = out_consensus_isoforms_cs.replace(".contigset.xml", ".fasta") out_hq_fa = out_hq_cs.replace('.contigset.xml', '.fasta') out_lq_fa = out_lq_cs.replace('.contigset.xml', '.fasta') hq_fq_fns, lq_fq_fns = [], [] split_uc_pickles, split_partial_uc_pickles = [], [] split_consensus_isoforms = [] cluster_bin_indices = [task.cluster_bin_index for task in p] cluster_out_dirs = [task.cluster_out_dir for task in p] # sanity check that Cluster indices are unique! assert len(set(cluster_bin_indices)) == len(cluster_bin_indices) for task in p: ice_pq = IceQuiverPostprocess(root_dir=task.cluster_out_dir, ipq_opts=ipq_opts) hq_fq_fns.append(ice_pq.quivered_good_fq) lq_fq_fns.append(ice_pq.quivered_bad_fq) split_uc_pickles.append(ice_pq.final_pickle_fn) split_partial_uc_pickles.append(ice_pq.nfl_all_pickle_fn) split_consensus_isoforms.append(ice_pq.final_consensus_fa) combined_dir = op.join(op.dirname(op.dirname(cluster_out_dirs[0])), "combined") mkdir(combined_dir) combined_files = CombinedFiles(combined_dir) log.info("Combining results of all cluster bins to %s.", combined_dir) log.info("Merging HQ|LQ isoforms from all cluster bins.") log.info("HQ isoforms are: %s.", ",".join(hq_fq_fns)) log.info("LQ isoforms are: %s.", ",".join(lq_fq_fns)) combine_polished_isoforms(split_indices=cluster_bin_indices, split_hq_fns=hq_fq_fns, split_lq_fns=lq_fq_fns, combined_hq_fa=combined_files.all_hq_fa, combined_hq_fq=combined_files.all_hq_fq, combined_lq_fa=combined_files.all_lq_fa, combined_lq_fq=combined_files.all_lq_fq, hq_lq_prefix_dict_pickle=combined_files.hq_lq_prefix_dict_pickle, sample_name=sample_name) ln(combined_files.all_hq_fa, out_hq_fa) #'HQ isoforms' ln(combined_files.all_hq_fq, out_hq_fq) #'HQ isoforms' ln(combined_files.all_lq_fa, out_lq_fa) #'LQ isoforms' ln(combined_files.all_lq_fq, out_lq_fq) #'LQ isoforms' ln(combined_files.hq_lq_prefix_dict_pickle, out_hq_lq_prefix_dict_pickle) as_contigset(out_hq_fa, out_hq_cs) as_contigset(out_lq_fa, out_lq_cs) log.info("Merging consensus isoforms from all cluster bins.") combine_consensus_isoforms(split_indices=cluster_bin_indices, split_files=split_consensus_isoforms, combined_consensus_isoforms_fa=combined_files.all_consensus_isoforms_fa, sample_name=sample_name) ln(combined_files.all_consensus_isoforms_fa, out_consensus_isoforms_fa) #consensus isoforms as_contigset(out_consensus_isoforms_fa, out_consensus_isoforms_cs) log.info("Writing cluster summary to %s", combined_files.all_cluster_summary_fn) write_cluster_summary(summary_fn=combined_files.all_cluster_summary_fn, isoforms_fa=out_consensus_isoforms_cs, hq_fa=out_hq_fa, lq_fa=out_lq_fa) ln(combined_files.all_cluster_summary_fn, out_summary) # "cluster summary" log.info("Writing cluster report to %s", combined_files.all_cluster_report_fn) write_combined_cluster_report(split_indices=cluster_bin_indices, split_uc_pickles=split_uc_pickles, split_partial_uc_pickles=split_partial_uc_pickles, report_fn=combined_files.all_cluster_report_fn, sample_name=sample_name) ln(combined_files.all_cluster_report_fn, out_report) # "cluster report"
def test_get_sample_name(self): """Test get_sample_name""" self.assertTrue(get_sample_name("my_name"), "my_name") self.assertTrue(get_sample_name("my name,|"), "myname") self.assertTrue(len(get_sample_name("")) > 0)
def args_runner(args): """args runner""" logging.info("%s arguments are:\n%s\n", __file__, args) # sanity check arguments _sanity_check_args(args) # make option objects ice_opts = IceOptions(quiver=args.quiver, use_finer_qv=args.use_finer_qv, targeted_isoseq=args.targeted_isoseq, ece_penalty=args.ece_penalty, ece_min_len=args.ece_min_len, flnc_reads_per_split=args.flnc_reads_per_split, nfl_reads_per_split=args.nfl_reads_per_split) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc, gcon_nproc=args.gcon_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) ipq_opts = IceQuiverHQLQOptions( qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) # (1) separate flnc reads into bins logging.info("Separating FLNC reads into bins.") tofu_f = TofuFiles(tofu_dir=args.tofu_dir) s = SeparateFLNCRunner(flnc_fa=args.flnc_fa, root_dir=args.tofu_dir, out_pickle=tofu_f.separate_flnc_pickle, bin_size_kb=args.bin_size_kb, bin_by_primer=args.bin_by_primer, bin_manual=args.bin_manual, max_base_limit_MB=args.max_base_limit_MB) s.run() flnc_files = SeparateFLNCBase.convert_pickle_to_sorted_flnc_files( tofu_f.separate_flnc_pickle) logging.info("Separated FLNC reads bins are %s", flnc_files) # (2) apply 'pbtranscript cluster' to each bin # run ICE/Quiver (the whole thing), providing the fasta_fofn logging.info("Running ICE/Polish on separated FLNC reads bins.") split_dirs = [] for flnc_file in flnc_files: split_dir = op.join(realpath(op.dirname(flnc_file)), "cluster_out") mkdir(split_dir) split_dirs.append(split_dir) cur_out_cons = op.join(split_dir, "consensus_isoforms.fasta") ipq_f = IceQuiverPostprocess(root_dir=split_dir, ipq_opts=ipq_opts) if op.exists(ipq_f.quivered_good_fq): logging.warning("HQ polished isoforms %s already exist. SKIP!", ipq_f.quivered_good_fq) continue else: logging.info("Running ICE/Quiver on %s", split_dir) rmpath(cur_out_cons) obj = Cluster(root_dir=split_dir, flnc_fa=flnc_file, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, fasta_fofn=args.fasta_fofn, out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts) if args.mem_debug: # DEBUG from memory_profiler import memory_usage start_t = time.time() mem_usage = memory_usage(obj.run, interval=60) end_t = time.time() with open('mem_debug.log', 'a') as f: f.write("Running ICE/Quiver on {0} took {1} secs.\n".format( split_dir, end_t - start_t)) f.write("Maximum memory usage: {0}\n".format(max(mem_usage))) f.write("Memory usage: {0}\n".format(mem_usage)) else: obj.run() if not args.keep_tmp_files: # by deafult, delete all tempory files. logging.info("Deleting %s", ipq_f.tmp_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.tmp_dir]) logging.info("Deleting %s", ipq_f.quivered_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.quivered_dir]) # (3) merge polished isoform cluster from all bins logging.info("Merging isoforms from all bins to %s.", tofu_f.combined_dir) c = CombineRunner(combined_dir=tofu_f.combined_dir, sample_name=get_sample_name(args.sample_name), split_dirs=split_dirs, ipq_opts=ipq_opts) c.run() if args.summary_fn is not None: ln(tofu_f.all_cluster_summary_fn, args.summary_fn) if args.report_fn is not None: ln(tofu_f.all_cluster_report_fn, args.report_fn) # (4) map HQ isoforms to GMAP reference genome map_isoforms_and_sort(input_filename=tofu_f.all_hq_fq, sam_filename=tofu_f.sorted_gmap_sam, gmap_db_dir=args.gmap_db, gmap_db_name=args.gmap_name, gmap_nproc=args.gmap_nproc) # (5) post mapping to genome analysis, including # * collapse polished HQ isoform clusters into groups # * count abundance of collapsed isoform groups # * filter collapsed isoforms based on abundance info logging.info("Post mapping to genome analysis.") out_isoforms = args.collapsed_filtered_fn if any(out_isoforms.endswith(ext) for ext in (".fa", ".fasta")): in_isoforms = tofu_f.all_hq_fa elif any(out_isoforms.endswith(ext) for ext in (".fq", ".fastq")): in_isoforms = tofu_f.all_hq_fq else: raise ValueError("Output file %s must be FASTA or FASTQ!" % out_isoforms) post_mapping_to_genome_runner(in_isoforms=in_isoforms, in_sam=tofu_f.sorted_gmap_sam, in_pickle=tofu_f.hq_lq_prefix_dict_pickle, out_isoforms=args.collapsed_filtered_fn, out_gff=args.gff_fn, out_abundance=args.abundance_fn, out_group=args.group_fn, out_read_stat=args.read_stat_fn, min_aln_coverage=args.min_aln_coverage, min_aln_identity=args.min_aln_identity, min_flnc_coverage=args.min_flnc_coverage, max_fuzzy_junction=args.max_fuzzy_junction, allow_extra_5exon=args.allow_extra_5exon, min_count=args.min_count) return 0
def args_runner(args): """args runner""" logging.info("%s arguments are:\n%s\n", __file__, args) # sanity check arguments _sanity_check_args(args) # make option objects ice_opts = IceOptions(quiver=args.quiver, use_finer_qv=args.use_finer_qv, targeted_isoseq=args.targeted_isoseq, ece_penalty=args.ece_penalty, ece_min_len=args.ece_min_len, nfl_reads_per_split=args.nfl_reads_per_split) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc, gcon_nproc=args.gcon_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) # (1) separate flnc reads into bins logging.info("Separating FLNC reads into bins.") tofu_f = TofuFiles(tofu_dir=args.tofu_dir) s = SeparateFLNCRunner(flnc_fa=args.flnc_fa, root_dir=args.tofu_dir, out_pickle=tofu_f.separate_flnc_pickle, bin_size_kb=args.bin_size_kb, bin_by_primer=args.bin_by_primer, bin_manual=args.bin_manual, max_base_limit_MB=args.max_base_limit_MB) s.run() flnc_files = SeparateFLNCBase.convert_pickle_to_sorted_flnc_files(tofu_f.separate_flnc_pickle) logging.info("Separated FLNC reads bins are %s", flnc_files) # (2) apply 'pbtranscript cluster' to each bin # run ICE/Quiver (the whole thing), providing the fasta_fofn logging.info("Running ICE/Polish on separated FLNC reads bins.") split_dirs = [] for flnc_file in flnc_files: split_dir = op.join(realpath(op.dirname(flnc_file)), "cluster_out") mkdir(split_dir) split_dirs.append(split_dir) cur_out_cons = op.join(split_dir, "consensus_isoforms.fasta") ipq_f = IceQuiverPostprocess(root_dir=split_dir, ipq_opts=ipq_opts) if op.exists(ipq_f.quivered_good_fq): logging.warning("HQ polished isoforms %s already exist. SKIP!", ipq_f.quivered_good_fq) continue else: logging.info("Running ICE/Quiver on %s", split_dir) rmpath(cur_out_cons) obj = Cluster(root_dir=split_dir, flnc_fa=flnc_file, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, fasta_fofn=args.fasta_fofn, out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts) if args.mem_debug: # DEBUG from memory_profiler import memory_usage start_t = time.time() mem_usage = memory_usage(obj.run, interval=60) end_t = time.time() with open('mem_debug.log', 'a') as f: f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(split_dir, end_t-start_t)) f.write("Maximum memory usage: {0}\n".format(max(mem_usage))) f.write("Memory usage: {0}\n".format(mem_usage)) else: obj.run() if not args.keep_tmp_files: # by deafult, delete all tempory files. logging.info("Deleting %s", ipq_f.tmp_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.tmp_dir]) logging.info("Deleting %s", ipq_f.quivered_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.quivered_dir]) # (3) merge polished isoform cluster from all bins logging.info("Merging isoforms from all bins to %s.", tofu_f.combined_dir) c = CombineRunner(combined_dir=tofu_f.combined_dir, sample_name=get_sample_name(args.sample_name), split_dirs=split_dirs, ipq_opts=ipq_opts) c.run() if args.summary_fn is not None: ln(tofu_f.all_cluster_summary_fn, args.summary_fn) if args.report_fn is not None: ln(tofu_f.all_cluster_report_fn, args.report_fn) # (4) map HQ isoforms to GMAP reference genome map_isoforms_and_sort(input_filename=tofu_f.all_hq_fq, sam_filename=tofu_f.sorted_gmap_sam, gmap_db_dir=args.gmap_db, gmap_db_name=args.gmap_name, gmap_nproc=args.gmap_nproc) # (5) post mapping to genome analysis, including # * collapse polished HQ isoform clusters into groups # * count abundance of collapsed isoform groups # * filter collapsed isoforms based on abundance info logging.info("Post mapping to genome analysis.") out_isoforms = args.collapsed_filtered_fn if any(out_isoforms.endswith(ext) for ext in (".fa", ".fasta")): in_isoforms = tofu_f.all_hq_fa elif any(out_isoforms.endswith(ext) for ext in (".fq", ".fastq")): in_isoforms = tofu_f.all_hq_fq else: raise ValueError("Output file %s must be FASTA or FASTQ!" % out_isoforms) post_mapping_to_genome_runner( in_isoforms=in_isoforms, in_sam=tofu_f.sorted_gmap_sam, in_pickle=tofu_f.hq_lq_prefix_dict_pickle, out_isoforms=args.collapsed_filtered_fn, out_gff=args.gff_fn, out_abundance=args.abundance_fn, out_group=args.group_fn, out_read_stat=args.read_stat_fn, min_aln_coverage=args.min_aln_coverage, min_aln_identity=args.min_aln_identity, min_flnc_coverage=args.min_flnc_coverage, max_fuzzy_junction=args.max_fuzzy_junction, allow_extra_5exon=args.allow_extra_5exon, min_count=args.min_count) return 0
def run(self): """ For each cluster bin, create summary.json, cluster_report.csv, hq_isoforms.fa|fq, lq_isoforms.fa|fq Finally, merge all cluster bins and save all outputs to 'combined'. """ logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=self.getVersion())) args = self.args # Get cluster bins directories as input cluster_bin_dirs = self.get_cluster_bin_dirs(separate_flnc_pickle=args.separate_flnc_pickle, cluster_bin_dirs=args.cluster_bin_dirs) cluster_bin_indices = range(0, len(cluster_bin_dirs)) # Create output dir combined_dir = args.combined_dir mkdir(combined_dir) # Get combined output filenames def f(input_fn, default_fn): if input_fn is None: return op.join(combined_dir, default_fn) out_consensus_isoforms_fa = f(args.consensus_isoforms_fa, "all.consensus_isoforms.fasta") out_summary = f(args.summary_fn, "all.cluster_summary.json") out_report = f(args.report_fn, "all.cluster_report.csv") out_hq_fa = f(args.hq_isoforms_fa, "all.polished_hq.fasta") out_lq_fa = f(args.lq_isoforms_fa, "all.polished_lq.fasta") out_hq_fq = f(args.hq_isoforms_fq, "all.polished_hq.fastq") out_lq_fq = f(args.lq_isoforms_fq, "all.polished_lq.fastq") ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) sample_name = get_sample_name(input_sample_name=args.sample_name) hq_fq_fns, lq_fq_fns = [], [] split_uc_pickles, split_partial_uc_pickles = [], [] split_consensus_isoforms = [] for cluster_bin_dir in cluster_bin_dirs: ice_pq = IceQuiverPostprocess(root_dir=cluster_bin_dir, ipq_opts=ipq_opts) hq_fq_fns.append(ice_pq.quivered_good_fq) lq_fq_fns.append(ice_pq.quivered_bad_fq) split_uc_pickles.append(ice_pq.final_pickle_fn) split_partial_uc_pickles.append(ice_pq.nfl_all_pickle_fn) split_consensus_isoforms.append(ice_pq.final_consensus_fa) combined_files = CombinedFiles(combined_dir) log.info("Combining results of all cluster bins to %s.", combined_dir) log.info("Merging HQ|LQ isoforms from all cluster bins.") log.info("HQ isoforms are: %s.", ",".join(hq_fq_fns)) log.info("LQ isoforms are: %s.", ",".join(lq_fq_fns)) combine_polished_isoforms(split_indices=cluster_bin_indices, split_hq_fns=hq_fq_fns, split_lq_fns=lq_fq_fns, combined_hq_fa=combined_files.all_hq_fa, combined_hq_fq=combined_files.all_hq_fq, combined_lq_fa=combined_files.all_lq_fa, combined_lq_fq=combined_files.all_lq_fq, hq_lq_prefix_dict_pickle=combined_files.hq_lq_prefix_dict_pickle, sample_name=sample_name) ln(combined_files.all_hq_fa, out_hq_fa) #'HQ isoforms' ln(combined_files.all_hq_fq, out_hq_fq) #'HQ isoforms' ln(combined_files.all_lq_fa, out_lq_fa) #'LQ isoforms' ln(combined_files.all_lq_fq, out_lq_fq) #'LQ isoforms' log.info("Merging consensus isoforms from all cluster bins.") combine_consensus_isoforms(split_indices=cluster_bin_indices, split_files=split_consensus_isoforms, combined_consensus_isoforms_fa=combined_files.all_consensus_isoforms_fa, sample_name=sample_name) ln(combined_files.all_consensus_isoforms_fa, out_consensus_isoforms_fa) log.info("Writing cluster summary to %s", combined_files.all_cluster_summary_fn) write_cluster_summary(summary_fn=combined_files.all_cluster_summary_fn, isoforms_fa=out_consensus_isoforms_fa, hq_fa=out_hq_fa, lq_fa=out_lq_fa) ln(combined_files.all_cluster_summary_fn, out_summary) # "cluster summary" log.info("Writing cluster report to %s", combined_files.all_cluster_report_fn) write_combined_cluster_report(split_indices=cluster_bin_indices, split_uc_pickles=split_uc_pickles, split_partial_uc_pickles=split_partial_uc_pickles, report_fn=combined_files.all_cluster_report_fn, sample_name=sample_name) ln(combined_files.all_cluster_report_fn, out_report) # "cluster report"