def merge_split_summaries(self): merged_summary_index = {} merged_summary_index_path = os.path.join(self.output_directory, 'SUMMARY.cp') summary_dir = filesnpaths.gen_output_directory(os.path.join(self.output_directory, 'SUMMARY'), delete_if_exists = True) # read all index files per run into a dict here, so the access is easier from within # the for loop below run_sum_indices = {} for runinfo in self.input_runinfo_dicts.values(): run_sum_indices[runinfo['sample_id']] = dictio.read_serialized_object(runinfo['profile_summary_index']) for i in range(0, len(self.split_names)): self.progress.update('merging summaries for splits %s of %s' % (i + 1, len(self.split_names))) split_name = self.split_names[i] merged_summary = {} for runinfo in self.input_runinfo_dicts.values(): run_split_summary = dictio.read_serialized_object(os.path.join(runinfo['input_dir'], run_sum_indices[runinfo['sample_id']][split_name])) merged_summary[runinfo['sample_id']] = run_split_summary[runinfo['sample_id']] merged_split_summary_path = os.path.join(summary_dir, os.path.basename(run_sum_indices[runinfo['sample_id']][split_name])) dictio.write_serialized_object(merged_summary, merged_split_summary_path) merged_summary_index[split_name] = merged_split_summary_path self.progress.update('Serializing merged split summary index ...') dictio.write_serialized_object(dictio.strip_prefix_from_dict_values(merged_summary_index, self.output_directory),\ merged_summary_index_path) return summary_dir, merged_summary_index_path
def store_summarized_profile_for_each_split(self): summary_index = {} summary_index_output_path = self.generate_output_destination("SUMMARY.cp") summary_dir = self.generate_output_destination("SUMMARY", directory=True) self.progress.new("Storing summary files") counter = 1 for contig in self.contigs: self.progress.update("working on contig %s of %s" % (pp(counter), pp(len(self.contigs)))) for split in self.contigs[contig].splits: split_summary_path = self.generate_output_destination(os.path.join(summary_dir, "%.6d.cp" % counter)) dictio.write_serialized_object( { self.sample_id: { "coverage": split.coverage.c, "variability": split.auxiliary.v, "competing_nucleotides": split.auxiliary.competing_nucleotides, } }, split_summary_path, ) summary_index[split.name] = split_summary_path counter += 1 self.progress.end() self.run.info("profile_summary_dir", summary_dir) dictio.write_serialized_object( dictio.strip_prefix_from_dict_values(summary_index, self.output_directory), summary_index_output_path ) self.run.info("profile_summary_index", summary_index_output_path)
def store_info_dict(self, destination, strip_prefix=None): if strip_prefix: # mostly to get rid of output_dir prefix in output file names. # surprisingly enough, this is the best place to do it. live # and learn :/ self.info_dict = dictio.strip_prefix_from_dict_values(self.info_dict, strip_prefix) dictio.write_serialized_object(self.info_dict, destination)
def store_info_dict(self, destination, strip_prefix = None): if strip_prefix: # mostly to get rid of output_dir prefix in output file names. # surprisingly enough, this is the best place to do it. live # and learn :/ self.info_dict = dictio.strip_prefix_from_dict_values(self.info_dict, strip_prefix) dictio.write_serialized_object(self.info_dict, destination)