def do_auxiliary_profile_data(self): self.progress.new('Splitting "%s"' % self.bin_id) self.progress.update('Subsetting the auxiliary data (for profile db)') new_auxiliary_profile_data_path = dbops.get_auxiliary_data_path_for_profile_db( self.bin_profile_db_path) parent_auxiliary_profile_data_path = self.summary.auxiliary_data_path bin_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages( new_auxiliary_profile_data_path, self.contigs_db_hash, create_new=True) parent_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages( parent_auxiliary_profile_data_path, self.summary.a_meta['contigs_db_hash']) for split_name in self.split_names: sample_coverages = parent_profile_auxiliary.get(split_name) for sample_name in sample_coverages: bin_profile_auxiliary.append(split_name, sample_name, sample_coverages[sample_name]) bin_profile_auxiliary.store() bin_profile_auxiliary.close() parent_profile_auxiliary.close() if self.compress_auxiliary_data: self.progress.update( 'Compressing the profile db auxiliary data file ...') utils.gzip_compress_file(new_auxiliary_profile_data_path) self.progress.end()
def do_auxiliary_profile_data(self): self.progress.new('Splitting "%s"' % self.bin_id) self.progress.update('Subsetting the auxiliary data (for profile db)') new_auxiliary_profile_data_path = dbops.get_auxiliary_data_path_for_profile_db(self.bin_profile_db_path) parent_auxiliary_profile_data_path = self.summary.auxiliary_data_path bin_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(new_auxiliary_profile_data_path, self.contigs_db_hash, create_new=True) parent_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(parent_auxiliary_profile_data_path, self.summary.a_meta['contigs_db_hash']) for split_name in self.split_names: sample_coverages = parent_profile_auxiliary.get(split_name) for sample_name in sample_coverages: bin_profile_auxiliary.append(split_name, sample_name, sample_coverages[sample_name]) bin_profile_auxiliary.store() bin_profile_auxiliary.close() parent_profile_auxiliary.close() if self.compress_auxiliary_data: self.progress.update('Compressing the profile db auxiliary data file ...') utils.gzip_compress_file(new_auxiliary_profile_data_path) self.progress.end()
def store_short_reads_for_splits(self): self.sanity_check() if not self.sanity_checked: raise ConfigError( "store_short_reads_for_splits :: Cannot be called before running sanity_check" ) short_reds_for_splits_dict = self.get_short_reads_for_splits_dict() self.progress.new("Storing reads") self.progress.update("...") if self.split_R1_and_R2: for read_type in sorted(list(short_reds_for_splits_dict.keys())): output_file_path = '%s_%s.fa' % (self.output_file_prefix, read_type) utils.store_dict_as_FASTA_file( short_reds_for_splits_dict[read_type], output_file_path) if self.gzip: utils.gzip_compress_file(output_file_path) output_file_path = output_file_path + ".gz" self.run.info('Output file for %s' % read_type, output_file_path, progress=self.progress) self.progress.end() self.run.info('Num paired-end reads stored', pp(len(short_reds_for_splits_dict['R1'])), mc='green', nl_before=1) self.run.info('Num unpaired reads stored', pp(len(short_reds_for_splits_dict['UNPAIRED'])), mc='green') else: output_file_path = self.output_file_path or 'short_reads.fa' utils.store_dict_as_FASTA_file(short_reds_for_splits_dict['all'], output_file_path) if self.gzip: utils.gzip_compress_file(output_file_path) output_file_path = output_file_path + ".gz" self.progress.end() self.run.info('Output file for all short reads', output_file_path) self.run.info('Num reads stored', pp(len(short_reds_for_splits_dict['all'])), mc='green')
def store_short_reads_for_splits(self): self.sanity_check() if not self.sanity_checked: raise ConfigError("store_short_reads_for_splits :: Cannot be called before running sanity_check") short_reds_for_splits_dict = self.get_short_reads_for_splits_dict() self.progress.new("Storing reads") self.progress.update("...") if self.split_R1_and_R2: for read_type in sorted(list(short_reds_for_splits_dict.keys())): output_file_path = '%s_%s.fa' % (self.output_file_prefix, read_type) utils.store_dict_as_FASTA_file(short_reds_for_splits_dict[read_type], output_file_path) if self.gzip: utils.gzip_compress_file(output_file_path) output_file_path = output_file_path + ".gz" self.run.info('Output file for %s' % read_type, output_file_path, progress=self.progress) self.progress.end() self.run.info('Num paired-end reads stored',pp(len(short_reds_for_splits_dict['R1'])), mc='green', nl_before=1) self.run.info('Num unpaired reads stored',pp(len(short_reds_for_splits_dict['UNPAIRED'])), mc='green') else: output_file_path = self.output_file_path or 'short_reads.fa' utils.store_dict_as_FASTA_file(short_reds_for_splits_dict['all'], output_file_path) if self.gzip: utils.gzip_compress_file(output_file_path) output_file_path = output_file_path + ".gz" self.progress.end() self.run.info('Output file for all short reads',output_file_path) self.run.info('Num reads stored', pp(len(short_reds_for_splits_dict['all'])), mc='green')