Пример #1
0
    def do_auxiliary_profile_data(self):
        self.progress.new('Splitting "%s"' % self.bin_id)
        self.progress.update('Subsetting the auxiliary data (for profile db)')

        new_auxiliary_profile_data_path = dbops.get_auxiliary_data_path_for_profile_db(
            self.bin_profile_db_path)
        parent_auxiliary_profile_data_path = self.summary.auxiliary_data_path

        bin_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(
            new_auxiliary_profile_data_path,
            self.contigs_db_hash,
            create_new=True)

        parent_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(
            parent_auxiliary_profile_data_path,
            self.summary.a_meta['contigs_db_hash'])

        for split_name in self.split_names:
            sample_coverages = parent_profile_auxiliary.get(split_name)
            for sample_name in sample_coverages:
                bin_profile_auxiliary.append(split_name, sample_name,
                                             sample_coverages[sample_name])

        bin_profile_auxiliary.store()
        bin_profile_auxiliary.close()
        parent_profile_auxiliary.close()

        if self.compress_auxiliary_data:
            self.progress.update(
                'Compressing the profile db auxiliary data file ...')
            utils.gzip_compress_file(new_auxiliary_profile_data_path)

        self.progress.end()
Пример #2
0
    def do_auxiliary_profile_data(self):
        self.progress.new('Splitting "%s"' % self.bin_id)
        self.progress.update('Subsetting the auxiliary data (for profile db)')

        new_auxiliary_profile_data_path = dbops.get_auxiliary_data_path_for_profile_db(self.bin_profile_db_path)
        parent_auxiliary_profile_data_path = self.summary.auxiliary_data_path

        bin_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(new_auxiliary_profile_data_path,
                                                                                self.contigs_db_hash,
                                                                                create_new=True)

        parent_profile_auxiliary = auxiliarydataops.AuxiliaryDataForSplitCoverages(parent_auxiliary_profile_data_path,
                                                                                   self.summary.a_meta['contigs_db_hash'])

        for split_name in self.split_names:
            sample_coverages = parent_profile_auxiliary.get(split_name)
            for sample_name in sample_coverages:
                bin_profile_auxiliary.append(split_name, sample_name, sample_coverages[sample_name])

        bin_profile_auxiliary.store()
        bin_profile_auxiliary.close()
        parent_profile_auxiliary.close()

        if self.compress_auxiliary_data:
            self.progress.update('Compressing the profile db auxiliary data file ...')
            utils.gzip_compress_file(new_auxiliary_profile_data_path)

        self.progress.end()
Пример #3
0
    def store_short_reads_for_splits(self):
        self.sanity_check()

        if not self.sanity_checked:
            raise ConfigError(
                "store_short_reads_for_splits :: Cannot be called before running sanity_check"
            )

        short_reds_for_splits_dict = self.get_short_reads_for_splits_dict()

        self.progress.new("Storing reads")
        self.progress.update("...")

        if self.split_R1_and_R2:
            for read_type in sorted(list(short_reds_for_splits_dict.keys())):
                output_file_path = '%s_%s.fa' % (self.output_file_prefix,
                                                 read_type)

                utils.store_dict_as_FASTA_file(
                    short_reds_for_splits_dict[read_type], output_file_path)
                if self.gzip:
                    utils.gzip_compress_file(output_file_path)
                    output_file_path = output_file_path + ".gz"

                self.run.info('Output file for %s' % read_type,
                              output_file_path,
                              progress=self.progress)

            self.progress.end()
            self.run.info('Num paired-end reads stored',
                          pp(len(short_reds_for_splits_dict['R1'])),
                          mc='green',
                          nl_before=1)
            self.run.info('Num unpaired reads stored',
                          pp(len(short_reds_for_splits_dict['UNPAIRED'])),
                          mc='green')
        else:
            output_file_path = self.output_file_path or 'short_reads.fa'
            utils.store_dict_as_FASTA_file(short_reds_for_splits_dict['all'],
                                           output_file_path)

            if self.gzip:
                utils.gzip_compress_file(output_file_path)
                output_file_path = output_file_path + ".gz"

            self.progress.end()
            self.run.info('Output file for all short reads', output_file_path)
            self.run.info('Num reads stored',
                          pp(len(short_reds_for_splits_dict['all'])),
                          mc='green')
Пример #4
0
    def store_short_reads_for_splits(self):
        self.sanity_check()

        if not self.sanity_checked:
            raise ConfigError("store_short_reads_for_splits :: Cannot be called before running sanity_check")

        short_reds_for_splits_dict = self.get_short_reads_for_splits_dict()

        self.progress.new("Storing reads")
        self.progress.update("...")

        if self.split_R1_and_R2:
            for read_type in sorted(list(short_reds_for_splits_dict.keys())):
                output_file_path = '%s_%s.fa' % (self.output_file_prefix, read_type)

                utils.store_dict_as_FASTA_file(short_reds_for_splits_dict[read_type], output_file_path)
                if self.gzip:
                    utils.gzip_compress_file(output_file_path)
                    output_file_path = output_file_path + ".gz"

                self.run.info('Output file for %s' % read_type, output_file_path, progress=self.progress)

            self.progress.end()
            self.run.info('Num paired-end reads stored',pp(len(short_reds_for_splits_dict['R1'])), mc='green', nl_before=1)
            self.run.info('Num unpaired reads stored',pp(len(short_reds_for_splits_dict['UNPAIRED'])), mc='green')
        else:
            output_file_path = self.output_file_path or 'short_reads.fa'
            utils.store_dict_as_FASTA_file(short_reds_for_splits_dict['all'], output_file_path)

            if self.gzip:
                utils.gzip_compress_file(output_file_path)
                output_file_path = output_file_path + ".gz"

            self.progress.end()
            self.run.info('Output file for all short reads',output_file_path)
            self.run.info('Num reads stored', pp(len(short_reds_for_splits_dict['all'])), mc='green')