Exemplo n.º 1
0
    def gen_view_data_tables_from_atomic_data(self):
        essential_fields = [f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f)]
        auxiliary_fields = [f for f in self.atomic_data_fields if constants.IS_AUXILIARY_FIELD(f)]

        # setting standard view table structure and types
        view_table_structure = ['contig'] + self.sample_ids_found_in_input_dbs + auxiliary_fields
        view_table_types = ['text'] + ['numeric'] * len(self.sample_ids_found_in_input_dbs) + ['text']

        # generate a dictionary for normalized coverage of each contig across samples per target
        self.normalized_coverages = {'contigs': {}, 'splits': {}}
        for target in ['contigs', 'splits']:
            for split_name in self.split_names:
                self.normalized_coverages[target][split_name] = {}
                for input_profile_db_path in self.profile_dbs_info_dict:
                    self.normalized_coverages[target][split_name][input_profile_db_path] = self.get_normalized_coverage_of_split(target, input_profile_db_path, split_name)

        # generate a dictionary for max normalized ratio of each contig across samples per target
        self.max_normalized_ratios = {'contigs': {}, 'splits': {}}
        for target in ['contigs', 'splits']:
            for split_name in self.split_names:
                self.max_normalized_ratios[target][split_name] = self.get_max_normalized_ratio_of_split(target, split_name)

        self.progress.new('Generating view data tables')
        for target in ['contigs', 'splits']:
            for essential_field in essential_fields:
                self.progress.update('Processing %s for %s ...' % (essential_field, target))

                data_dict = {}
                for split_name in self.split_names:
                    data_dict[split_name] = {'__parent__': self.split_parents[split_name]}

                    for input_profile_db_path in self.profile_dbs_info_dict:
                        sample_id = self.profile_dbs_info_dict[input_profile_db_path]['sample_id']
                        if essential_field == 'normalized_coverage':
                            data_dict[split_name][sample_id] = self.normalized_coverages[target][split_name][input_profile_db_path]
                        elif essential_field == 'max_normalized_ratio':
                            data_dict[split_name][sample_id] = self.max_normalized_ratios[target][split_name][input_profile_db_path]
                        elif essential_field == 'relative_abundance':
                            data_dict[split_name][sample_id] = self.get_relative_abundance_of_split(target, input_profile_db_path, split_name)
                        else:
                            data_dict[split_name][sample_id] = self.atomic_data_for_each_run[target][input_profile_db_path][split_name][essential_field]

                # time to store the data for this view in the profile database
                table_name = '_'.join([essential_field, target])
                TablesForViews(self.merged_profile_db_path).create_new_view(
                                                data_dict=data_dict,
                                                table_name=table_name,
                                                table_structure=view_table_structure,
                                                table_types=view_table_types,
                                                view_name=essential_field if target == 'splits' else None)

        # if SNVs were not profiled, remove all entries from variability tables:
        if not self.SNVs_profiled:
            TablesForViews(self.merged_profile_db_path).remove(view_name='variability', table_names_to_blank=['variability_splits', 'variability_contigs'])

        self.progress.end()
Exemplo n.º 2
0
    def gen_view_data_tables_from_atomic_data(self):
        essential_fields = [
            f for f in self.atomic_data_fields
            if constants.IS_ESSENTIAL_FIELD(f)
        ]
        auxiliary_fields = [
            f for f in self.atomic_data_fields
            if constants.IS_AUXILIARY_FIELD(f)
        ]

        views_table = dbops.TableForViews(self.profile_db_path,
                                          anvio.__profile__version__,
                                          progress=self.progress)

        # setting standard view table structure and types
        view_table_structure = ['contig'
                                ] + self.merged_sample_ids + auxiliary_fields
        view_table_types = [
            'text'
        ] + ['numeric'] * len(self.merged_sample_ids) + ['text']

        # generate a dictionary for normalized coverage of each contig across samples per target
        self.normalized_coverages = {'contigs': {}, 'splits': {}}
        for target in ['contigs', 'splits']:
            for split_name in self.split_names:
                self.normalized_coverages[target][split_name] = {}
                for sample_id in self.merged_sample_ids:
                    self.normalized_coverages[target][split_name][
                        sample_id] = self.get_normalized_coverage_of_split(
                            target, sample_id, split_name)

        # generate a dictionary for max normalized ratio of each contig across samples per target
        self.max_normalized_ratios = {'contigs': {}, 'splits': {}}
        for target in ['contigs', 'splits']:
            for split_name in self.split_names:
                self.max_normalized_ratios[target][
                    split_name] = self.get_max_normalized_ratio_of_split(
                        target, split_name)

        self.progress.new('Generating view data tables')
        profile_db = dbops.ProfileDatabase(self.profile_db_path, quiet=True)
        for target in ['contigs', 'splits']:
            for essential_field in essential_fields:
                self.progress.update('Processing %s for %s ...' %
                                     (essential_field, target))

                target_table = '_'.join([essential_field, target])

                m = {}
                for split_name in self.split_names:
                    m[split_name] = {
                        '__parent__': self.split_parents[split_name]
                    }

                    for sample_id in self.merged_sample_ids:
                        if essential_field == 'normalized_coverage':
                            m[split_name][
                                sample_id] = self.normalized_coverages[target][
                                    split_name][sample_id]
                        elif essential_field == 'max_normalized_ratio':
                            m[split_name][
                                sample_id] = self.max_normalized_ratios[
                                    target][split_name][sample_id]
                        elif essential_field == 'relative_abundance':
                            m[split_name][
                                sample_id] = self.get_relative_abundance_of_split(
                                    target, sample_id, split_name)
                        else:
                            m[split_name][
                                sample_id] = self.atomic_data_for_each_run[
                                    target][sample_id][split_name][
                                        essential_field]

                # variable 'm' for the essential field is now ready to be its own table:
                profile_db.db.create_table(target_table, view_table_structure,
                                           view_table_types)
                db_entries = [
                    tuple([split_name] +
                          [m[split_name][h] for h in view_table_structure[1:]])
                    for split_name in self.split_names
                ]
                profile_db.db._exec_many(
                    '''INSERT INTO %s VALUES (%s)''' % (target_table, ','.join(
                        ['?'] * len(view_table_structure))), db_entries)

                if target == 'splits':
                    views_table.append(essential_field, target_table)

        profile_db.disconnect()
        self.progress.end()

        # store views in the database
        views_table.store()