def gen_view_data_tables_from_atomic_data(self): essential_fields = [f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f)] auxiliary_fields = [f for f in self.atomic_data_fields if constants.IS_AUXILIARY_FIELD(f)] # setting standard view table structure and types view_table_structure = ['contig'] + self.sample_ids_found_in_input_dbs + auxiliary_fields view_table_types = ['text'] + ['numeric'] * len(self.sample_ids_found_in_input_dbs) + ['text'] # generate a dictionary for normalized coverage of each contig across samples per target self.normalized_coverages = {'contigs': {}, 'splits': {}} for target in ['contigs', 'splits']: for split_name in self.split_names: self.normalized_coverages[target][split_name] = {} for input_profile_db_path in self.profile_dbs_info_dict: self.normalized_coverages[target][split_name][input_profile_db_path] = self.get_normalized_coverage_of_split(target, input_profile_db_path, split_name) # generate a dictionary for max normalized ratio of each contig across samples per target self.max_normalized_ratios = {'contigs': {}, 'splits': {}} for target in ['contigs', 'splits']: for split_name in self.split_names: self.max_normalized_ratios[target][split_name] = self.get_max_normalized_ratio_of_split(target, split_name) self.progress.new('Generating view data tables') for target in ['contigs', 'splits']: for essential_field in essential_fields: self.progress.update('Processing %s for %s ...' % (essential_field, target)) data_dict = {} for split_name in self.split_names: data_dict[split_name] = {'__parent__': self.split_parents[split_name]} for input_profile_db_path in self.profile_dbs_info_dict: sample_id = self.profile_dbs_info_dict[input_profile_db_path]['sample_id'] if essential_field == 'normalized_coverage': data_dict[split_name][sample_id] = self.normalized_coverages[target][split_name][input_profile_db_path] elif essential_field == 'max_normalized_ratio': data_dict[split_name][sample_id] = self.max_normalized_ratios[target][split_name][input_profile_db_path] elif essential_field == 'relative_abundance': data_dict[split_name][sample_id] = self.get_relative_abundance_of_split(target, input_profile_db_path, split_name) else: data_dict[split_name][sample_id] = self.atomic_data_for_each_run[target][input_profile_db_path][split_name][essential_field] # time to store the data for this view in the profile database table_name = '_'.join([essential_field, target]) TablesForViews(self.merged_profile_db_path).create_new_view( data_dict=data_dict, table_name=table_name, table_structure=view_table_structure, table_types=view_table_types, view_name=essential_field if target == 'splits' else None) # if SNVs were not profiled, remove all entries from variability tables: if not self.SNVs_profiled: TablesForViews(self.merged_profile_db_path).remove(view_name='variability', table_names_to_blank=['variability_splits', 'variability_contigs']) self.progress.end()
def gen_view_data_tables_from_atomic_data(self): essential_fields = [ f for f in self.atomic_data_fields if constants.IS_ESSENTIAL_FIELD(f) ] auxiliary_fields = [ f for f in self.atomic_data_fields if constants.IS_AUXILIARY_FIELD(f) ] views_table = dbops.TableForViews(self.profile_db_path, anvio.__profile__version__, progress=self.progress) # setting standard view table structure and types view_table_structure = ['contig' ] + self.merged_sample_ids + auxiliary_fields view_table_types = [ 'text' ] + ['numeric'] * len(self.merged_sample_ids) + ['text'] # generate a dictionary for normalized coverage of each contig across samples per target self.normalized_coverages = {'contigs': {}, 'splits': {}} for target in ['contigs', 'splits']: for split_name in self.split_names: self.normalized_coverages[target][split_name] = {} for sample_id in self.merged_sample_ids: self.normalized_coverages[target][split_name][ sample_id] = self.get_normalized_coverage_of_split( target, sample_id, split_name) # generate a dictionary for max normalized ratio of each contig across samples per target self.max_normalized_ratios = {'contigs': {}, 'splits': {}} for target in ['contigs', 'splits']: for split_name in self.split_names: self.max_normalized_ratios[target][ split_name] = self.get_max_normalized_ratio_of_split( target, split_name) self.progress.new('Generating view data tables') profile_db = dbops.ProfileDatabase(self.profile_db_path, quiet=True) for target in ['contigs', 'splits']: for essential_field in essential_fields: self.progress.update('Processing %s for %s ...' % (essential_field, target)) target_table = '_'.join([essential_field, target]) m = {} for split_name in self.split_names: m[split_name] = { '__parent__': self.split_parents[split_name] } for sample_id in self.merged_sample_ids: if essential_field == 'normalized_coverage': m[split_name][ sample_id] = self.normalized_coverages[target][ split_name][sample_id] elif essential_field == 'max_normalized_ratio': m[split_name][ sample_id] = self.max_normalized_ratios[ target][split_name][sample_id] elif essential_field == 'relative_abundance': m[split_name][ sample_id] = self.get_relative_abundance_of_split( target, sample_id, split_name) else: m[split_name][ sample_id] = self.atomic_data_for_each_run[ target][sample_id][split_name][ essential_field] # variable 'm' for the essential field is now ready to be its own table: profile_db.db.create_table(target_table, view_table_structure, view_table_types) db_entries = [ tuple([split_name] + [m[split_name][h] for h in view_table_structure[1:]]) for split_name in self.split_names ] profile_db.db._exec_many( '''INSERT INTO %s VALUES (%s)''' % (target_table, ','.join( ['?'] * len(view_table_structure))), db_entries) if target == 'splits': views_table.append(essential_field, target_table) profile_db.disconnect() self.progress.end() # store views in the database views_table.store()