Python TableForItemAdditionalData примеры использования

Язык программирования: Python

Пространство имен/Пакет: anvio.tables.miscdata

Примеров на hotexamples.com: 2

Python TableForItemAdditionalData - 2 примера найдено. Это лучшие примеры Python кода для anvio.tables.miscdata.TableForItemAdditionalData, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TableForItemAdditionalData(2)

get(1)

Основные методы

TableForItemAdditionalData (2)

get (1)

Пример #1

Показать файл

Файл: metapanops.py Проект: satish162/anvio

    def add_ECG_EAG_ratio_per_gene_cluster_into_pan_database(self):
        if not self.pan_summary:
            self.init_pan_summary()

        gene_presence_in_the_environment_dict = self.get_gene_presence_in_the_environment_dict()

        self.progress.new('Working on ECG/EAG ratio per gene cluster')
        self.progress.update('...')

        gene_status_frequencies_in_gene_cluster = {}

        gene_cluster_names = list(self.pan_summary.gene_clusters.keys())
        num_gene_clusters = len(gene_cluster_names)
        for i in range(0, num_gene_clusters):
            self.progress.update('%.2f' % ((i + 1) * 100 / num_gene_clusters))
            gene_cluster_name = gene_cluster_names[i]

            status = {'EAG': 0, 'ECG': 0, 'NA': 0}
            for internal_genome_name in self.pan_summary.gene_clusters[gene_cluster_name]:
                genome_name = self.descriptions.genomes[internal_genome_name]['bin_id']

                for gene_caller_id in self.pan_summary.gene_clusters[gene_cluster_name][internal_genome_name]:
                    if genome_name not in gene_presence_in_the_environment_dict:
                        self.progress.end()
                        raise ConfigError("Something is wrong... It seems you generated a pangenome with an internal genomes file\
                                           that is not identical to the internal genomes file you are using to run this program.")

                    status[gene_presence_in_the_environment_dict[genome_name][gene_caller_id]] += 1
            gene_status_frequencies_in_gene_cluster[gene_cluster_name] = status

        # setup some boring variable names.
        items_additional_data_dict = {}
        key_ECG_EAG_ratio = 'EAG_ECG_ratio'
        key_ECGs_and_EAGs = 'ECGs_and_EAGs'
        list_ECG_EAG_keys = ['EAG', 'ECG', 'NA']

        self.progress.update('Setting up the items data dictionary ..')
        for gene_cluster_name in gene_status_frequencies_in_gene_cluster:
            r = gene_status_frequencies_in_gene_cluster[gene_cluster_name]

            # add ECG and EAG frequencies for the gene cluster
            items_additional_data_dict[gene_cluster_name] = dict([('%s!%s' % (key_ECGs_and_EAGs, status), r[status]) for status in list_ECG_EAG_keys])

            # add ECG / EAG ratio
            items_additional_data_dict[gene_cluster_name][key_ECG_EAG_ratio] = (r['EAG'] / (r['EAG'] + r['ECG']) if (r['EAG'] + r['ECG']) else 0)

        self.progress.end()

        # add that bad boy to the database
        self.args.just_do_it = True
        items_additional_data_keys = [('%s!%s' % (key_ECGs_and_EAGs, status)) for status in list_ECG_EAG_keys] + [key_ECG_EAG_ratio]
        TableForItemAdditionalData(self.args).add(items_additional_data_dict, items_additional_data_keys)

Пример #2

Показать файл

    def check_for_db_requests(self, config):
        sections = self.get_other_sections(config)
        # look for requests from the database, create temporary tab delimited files:
        for section in sections:
            alias, matrix = section.split()
            if matrix.find('::') > -1:
                if matrix.startswith('!'):
                    database, table = matrix.split('::')
                    database = database[1:]

                    if database not in self.db_paths:
                        raise ConfigError('anvio could not recover the actual path of the database\
                                            (!%s) referenced in the config file, because the database\
                                            paths variable sent from the client does not have an entry\
                                            for it :( There are two options. One is to get a db_paths\
                                            dictionary sent to this class that contains a key for %s\
                                            with the full path to the dataase as a value. Or the table\
                                            "%s" can be exported to a TAB-delimited matrix and declared in\
                                            the config file. If you are experimenting and stuck here, please\
                                            see the documentation or send an e-mail to the developers.'\
                                                                                % (database, database, table))
                    database_path = self.db_paths[database]
                else:
                    database, table = matrix.split('::')
                    database_path = os.path.abspath(self.db_paths[database]) if database in self.db_paths else os.path.abspath(database)

                    # if its not there, let's try one more thing
                    if not os.path.exists(database_path):
                        database_path = os.path.abspath(os.path.join(self.input_directory, database))

                if not os.path.exists(database_path):
                    raise ConfigError("The database you requested (%s) is not where it was supposed to be ('%s') :/" % (database, database_path))

                dbc = db.DB(database_path, None, ignore_version=True)

                if not table in dbc.get_table_names():
                    raise ConfigError('The table you requested (%s) does not seem to be in %s :/' % (table, database))

                # here we know we are working with a database table that we have access to. however, in anvi'o database
                # tables in two forms: dataframe form, and matrix form. in dataframe form, we have key/value pairs rather
                # than MxN matrices where each N is a column for an attribute. while the latter is easier to export as a
                # matrix the clustering module can work with, the former requires extra attention. so here we need to first
                # figure out whether which form the table is in. why this even became necessary? taking a look at this issue
                # may help: https://github.com/merenlab/anvio/issues/662
                table_form = None
                if config.has_option(section, 'table_form'):
                    table_form = config.get(section, 'table_form')

                table_rows = dbc.get_all_rows_from_table(table)

                if self.row_ids_of_interest:
                    if table_form == 'dataframe':
                        raise ConfigError("Oops .. anvi'o does not know how to deal with specific row ids of interest when a table\
                                           refernced from a clustering recipe is in dataframe form :(")
                    table_rows = [r for r in table_rows if r[0] in self.row_ids_of_interest]

                if not len(table_rows):
                    raise ConfigError("It seems the table '%s' in the database it was requested from is empty. This\
                                        is not good. Here is the section that is not working for you: '%s' :/" \
                                                                % (table, section))

                tmp_file_path = filesnpaths.get_temp_file_path()

                # time to differentially store table contents.
                if table_form == 'dataframe':
                    args = argparse.Namespace(pan_or_profile_db=database_path, table_name=table)
                    table = TableForItemAdditionalData(args)
                    table_keys_list, table_data_dict = table.get()
                    store_dict_as_TAB_delimited_file(table_data_dict, tmp_file_path)
                else:
                    table_structure = dbc.get_table_structure(table)
                    columns_to_exclude = [c for c in ['entry_id', 'sample_id'] if c in table_structure]
                    store_array(table_rows, tmp_file_path, table_structure, exclude_columns=columns_to_exclude)

                self.matrix_paths[alias] = tmp_file_path