Python KEGG.conv примеры использования

Язык программирования: Python

Пространство имен/Пакет: bioservices

Класс/Тип: KEGG

Метод/Функция: conv

Примеров на hotexamples.com: 6

Python KEGG.conv - 6 примеров найдено. Это лучшие примеры Python кода для bioservices.KEGG.conv, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KEGG(30)

get(15)

parse(13)

conv(5)

organism(5)

get_pathway_by_gene(3)

list(3)

lookfor_organism(2)

parse_kgml_pathway(2)

TIMEOUT(1)

clear_cache(1)

find(1)

link(1)

show_pathway(1)

Пример #1

Показать файл

    def test_extract_protein_interactions_kgml(self, kgml_file,
                                               expected_no_rel):
        # Arrange
        sut = KeggProteinInteractionsExtractor()
        with open(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             kgml_file), 'r') as myfile:
            kgml_string = myfile.read()

        # Mock Kegg ops
        mock_kegg = KEGG()
        sut.kegg = mock_kegg

        # No matter what the input is, return the  ko numbers that map to hsa numbers
        mock_kegg.link = MagicMock(return_value="ko:K00922	hsa:5293\n" +
                                   "ko:K00922	hsa:5291\n" +
                                   "ko:K02649	hsa:5295")

        # No matter what the input is, return the  hsa numbers that map to uniprot numbers
        mock_kegg.conv = MagicMock(return_value={"hsa:5293": "up:B0LPE5"})

        # Mock Uni Prot
        mock_uniprot = UniProt()
        sut.uniprot = mock_uniprot
        mock_uniprot.mapping = MagicMock(
            return_value={"B0LPE5": ["gene1", "gene2"]})

        # Act
        actual = sut.extract_protein_interactions_kgml(kgml_string)

        # Assert
        self.assertEqual(expected_no_rel, len(actual))

Пример #2

Показать файл

def kegg_to_uniprot(fr='hsa', cache=False):
    """Downloads a mapping from a `KEGG` database to `UniProt`, including
    both `TrEMBL` and `SwissProt`.

    Parameters:
    ----------
    fr : str, optional, default: 'hsa'
        KEGG database identifier to convert. Defaults to 'hsa'.

    cache : bool, optional, default: False
        If True, results are cached by `bioservices`. This can save
        time but you will eventually miss out on new database releases if
        your cache is old.

    Returns
    -------
    `dict`
        Mapping from `KEGG` identifiers to a list of `UniProt` accessions.
    """

    kegg = KEGG(cache=cache)
    mapping = kegg.conv(fr, 'uniprot')

    parsed_mapping = {}
    for upid, org in mapping.items():
        upid = upid.split(':')[1]  # remove the 'up:' prefix
        if org in parsed_mapping:
            parsed_mapping[org] += [upid]
        else:
            parsed_mapping[org] = [upid]
    return parsed_mapping

Пример #3

Показать файл

Файл: tcell.py Проект: statisticalbiotechnology/metabric-pathway-survival

def tcell_read_metabolomics_data():
    """This function is quite convoluted as it downloads an excelfile from a publication and extracts a dataframe, idexed by chebi. The function also caches intermediate files"""
    tcell_metabol_xls = cache.UrlFileCache(os.path.join(cache.get_cache_path(),  metabolite_expression_name + ".xlsx"), metabolomics_data_url)
    metabolomics_df = pd.read_excel(tcell_metabol_xls.get_file_name(), sheet_name = "normalized by sample mean", index_col=0, usecols="A,C:HN", skiprows = [0])
    #metabolomics_df = pd.read_excel(tcell_metabol_xls.get_file_name(), sheet_name = "normalized by sample mean", index_col=0, usecols="A,C:HN", skiprows = [0])
    for col in metabolomics_df.columns:
        # Average all technical replicates (Named by trailing ".1")
        if len(col.split('.'))>1 and col.split('.')[1] == "1":
            remcol = col.split('.')[0]
            metabolomics_df[remcol] = scipy.stats.gmean(metabolomics_df[[remcol,col]],axis=1)
            metabolomics_df.drop(col, axis=1, inplace=True)
    metabolomics_df.index.name = "KEGG_ID"
    metabolomics_df = metabolomics_df.apply(np.exp2)    # The excel data is in log2 space, return it to normal
    k = KEGG(verbose=False)
    map_kegg_chebi = k.conv("chebi", "compound")
    metabolomics_df = metabolomics_df.groupby("KEGG_ID", group_keys=False).apply(lambda x: one_row_per_compound_convert(x, map_kegg_chebi)).reset_index(drop=True)
    metabolomics_df.set_index("MetaboliteID", inplace=True)
    return metabolomics_df

Пример #4

Показать файл

    it.
'''

import os
import click
import json
import requests
import time
import xmltodict
import bioservices
from bioservices import KEGG, ChEBI
from zeep import Client
from tqdm import tqdm

k = KEGG(verbose=False)
map_kegg_chebi = k.conv("chebi", "compound")
c = ChEBI(verbose=False)

chebi_client = Client(
    "https://www.ebi.ac.uk/webservices/chebi/2.0/webservice?wsdl")
chemspider_client = Client("https://www.chemspider.com/InChI.asmx?WSDL")

# For compounds that cant be found at all.
not_founds = []

# Need to create a global dictonary for these annotations, as I don't
# want to take the piss with the web services these wonderful people
# provide to us free of charge.

global CONVERTED_COMPOUNDS
CONVERTED_COMPOUNDS = {}

Пример #5

Показать файл

Файл: gene_set_analysis.py Проект: jorgemlferreira/liverx

uniprot = UniProt(cache=True)

# ---- Set-up QuickGO bioservice
quickgo = QuickGO(cache=True)

# ---- Set-up KEGG bioservice
kegg, kegg_parser = KEGG(cache=True), KEGGParser()

kegg.organism = 'mmu'
print '[INFO] KEGG service configured'

kegg_pathways = {p: kegg.parse_kgml_pathway(p) for p in kegg.pathwayIds}
print '[INFO] KEGG pathways extracted: ', len(kegg_pathways)

# Convert KEGG pathways Gene Name to UniProt
k2u = kegg.conv('uniprot', 'mmu')

kegg_pathways_proteins = {p: {k2u[x].split(':')[1] for i in kegg_pathways[p]['entries'] if i['type'] == 'gene' for x in i['name'].split(' ') if x in k2u} for p in kegg_pathways}

kegg_uniprot_acc_map = {x for p in kegg_pathways_proteins for x in kegg_pathways_proteins[p]}
kegg_uniprot_acc_map = {p: uniprot.get_fasta(str(p)).split(' ')[0].split('|')[2] for p in kegg_uniprot_acc_map}

kegg_pathways_proteins = {p: {kegg_uniprot_acc_map[i] for i in kegg_pathways_proteins[p]} for p in kegg_pathways_proteins}
print '[INFO] KEGG pathways Ids converted to UniProt: ', len(kegg_pathways_proteins)

# ---- Set-up GO Terms gene list
go_terms_file = '%s/files/go_terms_uniprot.pickle' % wd

if os.path.isfile(go_terms_file):
    with open(go_terms_file, 'rb') as handle:
        go_terms = pickle.load(handle)

Пример #6

Показать файл

Файл: abCallToKeggAppend.py Проект: NDHall/coge_tools

found = 0
found_list = []
missing = []
for line in ecorToSita:
    ecor, sita = line.split(' ')
    if ecor in ab_dict:
        if len(ab_dict[ecor]) == 2:
            ab_dict[ecor].append(sita)
            found += 1
            found_list.append(ecor)
    else:
        missing.append(ecor)
# load kegg module
from bioservices import KEGG
s = KEGG()
convDb = s.conv('sita', 'ncbi-proteinid')
convDb['ncbi-proteinid:YP_008815800']

# annotate kegg module
annotated = []
no_joy_for_sita = []
for gene in found_list:
    sita = ab_dict[gene][-1]
    sita_q = 'ncbi-proteinid:{g}'.format(g=sita[:-2])
    if sita_q in convDb:
        ab_dict[gene].append(convDb[sita_q])
        annotated.append(gene)
    else:
        no_joy_for_sita.append(sita)
counter = 0