Пример #1
1
def download_all_kegg_pathways(species_code='mmu'):
    """
    
    """
    pathways_str = REST.kegg_list("pathway", species_code).read()
    pathways = {p.split('\t')[0]:{'name':p.split('\t')[1]} for p in pathways_str.rstrip().split('\n')}
    def get_genes_for(pathways):
        for pathway in pathways:
            pathways[pathway]['geneid'] = set(); pathways[pathway]['gene_symbol'] = set()
            pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway
            # iterate through each KEGG pathway file, keeping track of which section
            # of the file we're in, only read the gene in each pathway
            current_section = None
            for line in pathway_file.rstrip().split("\n"):
                section = line[:12].strip()  # section names are within 12 columns
                if not section == "":
                    current_section = section
                if current_section == "GENE":
                    try:
                        gene_identifiers, _ = line[12:].split("; ")[:2]
                        geneid, gene_symbol = gene_identifiers.split()
                        pathways[pathway]['geneid'].add(int(geneid))
                        pathways[pathway]['gene_symbol'].add(gene_symbol)
                    except: pass#print('Discarded:', line); 
    
    get_genes_for(pathways)
    return pathways
Пример #2
0
    def load_taxon_id(self, in_code=None):
        ''' lets try getting it directly from KEGG based on inputted organism 3-letter code
            a bit hairy but it works!  TODO: cache the org_table and gen_table in cache/'''
        if self.taxon_id is not None:
            return self.taxon_id
        import Bio.KEGG.REST as kegg  ## requires BioPython 1.65 or later!
        if in_code is None:
            in_code = self.tables['run_infos'].organism[0]

        org_table = kegg.kegg_list('organism').readlines()
        org_table = ''.join(org_table)
        buf = cStringIO.StringIO(org_table)
        org_table = pd.read_table(buf, sep='\t', header=None)
        #full_org_name = org_table.ix[org_table[1]==in_code][2].values[0]
        buf.close()
        kegg_code = org_table.ix[org_table[1] == in_code][0].values[0]

        gen_table = kegg.kegg_list('genome').readlines()
        gen_table = ''.join(gen_table)
        buf = cStringIO.StringIO(gen_table)
        gen_table = pd.read_table(buf, sep='\t', header=None)
        buf.close()
        taxon_id = int(
            gen_table.ix[gen_table[0] == 'genome:' +
                         kegg_code][1].values[0].split(', ')[2].split('; ')[0])
        self.taxon_id = taxon_id
        return taxon_id
Пример #3
0
Файл: lib.py Проект: endrebak/kg
def get_kegg_gene_to_external_map(species):
    """Maps kegg genes to external gene names.

    Legacy function for goverlap. Deprecated. """

    kegg_list = REST.kegg_list(species)

    clean_kegg_info = re.compile(r"{}:|\n".format(species))
    parse_kegg_info = re.compile(r"[^\t;\n]+")

    rowdicts = []
    for kegg_info in kegg_list:

        try:
            kegg_info = kegg_info.decode("utf-8")
        except AttributeError:
            pass
        kegg_info = re.sub(clean_kegg_info, "", kegg_info)
        kegg_data = re.findall(parse_kegg_info, kegg_info)

        for gene in kegg_data[1].split(", "):
            rowdict = {"entrezgene": kegg_data[0], "gene": gene}
            rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Пример #4
0
def fetch_entry_list(database):
    "connects to appropriate KEGG database and fetches list of all entries"
    all_entries_df = pd.read_csv(REST.kegg_list(database),
                                 sep='\t',
                                 header=None,
                                 names=['id', 'description'])
    return all_entries_df
Пример #5
0
    def get_kegg(self, K_number):

        print(K_number)
        self.cur.execute(
            """SELECT K_number from kegg_reference  where K_number = ?""",
            (K_number, ))
        if self.cur.fetchone():
            return
        name = definition = identifier = ec_number = None
        try:
            kegg_list = REST.kegg_list(K_number).read()
            identifier, definition = kegg_list.strip().split("\t",
                                                             1)[1:][0].split(
                                                                 ";", 1)
            definition = definition.rstrip("]").split("[EC:")
            name = definition[0]
            ec_number = None
            if len(definition) == 2:
                ec_number = definition[1]
        except Exception as e:
            sys.stderr.write("\t".join([K_number, str(e)]))
        finally:
            self.cur.execute(
                """INSERT OR IGNORE INTO kegg_reference(K_number, name, identifier, ec_number)
                                 VALUES(?,?,?,?)""",
                (K_number, name, identifier, ec_number))
            sys.stderr.write(str(self.cur.lastrowid) + "\n")

        self.conx.commit()
Пример #6
0
def create_id_name_dict(db):
    ## Grab list of ids in db
    id_name_dict = dict()
    raw_list = REST.kegg_list(db)
    id_name_list = [s.split('\t') for s in raw_list.read().splitlines()]
    for i in id_name_list:
        id_name_dict[i[0]] = i[1]
    return id_name_dict
Пример #7
0
def queryAllPathway(fpathway=None, fpathwayInfo=None, hsa='hsa'):
    human_pathways = REST.kegg_list("pathway", hsa).read()
    repair_pathways = []
    repair_pathways_info = []
    for line in human_pathways.rstrip().split("\n"):
        entry, description = line.split("\t")
        entry = entry.split(':')[1]
        repair_pathways.append(entry)
        repair_pathways_info.append((entry, description))
    if fpathway: saveList(repair_pathways, fpathway)
    if fpathwayInfo: saveList(repair_pathways_info, fpathwayInfo)
    return repair_pathways
Пример #8
0
def _retrieve_lists(dbs):

    lists = dict()
    for db in dbs:

        ## Retreive all entry ids and names
        id_name_dict = dict()
        raw_list = REST.kegg_list(db)
        id_name_list = [s.split('\t') for s in raw_list.read().splitlines()]
        for i in id_name_list:
            id_name_dict[i[0]] = i[1]

        lists[db] = list(id_name_dict.keys())

    return lists
Пример #9
0
Файл: lib.py Проект: vd4mmind/kg
def get_pathway_to_definition_map(species):

    """Map kegg paths to their definition."""

    kegg_list = REST.kegg_list("pathway", species)

    clean_kegg_path = re.compile(r"path:{}|\n".format(species))

    rowdicts = []
    for kegg_path_line in kegg_list:

        kegg_info = re.sub(clean_kegg_path, "", kegg_path_line)
        pathway, definition = kegg_info.split("\t")
        definition = definition.split(" - ")[0] # Remove species info
        rowdict = {"kegg_pathway": pathway, "kegg_pathway_definition":
                   definition}
        rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Пример #10
0
def get_individual(target):
    try:
        individual = REST.kegg_list(target).readlines()
        target_db = []
        name = []

        for line in individual:
            entity = line.strip().split('\t')
            target_db.append(entity[0])
            name.append(entity[1])
            
        df_individual = pd.DataFrame({target : target_db, "name" : name})
        df_individual.replace('.+(?<=\:)','', regex=True, inplace=True)
        df_individual.to_csv("./database/"+'kegg_'+target+'.tsv', sep = '\t', index = None)

    except HTTPError:
        sys.exit("ERROR: Please select the available database according to the following URL. (http://www.kegg.jp/kegg/rest/keggapi.html) ")

    return df_individual
Пример #11
0
def kegg_rest(type: str,
              argument: str,
              optional_argument: str = "",
              sleep_time: float = .5) -> List[str]:
    """This function calls Biopython's KEGG REST function and returns the lines as a string list.

    All empty lines are deleted from the list as they do not contain any information.

    Arguments
    ----------
    * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list.
    * argument: str ~ The argument for the KEGG order.
    * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions
      to work correctly.
    * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed.
      Its default value of 10.0 seconds is in accordance with the NCBI
      rule that its servers shall not be contacted more often than every
      10 seconds. KEGG might have lower or higher required sleep times,
      but I did not find any specified time step.
    """
    # Execute correct Biotpython KEGG REST function.
    if type == "info":
        kegg_data = REST.kegg_info(argument)
    elif type == "get":
        kegg_data = REST.kegg_get(argument)
    elif type == "link":
        kegg_data = REST.kegg_link(argument, optional_argument)
    elif type == "list":
        kegg_data = REST.kegg_list(argument, optional_argument)
    elif type == "find":
        kegg_data = REST.kegg_find(argument, optional_argument)

    # Wait the sleep time doing nothing.
    time.sleep(sleep_time)

    # Get one string per line of the KEGG REST result.
    lines: List[str] = kegg_data.read().split("\n")

    # Delete empty lines.
    not_empty_lines: List[str] = [i for i in lines if len(i) > 0]

    return not_empty_lines
Пример #12
0
Файл: lib.py Проект: vd4mmind/kg
def get_kegg_gene_to_external_map(species):

    """Maps kegg genes to external gene names."""

    kegg_list = REST.kegg_list(species)

    clean_kegg_info = re.compile(r"{}:|\n".format(species))
    parse_kegg_info = re.compile(r"[^\t;\n]+")

    rowdicts = []
    for kegg_info in kegg_list:

        kegg_info = re.sub(clean_kegg_info, "", kegg_info)
        kegg_data = re.findall(parse_kegg_info, kegg_info)

        for gene in kegg_data[1].split(", "):
            rowdict = {"kegg_gene": kegg_data[0], "gene": gene}
            rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Пример #13
0
    def save_all_kegg_pathway_files(paths):
        """Uses the KEGG REST API to find and save all pathway data files for each species in the input dictionary.
		
		Args:
		    paths (dict of str:str): A mapping between strings referencing species and paths to the output directory for each.
		"""
        for species, path in paths.items():
            pathways = REST.kegg_list("pathway", species)
            for pathway in pathways:

                # Get the pathway file contents through the REST API.
                pathway_id = pathway.split()[0]
                pathway_file = REST.kegg_get(dbentries=pathway_id).read()

                # Where should the contents of the obtained file be written?
                pathway_id_str = pathway_id.replace(":", "_")
                filename = os.path.join(path, "{}.txt".format(pathway_id_str))
                if not os.path.exists(path):
                    os.makedirs(path)
                with open(filename, "w") as outfile:
                    outfile.write(pathway_file)
Пример #14
0
def main():
    # 'hsa', 'dme'
    for pwid in ['dre', 'cel', 'sce', 'mmu']:
        human_pathways = REST.kegg_list('pathway', pwid).read()

        pathways = [line for line in human_pathways.strip().split('\n')]

        # Get the genes for pathways and add them to a list
        pathways_dict = defaultdict(list)
        for pathway in pathways:
            entry, description = pathway.split('\t')
            pathway_file = REST.kegg_get(
                entry).read()  # query and read each pathway

            # iterate through each KEGG pathway file, keeping track of which section
            # of the file we're in, only read the gene in each pathway
            current_section = None
            for line in pathway_file.rstrip().split('\n'):
                section = line[:12].strip(
                )  # section names are within 12 columns
                if not section == '':
                    current_section = section

                if current_section == 'GENE':
                    if len(line[12:].split('; ')) > 1:
                        gene_identifiers, *gene_description = line[12:].split(
                            '; ')
                        gene_id, gene_symbol = gene_identifiers.split()
                        pathways_dict[description.split(' - ')[0]].append(
                            gene_symbol)

        with open('in/{0}.gmt'.format(pwid), 'w') as pw_file:
            out_pw = [
                '{0}\t\t{1}\n'.format(desc,
                                      '\t'.join(sorted(pathways_dict[desc])))
                for desc in sorted(pathways_dict)
                if len(pathways_dict[desc]) > 4
            ]
            pw_file.writelines(out_pw)
    return None
Пример #15
0
def find_kegg(genes):
    count=0
    lpl_pathways = REST.kegg_list("pathway", "lpl").read()
    entries = []
    for line in lpl_pathways.rstrip().split("\n"):
        entry, description = line.split("\t")
        #print(line)
        entries.append(entry)
    print(entries)               
    pathway = {}
    for i in genes:
        for entry in entries:
            count+=1
            get = REST.kegg_get(entry, option=None)
            get_read = get.readlines()
            if any(i in s for s in get_read):
                print(entry)
                print(i)                
            #checkt of j als een k in de dictionary staat, maakt een lijst van alle values van de key wanneer dit zo is en updat de key met de ljst+ nieuwe gen id)
                if i in pathway:
                    k = pathway.get(i)
                    #print(k)
                    m = []
                    if isinstance(k, list):
                        for l in k: 
                            m.append(l)
                    else:
                        m.append(k)
                    if entry not in m:
                        m.append(entry)
                    pathway.update({i:m})
                   #voegt j als een nieuwe key toe aan de dictionary    
                else:
                    pathway[i] = []
                    pathway.update({i:entry})
            print(pathway, count)
    print(pathway)
    return pathway
Пример #16
0
Файл: lib.py Проект: endrebak/kg
def get_pathway_to_definition_map(species):
    """Map kegg paths to their definition."""

    kegg_list = REST.kegg_list("pathway", species)

    clean_kegg_path = re.compile(r"path:{}|\n".format(species))

    rowdicts = []
    for kegg_path_line in kegg_list:

        try:
            kegg_path_line = kegg_path_line.decode("utf-8")
        except AttributeError:
            pass

        kegg_info = re.sub(clean_kegg_path, "", kegg_path_line)
        pathway, definition = kegg_info.split("\t")
        definition = definition.split(" - ")[0]  # Remove species info
        rowdict = {"kegg_pathway": pathway,
                   "kegg_pathway_definition": definition}
        rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Пример #17
0
    def update_files(base_dir="/data/databases/kegg/"):
        for db in ["pathway", "ko", "cpd", "brite"]:
            with open(base_dir + db + ".txt", "w") as h:
                data = REST.kegg_list(db).read()

                h.write(data)
        # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08901.keg&format=json&filedir=
        # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08001.keg&format=json&filedir=

        L = list(open(base_dir + "pathway.txt"))

        for pathway in tqdm(L):
            pw = "ko" + pathway.split()[0].split(":map")[1]
            kgmlpath = base_dir + "ko/" + pw + ".kgml"
            if not os.path.exists(kgmlpath):
                with open(kgmlpath, "w") as h:

                    try:
                        data = REST.kegg_get(pw, option="kgml").read()
                        h.write(data)
                        sleep(1)
                    except:
                        pass
Пример #18
0
def get_network(org, opt='ec'):
    # Creating a Parser Object
    graph = k.KeggParser()
    # Store pathways that doesn't have EC numbers
    error = []
    # Getting organism
    list1 = r.kegg_list('pathway', org).read()
    list1 = list1.split('\n')
    list1.remove('')
    print('Retrieving data from KEGG PATHWAY database. ' + str(time.ctime()))

    # For each path getting enzymes and reactions
    for path in list1:
        try:
            path = path.split('\t')
            # print (path[0])
            graph.genes = parse.read(r.kegg_get(path[0], 'kgml'))
            graph.genes_default = parse.read(
                r.kegg_get("path:" + opt + path[0][-5:], 'kgml'))
            graph.path = path
        except Exception:
            error.append(path[0])
            continue
        # print ("getting relations")
        graph.get_relations()
        # print ("getting reaction")
        graph.get_reactions()
    # print ('Unretrieved data',error)
    graph.ref = opt
    genes = 0
    for i in graph.ec_org_target.items():
        genes += len(i[1].split())

    # print (graph.ec_org_target.keys())
    # Building Graph
    graph.building_graph(2)
    return (graph)
Пример #19
0
def amount_pathway_genes():
    """
    Function to measure the amount repair genes in a human pathway.
    :return:
    """
    human_pathways = REST.kegg_list("pathway", "hsa").read()

    # Filter all human pathways for repair pathways
    repair_pathways = []
    for line in human_pathways.rstrip().split("\n"):
        entry, description = line.split("\t")
        if "repair" in description:
            repair_pathways.append(entry)

    # Get the genes for pathways and add them to a list
    repair_genes = []
    for pathway in repair_pathways:
        pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway

        # iterate through each KEGG pathway file, keeping track of which section
        # of the file we're in, only read the gene in each pathway
        current_section = None
        for line in pathway_file.rstrip().split("\n"):
            section = line[:12].strip()  # section names are within 12 columns
            if not section == "":
                current_section = section

            if current_section == "GENE":
                gene_identifiers, gene_description = line[12:].split("; ")
                gene_id, gene_symbol = gene_identifiers.split()

                if not gene_symbol in repair_genes:
                    repair_genes.append(gene_symbol)

    print("There are %d repair pathways and %d repair genes. The genes are:" % \
          (len(repair_pathways), len(repair_genes)))
    print(", ".join(repair_genes))
Пример #20
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
Пример #21
0
#!/usr/bin/env python
# encoding: utf-8
"""
@author: zhuhz
@file: keggAPI_v1.py
@time: 2020/6/17 9:13
"""

from Bio.KEGG import REST

orthologies = REST.kegg_list('orthology').read()
res = 'ko' + '\t' + 'ko name' + '\t' + 'ko des' + '\t' + 'map' + '\t' + 'map name' + '\t' + 'map class' + '\n'

for orth in orthologies.rstrip().split('\n'):
    ko_entry, ko_des = orth.strip().split('\t')
    ko_id = ko_entry.strip().split(':')[1]
    try:
        ko_name, ko_func = ko_des.strip().split('; ', 1)
    except ValueError:
        ko_name = ko_des
        ko_func = ko_des
    ko_info = REST.kegg_get(ko_entry).read()
    current_section = None
    for line in ko_info.rstrip().split('\n'):
        section = line[:12].strip()
        if not section == '':
            current_section = section
        if current_section == 'PATHWAY':
            maps = line[12:]
            for map in maps.rstrip().split('\n'):
                map_entry, map_name = (
Пример #22
0
def get_organisms():
    organisms = REST.kegg_list('organism').read()
    organisms = [i.split('\t') for i in organisms.split('\n')]
    return organisms
Пример #23
0
#!/usr/bin/env python
# encoding: utf-8
"""
@author: zhuhz
@file: keggAPI.py
@time: 2020/6/16 13:58
"""

from Bio.KEGG import REST

pathways = REST.kegg_list('pathway').read()
# print(type(pathways))
res = 'ko' + '\t' + 'ko name' + '\t' + 'ko des' + '\t' + 'module' + '\t' + 'module name' + '\t' + 'map' + '\t' + 'map name' + '\t' + 'map class' + '\n'

for pathway in pathways.rstrip().split('\n'):
    map_entry, map_description = pathway.split('\t')
    # print(map_entry)
    map = REST.kegg_get(map_entry).read()
    # print(type(map))
    current_section = None
    for line in map.rstrip().split('\n'):
        section = line[:12].strip()
        if not section == '':
            current_section = section
        if current_section == 'NAME':
            map_name = line[12:]
        elif current_section == 'CLASS':
            map_class = line[12:]
        # elif current_section == 'PATHWAY_MAP':
        # 	map_des = line[22:]
        elif current_section == 'MODULE':
Пример #24
0
def get_pathways(organism):
    pathways = REST.kegg_list('pathway', organism).read()
    pathways = [i.split('\t')[0] for i in pathways.split('\n')]
    return pathways
Пример #25
0
from Bio.KEGG import REST
drug = REST.kegg_list('drug').read()
# 가져올 수 있는 것: brite, pathway, genome(gene은 안됨), module, enzyme, glycan, compound, reaction, network, drug, disease
drug_list = []
drug_name=input("찾고 싶은 약과 관련된 것을 입력해주세요: ")
# 찾고 싶은 약과 관련된 것을 입력받아 검색한다(예: vaccine)
for line in drug.rstrip().split("\n"):
    entry, description = line.split("\t")
    if drug_name in description:
        drug_list.append(description)
print(drug_list)
Пример #26
0
from Bio.KEGG import REST
from bioservices import Reactome
import csv
from input import inp

#gene_list = ['POLD1', 'POLE3', 'ABO', 'TP53']
gene_list = inp
specie = "hsa"


human_pathways = REST.kegg_list("pathway", specie).read()
human_pathways_dict = {}
repair_pathways = []
repair_pathways_dict = {}
for line in human_pathways.rstrip().split("\n"):
    entry, description = line.split("\t")
    human_pathways_dict[entry] = description
    if "DNA" in description:
        repair_pathways.append(entry)
        repair_pathways_dict[entry] = description


rejected = []
gene_dict = dict((gene,[]) for gene in gene_list)

i = 0
len_ = len(human_pathways_dict.keys())
for pathway in human_pathways_dict.keys():
    i += 1
    print str(i) + ' // ' + str(len_)
    #print pathway
Пример #27
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
def KEGG(input1, input2):
    # Perform the query
    result = REST.kegg_info("kegg").read()

    # Print overview
    if input1 == "info" and input2 == "alt":
        return print(result)

    # Get all entries in the PATHWAY database as a dataframe
    elif input1 == "pathway_overview" and input2 == "alle":
        result = REST.kegg_list("pathway").read()
        return to_df(result)

    #Print alle biosynteseveje
    elif input1 == "print_pathway":
        if input2 == "alle":
            result = REST.kegg_get("map01100", "image").read()
            img = Image(result, width=1500, height=1000)
        else:
            result = REST.kegg_get(input2, "image").read()
            img = Image(result)
        return img

    #Find the compund vanillin
    elif input1 == "find_molekyle" and input2 != None:
        result = REST.kegg_find("compound", input2).read()  #cpd:C00755
        return print(result)

    elif input1 == "info_molekyle" and input2 != None:  #cpd:C00755
        # Get the entry information for vanillin
        result = REST.kegg_get(input2).read()
        return print(result)

    # Display molekylær struktur for cpd:C00051 (vanillin)
    elif input1 == "molekyle billede" and input2 != None:
        result = REST.kegg_get(input2, "image").read()  #"cpd:C00755"
        return Image(result)

    elif input1 == "Enzyme molekyle" and input2 != None:
        result = REST.kegg_find("enzyme", input2).read()
        return to_df(result)

    #Enzym delen
    from Bio.KEGG import Enzyme
    request = REST.kegg_get(input1)
    records = Enzyme.parse(request)
    record = list(records)[0]

    if input2 == "reaction":
        return record.reaction

    elif input2 == "classname":
        return record.classname

    elif input2 == "genes":
        genes = list()
        for g in record.genes:
            gene_id, gene_symbol = g
            genes.append(gene_id)
        return genes
    else:
        print("Du har indskrevet nogget der ikke er gældende. Prøv igen")
Пример #29
0
#13.10.KEGG.py
from Bio.KEGG import REST

human_pathways = REST.kegg_list("pathway", "hsa").read()

hepatitis_pathways = []
for line in human_pathways.rstrip().split("\n"):
    entry, description = line.split("\t")
    if "hepatitis" in description.lower():
        hepatitis_pathways.append(entry)
        print(entry, description)
print(hepatitis_pathways)

hepatitis_genes = []
for pathway in hepatitis_pathways:
    pathway_file = REST.kegg_get(pathway).read()

    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()
        if not section == "":
            current_section = section

            if current_section == "GENE":
                gene_identifiers, gene_description = line[12:].split("; ")
                gene_id, gene_symbol = gene_identifiers.split()

                if not gene_symbol in hepatitis_genes:
                    hepatitis_genes.append(gene_symbol)

print(


analysis_folder=os.path.dirname(__file__)
KEGG_data_folder=os.path.join(analysis_folder, 'KEGG_data/')

import urllib.request

#Download KEGG onthology
if not os.path.isfile(os.path.join(KEGG_data_folder,'ko00001.json')):
    url='https://www.genome.jp/kegg-bin/download_htext?htext=ko00001&format=json&filedir='
    urllib.request.urlretrieve(url, os.path.join(KEGG_data_folder,'ko00001.json'))


#get all human pathways
hsa_path_list=KEGG_REST.kegg_list('pathway','hsa')
identifiers=[]
for line in hsa_path_list:
   identifiers.append(line.partition('\t')[0][5:])


for identifier in identifiers:
   KGML_handle=KEGG_REST.kegg_get(identifier,option='kgml')
   file=open(os.path.join(KEGG_data_folder,identifier+'.kgml'),'w')
   file.write(KGML_handle.read())
   file.close()

#%%
parse_pathways=1
if parse_pathways:
    
Пример #31
0
from Bio.KEGG import REST
from bioservices import Reactome
import csv
from input import inp

#gene_list = ['POLD1', 'POLE3', 'ABO', 'TP53']
gene_list = inp
specie = "hsa"

human_pathways = REST.kegg_list("pathway", specie).read()
human_pathways_dict = {}
repair_pathways = []
repair_pathways_dict = {}
for line in human_pathways.rstrip().split("\n"):
    entry, description = line.split("\t")
    human_pathways_dict[entry] = description
    if "DNA" in description:
        repair_pathways.append(entry)
        repair_pathways_dict[entry] = description

rejected = []
gene_dict = dict((gene, []) for gene in gene_list)

i = 0
len_ = len(human_pathways_dict.keys())
for pathway in human_pathways_dict.keys():
    i += 1
    print str(i) + ' // ' + str(len_)
    #print pathway
    pathway_file = REST.kegg_get(pathway).read()
    current_section = None
Пример #32
0
from Bio.Seq import Seq
from Bio.KEGG import Enzyme
from Bio.KEGG import REST
from Bio.KEGG.KGML import KGML_parser
from Bio.KEGG import Map
#request = REST.kegg_get("ec:5.4.2.2")
#open("ec_5.4.2.2.txt",'w').write(request.read())
#records = Enzyme.parse(open("ec_5.4.2.2.txt"))
#record = list(records)[0]
#print(record.classname)
#print(record.entry)
organisms = REST.kegg_list("organism").read()
organismlist = []
for line in organisms.rstrip().split("\n"):
    #print(line)
    code = line.split("\t")[1]
    organismlist.append(code)

#print(organismlist)

#parser = KGML_parser.KGMLparser()
#open("human_map.xml",'w').write(REST.kegg_get("hsa05130",option="kgml").read())
human_map = KGML_parser.read(REST.kegg_get("hsa01100",option="kgml"))
cpds = human_map.compounds
for cpd in cpds:
    print(cpd.name)
    graphics = cpd.graphics
    for graphic in graphics:
        print(graphic.x)

rxns = human_map.reaction_entries
Пример #33
0
from Bio.KEGG import REST
import urllib.request
import re
import sys

##### TO DO #############
#### Get Organism ID from organims name #####

result = REST.kegg_list("pathway", "aga").read()
#print(result.split("\t")[0])
pathw_ids = []
pathw = []
for item in result.split("\t"):
    #print(item)
    #print("*")
    tmp2 = item.split("\n")

    if len(tmp2) > 1:
        if tmp2[1] != "":
            pathw.append(tmp2[0])
            pathw_ids.append(tmp2[1])

#print(pathw)
for pathway in pathw_ids:
    #pathway = 'hsa00010' # glycolysis
    url = "http://rest.kegg.jp/get/" + pathway
    with urllib.request.urlopen(url) as f:
        lines = f.read().decode('utf-8').splitlines()
        want = 0
        for line in lines:
            fields = line.split()
Пример #34
0
 def list(db, org="hsa"):
     # Create a static method which invokes the REST.kegg_list function from the Biopython module.
     # Return the result as a bytes string
     return str.encode(REST.kegg_list(db, org).read())