def main(): query = sys.argv[1].replace(" ", "+") result = KEGG_REST.kegg_find('PATHWAY', query) result_txt = result.read().split('\n') if len(result_txt) == 1: print("Search found no results") return choice = 0 if len(result_txt) > 2: print("More than 1 result:") for index, r in enumerate(result_txt): output = r.split("\t") if len(output) == 2: print(str(index) + "\t" + output[1]) choice = int(input("Which one? ")) identifier = result_txt[choice].split("\t")[0].strip() identifier = identifier.replace("map", "hsa") pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml") pathway = KEGG_KGML_PARSER.read(pathway_kgml) for i in pathway.genes: print(i.name)
def add_kegg_descript2(hit): try: desc= REST.kegg_find("genes", hit).read() try: K=re.search(r"K[0-9]{5}", desc) KEGG=K.group(0) except: KEGG="none" try: a=re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n","") ann=a.group(0) except: try: ann=desc.split("\t")[1].split(";")[0].replace("\n","") except: ann="none" try: mod=REST.kegg_link('module', hit).read() module=mod.split(":")[2].split("_")[-1].replace("\n","") except: module="none" except: module="none" KEGG="none" ann="none" ann=reduce_func_len(ann) return strip_lines_list([module, KEGG, ann])
def add_kegg_descript2(hit): try: desc = REST.kegg_find("genes", hit).read() try: K = re.search(r"K[0-9]{5}", desc) KEGG = K.group(0) except: KEGG = "none" try: a = re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n", "") ann = a.group(0) except: try: ann = desc.split("\t")[1].split(";")[0].replace("\n", "") except: ann = "none" try: mod = REST.kegg_link('module', hit).read() module = mod.split(":")[2].split("_")[-1].replace("\n", "") except: module = "none" except: module = "none" KEGG = "none" ann = "none" ann = reduce_func_len(ann) return strip_lines_list([module, KEGG, ann])
def add_kegg_descript(hit): desc = REST.kegg_find("genes", hit).read() K = re.search(r"K[0-9]{5}", desc) KEGG = K.group(0) a = re.search(r"(?<=K[0-9]{5}).*", desc) ann = a.group(0) return [KEGG, ann]
def add_kegg_descript(hit): desc= REST.kegg_find("genes", hit).read() K=re.search(r"K[0-9]{5}", desc) KEGG=K.group(0) a=re.search(r"(?<=K[0-9]{5}).*", desc) ann=a.group(0) return [KEGG, ann]
def kegg_search(database, query): result = KEGG_REST.kegg_find(database, query.replace(" ", "+")) result_lines = result.read().split('\n') result_lines = result_lines[:-1] if result_lines[0] == "": return [] output = [] for result in result_lines: output.append(result.split('\t')[0]) return output
def get_EC_num(geneName): # retrieve gene data from KEGG keggData = REST.kegg_find('genes', geneName).read() keggData = "".join(keggData) keggData = keggData.lower() keggData = keggData.splitlines() # find which line 'eco' exists in the returned values to get enzyme name enzymeNameLine = '' for line in keggData: if line.find('eco:') != -1: enzymeNameLine = line break if enzymeNameLine == '': return '' else: enzymeName = enzymeNameLine[enzymeNameLine.index('\t') + 1:enzymeNameLine.index(';')] # find enzyme name in KEGG and get ECNums associated to it keggData = REST.kegg_find('enzyme', enzymeName).read() keggData = "".join(keggData) keggData = keggData.lower() keggData = keggData.splitlines() ecNumList = [] for line in keggData: try: ecNumList.append(line[line.index(':') + 1:line.index('\t')]) except ValueError: return '' if ecNumList == []: return '' else: ecNumList = ','.join(ecNumList) return ecNumList
def kooo(cccc): mydog5 = "" mydog = REST.kegg_find("genes", cccc).read() #print(result) mydog1 = re.findall('^\S+', mydog)[0] #print(mydog1) mydog3 = REST.kegg_link("ko", mydog1).read() print("xxx", mydog3) if (len(mydog3) < 4): return (mydog5) mydog4 = re.findall('ko:\S+', mydog3)[0] #print(mydog4) mydog5 = REST.kegg_link("genes", mydog4).read() return (mydog5)
def kegg_rest(type: str, argument: str, optional_argument: str = "", sleep_time: float = .5) -> List[str]: """This function calls Biopython's KEGG REST function and returns the lines as a string list. All empty lines are deleted from the list as they do not contain any information. Arguments ---------- * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list. * argument: str ~ The argument for the KEGG order. * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions to work correctly. * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed. Its default value of 10.0 seconds is in accordance with the NCBI rule that its servers shall not be contacted more often than every 10 seconds. KEGG might have lower or higher required sleep times, but I did not find any specified time step. """ # Execute correct Biotpython KEGG REST function. if type == "info": kegg_data = REST.kegg_info(argument) elif type == "get": kegg_data = REST.kegg_get(argument) elif type == "link": kegg_data = REST.kegg_link(argument, optional_argument) elif type == "list": kegg_data = REST.kegg_list(argument, optional_argument) elif type == "find": kegg_data = REST.kegg_find(argument, optional_argument) # Wait the sleep time doing nothing. time.sleep(sleep_time) # Get one string per line of the KEGG REST result. lines: List[str] = kegg_data.read().split("\n") # Delete empty lines. not_empty_lines: List[str] = [i for i in lines if len(i) > 0] return not_empty_lines
def get_metabolite_ID(metName): keggData = REST.kegg_find('compound', metName).read() keggData = "".join(keggData) keggData = keggData.lower() keggData = keggData.replace('cpd:', '') keggData = keggData.replace('\t', '; ') keggData = keggData.splitlines() keggDataList = [item.split('; ') for item in keggData] keggID = '' for cmpItem in keggDataList: if metName.lower() in cmpItem: keggID = cmpItem[0] break if keggID != '': keggID = int(keggID[1:]) else: keggID = 0 return keggID
def t_KEGG_Query(): """Tests Bio.KEGG API Wrapper""" print("Testing Bio.KEGG.query\n\n") # info tests resp = REST.kegg_info("kegg") resp.read() print(resp.url) resp = REST.kegg_info("pathway") resp.read() print(resp.url) # list tests resp = REST.kegg_list("pathway") resp.read() print(resp.url) resp = REST.kegg_list("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_list("organism") resp.read() print(resp.url) resp = REST.kegg_list("hsa") resp.read() print(resp.url) resp = REST.kegg_list("T01001") resp.read() print(resp.url) resp = REST.kegg_list("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_list(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_list("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_list(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_list("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_list(["C01290", "G00092"]) resp.read() print(resp.url) # find tests resp = REST.kegg_find("genes", "shiga+toxin") resp.read() print(resp.url) resp = REST.kegg_find("genes", ["shiga", "toxin"]) resp.read() print(resp.url) resp = REST.kegg_find("compound", "C7H10O5", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "O5C7", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "174.05", "exact_mass") resp.read() print(resp.url) resp = REST.kegg_find("compound", "300-310", "mol_weight") resp.read() print(resp.url) # get tests resp = REST.kegg_get("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_get(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_get(["C01290", "G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq") resp.read() print(resp.url) resp = REST.kegg_get("hsa05130", "image") resp.read() print(resp.url) # conv tests resp = REST.kegg_conv("eco", "ncbi-geneid") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-geneid", "eco") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) # link tests resp = REST.kegg_link("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_link("hsa", "pathway") resp.read() print(resp.url) resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url)
def main(): # Expects name of pathway as argument # Get the KGML from KEGG query = sys.argv[1].replace(" ", "+") result = KEGG_REST.kegg_find('PATHWAY', query) result_txt = result.read().split('\n') if len(result_txt) == 1: print("Search found no results") return choice = 0 if len(result_txt) > 2: print("More than 1 result:") for index, r in enumerate(result_txt): output = r.split("\t") if len(output) == 2: print(str(index) + "\t" + output[1]) choice = int(input("Which one? ")) identifier = result_txt[choice].split("\t")[0].strip() identifier = identifier.replace("map", ORGANISM) pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml") pathway = KEGG_KGML_PARSER.read(pathway_kgml) config = configparser.ConfigParser() config.read("server_config") if not "KGML2NEO4J" in config: print("Server config not found!") return username = config["KGML2NEO4J"]['username'] password = config["KGML2NEO4J"]['password'] server_uri = config["KGML2NEO4J"]['uri'] db = database(server_uri, username, password) db.run_query("MATCH (n) DETACH DELETE n") query = "CREATE " query_list = [ db.make_gene_query(pathway.genes), db.make_compound_query(pathway.compounds), db.make_reaction_query(pathway.reaction_entries), db.make_map_query(pathway.maps), db.make_relations_query(pathway.relations) ] for q in query_list: if len(q) > 0: query += q + "," query = query[:-1] db.run_query(query) # Merge matching nodes merge_query = """MATCH (n1),(n2) WHERE ANY (x IN n1.name WHERE x IN n2.name) and id(n1) < id(n2) WITH [n1,n2] as ns CALL apoc.refactor.mergeNodes(ns) YIELD node RETURN node""" db.run_query(merge_query)
def t_KEGG_Query(): """Tests Bio.KEGG API Wrapper""" print("Testing Bio.KEGG.query\n\n") # info tests resp = REST.kegg_info("kegg") resp.read() print(resp.url) resp = REST.kegg_info("pathway") resp.read() print(resp.url) # list tests resp = REST.kegg_list("pathway") resp.read() print(resp.url) resp = REST.kegg_list("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_list("organism") resp.read() print(resp.url) resp = REST.kegg_list("hsa") resp.read() print(resp.url) resp = REST.kegg_list("T01001") resp.read() print(resp.url) resp = REST.kegg_list("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_list(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_list("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_list(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_list("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_list(["C01290", "G00092"]) resp.read() print(resp.url) # find tests resp = REST.kegg_find("genes", "shiga+toxin") resp.read() print(resp.url) resp = REST.kegg_find("genes", ["shiga", "toxin"]) resp.read() print(resp.url) resp = REST.kegg_find("compound", "C7H10O5", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "O5C7", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "174.05", "exact_mass") resp.read() print(resp.url) resp = REST.kegg_find("compound", "300-310", "mol_weight") resp.read() print(resp.url) # get tests resp = REST.kegg_get("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_get(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_get(["C01290", "G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq") resp.read() print(resp.url) resp = REST.kegg_get("hsa05130", "image") resp.read() print(resp.url) # conv tests resp = REST.kegg_conv("eco", "ncbi-geneid") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-geneid", "eco") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) # link tests resp = REST.kegg_link("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_link("hsa", "pathway") resp.read() print(resp.url) resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url)
import Bio.KEGG.REST as kegg result = kegg.kegg_find('ORTHOLOGY', 'PTEN') result = result.read().split('\n') gene = kegg.kegg_get(result[0].split('\t')[0]) gene = gene.read().split('\n') pathways = [] index = 0 while not 'PATHWAY' in gene[index]: index += 1 while not 'DISEASE' in gene[index]: pathways.append(gene[index]) index += 1 print("The PTEN gene is involved in these processes: ") for i in pathways: print(i)
def KEGG(input1, input2): # Perform the query result = REST.kegg_info("kegg").read() # Print overview if input1 == "info" and input2 == "alt": return print(result) # Get all entries in the PATHWAY database as a dataframe elif input1 == "pathway_overview" and input2 == "alle": result = REST.kegg_list("pathway").read() return to_df(result) #Print alle biosynteseveje elif input1 == "print_pathway": if input2 == "alle": result = REST.kegg_get("map01100", "image").read() img = Image(result, width=1500, height=1000) else: result = REST.kegg_get(input2, "image").read() img = Image(result) return img #Find the compund vanillin elif input1 == "find_molekyle" and input2 != None: result = REST.kegg_find("compound", input2).read() #cpd:C00755 return print(result) elif input1 == "info_molekyle" and input2 != None: #cpd:C00755 # Get the entry information for vanillin result = REST.kegg_get(input2).read() return print(result) # Display molekylær struktur for cpd:C00051 (vanillin) elif input1 == "molekyle billede" and input2 != None: result = REST.kegg_get(input2, "image").read() #"cpd:C00755" return Image(result) elif input1 == "Enzyme molekyle" and input2 != None: result = REST.kegg_find("enzyme", input2).read() return to_df(result) #Enzym delen from Bio.KEGG import Enzyme request = REST.kegg_get(input1) records = Enzyme.parse(request) record = list(records)[0] if input2 == "reaction": return record.reaction elif input2 == "classname": return record.classname elif input2 == "genes": genes = list() for g in record.genes: gene_id, gene_symbol = g genes.append(gene_id) return genes else: print("Du har indskrevet nogget der ikke er gældende. Prøv igen")