示例#1
0
def main():
    query = sys.argv[1].replace(" ", "+")
    result = KEGG_REST.kegg_find('PATHWAY', query)
    result_txt = result.read().split('\n')
    if len(result_txt) == 1:
        print("Search found no results")
        return

    choice = 0
    if len(result_txt) > 2:
        print("More than 1 result:")
        for index, r in enumerate(result_txt):
            output = r.split("\t")
            if len(output) == 2:
                print(str(index) + "\t" + output[1])
        choice = int(input("Which one? "))

    identifier = result_txt[choice].split("\t")[0].strip()
    identifier = identifier.replace("map", "hsa")

    pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml")
    pathway = KEGG_KGML_PARSER.read(pathway_kgml)

    for i in pathway.genes:
        print(i.name)
def add_kegg_descript2(hit):
    try:
        desc= REST.kegg_find("genes", hit).read()
        try:
            K=re.search(r"K[0-9]{5}", desc)
            KEGG=K.group(0)
        except:
            KEGG="none"
        try:
            a=re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n","")
            ann=a.group(0)
        except:
            try:
                ann=desc.split("\t")[1].split(";")[0].replace("\n","")
            except:
                ann="none"
        try:
            mod=REST.kegg_link('module', hit).read()
            module=mod.split(":")[2].split("_")[-1].replace("\n","")
        except:
            module="none"
        
    except:
        module="none"
        KEGG="none"
        ann="none"
    ann=reduce_func_len(ann)
    return strip_lines_list([module, KEGG, ann])
def add_kegg_descript2(hit):
    try:
        desc = REST.kegg_find("genes", hit).read()
        try:
            K = re.search(r"K[0-9]{5}", desc)
            KEGG = K.group(0)
        except:
            KEGG = "none"
        try:
            a = re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n", "")
            ann = a.group(0)
        except:
            try:
                ann = desc.split("\t")[1].split(";")[0].replace("\n", "")
            except:
                ann = "none"
        try:
            mod = REST.kegg_link('module', hit).read()
            module = mod.split(":")[2].split("_")[-1].replace("\n", "")
        except:
            module = "none"

    except:
        module = "none"
        KEGG = "none"
        ann = "none"
    ann = reduce_func_len(ann)
    return strip_lines_list([module, KEGG, ann])
def add_kegg_descript(hit):
    desc = REST.kegg_find("genes", hit).read()
    K = re.search(r"K[0-9]{5}", desc)
    KEGG = K.group(0)
    a = re.search(r"(?<=K[0-9]{5}).*", desc)
    ann = a.group(0)
    return [KEGG, ann]
def add_kegg_descript(hit):
    desc= REST.kegg_find("genes", hit).read()
    K=re.search(r"K[0-9]{5}", desc)
    KEGG=K.group(0)
    a=re.search(r"(?<=K[0-9]{5}).*", desc)
    ann=a.group(0)
    return [KEGG, ann]
示例#6
0
def kegg_search(database, query):
    result = KEGG_REST.kegg_find(database, query.replace(" ", "+"))
    result_lines = result.read().split('\n')
    result_lines = result_lines[:-1]
    if result_lines[0] == "":
        return []

    output = []
    for result in result_lines:
        output.append(result.split('\t')[0])
    return output
示例#7
0
def get_EC_num(geneName):

    # retrieve gene data from KEGG
    keggData = REST.kegg_find('genes', geneName).read()
    keggData = "".join(keggData)
    keggData = keggData.lower()
    keggData = keggData.splitlines()

    # find which line 'eco' exists in the returned values to get enzyme name
    enzymeNameLine = ''
    for line in keggData:
        if line.find('eco:') != -1:
            enzymeNameLine = line
            break

    if enzymeNameLine == '':
        return ''
    else:
        enzymeName = enzymeNameLine[enzymeNameLine.index('\t') +
                                    1:enzymeNameLine.index(';')]

    # find enzyme name in KEGG and get ECNums associated to it
    keggData = REST.kegg_find('enzyme', enzymeName).read()
    keggData = "".join(keggData)
    keggData = keggData.lower()
    keggData = keggData.splitlines()

    ecNumList = []
    for line in keggData:
        try:
            ecNumList.append(line[line.index(':') + 1:line.index('\t')])
        except ValueError:
            return ''

    if ecNumList == []:
        return ''
    else:
        ecNumList = ','.join(ecNumList)
        return ecNumList
示例#8
0
def kooo(cccc):
    mydog5 = ""
    mydog = REST.kegg_find("genes", cccc).read()
    #print(result)
    mydog1 = re.findall('^\S+', mydog)[0]
    #print(mydog1)
    mydog3 = REST.kegg_link("ko", mydog1).read()
    print("xxx", mydog3)
    if (len(mydog3) < 4):
        return (mydog5)
    mydog4 = re.findall('ko:\S+', mydog3)[0]
    #print(mydog4)
    mydog5 = REST.kegg_link("genes", mydog4).read()
    return (mydog5)
示例#9
0
def kegg_rest(type: str,
              argument: str,
              optional_argument: str = "",
              sleep_time: float = .5) -> List[str]:
    """This function calls Biopython's KEGG REST function and returns the lines as a string list.

    All empty lines are deleted from the list as they do not contain any information.

    Arguments
    ----------
    * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list.
    * argument: str ~ The argument for the KEGG order.
    * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions
      to work correctly.
    * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed.
      Its default value of 10.0 seconds is in accordance with the NCBI
      rule that its servers shall not be contacted more often than every
      10 seconds. KEGG might have lower or higher required sleep times,
      but I did not find any specified time step.
    """
    # Execute correct Biotpython KEGG REST function.
    if type == "info":
        kegg_data = REST.kegg_info(argument)
    elif type == "get":
        kegg_data = REST.kegg_get(argument)
    elif type == "link":
        kegg_data = REST.kegg_link(argument, optional_argument)
    elif type == "list":
        kegg_data = REST.kegg_list(argument, optional_argument)
    elif type == "find":
        kegg_data = REST.kegg_find(argument, optional_argument)

    # Wait the sleep time doing nothing.
    time.sleep(sleep_time)

    # Get one string per line of the KEGG REST result.
    lines: List[str] = kegg_data.read().split("\n")

    # Delete empty lines.
    not_empty_lines: List[str] = [i for i in lines if len(i) > 0]

    return not_empty_lines
示例#10
0
def get_metabolite_ID(metName):

    keggData = REST.kegg_find('compound', metName).read()
    keggData = "".join(keggData)
    keggData = keggData.lower()
    keggData = keggData.replace('cpd:', '')
    keggData = keggData.replace('\t', '; ')
    keggData = keggData.splitlines()
    keggDataList = [item.split('; ') for item in keggData]

    keggID = ''

    for cmpItem in keggDataList:
        if metName.lower() in cmpItem:
            keggID = cmpItem[0]
            break

    if keggID != '':
        keggID = int(keggID[1:])
    else:
        keggID = 0

    return keggID
示例#11
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
示例#12
0
def main():
    # Expects name of pathway as argument
    # Get the KGML from KEGG
    query = sys.argv[1].replace(" ", "+")
    result = KEGG_REST.kegg_find('PATHWAY', query)
    result_txt = result.read().split('\n')
    if len(result_txt) == 1:
        print("Search found no results")
        return

    choice = 0
    if len(result_txt) > 2:
        print("More than 1 result:")
        for index, r in enumerate(result_txt):
            output = r.split("\t")
            if len(output) == 2:
                print(str(index) + "\t" + output[1])
        choice = int(input("Which one? "))

    identifier = result_txt[choice].split("\t")[0].strip()
    identifier = identifier.replace("map", ORGANISM)

    pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml")
    pathway = KEGG_KGML_PARSER.read(pathway_kgml)
    config = configparser.ConfigParser()
    config.read("server_config")
    if not "KGML2NEO4J" in config:
        print("Server config not found!")
        return

    username = config["KGML2NEO4J"]['username']
    password = config["KGML2NEO4J"]['password']
    server_uri = config["KGML2NEO4J"]['uri']

    db = database(server_uri, username, password)

    db.run_query("MATCH (n) DETACH DELETE n")

    query = "CREATE "
    query_list = [
        db.make_gene_query(pathway.genes),
        db.make_compound_query(pathway.compounds),
        db.make_reaction_query(pathway.reaction_entries),
        db.make_map_query(pathway.maps),
        db.make_relations_query(pathway.relations)
    ]

    for q in query_list:
        if len(q) > 0:
            query += q + ","
    query = query[:-1]

    db.run_query(query)

    # Merge matching nodes
    merge_query = """MATCH (n1),(n2)
                    WHERE ANY (x IN n1.name WHERE x IN n2.name) and id(n1) < id(n2)
                    WITH [n1,n2] as ns
                    CALL apoc.refactor.mergeNodes(ns) YIELD node
                    RETURN node"""

    db.run_query(merge_query)
示例#13
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
示例#14
0
import Bio.KEGG.REST as kegg

result = kegg.kegg_find('ORTHOLOGY', 'PTEN')
result = result.read().split('\n')
gene = kegg.kegg_get(result[0].split('\t')[0])
gene = gene.read().split('\n')
pathways = []
index = 0
while not 'PATHWAY' in gene[index]:
    index += 1
while not 'DISEASE' in gene[index]:
    pathways.append(gene[index])
    index += 1

print("The PTEN gene is involved in these processes: ")
for i in pathways:
    print(i)
def KEGG(input1, input2):
    # Perform the query
    result = REST.kegg_info("kegg").read()

    # Print overview
    if input1 == "info" and input2 == "alt":
        return print(result)

    # Get all entries in the PATHWAY database as a dataframe
    elif input1 == "pathway_overview" and input2 == "alle":
        result = REST.kegg_list("pathway").read()
        return to_df(result)

    #Print alle biosynteseveje
    elif input1 == "print_pathway":
        if input2 == "alle":
            result = REST.kegg_get("map01100", "image").read()
            img = Image(result, width=1500, height=1000)
        else:
            result = REST.kegg_get(input2, "image").read()
            img = Image(result)
        return img

    #Find the compund vanillin
    elif input1 == "find_molekyle" and input2 != None:
        result = REST.kegg_find("compound", input2).read()  #cpd:C00755
        return print(result)

    elif input1 == "info_molekyle" and input2 != None:  #cpd:C00755
        # Get the entry information for vanillin
        result = REST.kegg_get(input2).read()
        return print(result)

    # Display molekylær struktur for cpd:C00051 (vanillin)
    elif input1 == "molekyle billede" and input2 != None:
        result = REST.kegg_get(input2, "image").read()  #"cpd:C00755"
        return Image(result)

    elif input1 == "Enzyme molekyle" and input2 != None:
        result = REST.kegg_find("enzyme", input2).read()
        return to_df(result)

    #Enzym delen
    from Bio.KEGG import Enzyme
    request = REST.kegg_get(input1)
    records = Enzyme.parse(request)
    record = list(records)[0]

    if input2 == "reaction":
        return record.reaction

    elif input2 == "classname":
        return record.classname

    elif input2 == "genes":
        genes = list()
        for g in record.genes:
            gene_id, gene_symbol = g
            genes.append(gene_id)
        return genes
    else:
        print("Du har indskrevet nogget der ikke er gældende. Prøv igen")