Exemplo n.º 1
0
def create_keggids_csv(filename, org):
    '''
    Extract keggids for an organism and save it to a csv file
        
        args: filename is the file containing gene name/ locus for all the organism genes
              org is the abrievation of the organism in kegg

    '''

    #Open csv as panda dataframe (df)
    df = pd.read_csv(filename, sep="\t", tupleize_cols=1)
    gene_list = tuple(df['Locus'].tolist())
    bid_list = tuple(df['Locus tag'].tolist())
    kid_list = []

    k = KEGG()

    #find keggid for each genes
    for gene in bid_list:
        kstrg = (k.find(org, gene))
        kid_list.append(kstrg.split()[1])

    #create new df and save it to csv
    new_df = pd.DataFrame(columns=['gene', 'b_id', 'kegg_id'])
    new_df.gene = gene_list
    new_df.b_id = bid_list
    new_df.kegg_id = kid_list

    new_df.to_csv("ecoli_keggids.csv", sep="\t", index=False)
def queryKegg(theIDs):
    print("Currently querying KEGG...")
    k = KEGG()
    keggData = list()
    IDlist = list()

    for id in theIDs:
        ids = id[3:]
        query = k.find("acb", ids)
        query = query.split('\t')
        finalQuery = query[0]
        data = k.get(finalQuery)
        dictData = k.parse(data)

        keggData.append(dictData)
        IDlist.append(ids)

    return keggData, IDlist
Exemplo n.º 3
0
def search(query,
           source="wikipathways",
           result_format="xml",
           species=None,
           genes=None,
           user=None):
    path_array = []

    if source.lower() in ["wikipathways", "all"] and species is None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query)
        r = requests.get(url + ext,
                         headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text

            temp_path = gnomics.objects.pathway.Pathway(
                identifier=temp_path_dict["identifier"],
                identifier_type="WikiPathways ID",
                name=temp_path_dict["name"],
                taxon=temp_path_dict["species"],
                source="WikiPathways")

            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)

    elif source.lower() in ["wikipathways", "all"] and species is not None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(
            species)
        r = requests.get(url + ext,
                         headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        path_array = []
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text

            temp_path = gnomics.objects.pathway.Pathway(
                identifier=temp_path_dict["identifier"],
                identifier_type="WikiPathways ID",
                name=temp_path_dict["name"],
                taxon=temp_path_dict["species"],
                source="WikiPathways")

            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)

    if source.lower() in ["kegg", "all"] and genes is not None:
        k = KEGG()

    elif source.lower() in ["kegg", "all"] and genes is None:
        k = KEGG()
        list_of_pathways = k.find("pathway", query)
        temp_path_list = list_of_pathways.split("\n")

        for thing in temp_path_list:
            temp_split = thing.split("\t")
            if len(temp_split) != 1:
                path_id = temp_split[0].strip().split(":")[1]
                path_name = temp_split[1].strip()

                if "map" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG MAP PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "ko" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG KO PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "ec" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG EC PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "rn" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG RN PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                else:
                    print(k.get(path_id))

    return path_array
Exemplo n.º 4
0
class KEGGPathways:
    """
    KEGG PATHWAY Database API
    """
    def __init__(self, organism="H**o sapiens"):
        self.database = KEGG()
        self.organism = self.get_organism_code(organism.lower())

    def search_by_gene(self, gene_name: str):
        """

        Args:
            gene_name: gene name (ex. 'BRCA2')

        Returns:
            Dictionary with ids of all pathways containing given gene as keys and their full names as values.

        """
        try:
            pathways = self.database.get_pathway_by_gene(
                gene_name, self.organism)
            return pathways if pathways else {}
        except AttributeError:
            return {}

    def get_pathway(self, pathway_id: str, self_loops: bool = False):
        """

        Args:
            pathway_id: KEGG pathway id (ex. 'hsa04110')
            self_loops: information about whether or not include self loops in returned graph

        Returns:
            `networkx.DiGraph` object: Directed graph depicting pathway, with a comma-separated string
            containing gene names as graph nodes and directed edges representing interactions between genes.
            Each edge has weight 'type', which is a list of interaction types between two nodes.

        """

        G = nx.DiGraph()
        try:
            pathway = self.database.parse_kgml_pathway(pathway_id)
        except TypeError:
            # incorrect pathway_id
            pathway = None

        if pathway:
            names = {}
            for entry in pathway['entries']:
                # only intra-pathway interactions taken into account
                if entry['gene_names']:
                    names[entry['id']] = {
                        'name': entry['gene_names'],
                        'type': entry['type']
                    }

            for rel in pathway['relations']:
                if rel['entry1'] in names.keys(
                ) and rel['entry2'] in names.keys():
                    e1 = names[rel['entry1']]['name']
                    e2 = names[rel['entry2']]['name']
                    G.add_node(e1, type=names[rel['entry1']]['type'])
                    G.add_node(e2, type=names[rel['entry2']]['type'])
                    if G.has_edge(e1, e2):
                        G[e1][e2]['type'] = G[e1][e2]['type'] + [rel['name']]
                    else:
                        # assumption of interaction direction entry1 -> entry2 #TODO: validate
                        if e1 != e2 or (e1 == e2 and self_loops):
                            G.add_edge(e1, e2, type=[rel['name']])

        not_gene_nodes = []
        for node in G.nodes():
            # only interactions between genes
            if G.node[node]['type'] != 'gene':
                for in_edge in G.in_edges(node):
                    for out_edge in G.out_edges(node):
                        if in_edge[0] != out_edge[1] or (
                                in_edge[0] == out_edge[1] and self_loops):
                            G.add_edge(in_edge[0],
                                       out_edge[1],
                                       type=['indirect'])
                not_gene_nodes.append(node)
        G.remove_nodes_from(not_gene_nodes)

        return G

    def fetch_organism_codes(self):
        """

        Returns:
            Dictionary with organisms as keys, and KEGG organism codes as values
            {   'h**o sapiens' : 'hsa',
                'human' : 'hsa',
                ...
            }

        """
        codes = {}
        for line in self.database.list('organism').split('\n'):
            if line:
                code = line.split('\t')[1]
                org = line.split('\t')[2]
                if '(' in org:
                    org = [x.strip().lower() for x in org[:-1].split('(')]
                    for o in org:
                        codes[o] = code
                else:
                    codes[org] = code
        return codes

    def get_organism_code(self, org: str):
        """

        Args:
            org: organism name (ex. 'H**o sapiens', 'human') - lowercase and uppercase optional

        Returns:
            str: KEGG organism code

        """
        codes = self.fetch_organism_codes()
        try:
            return codes[org]
        except KeyError:
            print('Invalid organism name.')
            raise

    def get_gene_code(self, gen: str):
        """

        Args:
            gen: gene name (ex. 'FGR', 'NIPAL1')

        Returns:
            KEGG gene code

        """
        code_gen = self.database.find(self.organism, gen)

        if code_gen == str('\n'):
            code_gen = str()
            print('Invalid gene name: ' + str(gen))
        return code_gen
Exemplo n.º 5
0
def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None):
    path_array = []
    
    if source.lower() in ["wikipathways", "all"] and species is None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query)
        r = requests.get(url+ext, headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text
                
            temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways")
            
            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)
    
    elif source.lower() in ["wikipathways", "all"] and species is not None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(species)
        r = requests.get(url+ext, headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        path_array = []
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text
                
            temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways")
            
            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)
    
    if source.lower() in ["kegg", "all"] and genes is not None:
        k = KEGG()
        
    elif source.lower() in ["kegg", "all"] and genes is None:
        k = KEGG()
        list_of_pathways = k.find("pathway", query)
        temp_path_list = list_of_pathways.split("\n")
        
        for thing in temp_path_list:
            temp_split = thing.split("\t")
            if len(temp_split) != 1:
                path_id = temp_split[0].strip().split(":")[1]
                path_name = temp_split[1].strip()

                if "map" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "ko" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "ec" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "rn" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                else:
                    print(k.get(path_id))
        
    return path_array