Exemplo n.º 1
0
def load_kegg(gene, organism):
    k = KEGG()
    result_line = ''
    try:
        a = k.get_pathway_by_gene(gene, organism)
        if a:
            k_list = list(a.values())
            result_line = ', '.join(k_list)
    except:
        print("    Gene '{0}' is not in KEGG database".format(gene))
    return result_line
Exemplo n.º 2
0
def kegg(inputInteractions):
    from bioservices.kegg import KEGG
    k = KEGG()
    interactions = []
    for items in inputInteractions:
        print(items[1].getName())
        try:
            pathways = k.get_pathway_by_gene(items[1].getName(), "hsa")
            #print(pathways)
            if pathways:
                for key, value in list(pathways.items()):
                    interactions.append([items[0], value])
        except AttributeError:
            print("Gene name error!!!!!!!!!")
    return interactions
Exemplo n.º 3
0
    else:
        blast_text = 'NULL'

    if pfamHits.get(ids) != None:
        pfam_text = pfamHits[ids]
    else:
        pfam_text = 'NULL'

    if prositeHits.get(ids) != None:
        prosite_text = prositeHits[ids]
    else:
        prosite_text = 'NULL'

    # Get the KEGG hits
    kegg = KEGG()
    kegg_text = ''
    gene_id = gene_ids[ids]
    KEGG_IDs = kegg.get_pathway_by_gene(gene_id, "acb")
    if KEGG_IDs != None:
        for KEGG_ID in KEGG_IDs:
            kegg_text += KEGG_IDs[KEGG_ID] + ' [' + KEGG_ID + ']; '
        kegg_text = kegg_text[:-2]
    else:
        kegg_text = 'NULL'
    comments = 'NULL'

    row = ids + '\t' + blast_text + '\t' + pfam_text + '\t' + prosite_text + '\t' + kegg_text + '\t' + GO_IDs + '\t' + comments + '\n'
    output.write(row)

output.close()
Exemplo n.º 4
0
# extract all relations from all pathways
from bioservices.kegg import KEGG

kegg = KEGG()
output = open("/home/enrico/Desktop/Snakemake/Kegg.txt", 'w')
input = open("/home/enrico/Desktop/Snakemake/RNA-Seq-counts.txt",
             "r",
             encoding="latin-1")
line = input.readline().strip()
Header = input.readline().strip()
try:
    while line:
        line = input.readline().strip()
        ID, WCFS1_glc_1, WCFS1_glc_2, WCFS1_rib_1, WCFS1_rib_2, NC8_glc_1, NC8_glc_2, NC8_rib_1, NC8_rib_2 = line.strip(
        ).split("\t")
        res = str(kegg.get_pathway_by_gene(ID, "lpl")).strip('[]')
        if res != "None":
            output.write(ID + "\t" + res + "\n")
        else:
            output.write(ID + "\t" + "No pathway found" + "\n")
except:
    pass

input.close()
output.close()
Exemplo n.º 5
0
    else:
        dr['refseq_genomic'] = 'NA'
        dr['refseq_rna'] = 'NA'
        dr['refseq_protein'] = 'NA'
    # go
    if dq.has_key('go'):
        dr['go.cc'] = str(ld2dl(dq['go']['CC'])).replace('u"', "").replace(
            '"', "").replace("u'", "").replace(
                "'", "") if dq['go'].has_key('CC') else 'NA'
        dr['go.mf'] = str(ld2dl(dq['go']['MF'])).replace('u"', "").replace(
            '"', "").replace("u'", "").replace(
                "'", "") if dq['go'].has_key('MF') else 'NA'
        dr['go.bp'] = str(ld2dl(dq['go']['BP'])).replace('u"', "").replace(
            '"', "").replace("u'", "").replace(
                "'", "") if dq['go'].has_key('BP') else 'NA'
    else:
        dr['go.cc'] = 'NA'
        dr['go.mf'] = 'NA'
        dr['go.bp'] = 'NA'
    # kegg
    if len(re.findall(kt + ':' + dr['entrezgene'] + "\t", kge)) != 0:
        dr['kegg'] = str(kg.get_pathway_by_gene(dr['entrezgene'], kt)).replace(
            'u"', "").replace('"', "").replace("u'", "").replace("'", "")
    else:
        dr['kegg'] = 'NA'

    sys.stderr.write("\t".join([dr[kr] for kr in lr]) + "\n")  # write

if fi != "-":
    f.close()
Exemplo n.º 6
0
class KEGGPathways:
    """
    KEGG PATHWAY Database API
    """
    def __init__(self, organism="H**o sapiens"):
        self.database = KEGG()
        self.organism = self.get_organism_code(organism.lower())

    def search_by_gene(self, gene_name: str):
        """

        Args:
            gene_name: gene name (ex. 'BRCA2')

        Returns:
            Dictionary with ids of all pathways containing given gene as keys and their full names as values.

        """
        try:
            pathways = self.database.get_pathway_by_gene(
                gene_name, self.organism)
            return pathways if pathways else {}
        except AttributeError:
            return {}

    def get_pathway(self, pathway_id: str, self_loops: bool = False):
        """

        Args:
            pathway_id: KEGG pathway id (ex. 'hsa04110')
            self_loops: information about whether or not include self loops in returned graph

        Returns:
            `networkx.DiGraph` object: Directed graph depicting pathway, with a comma-separated string
            containing gene names as graph nodes and directed edges representing interactions between genes.
            Each edge has weight 'type', which is a list of interaction types between two nodes.

        """

        G = nx.DiGraph()
        try:
            pathway = self.database.parse_kgml_pathway(pathway_id)
        except TypeError:
            # incorrect pathway_id
            pathway = None

        if pathway:
            names = {}
            for entry in pathway['entries']:
                # only intra-pathway interactions taken into account
                if entry['gene_names']:
                    names[entry['id']] = {
                        'name': entry['gene_names'],
                        'type': entry['type']
                    }

            for rel in pathway['relations']:
                if rel['entry1'] in names.keys(
                ) and rel['entry2'] in names.keys():
                    e1 = names[rel['entry1']]['name']
                    e2 = names[rel['entry2']]['name']
                    G.add_node(e1, type=names[rel['entry1']]['type'])
                    G.add_node(e2, type=names[rel['entry2']]['type'])
                    if G.has_edge(e1, e2):
                        G[e1][e2]['type'] = G[e1][e2]['type'] + [rel['name']]
                    else:
                        # assumption of interaction direction entry1 -> entry2 #TODO: validate
                        if e1 != e2 or (e1 == e2 and self_loops):
                            G.add_edge(e1, e2, type=[rel['name']])

        not_gene_nodes = []
        for node in G.nodes():
            # only interactions between genes
            if G.node[node]['type'] != 'gene':
                for in_edge in G.in_edges(node):
                    for out_edge in G.out_edges(node):
                        if in_edge[0] != out_edge[1] or (
                                in_edge[0] == out_edge[1] and self_loops):
                            G.add_edge(in_edge[0],
                                       out_edge[1],
                                       type=['indirect'])
                not_gene_nodes.append(node)
        G.remove_nodes_from(not_gene_nodes)

        return G

    def fetch_organism_codes(self):
        """

        Returns:
            Dictionary with organisms as keys, and KEGG organism codes as values
            {   'h**o sapiens' : 'hsa',
                'human' : 'hsa',
                ...
            }

        """
        codes = {}
        for line in self.database.list('organism').split('\n'):
            if line:
                code = line.split('\t')[1]
                org = line.split('\t')[2]
                if '(' in org:
                    org = [x.strip().lower() for x in org[:-1].split('(')]
                    for o in org:
                        codes[o] = code
                else:
                    codes[org] = code
        return codes

    def get_organism_code(self, org: str):
        """

        Args:
            org: organism name (ex. 'H**o sapiens', 'human') - lowercase and uppercase optional

        Returns:
            str: KEGG organism code

        """
        codes = self.fetch_organism_codes()
        try:
            return codes[org]
        except KeyError:
            print('Invalid organism name.')
            raise

    def get_gene_code(self, gen: str):
        """

        Args:
            gen: gene name (ex. 'FGR', 'NIPAL1')

        Returns:
            KEGG gene code

        """
        code_gen = self.database.find(self.organism, gen)

        if code_gen == str('\n'):
            code_gen = str()
            print('Invalid gene name: ' + str(gen))
        return code_gen
Exemplo n.º 7
0
len(A)

# In[79]:

#pwd

# In[81]:

with open('./pathway_baicalin_386_genes.txt', 'w') as f:
    for i in A:
        try:
            listname = Symbol.get(i)
            n = str(listname[0])
            if (n != 'nan'):
                j = k.get_pathway_by_gene(n, "hsa")
                if (j != None):
                    for keys, values in j.items():
                        print(n, keys, values, sep="\t")
                    #f.write("%s \n" %n)
                    #for keys,values in j.items():
                    #    f.write("%s %s %s\n" %(n,keys,values))
        except AttributeError as err:
            print('error')

# In[134]:

#Print algorithms
#for i in A:
#    try:
#        listname=Symbol.get(i)