Exemplo n.º 1
0
def kegg_get_gene_id(g="CDK5", species_code="hsa"):
    """
    Return the KEGG gene identifier for a given common gene symbol.
    @param g: The gene common name, like "OAS1"
    @param species_code: The code for the genome where this gene is from. See
    KEGG for more details. hsa is the human genome, mmu is mouse.
    @return: The kegg gene id.
    """
    rc = fetch_data("http://rest.kegg.jp/find/%s/%s" % (species_code, g), {}, method="POST")
    if rc[1] != 200:
        print "Query failed with error code: %d" % rc[1]
        return None

    if len(rc[0]) < 1 or rc[0] == ["\n"]:
        print "Query succeeded, but has no data for gene: %s" % g
        return None

    kegg_id = None
    for line in rc[0]:
        tmp = line.split("\t")[1].split(";")[0].split(",")
        tmp = [t.strip() for t in tmp]
        # print "Debug: ", tmp
        if g in tmp:
            kegg_id = line.split("\t")[0].strip()
            break

    return kegg_id
Exemplo n.º 2
0
def kegg_get_pathways_with_gene(g="OAS1", species_code="hsa"):
    """
    Determines the set of known pathways associated to a specific gene
    @param g: The common gene symbol
    @param species_code: Like "hsa" for human, "mmu" for mouse. See the
    KEGG documentation for available organism codes.
    @return: Tuple (kegg_path_names, kegg_path_ids)
    """
    kegg_id = kegg_get_gene_id(g, species_code)
    if kegg_id is None:
        print "Error: Could not resolve KEGG path id for gene name %s" % g
        return None

    rc = fetch_data("http://rest.kegg.jp/link/pathway/%s" % kegg_id, {}, method="POST")
    if rc[1] != 200:
        print "Query failed with error code: %d" % rc[1]
        return None

    if len(rc[0]) < 1 or rc[0] == ["\n"]:
        print "Query succeeded, but has no pathway ids that include gene: %s" % g
        return None

    kegg_path_ids = [line.split("\t")[1].strip() for line in rc[0]]
    kegg_path_names = []
    for pid in kegg_path_ids:
        tmp = kegg_get_pathway_name(pid)
        if tmp is None:
            tmp = "unknown"
        kegg_path_names.append(tmp)

    return (kegg_path_names, kegg_path_ids)
Exemplo n.º 3
0
def kegg_get_pathway_info(p="path:hsa05222"):
    """
    Returns all the information for a given kegg pathway identifier
    """
    rc = fetch_data("http://rest.kegg.jp/get/%s" % p, {}, method="POST")
    if rc[1] != 200:
        print "Query failed with error code: %d" % rc[1]
        return None

    if len(rc[0]) < 1 or rc[0] == ["\n"]:
        print "Query succeeded, but has no data for pathway: %s" % p
        return None

    return rc[0]
Exemplo n.º 4
0
def kegg_get_pathway_name(p="path:hsa05164"):
    """
    Returns the kegg pathway name (description) for a given path identifier
    @param p: The kegg pathway identifier
    """
    rc = fetch_data("http://rest.kegg.jp/list/%s" % p, {}, method="POST")
    if rc[1] != 200:
        print "Query failed with error code: %d" % rc[1]
        return None

    if len(rc[0]) < 1 or rc[0] == ["\n"]:
        print "Query succeeded, but has no data for pathway: %s" % p
        return None

    return rc[0][0].split("\t")[1].strip()
Exemplo n.º 5
0
 def query(self, genelist, homologs=True):
     '''
     Processes the query for the given list of genes. The result will be to cache the
     results in self.data. Use other methods of this class to query the cached data, for
     example, to get the data for annotation GO:0009615, which is 'response to virus',
     use getDataForAnnotation()
     @param genelist: A list of gene strings like ['RSAD2', 'SERPING1',....]
     @param homologs: If True, then the query includes annotations for homologs to the input
     genelist, else it does not.
     @return: A return code indicating status of request. The data will be
     stored in self.data[] as a list of lines.
     '''
     self.data = []
     gene_box = ",".join(genelist)
     h = '1' if homologs else '0'
     p = {'annot_type': 'gene_ontology', 'cmd': 'report', 'homologs': h, 'network': '0', 'tax_id': '9606'}
     p['gene_box'] = gene_box
     
     raw_data, rc = ifr.fetch_data(self.url, p, method='POST')  #post doesn't work?
     for ln in raw_data:
         self.data.append( ln.split('\n')[0])
             
     return rc