def getEnsemblGene(gene): """Find ENSG for this gene. Look in HGNC aliases for this gene name. If a page has no ENGG, use the HGNCs with the hgnc module to look for one.""" searchUrl = baseUrl + 'Search/Keyword?queryString=%s' % (gene, ) response = requests.get(searchUrl) soup = bs4.BeautifulSoup(response.text) links = soup.find_all("td", class_="gc-gene-symbol") ensemblGeneSet = set() i = 0 for link in links: for item in link: name = str(item).split('data-ga-label="')[1].split('"')[0] href = baseUrl + str(item).split('href="')[1].split('"')[0].split('&')[0] time.sleep(2) newHtml = requests.get(href) hgncAliasSet = getPreviousHGNC(newHtml) | getAlias(newHtml) ensemblSet = mkEnsemblSet(newHtml) | set([hgnc.fetchEnsemblGeneIdForGeneSymbol(name)]) if name == gene or gene in hgncAliasSet: ensemblGeneSet |= ensemblSet if i: time.sleep(2) i += 1 return ensemblGeneSet
def testSearchSymbol(): nose.tools.assert_equal(hgnc.fetchEnsemblGeneIdForGeneSymbol('RBM5-AS1'), 'ENSG00000281691')