def work(): while True: url = in_queue.get() content = requests.get(url).content.decode('iso8859-1') container = {} container["cazy"] = {} container["taxonomy"] = {} try: container["name"] = re.findall(r'id="font_org">(.+)</font>', content)[0] #container["organism"] = re.findall(r'id="font_org">(.+)</font>', content)[0] cazy_page_taxon_name = re.findall( r'<font class="titre_cazome" id="font_org">(.+)<\/font>', content)[0] #print (cazy_page_taxon_name) taxonomy_id = -1 if cazy_page_taxon_name: taxonomy_id = int( pyphy.getTaxidByName(cazy_page_taxon_name.strip())[0]) if taxonomy_id == -1: taxonomy_id = re.findall( r'http://www\.ncbi\.nlm\.nih\.gov/Taxonomy/Browser/wwwtax\.cgi\?id=(\d+)', content)[0] container["taxid"] = int(taxonomy_id) #lineage = re.findall(r'<b>Lineage</b>\:(.+)<br><br />', content)[0].strip() current_id = int(taxonomy_id) while current_id != 1 and current_id != -1: current_id = int(pyphy.getParentByTaxid(current_id)) if pyphy.getRankByTaxid(current_id) in desired_ranks: container["taxonomy"][pyphy.getRankByTaxid(current_id)] = [ pyphy.getNameByTaxid(current_id), current_id ] #container["total_gi"] = len(pyphy.getGiByTaxid(taxonomy_id)) cazies = re.findall(rx, content) #print cazy for cazy in cazies: container["cazy"][cazy[0]] = int(cazy[1]) if len(container["cazy"]) != 0: writeLock.acquire() container["column"] = "genome" print(json.dumps(container)) writeLock.release() except Exception: pass in_queue.task_done()
def work(): while True: content = in_queue.get() nonsense = True #print ("initial", content) for line in content.split("\n"): fields = line.split("\t") temp_content = ["-1"] * 7 if nonsense == True: search_taxon = rx_taxon.search(line) if search_taxon: taxon = search_taxon.group(1).replace("_", " ") if "sp." in taxon: taxon = taxon.split("sp.")[0] + "sp. " + taxon.split( "sp.")[1].strip().replace(" ", "_") if taxon not in taxon_content: taxid = pyphy.getTaxidByName(taxon)[0] path = pyphy.getPathByTaxid(taxid) for item in path: rank = pyphy.getRankByTaxid(item) if rank in desired_ranks: index = desired_ranks.index(rank) temp_content[index] = pyphy.getNameByTaxid( item) dictLock.acquire() taxon_content[taxon] = temp_content dictLock.release() else: temp_content = taxon_content[taxon] if temp_content.count("-1") < 7: nonsense = False writeLock.acquire() #print () print(fields[0] + "\t" + "\t".join(temp_content)) writeLock.release() in_queue.task_done()
def worker(one_query): #print (taxid) name_path = {} query_name = "" #print (one_query) for query in one_query: if len(name_path) == 0: query_name, taxid, score, pident, qcovs, evalue = query temp_name_path = {} if cache.exists(taxid): temp_name_path = cache.hgetall(taxid) else: path = pyphy.getDictPathByTaxid(taxid) temp_name_path = { rank: pyphy.getNameByTaxid(path[rank]) for rank in path } cache.hmset(taxid, temp_name_path) if "phylum" in temp_name_path: name_path = temp_name_path if len(name_path) != 0: content = "\t".join([query_name, taxid, score, pident, qcovs, evalue ]) + "\t" for rank in desired_ranks: if rank in name_path: content += name_path[rank] + "\t" else: content += "\t" print(content.strip())
for t in search_taxid: taxid = int(t) #print (file) if taxid != -1: dict_path = pyphy.getDictPathByTaxid(taxid) dict_path["genome"] = taxid quartett = [""] * 2 #print (dict_path) if filter_rank in dict_path and pyphy.getNameByTaxid(dict_path[filter_rank]) == filter_taxon: for rank in desired_rank: if rank in dict_path: name = definition.split(",")[0] if rank != "genome": name = pyphy.getNameByTaxid(dict_path[rank]) if rank == "superkingdom": quartett[0] = dict_path[rank] if dict_path[rank] not in taxid_taxon: taxid_taxon[dict_path[rank]] = [name, rank] else:
def test_TaxidToName(self): self.assertEqual(pyphy.getNameByTaxid(2), "Bacteria")