def search_kegg(accessions): start_time = datetime.datetime.now() with yaspin(text="Retrieving KEGG annotations...", color="cyan") as sp: raw_data = "" for accession in accessions.dropna(): path = KEGG() res = accession.split(":") try: for k, val in path.get_pathway_by_gene(res[1], res[0]).items(): _id = re.search("\d+", k).group(0) raw_data = f"{raw_data}map{_id}\t\"{val}\"\n" except AttributeError: pass try: kegg = pandas.read_csv(pandas.compat.StringIO(raw_data), sep="\t", header=None) kegg.columns = ["accession", "description"] # Add column of counts. kegg["count"] = kegg.groupby("accession")["accession"].transform( "count") kegg = (kegg.drop_duplicates(subset="accession").sort_values( by="count", ascending=False).reset_index(drop=True)) mssg = f"* Found {sum(kegg['count'])} KEGG pathways from which {len(kegg)} were unique." except pandas.errors.EmptyDataError: kegg = pandas.DataFrame() mssg = f"* Found 0 KEGG Pathways." time_diff = (datetime.datetime.now() - start_time).total_seconds() sp.text = f"Retrieving KEGG annotations => Task done in {time_diff} seconds." sp.ok("✔") print(mssg) return kegg
def search_pathway(gene, organism): k = KEGG() return k.get_pathway_by_gene(gene, organism)
with open(sys.argv[1], 'r') as fh: data = fh.read() data = data.replace('\t', '\n') inputGenes = data.split('\n') if '' in inputGenes: inputGenes.remove('') ############################# ############## Main part result = {} for inputGene in inputGenes: print('Processing gene ' + inputGene + ':') try: pathways = k.get_pathway_by_gene(inputGene, 'hsa') except (AttributeError): print('Invalid gene identifier') continue else: if pathways: for pathway in pathways: if pathway != 'hsa01100': print('\tProcessing pathway ' + pathway) rel = k.parse_kgml_pathway(pathway) genes_result = [] ####### Part where the functions are called for entry in rel['entries']: if entry['type'] == 'gene':