def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/') subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] if category == FUNCTION_CATEGORY: # get proteins for a gene only when the category is 'function' for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = sg_dev.gene_to_uniprot_proteins(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) else: slimmer_subjects = subjects if category == ANATOMY_CATEGORY: category = 'anatomical entity' results = map2slim(subjects=slimmer_subjects, slim=slim, object_category=category, user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1) if (subjects[0].startswith('HGNC') or subjects[0].startswith('NCBIGene') or subjects[0].startswith('ENSEMBL:')): sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) == 0: prots = subjects else: prots = subjects results = map2slim(subjects=prots, slim=slim, rows=200, exclude_automatic_assertions=True, object_category=category, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: sg_dev = SciGraph( url= 'https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self): """ For a given gene(s), summarize its annotations over a defined set of slim """ args = self.function_parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph(get_biolink_config()['scigraph_data']['url']) subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = identifier_converter.convert_gene_to_protein(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) results = map2slim(subjects=slimmer_subjects, slim=slim, object_category='function', user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if proteinId not in checked: genes = identifier_converter.convert_protein_to_gene( proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def test_map2slim_golr(): results = map2slim(subjects=SUBJECTS, slim=SLIM, object_category='function') assert len(results) > 0 n_found = 0 for r in results: n_found = n_found + 1 print("Subject: {} Slim:{} Assocs:{}".format(r['subject'], r['slim'], len(r['assocs']))) assert n_found > 0
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() logging.info("category is {}".format(category)) slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] results = map2slim(subjects=subjects, slim=slim, rows=200, object_category=category, **args) # If there are no associations for the given ID, try other IDs. # Note the AmiGO instance does *not* support equivalent IDs assoc_count = 0 for result in results: assoc_count += len(result['assocs']) if assoc_count == 0 and len(subjects) == 1: # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects logging.info( "Found no associations using {} - will try mapping to other IDs" .format(subjects[0])) sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) > 0: results = map2slim(subjects=prots, slim=slim, rows=200, object_category=category, **args) return results
def get(self): """ For a given gene(s), summarize its annotations over a defined set of slim """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] subjects = [x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects] results = map2slim( subjects=subjects, slim=slim, object_category='anatomical entity', user_agent=USER_AGENT, **args ) return results