def test_normalize_predicates(): test_set = { "CTD:affects^expression": "CTD:affects_expression", "CTD:affects^metabolic processing": "CTD:affects_metabolic_processing", "CTD:affects^ADP-ribosylation": "CTD:affects_ADP-ribosylation", "GAMMA:other/unknown": "GAMMA:other_unknown", "CTD:affects^N-linked glycosylation": "CTD:affects_N-linked_glycosylation" } for unformatted, formatted in test_set.items(): result = Text.normalize_predicate(unformatted) assert result == formatted
def standardize_relationship(relationship): predicate_id = Text.normalize_predicate(relationship.identifier) key = urllib.parse.quote_plus(predicate_id) url = f'https://edgenormalization-sri.renci.org/resolve?key={key}' response = requests.get(url) if response.status_code == 200: response = response.json() r = response.get(relationship.identifier, None) if r is not None: return LabeledID(identifier=r['identifier'], label=r['label']) else: logger.debug( f'Error calling {url} to standardize predicate {relationship.identifier} -- {response.status_code}' ) # every fail condition will be handled here, defaulting to related to ??? # @TODO maybe this should be handled differently return LabeledID(identifier="GAMMA:Unmapped_Relation", label="Unmapped_Relation")
def get_gene_by_drug(self, input_node): drugbank_ids = input_node.get_synonyms_by_prefix('DRUGBANK') response = [] for drugbank_id in drugbank_ids: drugbank_id = Text.un_curie(drugbank_id) url = f'{self.url}chem/{drugbank_id}?fields=drugbank.enzymes,drugbank.targets,drugbank.carriers,drugbank.transporters' logger.debug(url) results = self.query(url) if 'drugbank' in results: # maybe gene are everywhere they are enzymes they are genes = [] if 'enzymes' in results['drugbank']: # maybe we don't need this filter ... ??? logger.debug('found enzymes') genes += list( filter( lambda x: ('organism' in x) and (x['organism'] == 'Humans'), results['drugbank']['enzymes'])) if 'transporters' in results['drugbank']: logger.debug('found transporters') genes += list( filter( lambda x: ('organism' in x) and (x['organism'] == 'Humans'), results['drugbank']['transporters'])) if 'carriers' in results['drugbank']: logger.debug('found some carriers') genes += list( filter( lambda x: ('organism' in x) and (x['organism'] == 'Humans'), results['drugbank']['carriers'])) if 'targets' in results['drugbank']: logger.debug('found targets') genes += list( filter( lambda x: ('organism' in x) and (x['organism'] == 'Humans'), results['drugbank']['targets'])) for gene in genes: # Actions relate what the drug does to the enzyme ... ?./> # so I think we can treat actions as relationship types # eg : Alfuzosin (DB00346) is a substrate for CYP34A (Uniprokb:P08684) which implies its metabolized by that enzyme .... # we might have (A drug) that (inhibits) a gene and the action here is inhibitor. # So I think its safe to generalize the actions are what the drug is to the enzyme. Or how the enzyme acts to the drug. # so more like (Drug) - is a/an (action) for -> (Enzyme/gene), but some <- so list contains direction #action_to_predicate_map = { # 'substrate': (LabeledID(identifier='CTD:increases_degradation_of', label= 'substrate'), True), #(label, direction where true means reverse) # 'inhibitor': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inhibitor"), False), # 'inducer': (LabeledID(identifier = 'CTD:increases_activity_of', label="inducer"), False), # 'antagonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "antagonist"), False), # 'weak inhibitor': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "weak_inhibitor"), False), # 'partial antagonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "partial_antagonist"), False), # 'blocker': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "blocker"), False), # 'inverse agonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inverse_agonist"), False), # 'binder': (LabeledID(identifier='CTD:molecularly_interacts_with', label= 'binder'), False), # 'activator': (LabeledID(identifier = 'CTD:increases_activity_of', label="activator"), False), # 'agonist': (LabeledID(identifier = 'CTD:increases_activity_of', label="agonist"), False), # 'partial agonist': (LabeledID(identifier = 'CTD:increases_activity_of', label="partial_agonist"), False), # 'potentiator': (LabeledID(identifier = 'CTD:increases_activity_of', label="potentiator"), False), # 'carrier': (LabeledID(identifier = 'CTD:increases_transport_of', label="potentiator"), True), # 'product of': (LabeledID(identifier= 'CTD:increases_synthesis_of', label = "product_of"), False), # 'inhibition of synthesis': (LabeledID(identifier= 'CTD:decreases_synthesis_of', label = "inhibition_of_synthesis"), False), # 'inactivator': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inactivator"), False), #} reverse = ['substrate', 'carrier'] #Some genes are more like gene familes, and we don't want them if 'actions' in gene and 'uniprot' in gene: actions = gene['actions'] if type( gene['actions']) == type( []) else [gene['actions']] # create the gene node if 'gene_name' in gene: nm = gene['gene_name'] else: nm = '' gene_node = KNode(f"UNIPROTKB:{gene['uniprot']}", name=nm, type=node_types.GENE) publications = [f'PMID:{x}' for x in gene['pmids'] ] if 'pmids' in gene else [] for action in actions: if action in reverse: direction = True else: direction = False #predicate,direction = action_to_predicate_map.get(action, (LabeledID(identifier= 'CTD:interacts_with', label=action),False)) rel = Text.snakify(action) rel = Text.normalize_predicate(rel) predicate = LabeledID(identifier=f'GAMMA:{rel}', label=rel) source_node = input_node target_node = gene_node if direction: # swap input and target nodes source_node = gene_node target_node = input_node if predicate: edge = self.create_edge( source_node, target_node, 'mychem.get_gene_by_drug', source_node.id, predicate, publications=publications) response.append((edge, gene_node)) return response