def _extract_chp_query(self, query, query_type=None): evidence = {} dynamic_targets = {} # check edge for source and target edge_key = list(query["query_graph"]["edges"].keys())[0] edge = query["query_graph"]['edges'][edge_key] subject = edge['subject'] obj = edge['object'] # Get non-wildcard node if query_type == 'gene': drug_curie = query["query_graph"]['nodes'][obj]['id'] evidence['_{}'.format(drug_curie)] = 'True' elif query_type == 'drug': gene_curie = query["query_graph"]['nodes'][obj]['id'] evidence['_{}'.format(gene_curie)] = 'True' # default survival time dynamic_targets.update(self.default_survival_target) truth_target = ('EFO:0000714', '{} {}'.format(self.default_survival_target["EFO:0000714"]["op"], self.default_survival_target["EFO:0000714"]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=None, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def buildQueries(self): # get evidence evidence = dict() meta_evidence = list() for node in self.qg['nodes']: if node['type'] == 'Gene': gene_curie = node['curie'] try: gene = self.gene_curie_dict[gene_curie] except: sys.exit( 'Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.' ) evidence["mut_" + gene] = 'True' if node['type'] == 'Drug': drug_curie = node['curie'] try: drug = self.drug_curie_dict[drug_curie] except: sys.exit( 'Invalid CHEMBL Identifier. Must be in form CHEMBL:<ID>' ) meta_evidence.append(('Drug_Name(s)', '==', drug)) # get target targets = list() for edge in self.qg['edges']: if edge['type'] == 'causes': if 'onset_qualifier' in list(edge.keys()): days = edge['onset_qualifier'] # default value - needs to be documented in openAPI? else: days = 970 targets.append(('Survival_Time', '>=', days)) if len(list(evidence.keys())) + len(meta_evidence) > 1: sys.exit('More than 1 piece of evidence') if len(list(evidence.keys())) + len(meta_evidence) == 0: sys.exit('No evidence provided') if len(list(evidence.keys())) > 0: query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=targets, type='updating') elif len(meta_evidence) > 0: query = Query(evidence=None, targets=[], meta_evidence=meta_evidence, meta_targets=targets, type='updating') else: sys.exit('Problem in constructing BKB query') self.chp_query = query return query
def run_synthetic_prob_queries(self, fused_bkbs, patient_dicts): """ Each patient set two queries must be run: 1.) P(survival_time >= 1000 | gene & drug) 2.) P(survival_time >= 1000 | gene) The probabilities for each set are returned in a list of dictionaries indexed by 'drug' and 'no_drug'. :param fused_bkbs: a list of fused AXLE bkbs :type fused_bkbs: list :param patient_dicts: a list of patient dict :type patient_dicts: list :return query_responses: a list of truth probability assignments. :rtype query_responses: list """ query_responses = [] for i in range(0, 11): # with drug reasoner = Reasoner(fused_bkb=fused_bkbs[i], patient_data=patient_dicts[i]) gene_evidence = {'mut_RAF1': 'True'} query_drug = Query(evidence=gene_evidence, targets=list(), meta_evidence=[('drug_curies', '==', 'CYCLOPHOSPHAMIDE')], meta_targets=[('survival_time', '>=', 1000)], type='updating') query_drug1 = reasoner.analyze_query(copy.deepcopy(query_drug), save_dir=None, target_strategy='explicit', interpolation='standard') truth_assignment_drug, f_norm, t_unnorm, f_unnorm = self.get_synthetic_query_probs( query_drug1) # with no drug reasoner = Reasoner(fused_bkb=fused_bkbs[i], patient_data=patient_dicts[i]) query_no_drug = Query(evidence=gene_evidence, targets=list(), meta_evidence=None, meta_targets=[('survival_time', '>=', 1000)], type='updating') query_no_drug = reasoner.analyze_query(query_no_drug, save_dir=None, target_strategy='explicit', interpolation='standard') truth_assignment_no_drug, f_norm, t_unnorm, f_unnorm = self.get_synthetic_query_probs( query_no_drug) # answers query_responses.append({ 'drug': truth_assignment_drug, 'no_drug': truth_assignment_no_drug }) return query_responses
def _extract_chp_query(self, message, message_type): evidence = {} dynamic_targets = {} dynamic_evidence = {} if message_type == 'standard': # Setup gene and drug evidence for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY or qnode.categories[ 0] == BIOLINK_DRUG_ENTITY: evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'gene': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_DRUG_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'drug': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' # Grab edge for qedge_id, qedge in message.query_graph.edges.items(): break # Process predicate proxy dynamic_targets = self._process_predicate_proxy(qedge) # Process predicate context _evidence, _dynamic_evidence = self._process_predicate_context( qedge, message_type) evidence.update(_evidence) dynamic_evidence.update(_dynamic_evidence) #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it. #if len(evidence) == 0: # raise ValueError('Did not supply context with a query that required context.') target = list(dynamic_targets.keys())[0] truth_target = (target, '{} {}'.format(dynamic_targets[target]["op"], dynamic_targets[target]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query
def _extract_chp_query(self, query, query_type=None): evidence = {} dynamic_targets = {} if len(query["query_graph"]['nodes']) > 2 or len( query["query_graph"]['edges']) > 1: sys.exit('1 hop quries can only have 2 nodes and 1 edge') # check edge for source and target edge_key = list(query["query_graph"]["edges"].keys())[0] edge = query["query_graph"]['edges'][edge_key] if 'subject' not in edge.keys() or 'object' not in edge.keys(): sys.exit( 'Edge must have both a \'subject\' and and \'object\' key') subject = edge['subject'] obj = edge['object'] # Get non-wildcard node if query_type == 'gene': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_GENE: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_GENE)) drug_curie = query["query_graph"]['nodes'][obj]['id'] if drug_curie not in self.curies[BIOLINK_DRUG]: sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>') evidence['_{}'.format(drug_curie)] = 'True' elif query_type == 'drug': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_DRUG: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_DRUG)) gene_curie = query["query_graph"]['nodes'][obj]['id'] if gene_curie not in self.curies[BIOLINK_GENE]: sys.exit('Invalid ENSEMBL Identifier. Must be ENSEMBL:<ID>') evidence['_{}'.format(gene_curie)] = 'True' # default survival time dynamic_targets.update(self.default_survival_target) truth_target = ('EFO:0000714', '{} {}'.format( self.default_survival_target["EFO:0000714"]["op"], self.default_survival_target["EFO:0000714"]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=None, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def buildQueries(self): """ Assumes Query Graph of structure: Disease -> |Intermediate Nodes, i.e. Genes, Drugs, etc.| -> UpdateTargetNode - Update Target Node is designated by a 't' label in the QG, i.e. t1, t2, etc. - The beginning disease node tells us which datasets to use. - The intermediate nodes dictate the evidence to use during updating. """ queries = [] evidence = dict() for node in self.qg['nodes']: if node['type'] == 'Gene': gene_curie = node['curie'] try: gene = self.gene_curie_dict[gene_curie] except: sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.') evidence["mut_" + gene] = 'True' query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=self.probability_targets, type='updating') queries.append(query) self.chp_query = query return queries
def buildQueries(self): evidence = dict() targets = list() acceptable_target_curies = ['EFO:0000714'] for node in self.qg['nodes']: if node['type'] == 'Drug': drug_curie = node['curie'] try: self.drug = self.drug_curie_dict[drug_curie] except: sys.exit( 'Invalid CHEMBL Identifier. Must be in form CHEMBL:<ID>' ) evidence['drug_{}'.format(self.drug)] = 'True' for edge in self.qg['edges']: if edge['type'] == 'chemical_to_disease_or_phenotypic_feature_association': if 'value' in edge.keys(): self.days = edge['value'] else: self.days = 970 targets.append(('Survival_Time', '>=', self.days)) query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=targets, type='updating') self.chp_query = query return query
def test_joint_reasoner_one_gene(self): # Specify evidence evidence = {'ENSEMBL:ENSG00000155657': 'True'} # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.joint_reasoner.run_query(query)
def test_dynamic_reasoner_one_gene(self): # Specify evidence evidence = {'_ENSEMBL:ENSG00000155657': 'True'} # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query) query.result.summary(include_contributions=False)
def test_dynamic_reasoner_one_drug_survival(self): # Specify evidence evidence = { '_CHEMBL:CHEMBL83': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query, bkb_type='drug') query.result.summary(include_contributions=False)
def run(self, i=0): if i == 0: print('Welcome to the BKB Pathways Driver.') input('Press any key to begin reasoning...') print("Let's build a query!") meta_evidence = self.chooseDemoEvidence() evidence = self.chooseStandardEvidence() meta_targets = self.chooseDemoTargets() targets = self.chooseStandardTargets() while True: print('Choose reasoning Type:\n1)\tRevision\n2)\tUpdating') reason_type = int(input('Your Choice: ')) if reason_type == 1: reason_type = 'revision' break elif reason_type == 2: reason_type = 'updating' break else: print('Unrecognized reasoning type.') query = Query(evidence=evidence, targets=targets, meta_evidence=meta_evidence, meta_targets=meta_targets, type=reason_type) #print(meta_evidence) query = self.reasoner.analyze_query(query) query.getReport() again = input('Do you wish to reason again? ([y],n): ') or 'y' if again == 'y': self.run(i=i + 1) else: return
def buildQueries(self): """ Parses over the sent query graph to form a BKB query. :return: A internal CHP query. :rtype: Query """ evidence = dict() targets = list() self.contribution_target = None if len(self.qg['nodes']) > 2 or len(self.qg['edges']) > 1: sys.exit('1 hop quries can only have 2 nodes and 1 edge') # check edge for source and target edge_key = list(self.qg['edges'].keys())[0] edge = self.qg['edges'][edge_key] if 'subject' not in edge.keys() or 'object' not in edge.keys(): sys.exit('Edge must have both a \'subject\' and and \'object\' key') subject = edge['subject'] obj = edge['object'] # get drug node if self.qg['nodes'][subject]['category'] != 'biolink:Drug': sys.exit('Subject node must be \'category\' biolink:Drug') elif 'id' not in self.qg['nodes'][subject].keys(): sys.exit('Must have \'id\' key in drug node') self.drug_curie = self.qg['nodes'][subject]['id'] if self.drug_curie not in self.drug_curie_dict.keys(): sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>') evidence['demo_{}'.format(self.drug_curie)] = 'True' # ensure gene is wildcard if self.qg['nodes'][obj]['category'] != 'biolink:Gene': sys.exit('Object node must be \'category\' biolink:Gene') elif 'id' in self.qg['nodes'][obj].keys(): sys.exit('Must NOT have \'id\' key in gene node') # default survival time targets.append(('survival_time', self.op, self.value)) query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=targets, type='updating') self.chp_query = query return query
def _extract_chp_query(self, query, message_type): # Extract Message message = query.message # Initialize CHP BKB Query chp_query = ChpQuery(reasoning_type='updating') # Grab edge for qedge_id, qedge in message.query_graph.edges.items(): break # Process predicate proxy chp_query = self._process_predicate_proxy(qedge, chp_query) # Process predicate context chp_query = self._process_predicate_context(qedge, message_type, chp_query) #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it. #if len(evidence) == 0: # raise ValueError('Did not supply context with a query that required context.') if message_type == 'standard': # Setup gene and drug evidence for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY or qnode.categories[ 0] == BIOLINK_DRUG_ENTITY: chp_query.add_meta_evidence(qnode.ids[0], 'True') elif message_type == 'gene' or message_type == 'drug_two_hop': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_DRUG_ENTITY: if qnode.ids is not None: chp_query.add_meta_evidence(qnode.ids[0], 'True') elif message_type == 'drug' or message_type == 'gene_two_hop': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: if qnode.ids is not None: chp_query.add_meta_evidence(qnode.ids[0], 'True') target = list(chp_query.dynamic_targets.keys())[0] truth_target = (target, '{} {}'.format( chp_query.dynamic_targets[target]["op"], chp_query.dynamic_targets[target]["value"])) # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query
def buildQueries(self): """ Parses over the sent query graph to form a BKB query. :return: A internal CHP query. :rtype: Query """ evidence = dict() targets = list() acceptable_target_curies = ['EFO:0000714'] self.contribution_target = None for node_id, node in self.qg['nodes'].items(): if 'id' not in node.keys(): if self.contribution_target is None: self.contribution_target = node['category'] else: sys.exit( 'You can only have one contribution target. Make sure to leave only one node with a black curie.' ) else: if node['category'] == 'biolink:Drug': self.drug_curie = node['id'] if self.drug_curie not in self.drug_curie_dict.keys(): sys.exit( 'Invalid CHEMBL Identifier. Must be in form CHEMBL:<ID>' ) evidence['demo_{}'.format(self.drug_curie)] = 'True' for edge_id, edge in self.qg['edges'].items(): if edge['predicate'] == 'biolink:DiseaseToPhenotypicFeatureAssociation': if 'properties' in edge: self.op = edge['properties']['qualifier'] self.value = edge['properties']['days'] else: self.op = '>=' self.value = 970 targets.append(('survival_time', self.op, self.value)) query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=targets, type='updating') self.chp_query = query return query
def make_query(self, patient_hash, target, supported_compnames, evidence_type='mutation'): evidence = dict() if evidence_type == 'mutation': patientMutationEvidence = dict() for mut in self.patient_data[patient_hash]["Patient_Genes"]: compName = '_mut_' + mut if compName in supported_compnames: patientMutationEvidence[compName] = 'True' if len(patientMutationEvidence) == 0: return None #print(patientMutationEvidence) query = Query(evidence=patientMutationEvidence, targets=[], meta_targets=[target]) return query
def test_dynamic_reasoner_one_gene_one_drug(self): # Specify evidence dynamic_evidence = { 'CHEMBL:CHEMBL83': { "op": '==', "value": 'True', } } meta_evidence = { 'ENSEMBL:ENSG00000155657': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(dynamic_evidence=dynamic_evidence, meta_evidence=meta_evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query) query.result.summary(include_contributions=False)
def processUiQuery(dict_): query_dict = dict() query_dict['name'] = dict_['name'] query_dict['evidence'] = dict_['genetic_evidence'] query_dict['targets'] = dict_['genetic_targets'] if dict_['demographic_evidence'] is not None: query_dict['meta_evidence'] = [ tuple(demo) for demo in dict_['demographic_evidence'] ] else: query_dict['meta_evidence'] = None if dict_['demographic_targets'] is not None: query_dict['meta_targets'] = [ tuple(demo) for demo in dict_['demographic_targets'] ] else: query_dict['meta_targets'] = None query = Query(**query_dict) return query
def run_synthetic_contribution_queries(self, fused_bkbs, patient_dicts): """ Runs the following query on a wildcard BKB: P(survival_time >= 1000 | drug) Calculates the wild card gene contributions using the patient analysis. :param fused_bkbs: A set of fused wildcard BKBs :type fused_bkbs: list :param patient_dicts: a set of patient_dicts :type fused_bkbs: list :return query_responses: a list of gene contributions in both the true/false survival instantiations for each query :rtype list: """ query_responses = [] for i in range(0, 11): reasoner = Reasoner(fused_bkb=fused_bkbs[i], patient_data=patient_dicts[i]) drug_evidence = {'demo_CYCLOPHOSPHAMIDE': 'True'} query_drug = Query(evidence=drug_evidence, targets=list(), meta_evidence=None, meta_targets=[('survival_time', '>=', 1000)], type='updating') query_drug = reasoner.analyze_query(copy.deepcopy(query_drug), save_dir=None, target_strategy='explicit', interpolation='standard') report = query_drug.jsonExplanations( contributions_include_srcs=False, contributions_top_n_inodes=30, contributions_ignore_prefixes=['_']) t_norm, f_norm, t_unnorm, f_unnorm = self.get_synthetic_query_probs( query_drug) analysis = { 'patient_analysis': report['Patient Analysis'], 'contribution_analysis': report['Contributions Analysis'] } if 'survival_time >= 1000 = True' in analysis[ 'contribution_analysis'].keys(): true_contrib = analysis['contribution_analysis'][ 'survival_time >= 1000 = True'][ 'demo_CYCLOPHOSPHAMIDE = True'] true_pats = len( analysis['patient_analysis']['All Involved Patients'] ['survival_time >= 1000 = True'].keys()) true_ind_cont = float(true_contrib) / float( true_pats) / t_unnorm else: true_ind_cont = 0 if 'survival_time >= 1000 = False' in analysis[ 'contribution_analysis'].keys(): false_contrib = analysis['contribution_analysis'][ 'survival_time >= 1000 = False'][ 'demo_CYCLOPHOSPHAMIDE = True'] false_pats = len( analysis['patient_analysis']['All Involved Patients'] ['survival_time >= 1000 = False'].keys()) false_ind_cont = float(false_contrib) / float( false_pats) / f_unnorm # gene contributions true_gene_contrib = {'RAF1': 0} false_gene_contrib = {'RAF1': 0} for pat_key, pat in patient_dicts[i].items(): if pat['survival_time'] > 1000 and 'CYCLOPHOSPHAMIDE' in pat[ 'drug_curies']: if 'RAF1' in pat['gene_curies']: true_gene_contrib['RAF1'] += true_ind_cont elif pat['survival_time'] < 1000 and 'CYCLOPHOSPHAMIDE' in pat[ 'drug_curies']: if 'RAF1' in pat['gene_curies']: false_gene_contrib['RAF1'] += false_ind_cont query_responses.append((true_gene_contrib, false_gene_contrib)) return query_responses
def _extract_chp_query(self, query, query_type): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] self.implicit_survival_node = False for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node['id'] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: # Use Default Survival self.implicit_survival_node = True total_nodes += 1 #acceptable_target_curies_print = ','.join(acceptable_target_curies) #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(BIOLINK_PHENOTYPIC_FEATURE, # acceptable_target_curies_print)) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE and node['id'] in acceptable_disease_curies: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == disease_id and edge['object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 if total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if self.implicit_survival_node: days=970 qualifier = '>=' total_edges += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join(acceptable_disease_curies) sys.exit("Disease node not found. Node type must be '{}' and curie must be in: {}".format(BIOLINK_DISEASE, acceptable_disease_curies_print)) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target dynamic_targets['EFO:0000714'] = { "op": qualifier, "value": days, } truth_target = ('EFO:0000714', '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge['subject'] == gene_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Gene and disease edge not found. Edge type must be '{}'".format(BIOLINK_GENE_TO_DISEASE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie if query_type != 'gene': gene_curie = node['id'] if gene_curie in self.curies[BIOLINK_GENE]: gene = gene_curie else: sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.') evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == drug_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Drug and disease edge not found. Edge type must be '{}'".format(BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie if query_type != 'drug': drug_curie = node['id'] if drug_curie in self.curies[BIOLINK_DRUG]: drug = drug_curie else: sys.exit('Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'.format(drug_curie)) evidence['_' + drug] = 'True' total_nodes += 1 # Temporary solution to no evidence linking if len(evidence.keys()) == 0 and len(dynamic_evidence.keys()) == 0: self.no_evidence_probability_check = True else: self.no_evidence_probability_check = False # produce BKB query chp_query = Query( evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, message, message_type): # Initialize CHP BKB Query chp_query = ChpQuery(reasoning_type='updating') # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 query_graph = message.query_graph # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] self.implicit_survival_node = False for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY and node.ids[0] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: # Use Default Survival self.implicit_survival_node = True total_nodes += 1 #acceptable_target_curies_print = ','.join(acceptable_target_curies) #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(Biolink(BIOLINK_PHENOTYPIC_FEATURE), # acceptable_target_curies_print)) survival_value = 970 survival_operator = '>=' # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_DISEASE_ENTITY and node.ids[0] in acceptable_disease_curies: disease_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY) and edge.subject == disease_id and edge.object == target_id: survival_time_constraint = edge.find_constraint(name='survival_time') if survival_time_constraint is not None: survival_value = survival_time_constraint.value survival_operator = survival_time_constraint.operator if survival_operator == 'matches': survival_operator = '==' total_edges += 1 total_nodes += 1 if self.implicit_survival_node: days=970 qualifier = '>=' total_edges += 1 # set BKB target chp_query.add_dynamic_target('EFO:0000714', survival_operator, survival_value) truth_target = ('EFO:0000714', '{} {}'.format(survival_operator, survival_value)) # get evidence for node_key in query_graph.nodes.keys(): # genes node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_GENE_ENTITY: # check for appropriate gene node structure gene_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY) and edge.subject == gene_id and edge.object == disease_id: total_edges += 1 # check for appropriate gene node curie if message_type != 'gene': gene_curie = node.ids[0] if gene_curie in self.curies[BIOLINK_GENE_ENTITY.get_curie()]: gene = gene_curie chp_query.add_meta_evidence(gene, 'True') total_nodes += 1 # drugs if node.categories[0] == BIOLINK_DRUG_ENTITY: # check for appropriate drug node structure drug_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_TREATS_ENTITY) and edge.subject == drug_id and edge.object == disease_id: total_edges += 1 # check for appropriate drug node curie if message_type != 'drug': drug_curie = node.ids[0] if drug_curie in self.curies[BIOLINK_DRUG_ENTITY.get_curie()]: drug = drug_curie chp_query.add_meta_evidence(drug, 'True') total_nodes += 1 # Temporary solution to no evidence linking if len(chp_query.evidence.keys()) == 0 and len(chp_query.dynamic_evidence.keys()) == 0: self.no_evidence_probability_check = True else: self.no_evidence_probability_check = False # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query
def _extract_chp_query(self, message, message_type=None): # Initialize Chp Query chp_query = ChpQuery(reasoning_type='updating') # Ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 query_graph = message.query_graph # get phenotype node targets = list() for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY: target_id = node_key total_nodes += 1 survival_value = 970 survival_operator = '>=' # get disease node info and ensure only 1 disease: for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_DISEASE_ENTITY: disease_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY ) and edge.subject == disease_id and edge.object == target_id: survival_time_constraint = edge.find_constraint( name='survival_time') if survival_time_constraint is not None: survival_value = survival_time_constraint.value survival_operator = survival_time_constraint.operator if survival_operator == 'matches': survival_operator = '==' total_edges += 1 total_nodes += 1 # set BKB target chp_query.add_dynamic_target(node.ids[0], survival_operator, survival_value) truth_target = (node.ids[0], '{} {}'.format(survival_operator, survival_value)) # get evidence for node_key in query_graph.nodes.keys(): # genes node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_GENE_ENTITY: # check for appropriate gene node structure gene_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY ) and edge.subject == gene_id and edge.object == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node.ids[0] gene = gene_curie chp_query.add_meta_evidence(gene, 'True') total_nodes += 1 # drugs if node.categories[0] == BIOLINK_DRUG_ENTITY: # check for appropriate drug node structure drug_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_TREATS_ENTITY ) and edge.subject == drug_id and edge.object == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node.ids[0] drug = drug_curie chp_query.add_dynamic_evidence(node.ids[0], '==', 'True') total_nodes += 1 # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query
def buildQueries(self): # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] for node in self.qg['nodes']: if node['type'] == 'PhenotypicFeature' and node[ 'curie'] in acceptable_target_curies: target_id = node['id'] total_nodes += 1 if total_nodes == 0: acceptable_target_curies_print = ','.join(acceptable_target_curies) sys.exit( 'Survival Node not found. Node type muse be \'PhenotypicFeature\' and curie must be in: ' + acceptable_target_curies_print) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node in self.qg['nodes']: if node['type'] == 'disease' and node[ 'curie'] in acceptable_disease_curies: disease_id = node['id'] for edge in self.qg['edges']: if edge['type'] == 'disease_to_phenotype_association' and edge[ 'source_id'] == disease_id and edge[ 'target_id'] == target_id and 'value' in list( edge.keys()): self.days = edge['value'] if isinstance(self.days, str): self.days = int(self.days) total_edges += 1 if total_edges == 0: sys.exit( 'Disease and target edge not found. Edge type must be \'disease_to_phenotype_association\'' ) elif total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join( acceptable_disease_curies) sys.exit( 'Disease node not found. Node type must be \'disease\' and curie must be in: ' + acceptable_disease_curies_print) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target targets.append(('Survival_Time', '>=', self.days)) # get evidence evidence = dict() meta_evidence = list() for node in self.qg['nodes']: # genes if node['type'] == 'Gene': # check for appropriate gene node structure gene_id = node['id'] for edge in self.qg['edges']: if edge['type'] == 'gene_to_disease_association' and edge[ 'source_id'] == gene_id and edge[ 'target_id'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( 'Gene and disease edge not found. Edge type must be \'gene_to_disease_association\'' ) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie gene_curie = node['curie'] try: gene = self.gene_curie_dict[gene_curie] except: sys.exit( 'Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.' ) evidence["_mut_" + gene] = 'True' total_nodes += 1 # drugs if node['type'] == 'Drug': # check for appropriate drug node structure drug_id = node['id'] for edge in self.qg['edges']: if edge['type'] == 'chemical_to_disease_or_phenotypic_feature_association' and edge[ 'source_id'] == drug_id and edge[ 'target_id'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( 'Drug and disease edge not found. Edge type must be \'chemical_to_disease_or_phenotypic_feature_association\'' ) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie drug_curie = node['curie'] try: drug = self.drug_curie_dict[drug_curie] except: sys.exit( 'Invalid CHEMBL Identifier. Must be in form CHEMBL:<ID>' ) meta_evidence.append(('Drug_Name(s)', '==', drug)) total_nodes += 1 if total_nodes != len(self.qg['nodes']) or total_edges != len( self.qg['edges']): sys.exit('There are extra components in the provided QG structure') if len(list(evidence.keys())) == 0: sys.exit('Needs at least 1 gene') # produce BKB query if len(meta_evidence) > 0 and len(list(evidence.keys())) > 0: query = Query(evidence=evidence, targets=[], meta_evidence=meta_evidence, meta_targets=targets, type='updating') elif len(list(evidence.keys())) > 0: query = Query(evidence=evidence, targets=[], meta_evidence=None, meta_targets=targets, type='updating') elif len(meta_evidence) > 0: query = Query(evidence=None, targets=[], meta_evidence=meta_evidence, meta_targets=targets, type='updating') else: query = Query(evidence=None, targets=[], meta_evidence=None, meta_targets=targets, type='updating') self.chp_query = query return query
} return {'genetic_info': inode_names, 'demographic_info': demographics} if __name__ == '__main__': parser = argparse.ArgumentParser( description='BKB Pathway Provider Minimal Driver') parser.add_argument('--config_file', default='driver.config', type=str) parser.add_argument('--headless', action='store_true') parser.add_argument('--query_file', type=str) parser.add_argument('--save_dir', type=str, default=os.getcwd()) parser.add_argument('--get_variables', type=str, default=None) args = parser.parse_args() if args.headless: driver = Driver(args.config_file) #-- Load Query from query file query = Query().read(args.query_file) result_query = driver.run_query(query) #-- Save Query result_query.save(args.save_dir, only_json=True) elif args.get_variables is not None: driver = Driver(args.config_file) vars_ = driver.collectVariables() with open(args.get_variables, 'wb') as f_: pickle.dump(file=f_, obj=vars_) else: driver = Driver(args.config_file) driver.run()
def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node[ 'id'] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: acceptable_target_curies_print = ','.join(acceptable_target_curies) sys.exit( 'Survival Node not found. Node category must be \'biolink:PhenotypicFeature\' and id must be in: ' + acceptable_target_curies_print) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE and node[ 'id'] in acceptable_disease_curies: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 if total_edges == 0: sys.exit( 'Disease and target edge not found. Edge type must be \'biolink:DiseaseToPhenotypicFeatureAssociation\'' ) elif total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join( acceptable_disease_curies) sys.exit( 'Disease node not found. Node type must be \'biolink:Disease\' and curie must be in: ' + acceptable_disease_curies_print) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( "Gene and disease edge not found. Edge type must be '{}'" .format(BIOLINK_GENE_TO_DISEASE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie gene_curie = node['id'] if gene_curie in self.curies[BIOLINK_GENE]: gene = gene_curie else: sys.exit( 'Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.' ) evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( "Drug and disease edge not found. Edge type must be '{}'" .format( BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE )) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie drug_curie = node['id'] if drug_curie in self.curies[BIOLINK_DRUG]: drug = drug_curie else: sys.exit( 'Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>' .format(drug_curie)) evidence[node["id"]] = 'True' total_nodes += 1 if total_nodes != len( query["query_graph"]['nodes']) or total_edges != len( query["query_graph"]['edges']): sys.exit('There are extra components in the provided QG structure') # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE: target_id = node_key total_nodes += 1 # get disease node info and ensure only 1 disease: for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 total_nodes += 1 # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node['id'] gene = gene_curie evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node['id'] drug = drug_curie evidence[node["id"]] = 'True' total_nodes += 1 # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
#-- Set the patient data file reasoner.set_src_metadata(patient_data_file) #reasoner.collapsed_bkb.makeGraph() #-- If you want to see what genetic or demographic evidence is avaliable, uncomment the line below #print(reasoner.metadata_ranges) ''' #-- Make a query (evidence is for genetic info, and meta_ is for demographic info) query0 = Query(evidence={'mut_TMEM245=': 'True'}, targets=list(), meta_evidence=[('Age_of_Diagnosis', '>=',20000)], meta_targets=[('Survival_Time', '>=', 300)]) ''' query0 = Query(evidence={'_mut_AADAC': 'True'}, targets=list(), meta_targets=[('Survival_Time', '>=', 300)]) #-- Run the query. query = reasoner.analyze_query(query0, check_mutex=False, interpolation='standard', target_strategy='explicit') #-- Return the report query.getReport() #print(query.result.print_contributions()) #print(query.result.completed_inferences_report()) query.bkb.makeGraph() #-- Check for mutex if you want to.