class OneHopHandlerMixin: """ OneHopeHandler is the handler for 1-hop queries. That is query graphs (QGs) that consists of 2 nodes and a single edge. :param query: the query graph sent by the ARA. :type query: dict :param hosts_filename: a filename for a stored QG. Defaults to None :type hosts_filename: str :param num_processes_per_host: Not implemented thouroughly, but would be used for distributed reasoning. :type num_processes_per_host: int :param max_results: specific to 1-hop queries, specifies the number of wildcard genes to return. :type max_results: int """ def _setup_handler(self): self.default_survival_target = { "EFO:0000714": { "op": '>=', "value": 970 } } # Only do the rest of this if a query is passed if self.init_query is not None: # Setup queries self._setup_queries() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_queries(self): if type(self.init_query) == list: self.query_dict = defaultdict(list) self.query_map = [] for query in self.init_query: self.query_map.append(query["query_id"]) self.query_dict[self._get_wildcard_type(query)].append( self._setup_single_query(query)) else: self.query_dict[self._get_wildcard_type(query)].append( self._setup_single_query(query)) def _get_wildcard_type(self, query): wildcard_type = None for node_id, node in query["query_graph"]["nodes"].items(): if 'id' not in node: if wildcard_type is None: wildcard_type = node['category'] else: sys.exit( 'You can only have one contribution target. Make sure to leave only one node with a black curie.' ) if wildcard_type == BIOLINK_DRUG: return 'drug' elif wildcard_type == BIOLINK_GENE: return 'gene' else: raise ValueError( 'Did not understand wildcard type {}.'.format(wildcard_type)) def check_query(self): """ Currently not implemented. Would check validity of query. """ return True def _extract_chp_query(self, query, query_type=None): evidence = {} dynamic_targets = {} if len(query["query_graph"]['nodes']) > 2 or len( query["query_graph"]['edges']) > 1: sys.exit('1 hop quries can only have 2 nodes and 1 edge') # check edge for source and target edge_key = list(query["query_graph"]["edges"].keys())[0] edge = query["query_graph"]['edges'][edge_key] if 'subject' not in edge.keys() or 'object' not in edge.keys(): sys.exit( 'Edge must have both a \'subject\' and and \'object\' key') subject = edge['subject'] obj = edge['object'] # Get non-wildcard node if query_type == 'gene': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_GENE: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_GENE)) drug_curie = query["query_graph"]['nodes'][obj]['id'] if drug_curie not in self.curies[BIOLINK_DRUG]: sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>') evidence['_{}'.format(drug_curie)] = 'True' elif query_type == 'drug': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_DRUG: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_DRUG)) gene_curie = query["query_graph"]['nodes'][obj]['id'] if gene_curie not in self.curies[BIOLINK_GENE]: sys.exit('Invalid ENSEMBL Identifier. Must be ENSEMBL:<ID>') evidence['_{}'.format(gene_curie)] = 'True' # default survival time dynamic_targets.update(self.default_survival_target) truth_target = ('EFO:0000714', '{} {}'.format( self.default_survival_target["EFO:0000714"]["op"], self.default_survival_target["EFO:0000714"]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=None, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ if query_type == 'gene': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='drug') elif query_type == 'drug': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='gene') chp_res_dict = chp_query.result.process_updates() chp_res_norm_dict = chp_query.result.process_updates(normalize=True) #chp_query.result.summary() chp_res_contributions = chp_query.result.process_inode_contributions() chp_query.truth_prob = max([ 0, chp_res_norm_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) #print(chp_res_contributions) # Collect all source inodes and process patient hashes patient_contributions = defaultdict(lambda: defaultdict(int)) for target, contrib_dict in chp_res_contributions.items(): target_comp_name, target_state_name = target for inode, contrib in contrib_dict.items(): comp_name, state_name = inode if '_Source_' in comp_name: # Split source state name to get patient hashes source_hashes_str = state_name.split('_')[-1] source_hashes = [ int(source_hash) for source_hash in source_hashes_str.split(',') ] hash_len = len(source_hashes) # Process patient contributions for _hash in source_hashes: # Normalize to get relative contribution patient_contributions[target][ _hash] += contrib / hash_len #/ chp_res_dict[target_comp_name][target_state_name] # Now iterate through the patient data to translate patient contributions to drug/gene contributions wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, patient_contrib_dict in patient_contributions.items(): for patient, contrib in patient_contrib_dict.items(): if query_type == 'gene': for gene_curie in self.dynamic_reasoner.raw_patient_data[ patient]["gene_curies"]: wildcard_contributions[gene_curie][target] += contrib elif query_type == 'drug': for drug_curie in self.dynamic_reasoner.raw_patient_data[ patient]["drug_curies"]: wildcard_contributions[drug_curie][target] += contrib # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): if target[0] == chp_query.truth_target[0] and target[ 1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / chp_res_dict[ target[0]][target[1]] else: nontruth_target_gene_contrib += contrib / chp_res_dict[ target[0]][target[1]] wildcard_contributions[curie][ 'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_response(self, chp_query, query_type): # Get orginal query if len(self.init_query) == 1: query = self.init_query[0] query_id = None else: for _query in self.init_query: if _query["query_id"] == chp_query.query_id: query = _query query_id = query["query_id"] break kg = copy.deepcopy(query["query_graph"]) edge_bindings = {} node_bindings = {} # get edge subject, object, edge label and pop edge edge_key = list(kg['edges'].keys())[0] edge = kg['edges'][edge_key] edge_label = edge['predicate'] subject = edge['subject'] obj = edge['object'] kg['edges'].pop(edge_key) # move curie to key non_wildcard_curie = kg['nodes'][obj].pop('id') kg['nodes'][non_wildcard_curie] = kg['nodes'].pop(obj) if query_type == 'gene': kg['nodes'][non_wildcard_curie]['name'] = self._get_curie_name( BIOLINK_DRUG, non_wildcard_curie)[0] elif query_type == 'drug': kg['nodes'][non_wildcard_curie]['name'] = self._get_curie_name( BIOLINK_GENE, non_wildcard_curie)[0] node_bindings[obj] = non_wildcard_curie # remove wildcard gene node from kg kg['nodes'].pop(subject) # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items(): unsorted_wildcard_contributions.append( (contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [ (contrib, wildcard) for contrib, wildcard in sorted(unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True) ] # add kg gene nodes and edges edge_count = 0 node_count = 1 results = [] for contrib, wildcard in sorted_wildcard_contributions[:self. max_results]: if query_type == 'gene': kg['nodes'][wildcard] = { "name": self._get_curie_name(BIOLINK_GENE, wildcard)[0], "category": BIOLINK_GENE } # add edge kg['edges']['kge{}'.format(edge_count)] = { "predicate": BIOLINK_CHEMICAL_TO_GENE_PREDICATE, "subject": wildcard, "object": non_wildcard_curie, "attributes": [{ 'name': 'Contribution', 'type': BIOLINK_CONTRIBUTION, 'value': contrib }] } elif query_type == 'drug': kg['nodes'][wildcard] = { "name": self._get_curie_name(BIOLINK_DRUG, wildcard)[0], "category": BIOLINK_DRUG } # add edge kg['edges']['kge{}'.format(edge_count)] = { "predicate": BIOLINK_CHEMICAL_TO_GENE_PREDICATE, "subject": wildcard, "object": non_wildcard_curie, "attributes": [{ 'name': 'Contribution', 'type': BIOLINK_CONTRIBUTION, 'value': contrib }] } # add to results node_binding = { obj: [{ 'id': non_wildcard_curie }], subject: [{ 'id': wildcard }] } edge_binding = {edge_key: [{'id': 'kge{}'.format(edge_count)}]} results.append({ 'node_bindings': node_binding, 'edge_bindings': edge_binding }) edge_count += 1 node_count += 1 # query response trapi_message = { 'query_graph': query["query_graph"], 'knowledge_graph': kg, 'results': results } trapi_response = {'message': trapi_message} return query_id, trapi_response
class WildCardHandlerMixin: def _setup_handler(self): # Only do the rest of this if a query is passed if self.init_query is not None: # Setup queries self._setup_queries() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_queries(self): if type(self.init_query) == list: self.query_dict = defaultdict(list) self.query_map = [] for query in self.init_query: self.query_map.append(query["query_id"]) self.query_dict[self._get_wildcard_type(query)].append(self._setup_single_query(query)) else: self.query_dict[self._get_wildcard_type(query)].append(self._setup_single_query(query)) def _get_wildcard_type(self, query): wildcard_type = None for node_id, node in query["query_graph"]["nodes"].items(): if 'id' not in node: if wildcard_type is None: wildcard_type = node['category'] else: sys.exit('You can only have one contribution target. Make sure to leave only one node with a black curie.') if wildcard_type == BIOLINK_DRUG: return 'drug' elif wildcard_type == BIOLINK_GENE: return 'gene' else: raise ValueError('Did not understand wildcard type {}.'.format(wildcard_type)) def _extract_chp_query(self, query, query_type): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] self.implicit_survival_node = False for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node['id'] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: # Use Default Survival self.implicit_survival_node = True total_nodes += 1 #acceptable_target_curies_print = ','.join(acceptable_target_curies) #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(BIOLINK_PHENOTYPIC_FEATURE, # acceptable_target_curies_print)) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE and node['id'] in acceptable_disease_curies: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == disease_id and edge['object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 if total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if self.implicit_survival_node: days=970 qualifier = '>=' total_edges += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join(acceptable_disease_curies) sys.exit("Disease node not found. Node type must be '{}' and curie must be in: {}".format(BIOLINK_DISEASE, acceptable_disease_curies_print)) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target dynamic_targets['EFO:0000714'] = { "op": qualifier, "value": days, } truth_target = ('EFO:0000714', '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge['subject'] == gene_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Gene and disease edge not found. Edge type must be '{}'".format(BIOLINK_GENE_TO_DISEASE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie if query_type != 'gene': gene_curie = node['id'] if gene_curie in self.curies[BIOLINK_GENE]: gene = gene_curie else: sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.') evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == drug_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Drug and disease edge not found. Edge type must be '{}'".format(BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie if query_type != 'drug': drug_curie = node['id'] if drug_curie in self.curies[BIOLINK_DRUG]: drug = drug_curie else: sys.exit('Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'.format(drug_curie)) evidence['_' + drug] = 'True' total_nodes += 1 # Temporary solution to no evidence linking if len(evidence.keys()) == 0 and len(dynamic_evidence.keys()) == 0: self.no_evidence_probability_check = True else: self.no_evidence_probability_check = False # produce BKB query chp_query = Query( evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ # temporary solution to no evidence linking if not self.no_evidence_probability_check: if query_type == 'gene': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='drug') elif query_type == 'drug': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='gene') chp_res_dict = chp_query.result.process_updates() chp_res_norm_dict = chp_query.result.process_updates(normalize=True) #chp_query.result.summary() chp_res_contributions = chp_query.result.process_inode_contributions() chp_query.truth_prob = max([0, chp_res_norm_dict[chp_query.truth_target[0]][chp_query.truth_target[1]]]) # Collect all source inodes and process patient hashes patient_contributions = defaultdict(lambda: defaultdict(int)) for target, contrib_dict in chp_res_contributions.items(): target_comp_name, target_state_name = target for inode, contrib in contrib_dict.items(): comp_name, state_name = inode if '_Source_' in comp_name: # Split source state name to get patient hashes source_hashes_str = state_name.split('_')[-1] source_hashes = [int(source_hash) for source_hash in source_hashes_str.split(',')] hash_len = len(source_hashes) # Process patient contributions for _hash in source_hashes: # Normalize to get relative contribution patient_contributions[target][_hash] += contrib/hash_len #/ chp_res_dict[target_comp_name][target_state_name] else: # probability of survival num_survived = 0 num_all = len(self.dynamic_reasoner.raw_patient_data.keys()) str_op = chp_query.dynamic_targets['EFO:0000714']['op'] opp_op = get_opposite_operator(str_op) op = get_operator(str_op) days = chp_query.dynamic_targets['EFO:0000714']['value'] for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): num_survived += 1 chp_query.truth_prob = num_survived/num_all # patient_contributions patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0 else: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived else: if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all else: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived) # Now iterate through the patient data to translate patient contributions to drug/gene contributions wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, patient_contrib_dict in patient_contributions.items(): for patient, contrib in patient_contrib_dict.items(): if query_type == 'gene': for gene_curie in self.dynamic_reasoner.raw_patient_data[patient]["gene_curies"]: wildcard_contributions[gene_curie][target] += contrib elif query_type == 'drug': for drug_curie in self.dynamic_reasoner.raw_patient_data[patient]["drug_curies"]: wildcard_contributions[drug_curie][target] += contrib # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): if target[0] == chp_query.truth_target[0] and target[1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / chp_query.truth_prob else: nontruth_target_gene_contrib += contrib / (1 - chp_query.truth_prob) wildcard_contributions[curie]['relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_response(self, chp_query, query_type): # Get orginal query if len(self.init_query) == 1: query = self.init_query[0] query_id = None else: for _query in self.init_query: if _query["query_id"] == chp_query.query_id: query = _query query_id = query["query_id"] break # Construct first result which is the result of the standard probablistic query. kg = copy.deepcopy(query["query_graph"]) # Process Nodes node_pairs = defaultdict(None) contrib_qg_id = None for node_key in list(kg["nodes"].keys())[:]: qg_node_curie = kg['nodes'][node_key].pop('id', None) if qg_node_curie is not None: kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key) if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE: kg['nodes'][qg_node_curie]['name'] = self.curies["biolink:Gene"][qg_node_curie][0] elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG: kg['nodes'][qg_node_curie]['name'] = self.curies["biolink:Drug"][qg_node_curie][0] node_pairs[node_key] = qg_node_curie else: kg["nodes"].pop(node_key) if not self.implicit_survival_node: # Process Edges edge_pairs = dict() knowledge_edges = 0 for edge_key in list(kg['edges'].keys())[:]: subject_node = kg['edges'][edge_key]['subject'] if kg['edges'][edge_key]['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE and query_type == 'gene': kg['edges'].pop(edge_key) elif kg['edges'][edge_key]['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG and query_type == 'drug': kg['edges'].pop(edge_key) else: kg_id = 'kge{}'.format(knowledge_edges) knowledge_edges += 1 kg['edges'][kg_id] = kg['edges'].pop(edge_key) kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]['subject']] kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]['object']] edge_pairs[edge_key] = kg_id if kg['edges'][kg_id]['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE: if 'properties' in kg['edges'][kg_id].keys(): kg['edges'][kg_id].pop('properties') kg['edges'][kg_id]['attributes'] = [{'name':'Probability of Survival', 'type':BIOLINK_PROBABILITY, 'value':chp_query.truth_prob}] # Put first result of standard prob query of only curie nodes (i.e. no wildcard nodes where used as evidence) results = [] results.append({'edge_bindings':dict(), 'node_bindings':dict()}) for edge_pair_key in edge_pairs: results[0]['edge_bindings'][edge_pair_key] = [{ 'id': str(edge_pairs[edge_pair_key])}] for node_pair_key in node_pairs: results[0]['node_bindings'][node_pair_key] = [{ 'id': str(node_pairs[node_pair_key])}] else: knowledge_edges = 0 kg['edges'] = {} results = [] # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items(): unsorted_wildcard_contributions.append((contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [(contrib,wildcard) for contrib, wildcard in sorted(unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True)] for contrib, wildcard in sorted_wildcard_contributions[:self.max_results]: rg = copy.deepcopy(query["query_graph"]) _node_pairs = {} _edge_pairs = {} # Process node pairs for node_id, node in rg["nodes"].items(): if node["category"] == BIOLINK_GENE and query_type == 'gene': kg["nodes"][wildcard] = copy.deepcopy(node) kg["nodes"][wildcard].update({"name": self.curies[BIOLINK_GENE][wildcard][0]}) _node_pairs[node_id] = wildcard elif node["category"] == BIOLINK_DRUG and query_type == 'drug': kg["nodes"][wildcard] = copy.deepcopy(node) kg["nodes"][wildcard].update({"name": self.curies[BIOLINK_DRUG][wildcard][0]}) _node_pairs[node_id] = wildcard else: _node_pairs[node_id] = node_pairs[node_id] # Process edge pairs for edge_id, edge in rg["edges"].items(): subject_node = edge['subject'] if query_type == 'gene' and edge["predicate"] == BIOLINK_GENE_TO_DISEASE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE: knowledge_edges += 1 kg_edge_id = 'kge{}'.format(knowledge_edges) kg["edges"][kg_edge_id] = copy.deepcopy(edge) kg["edges"][kg_edge_id]["subject"] = _node_pairs[kg["edges"][kg_edge_id]["subject"]] kg["edges"][kg_edge_id]["object"] = _node_pairs[kg["edges"][kg_edge_id]["object"]] #kg["edges"][kg_edge_id]["value"] = contrib kg["edges"][kg_edge_id]["attributes"] = [{'name':'Contribution', 'type':BIOLINK_CONTRIBUTION, 'value':contrib}] _edge_pairs[edge_id] = kg_edge_id elif query_type == 'drug' and edge["predicate"] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG: knowledge_edges += 1 kg_edge_id = 'kge{}'.format(knowledge_edges) kg["edges"][kg_edge_id] = copy.deepcopy(edge) kg["edges"][kg_edge_id]["subject"] = _node_pairs[kg["edges"][kg_edge_id]["subject"]] kg["edges"][kg_edge_id]["object"] = _node_pairs[kg["edges"][kg_edge_id]["object"]] #kg["edges"][kg_edge_id]["value"] = contrib kg["edges"][kg_edge_id]["attributes"] = [{'name':'Contribution', 'type':BIOLINK_CONTRIBUTION, 'value':contrib}] _edge_pairs[edge_id] = kg_edge_id else: _edge_pairs[edge_id] = edge_pairs[edge_id] # Process node and edge binding results _res = {"edge_bindings": {}, "node_bindings": {}} for edge_pair_key in _edge_pairs: _res["edge_bindings"][edge_pair_key] = [{ "id": str(_edge_pairs[edge_pair_key])}] for node_pair_key in _node_pairs: _res["node_bindings"][node_pair_key] = [{ "id": str(_node_pairs[node_pair_key])}] results.append(_res) # query response trapi_message = {'query_graph': query["query_graph"], 'knowledge_graph': kg, 'results': results} trapi_response = {'message' : trapi_message} return query_id, trapi_response
class DefaultHandlerMixin: def _setup_handler(self): # Only do the rest of this if a message is passed if self.messages is not None: # Setup messages self._setup_messages() # Instiatate Reasoners if 'default' in self.message_dict: if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if 'simple' in self.message_dict: if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): self.message_dict = defaultdict(list) for message in self.messages: if self._is_simple_message(message): self.message_dict['simple'].append(message) else: self.message_dict['default'].append(message) def _is_simple_message(self, message): """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard message. """ _found_outcome = False _found_disease = False _found_gene = False _found_drug = False query_graph = message.query_graph for node_key, node in query_graph.nodes.items(): if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY: # If we've already found the target and there's another phenotypic feature, then this isn't simple. if _found_outcome: return False else: _found_outcome = True if node.categories[0] == BIOLINK_DISEASE_ENTITY: # If we've already found disease and there's another disease, then this isn't simple. if _found_disease: return False else: _found_disease = True if node.categories[0] == BIOLINK_GENE_ENTITY: if _found_gene: return False else: _found_gene = True if node.categories[0] == BIOLINK_DRUG_ENTITY: if _found_drug: return False else: _found_drug = True return True def _extract_chp_query(self, message, message_type=None): # Initialize Chp Query chp_query = ChpQuery(reasoning_type='updating') # Ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 query_graph = message.query_graph # get phenotype node targets = list() for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY: target_id = node_key total_nodes += 1 survival_value = 970 survival_operator = '>=' # get disease node info and ensure only 1 disease: for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_DISEASE_ENTITY: disease_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY ) and edge.subject == disease_id and edge.object == target_id: survival_time_constraint = edge.find_constraint( name='survival_time') if survival_time_constraint is not None: survival_value = survival_time_constraint.value survival_operator = survival_time_constraint.operator if survival_operator == 'matches': survival_operator = '==' total_edges += 1 total_nodes += 1 # set BKB target chp_query.add_dynamic_target(node.ids[0], survival_operator, survival_value) truth_target = (node.ids[0], '{} {}'.format(survival_operator, survival_value)) # get evidence for node_key in query_graph.nodes.keys(): # genes node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_GENE_ENTITY: # check for appropriate gene node structure gene_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY ) and edge.subject == gene_id and edge.object == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node.ids[0] gene = gene_curie chp_query.add_meta_evidence(gene, 'True') total_nodes += 1 # drugs if node.categories[0] == BIOLINK_DRUG_ENTITY: # check for appropriate drug node structure drug_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_TREATS_ENTITY ) and edge.subject == drug_id and edge.object == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node.ids[0] drug = drug_curie chp_query.add_dynamic_evidence(node.ids[0], '==', 'True') total_nodes += 1 # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): if query_type == 'simple': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None else: chp_query = self.dynamic_reasoner.run_query(chp_query) chp_res_dict = chp_query.result.process_updates(normalize=True) try: chp_query.truth_prob = max([ 0, chp_res_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) except KeyError: # May need to come back and fix this. chp_query.truth_prob = -1 chp_query.report = None return chp_query def _construct_trapi_message(self, chp_query, message, query_type=None): # update target node info and form edge pair combos for results graph qg = message.query_graph kg = message.knowledge_graph node_bindings = {} for qnode_key, qnode in qg.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]] [0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]] [0], qnode.categories[0].get_curie(), ) else: knode_key = kg.add_node( qnode.ids[0], qnode.ids[0], qnode.categories[0].get_curie(), ) node_bindings[qnode_key] = [knode_key] edge_bindings = {} for qedge_key, qedge in qg.edges.items(): kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), relation=qedge.relation, ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute if self.check_predicate_support(qedge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY): kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY. get_curie(), ) # Proces results message.results.add_result( node_bindings, edge_bindings, ) return message
class TestDynamicReasoner(unittest.TestCase): def setUp(self): self.bkb_handler = BkbDataHandler( bkb_major_version='coulomb', bkb_minor_version='1.0', ) self.dynamic_reasoner = ChpDynamicReasoner(self.bkb_handler) def test_dynamic_reasoner_one_gene(self): # Specify evidence evidence = {'_ENSEMBL:ENSG00000155657': 'True'} # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query) query.result.summary(include_contributions=False) def test_dynamic_reasoner_one_gene_one_drug(self): # Specify evidence evidence = { '_ENSEMBL:ENSG00000155657': 'True', 'CHEMBL:CHEMBL83': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query) query.result.summary(include_contributions=False) def test_dynamic_reasoner_two_gene_one_drug(self): # Specify evidence evidence = { '_ENSEMBL:ENSG00000155657': 'True', '_ENSEMBL:ENSG00000241973': 'True', 'CHEMBL:CHEMBL83': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query) query.result.summary(include_contributions=False) def test_dynamic_reasoner_one_drug_survival(self): # Specify evidence evidence = { '_CHEMBL:CHEMBL83': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query, bkb_type='drug') query.result.summary(include_contributions=False) def test_dynamic_reasoner_two_drug_survival(self): # Specify evidence evidence = { '_CHEMBL:CHEMBL83': 'True', '_CHEMBL:CHEMBL1201247': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.dynamic_reasoner.run_query(query, bkb_type='drug') query.result.summary(include_contributions=False)
class DefaultHandlerMixin: def _setup_handler(self): # Only do the rest of this if a query is passed if self.init_query is not None: # Setup queries self._setup_queries() # Instiatate Reasoners if 'default' in self.query_dict: if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if 'simple' in self.query_dict: if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_queries(self): if type(self.init_query) == list: self.query_dict = defaultdict(list) self.query_map = [] for query in self.init_query: self.query_map.append(query["query_id"]) if self._is_simple_query(query): self.query_dict['simple'].append( self._setup_single_query(query)) else: self.query_dict['default'].append( self._setup_single_query(query)) else: if self._is_simple_query(self.init_query): self.query_dict = { "simple": [self._setup_single_query(self.init_query)] } else: self.query_dict = { "default": [self._setup_single_query(self.init_query)] } def _is_simple_query(self, query): """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard query. """ _found_outcome = False _found_disease = False _found_gene = False _found_drug = False for node_key, node in query["query_graph"]["nodes"].items(): if node["category"] == BIOLINK_PHENOTYPIC_FEATURE: # If we've already found the target and there's another phenotypic feature, then this isn't simple. if _found_outcome: return False else: _found_outcome = True if node['category'] == BIOLINK_DISEASE: # If we've already found disease and there's another disease, then this isn't simple. if _found_disease: return False else: _found_disease = True if node["category"] == BIOLINK_GENE: if _found_gene: return False else: _found_gene = True if node['category'] == BIOLINK_DRUG: if _found_drug: return False else: _found_drug = True return True def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE: target_id = node_key total_nodes += 1 # get disease node info and ensure only 1 disease: for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 total_nodes += 1 # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node['id'] gene = gene_curie evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node['id'] drug = drug_curie evidence[node["id"]] = 'True' total_nodes += 1 # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query def _run_query(self, chp_query, query_type): if query_type == 'simple': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None else: chp_query = self.dynamic_reasoner.run_query(chp_query) chp_res_dict = chp_query.result.process_updates(normalize=True) chp_query.truth_prob = max([ 0, chp_res_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) chp_query.report = None return chp_query def _construct_trapi_response(self, chp_query, query_type=None): # Get orginal query if len(self.init_query) == 1: query = self.init_query[0] query_id = None else: for _query in self.init_query: if _query["query_id"] == chp_query.query_id: query = _query query_id = query["query_id"] break kg = copy.deepcopy(query["query_graph"]) # update target node info and form edge pair combos for results graph node_pairs = dict() for node_key in list(kg['nodes'].keys())[:]: qg_node_curie = kg['nodes'][node_key].pop('id') kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key) node_pairs[node_key] = qg_node_curie if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE: kg['nodes'][qg_node_curie]['name'] = self._get_curie_name( BIOLINK_GENE, qg_node_curie)[0] elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG: kg['nodes'][qg_node_curie]['name'] = self._get_curie_name( BIOLINK_DRUG, qg_node_curie)[0] edge_pairs = dict() knowledge_edges = 0 for edge_key in list(kg['edges'].keys())[:]: kg_id = 'kge{}'.format(knowledge_edges) knowledge_edges += 1 kg['edges'][kg_id] = kg['edges'].pop(edge_key) kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id] ['subject']] kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id] ['object']] edge_pairs[edge_key] = kg_id if kg['edges'][kg_id][ 'predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE: if 'properties' in kg['edges'][kg_id].keys(): kg['edges'][kg_id].pop('properties') kg['edges'][kg_id]['attributes'] = [{ 'name': 'Probability of Survival', 'type': BIOLINK_PROBABILITY, 'value': chp_query.truth_prob }] results = [] results.append({ 'edge_bindings': {}, 'node_bindings': {}, }) for edge_pair_key in edge_pairs: results[0]['edge_bindings'][edge_pair_key] = [{ 'id': edge_pairs[edge_pair_key] }] for node_pair_key in node_pairs: results[0]['node_bindings'][node_pair_key] = [{ 'id': node_pairs[node_pair_key] }] # query response trapi_message = { 'query_graph': query["query_graph"], 'knowledge_graph': kg, 'results': results } trapi_response = {'message': trapi_message} return query_id, trapi_response
class WildCardHandlerMixin: def _setup_handler(self): # Only do the rest of this if a query is passed if self.messages is not None: # Setup messages self._setup_messages() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): if type(self.messages) == list: self.message_dict = defaultdict(list) for message in self.messages: self.message_dict[self._get_wildcard_type(message)].append(message) def _get_wildcard_type(self, message): wildcard_type = None for node_id, node in message.query_graph.nodes.items(): if node.ids is None: if wildcard_type is None: wildcard_type = node.categories[0] if wildcard_type == BIOLINK_DRUG_ENTITY: return 'drug' elif wildcard_type == BIOLINK_GENE_ENTITY: return 'gene' else: raise ValueError('Did not understand wildcard type {}.'.format(wildcard_type)) def _extract_chp_query(self, message, message_type): # Initialize CHP BKB Query chp_query = ChpQuery(reasoning_type='updating') # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 query_graph = message.query_graph # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] self.implicit_survival_node = False for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY and node.ids[0] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: # Use Default Survival self.implicit_survival_node = True total_nodes += 1 #acceptable_target_curies_print = ','.join(acceptable_target_curies) #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(Biolink(BIOLINK_PHENOTYPIC_FEATURE), # acceptable_target_curies_print)) survival_value = 970 survival_operator = '>=' # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_DISEASE_ENTITY and node.ids[0] in acceptable_disease_curies: disease_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY) and edge.subject == disease_id and edge.object == target_id: survival_time_constraint = edge.find_constraint(name='survival_time') if survival_time_constraint is not None: survival_value = survival_time_constraint.value survival_operator = survival_time_constraint.operator if survival_operator == 'matches': survival_operator = '==' total_edges += 1 total_nodes += 1 if self.implicit_survival_node: days=970 qualifier = '>=' total_edges += 1 # set BKB target chp_query.add_dynamic_target('EFO:0000714', survival_operator, survival_value) truth_target = ('EFO:0000714', '{} {}'.format(survival_operator, survival_value)) # get evidence for node_key in query_graph.nodes.keys(): # genes node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_GENE_ENTITY: # check for appropriate gene node structure gene_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY) and edge.subject == gene_id and edge.object == disease_id: total_edges += 1 # check for appropriate gene node curie if message_type != 'gene': gene_curie = node.ids[0] if gene_curie in self.curies[BIOLINK_GENE_ENTITY.get_curie()]: gene = gene_curie chp_query.add_meta_evidence(gene, 'True') total_nodes += 1 # drugs if node.categories[0] == BIOLINK_DRUG_ENTITY: # check for appropriate drug node structure drug_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support(edge.predicates[0], BIOLINK_TREATS_ENTITY) and edge.subject == drug_id and edge.object == disease_id: total_edges += 1 # check for appropriate drug node curie if message_type != 'drug': drug_curie = node.ids[0] if drug_curie in self.curies[BIOLINK_DRUG_ENTITY.get_curie()]: drug = drug_curie chp_query.add_meta_evidence(drug, 'True') total_nodes += 1 # Temporary solution to no evidence linking if len(chp_query.evidence.keys()) == 0 and len(chp_query.dynamic_evidence.keys()) == 0: self.no_evidence_probability_check = True else: self.no_evidence_probability_check = False # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ # temporary solution to no evidence linking if not self.no_evidence_probability_check: if query_type == 'gene': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='drug') elif query_type == 'drug': chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='gene') chp_res_dict = chp_query.result.process_updates() chp_res_norm_dict = chp_query.result.process_updates(normalize=True) #chp_query.result.summary() chp_res_contributions = chp_query.result.process_inode_contributions() try: chp_query.truth_prob = max([0, chp_res_dict[chp_query.truth_target[0]][chp_query.truth_target[1]]]) except KeyError: # May need to come back and fix this. chp_query.truth_prob = -1 # Collect all source inodes and process patient hashes patient_contributions = defaultdict(lambda: defaultdict(int)) for target, contrib_dict in chp_res_contributions.items(): target_comp_name, target_state_name = target for inode, contrib in contrib_dict.items(): comp_name, state_name = inode if '_Source_' in comp_name: # Split source state name to get patient hashes source_hashes_str = state_name.split('_')[-1] source_hashes = [int(source_hash) for source_hash in source_hashes_str.split(',')] hash_len = len(source_hashes) # Process patient contributions for _hash in source_hashes: # Normalize to get relative contribution patient_contributions[target][_hash] += contrib/hash_len #/ chp_res_dict[target_comp_name][target_state_name] else: # probability of survival num_survived = 0 num_all = len(self.dynamic_reasoner.raw_patient_data.keys()) str_op = chp_query.dynamic_targets['EFO:0000714']['op'] opp_op = get_opposite_operator(str_op) op = get_operator(str_op) days = chp_query.dynamic_targets['EFO:0000714']['value'] for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): num_survived += 1 chp_query.truth_prob = num_survived/num_all # patient_contributions patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0 else: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived else: if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all else: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived) # Now iterate through the patient data to translate patient contributions to drug/gene contributions wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, patient_contrib_dict in patient_contributions.items(): for patient, contrib in patient_contrib_dict.items(): if query_type == 'gene': for gene_curie in self.dynamic_reasoner.raw_patient_data[patient]["gene_curies"]: wildcard_contributions[gene_curie][target] += contrib elif query_type == 'drug': for drug_curie in self.dynamic_reasoner.raw_patient_data[patient]["drug_curies"]: wildcard_contributions[drug_curie][target] += contrib # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): if target[0] == chp_query.truth_target[0] and target[1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / chp_query.truth_prob else: nontruth_target_gene_contrib += contrib / (1 - chp_query.truth_prob) wildcard_contributions[curie]['relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_message(self, chp_query, message, query_type=None): # update target node info and form edge pair combos for results graph qg = message.query_graph kg = message.knowledge_graph # Process Standard QUery as first result. # Process Nodes node_bindings = {} contrib_qg_id = None for qnode_key, qnode in qg.nodes.items(): if qnode.ids is not None: if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]][0], qnode.categories[0].get_curie(), ) else: knode_key = kg.add_node( qnode.ids[0], qnode.ids[0], qnode.categories[0].get_curie(), ) node_bindings[qnode_key] = [knode_key] if not self.implicit_survival_node: # Process Edges edge_bindings = {} knowledge_edges = 0 for qedge_key, qedge in qg.edges.items(): if not qedge.subject in node_bindings or not qedge.object in node_bindings: continue kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), relation=qedge.relation, ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute if self.check_predicate_support(qedge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY): kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.get_curie(), ) ''' subject_node = kg['edges'][edge_key]['subject'] if kg['edges'][edge_key]['predicate'] == BIOLINK_GENE_ENTITY_TO_DISEASE_PREDICATE, is_slot=True) and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE_ENTITY and query_type == 'gene': kg['edges'].pop(edge_key) elif kg['edges'][edge_key]['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE, is_slot=True) and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG_ENTITY and query_type == 'drug': kg['edges'].pop(edge_key) else: kg_id = 'kge{}'.format(knowledge_edges) knowledge_edges += 1 kg['edges'][kg_id] = kg['edges'].pop(edge_key) kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]['subject']] kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]['object']] edge_pairs[edge_key] = kg_id if kg['edges'][kg_id]['predicate'] == BIOLINK_DISEASE_ENTITY_TO_PHENOTYPIC_FEATURE_PREDICATE, is_slot=True): if 'properties' in kg['edges'][kg_id].keys(): kg['edges'][kg_id].pop('properties') kg['edges'][kg_id]['attributes'] = [{'name':'Probability of Survival', 'type':BIOLINK_PROBABILITY, 'value':chp_query.truth_prob}] ''' # Proces results message.results.add_result( node_bindings, edge_bindings, ) ''' # Put first result of standard prob query of only curie nodes (i.e. no wildcard nodes where used as evidence) results = [] results.append({'edge_bindings':dict(), 'node_bindings':dict()}) for edge_pair_key in edge_pairs: results[0]['edge_bindings'][edge_pair_key] = [{ 'id': str(edge_pairs[edge_pair_key])}] for node_pair_key in node_pairs: results[0]['node_bindings'][node_pair_key] = [{ 'id': str(node_pairs[node_pair_key])}] ''' #else: # knowledge_edges = 0 # kg['edges'] = {} # results = [] # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items(): unsorted_wildcard_contributions.append((contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [(contrib,wildcard) for contrib, wildcard in sorted(unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True)] for contrib, wildcard in sorted_wildcard_contributions[:self.max_results]: #TODO: Fix this! if wildcard == 'missing': continue #rg = copy.deepcopy(query["query_graph"]) _node_bindings = {} _edge_bindings = {} # Process node bindings bad_wildcard = False for qnode_id, qnode in qg.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY and query_type == 'gene': try: knode_id = kg.add_node( wildcard, self.curies[BIOLINK_GENE_ENTITY.get_curie()][wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] except KeyError: logger.info("Couldn't find {} in curies[{}]".format(wildcard, BIOLINK_GENE_ENTITY.get_curie())) bad_wildcard = True elif qnode.categories[0] == BIOLINK_DRUG_ENTITY and query_type == 'drug': knode_id = kg.add_node( wildcard, self.curies[BIOLINK_DRUG_ENTITY.get_curie()][wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] else: _node_bindings[qnode_id] = node_bindings[qnode_id] if bad_wildcard: continue # Process edge bindings for qedge_id, qedge in qg.edges.items(): subject_node = qedge.subject object_node = qedge.object if query_type == 'gene' and self.check_predicate_support(qedge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY) and qg.nodes[subject_node].categories[0] == BIOLINK_GENE_ENTITY: kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] elif query_type == 'gene' and self.check_predicate_support(qedge.predicates[0], BIOLINK_CONDITION_ASSOCIATED_WITH_GENE_ENTITY) and qg.nodes[object_node].categories[0] == BIOLINK_GENE_ENTITY: kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] elif query_type == 'drug' and self.check_predicate_support(qedge.predicates[0], BIOLINK_TREATS_ENTITY) and qg.nodes[subject_node].categories[0] == BIOLINK_DRUG_ENTITY: kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] elif query_type == 'drug' and self.check_predicate_support(qedge.predicates[0], BIOLINK_TREATED_BY_ENTITY) and qg.nodes[object_node].categories[0] == BIOLINK_DRUG_ENTITY: kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] else: _edge_bindings[qedge_id] = edge_bindings[qedge_id] # Process node and edge binding results message.results.add_result( _node_bindings, _edge_bindings, ) return message
class OneHopHandlerMixin: """ OneHopeHandler is the handler for 1-hop queries. That is query graphs (QGs) that consists of 2 nodes and a single edge. :param query: the query graph sent by the ARA. :type query: dict :param hosts_filename: a filename for a stored QG. Defaults to None :type hosts_filename: str :param num_processes_per_host: Not implemented thouroughly, but would be used for distributed reasoning. :type num_processes_per_host: int :param max_results: specific to 1-hop queries, specifies the number of wildcard genes to return. :type max_results: int """ def _setup_handler(self): self.default_survival_target = { "EFO:0000714": { "op": '>=', "value": 970 } } # Only do the rest of this if a query is passed if self.messages is not None: # Setup queries self._setup_messages() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): self.message_dict = defaultdict(list) for message in self.messages: self.message_dict[self._get_onehop_type(message)].append(message) def _get_onehop_type(self, message): wildcard_type = None for node_id, node in message.query_graph.nodes.items(): if node.ids is None: if wildcard_type is None: wildcard_type = node.categories[0] # If standard onehop query if wildcard_type is None: return 'standard' elif wildcard_type == BIOLINK_DRUG_ENTITY: return 'drug' elif wildcard_type == BIOLINK_GENE_ENTITY: return 'gene' else: raise ValueError( 'Did not understand wildcard type {}.'.format(wildcard_type)) def check_query(self): """ Currently not implemented. Would check validity of query. """ return True @staticmethod def _process_predicate_proxy(qedge): dynamic_targets = {} predicate_proxy_constraint = qedge.find_constraint('predicate_proxy') if predicate_proxy_constraint is None: predicate_proxy = get_default_predicate_proxy() proxy_constraint = qedge.find_constraint(predicate_proxy) else: predicate_proxy = predicate_proxy_constraint.value[0] proxy_constraint = qedge.find_constraint(predicate_proxy) if proxy_constraint is None: proxy_operator = get_default_operator(predicate_proxy) proxy_value = get_default_value(predicate_proxy) else: proxy_operator = proxy_constraint.operator proxy_value = proxy_constraint.value # Setup dynamic target dynamic_targets[predicate_proxy] = { "op": proxy_operator, "value": proxy_value, } return dynamic_targets @staticmethod def _process_predicate_context(qedge, message_type): evidence = {} dynamic_evidence = {} predicate_context_constraint = qedge.find_constraint( 'predicate_context') if predicate_context_constraint is not None: for context in predicate_context_constraint.value: context_curie = get_biolink_entity(context) context_constraint = qedge.find_constraint(context) if context_constraint is None: raise ValueError( 'Provided no context details for {}'.format(context)) if context_curie == BIOLINK_GENE_ENTITY: if message_type == 'gene': if type(context_constraint.value) is list: for _curie in context_constraint.value: dynamic_evidence[_curie] = { "op": '==', "value": 'True', } else: dynamic_evidence[context_constraint.value] = { "op": '==', "value": 'True', } else: if type(context_constraint.value) is list: for _curie in context_constraint.value: evidence['_{}'.format(_curie)] = 'True' else: evidence['_{}'.format(_curie)] = 'True' elif context_curie == BIOLINK_DRUG_ENTITY: if message_type == 'drug': if type(context_constraint.value) is list: for _curie in context_constraint.value: dynamic_evidence[_curie] = { "op": '==', "value": 'True', } else: dynamic_evidence[context_constraint.value] = { "op": '==', "value": 'True', } else: if type(context_constraint.value) is list: for _curie in context_constraint.value: evidence['_{}'.format(_curie)] = 'True' else: evidence['_{}'.format(_curie)] = 'True' else: raise ValueError( 'Unsupported context type: {}'.format(context_curie)) return evidence, dynamic_evidence def _extract_chp_query(self, message, message_type): evidence = {} dynamic_targets = {} dynamic_evidence = {} if message_type == 'standard': # Setup gene and drug evidence for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY or qnode.categories[ 0] == BIOLINK_DRUG_ENTITY: evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'gene': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_DRUG_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'drug': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' # Grab edge for qedge_id, qedge in message.query_graph.edges.items(): break # Process predicate proxy dynamic_targets = self._process_predicate_proxy(qedge) # Process predicate context _evidence, _dynamic_evidence = self._process_predicate_context( qedge, message_type) evidence.update(_evidence) dynamic_evidence.update(_dynamic_evidence) #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it. #if len(evidence) == 0: # raise ValueError('Did not supply context with a query that required context.') target = list(dynamic_targets.keys())[0] truth_target = (target, '{} {}'.format(dynamic_targets[target]["op"], dynamic_targets[target]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ if query_type == 'standard': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None return chp_query else: # Do this if a disease node is present if len(chp_query.evidence) == 0: # probability of survival chp_query = self.joint_reasoner.run_query(chp_query) if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 # patient_contributions num_all = len(self.joint_reasoner.patient_data) num_matched = chp_query.truth_prob * num_all patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, feature_dict in self.joint_reasoner.patient_data.items( ): for predicate_proxy, proxy_info in chp_query.dynamic_targets.items( ): proxy_op = get_operator(proxy_info["op"]) proxy_opp_op = get_opposite_operator(proxy_info["op"]) proxy_value = proxy_info["value"] if proxy_op(feature_dict[predicate_proxy], proxy_value): if num_matched == 0: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_op, proxy_value))][patient] = 0 else: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_op, proxy_value) )][patient] = chp_query.truth_prob / num_matched else: if num_matched == 0: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_opp_op, proxy_value) )][patient] = ( 1 - chp_query.truth_prob) / num_matched else: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_opp_op, proxy_value) )][patient] = (1 - chp_query.truth_prob) / ( num_all - num_matched) ''' num_survived = 0 num_all = len(self.dynamic_reasoner.raw_patient_data.keys()) str_op = chp_query.dynamic_targets['EFO:0000714']['op'] opp_op = get_opposite_operator(str_op) op = get_operator(str_op) days = chp_query.dynamic_targets['EFO:0000714']['value'] for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): num_survived += 1 chp_query.truth_prob = num_survived/num_all # patient_contributions patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0 else: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived else: if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all else: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived) ''' else: if query_type == 'gene': chp_query = self.dynamic_reasoner.run_query( chp_query, bkb_type='drug') elif query_type == 'drug': chp_query = self.dynamic_reasoner.run_query( chp_query, bkb_type='gene') chp_res_dict = chp_query.result.process_updates() chp_res_norm_dict = chp_query.result.process_updates( normalize=True) #chp_query.result.summary() chp_res_contributions = chp_query.result.process_inode_contributions( ) chp_query.truth_prob = max([ 0, chp_res_norm_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) # Collect all source inodes and process patient hashes patient_contributions = defaultdict(lambda: defaultdict(int)) for target, contrib_dict in chp_res_contributions.items(): target_comp_name, target_state_name = target for inode, contrib in contrib_dict.items(): comp_name, state_name = inode if '_Source_' in comp_name: # Split source state name to get patient hashes source_hashes_str = state_name.split('_')[-1] source_hashes = [ int(source_hash) for source_hash in source_hashes_str.split(',') ] hash_len = len(source_hashes) # Process patient contributions for _hash in source_hashes: # Normalize to get relative contribution patient_contributions[target][ _hash] += contrib / hash_len #/ chp_res_dict[target_comp_name][target_state_name] # Now iterate through the patient data to translate patient contributions to drug/gene contributions wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, patient_contrib_dict in patient_contributions.items(): for patient, contrib in patient_contrib_dict.items(): if query_type == 'gene': for gene_curie in self.dynamic_reasoner.raw_patient_data[ int(patient)]["gene_curies"]: wildcard_contributions[gene_curie][target] += contrib elif query_type == 'drug': for drug_curie in self.dynamic_reasoner.raw_patient_data[ int(patient)]["drug_curies"]: wildcard_contributions[drug_curie][target] += contrib # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): if target[0] == chp_query.truth_target[0] and target[ 1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / chp_query.truth_prob else: nontruth_target_gene_contrib += contrib / ( 1 - chp_query.truth_prob) wildcard_contributions[curie][ 'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_message(self, chp_query, message, query_type): qg = message.query_graph kg = message.knowledge_graph edge_bindings = {} node_bindings = {} # Process nodes for qnode_id, qnode in qg.nodes.items(): if qnode.ids is not None: if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY: #TODO: Add diseases to curies and fix name hack below. knode_key = kg.add_node( qnode.ids[0], qnode. ids[0], #TODO: Once curies is fixed, make this a name. qnode.categories[0].get_curie(), ) node_bindings[qnode_id] = [knode_key] else: wildcard_node = qnode if query_type == 'standard': for qedge_key, qedge in qg.edges.items(): kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), relation=qedge.relation, ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY. get_curie(), ) message.results.add_result( node_bindings, edge_bindings, ) else: # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items( ): unsorted_wildcard_contributions.append( (contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [ (contrib, wildcard) for contrib, wildcard in sorted( unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True) ] # add kg gene nodes and edges edge_count = 0 node_count = 1 results = [] for contrib, wildcard in sorted_wildcard_contributions[:self. max_results]: _node_bindings = {} _edge_bindings = {} # Process node bindings bad_wildcard = False for qnode_id, qnode in qg.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY and query_type == 'gene': try: knode_id = kg.add_node( wildcard, self.curies[BIOLINK_GENE_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] except KeyError: logger.info( "Couldn't find {} in curies[{}]".format( wildcard, BIOLINK_GENE)) bad_wildcard = True elif qnode.categories[ 0] == BIOLINK_DRUG_ENTITY and query_type == 'drug': knode_id = kg.add_node( wildcard, self.curies[BIOLINK_DRUG_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] else: _node_bindings[qnode_id] = node_bindings[qnode_id] if bad_wildcard: continue # Process edge bindings for qedge_id, qedge in qg.edges.items(): kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] # Process node and edge binding results message.results.add_result( _node_bindings, _edge_bindings, ) return message