class TestJointReasoner(unittest.TestCase): def setUp(self): self.bkb_handler = BkbDataHandler() self.joint_reasoner = ChpJointReasoner(self.bkb_handler) def test_joint_reasoner_one_gene(self): # Specify evidence evidence = {'ENSEMBL:ENSG00000155657': 'True'} # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.joint_reasoner.run_query(query) def test_joint_reasoner_one_gene_one_drug(self): # Specify evidence evidence = { 'ENSEMBL:ENSG00000155657': 'True', 'CHEMBL:CHEMBL83': 'True', } # Specify targets dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}} # Setup query query = Query(evidence=evidence, dynamic_targets=dynamic_targets) query = self.joint_reasoner.run_query(query)
class ChpBrainApiConfig(AppConfig): logger.warning('Running CHP Brain API Configuration. May take a minute.') name = 'chp_core_brain' # Used for distrbuted reasoning # Get Hosts File if it exists #parent_dir = os.path.dirname(os.path.realpath(__file__)) #HOSTS_FILENAME = os.path.join(parent_dir, 'hosts') #NUM_PROCESSES_PER_HOST = multiprocessing.cpu_count() #if not os.path.exists(HOSTS_FILENAME): hosts_filename = None num_processes_per_host = 0 # Instantiate BKB handler bkb_handler = BkbDataHandler(disease='tcga_gbm', bkb_major_version='darwin', bkb_minor_version='2.0') logger.info('Instantiating reasoners.') # Instantiate Reasoners dynamic_reasoner = ChpDynamicReasoner( bkb_handler=bkb_handler, hosts_filename=hosts_filename, num_processes_per_host=num_processes_per_host) joint_reasoner = ChpJointReasoner( bkb_handler=bkb_handler, hosts_filename=hosts_filename, num_processes_per_host=num_processes_per_host)
def _setup_handler(self): # Only do the rest of this if a message is passed if self.messages is not None: # Setup messages self._setup_messages() # Instiatate Reasoners if 'default' in self.message_dict: if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if 'simple' in self.message_dict: if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host)
def setUpClass(cls): super(TestOneHopHandler, cls).setUpClass() # load in sample query graphs with open('query_samples/onehop/standard_queries.json', 'r') as f_: cls.standard_queries = json.load(f_) with open('query_samples/onehop/wildcard_queries.json', 'r') as f_: cls.wildcard_queries = json.load(f_) cls.bkb_handler = BkbDataHandler() cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler) cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
def _setup_handler(self): self.default_survival_target = { "EFO:0000714": { "op": '>=', "value": 970 } } # Only do the rest of this if a query is passed if self.queries is not None: # Setup queries self._setup_messages() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host)
class DefaultHandlerMixin: def _setup_handler(self): # Only do the rest of this if a message is passed if self.messages is not None: # Setup messages self._setup_messages() # Instiatate Reasoners if 'default' in self.message_dict: if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if 'simple' in self.message_dict: if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): self.message_dict = defaultdict(list) for message in self.messages: if self._is_simple_message(message): self.message_dict['simple'].append(message) else: self.message_dict['default'].append(message) def _is_simple_message(self, message): """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard message. """ _found_outcome = False _found_disease = False _found_gene = False _found_drug = False query_graph = message.query_graph for node_key, node in query_graph.nodes.items(): if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY: # If we've already found the target and there's another phenotypic feature, then this isn't simple. if _found_outcome: return False else: _found_outcome = True if node.categories[0] == BIOLINK_DISEASE_ENTITY: # If we've already found disease and there's another disease, then this isn't simple. if _found_disease: return False else: _found_disease = True if node.categories[0] == BIOLINK_GENE_ENTITY: if _found_gene: return False else: _found_gene = True if node.categories[0] == BIOLINK_DRUG_ENTITY: if _found_drug: return False else: _found_drug = True return True def _extract_chp_query(self, message, message_type=None): # Initialize Chp Query chp_query = ChpQuery(reasoning_type='updating') # Ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 query_graph = message.query_graph # get phenotype node targets = list() for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY: target_id = node_key total_nodes += 1 survival_value = 970 survival_operator = '>=' # get disease node info and ensure only 1 disease: for node_key in query_graph.nodes.keys(): node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_DISEASE_ENTITY: disease_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY ) and edge.subject == disease_id and edge.object == target_id: survival_time_constraint = edge.find_constraint( name='survival_time') if survival_time_constraint is not None: survival_value = survival_time_constraint.value survival_operator = survival_time_constraint.operator if survival_operator == 'matches': survival_operator = '==' total_edges += 1 total_nodes += 1 # set BKB target chp_query.add_dynamic_target(node.ids[0], survival_operator, survival_value) truth_target = (node.ids[0], '{} {}'.format(survival_operator, survival_value)) # get evidence for node_key in query_graph.nodes.keys(): # genes node = query_graph.nodes[node_key] if node.categories[0] == BIOLINK_GENE_ENTITY: # check for appropriate gene node structure gene_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY ) and edge.subject == gene_id and edge.object == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node.ids[0] gene = gene_curie chp_query.add_meta_evidence(gene, 'True') total_nodes += 1 # drugs if node.categories[0] == BIOLINK_DRUG_ENTITY: # check for appropriate drug node structure drug_id = node_key for edge_key in query_graph.edges.keys(): edge = query_graph.edges[edge_key] if self.check_predicate_support( edge.predicates[0], BIOLINK_TREATS_ENTITY ) and edge.subject == drug_id and edge.object == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node.ids[0] drug = drug_curie chp_query.add_dynamic_evidence(node.ids[0], '==', 'True') total_nodes += 1 # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): if query_type == 'simple': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None else: chp_query = self.dynamic_reasoner.run_query(chp_query) chp_res_dict = chp_query.result.process_updates(normalize=True) try: chp_query.truth_prob = max([ 0, chp_res_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) except KeyError: # May need to come back and fix this. chp_query.truth_prob = -1 chp_query.report = None return chp_query def _construct_trapi_message(self, chp_query, message, query_type=None): # update target node info and form edge pair combos for results graph qg = message.query_graph kg = message.knowledge_graph node_bindings = {} for qnode_key, qnode in qg.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]] [0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]] [0], qnode.categories[0].get_curie(), ) else: knode_key = kg.add_node( qnode.ids[0], qnode.ids[0], qnode.categories[0].get_curie(), ) node_bindings[qnode_key] = [knode_key] edge_bindings = {} for qedge_key, qedge in qg.edges.items(): kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), relation=qedge.relation, ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute if self.check_predicate_support(qedge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY): kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY. get_curie(), ) # Proces results message.results.add_result( node_bindings, edge_bindings, ) return message
class OneHopHandlerMixin: """ OneHopeHandler is the handler for 1-hop queries. That is query graphs (QGs) that consists of 2 nodes and a single edge. :param query: the query graph sent by the ARA. :type query: dict :param hosts_filename: a filename for a stored QG. Defaults to None :type hosts_filename: str :param num_processes_per_host: Not implemented thouroughly, but would be used for distributed reasoning. :type num_processes_per_host: int :param max_results: specific to 1-hop queries, specifies the number of wildcard genes to return. :type max_results: int """ def _setup_handler(self): self.default_survival_target = { "EFO:0000714": { "op": '>=', "value": 970 } } # Only do the rest of this if a query is passed if self.queries is not None: # Setup queries self._setup_messages() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): self.queries_dict = defaultdict(list) for query in self.queries: self.queries_dict[self._get_onehop_type( query.message)].append(query) def _get_onehop_type(self, message): wildcard_type = None node_types = [] all_node_categories = [] for node_id, node in message.query_graph.nodes.items(): if node.ids is None: if wildcard_type is None: wildcard_type = node.categories[0] node_types.append(node.categories[0]) all_node_categories.append(node.categories[0]) # implicit 2-hop-queries if all(category == BIOLINK_GENE_ENTITY for category in all_node_categories): return 'gene_two_hop' elif all(category == BIOLINK_DRUG_ENTITY for category in all_node_categories): return 'drug_two_hop' # If standard onehop query if wildcard_type is None: return 'standard' elif wildcard_type == BIOLINK_DRUG_ENTITY: return 'drug' elif wildcard_type == BIOLINK_GENE_ENTITY: return 'gene' else: raise ValueError( 'Did not understand wildcard type {}.'.format(wildcard_type)) def check_query(self): """ Currently not implemented. Would check validity of query. """ return True @staticmethod def _process_predicate_proxy(qedge, chp_query): predicate_proxy_constraint = qedge.find_constraint('predicate_proxy') if predicate_proxy_constraint is None: predicate_proxy = get_default_predicate_proxy() proxy_constraint = qedge.find_constraint(predicate_proxy) else: predicate_proxy = predicate_proxy_constraint.value[0] proxy_constraint = qedge.find_constraint(predicate_proxy) if proxy_constraint is None: proxy_operator = get_default_operator(predicate_proxy) proxy_value = get_default_value(predicate_proxy) else: proxy_operator = proxy_constraint.operator proxy_value = proxy_constraint.value # Setup dynamic target chp_query.add_dynamic_target(predicate_proxy, proxy_operator, proxy_value) return chp_query @staticmethod def _process_predicate_context(qedge, message_type, chp_query): evidence = {} dynamic_evidence = {} predicate_context_constraint = qedge.find_constraint( 'predicate_context') if predicate_context_constraint is not None: for context in predicate_context_constraint.value: context_curie = get_biolink_entity(context) context_constraint = qedge.find_constraint(context) # used 2 hop structure where context curie is the proxy if context_constraint is None: continue if context_curie == BIOLINK_GENE_ENTITY: if message_type == 'gene' or message_type == 'drug_two_hop': if type(context_constraint.value) is list: for _curie in context_constraint.value: chp_query.add_dynamic_evidence( _curie, '==', 'True') else: chp_query.add_dynamic_evidence( context_constraint.value, '==', 'True') else: if type(context_constraint.value) is list: for _curie in context_constraint.value: chp_query.add_meta_evidence(_curie, 'True') else: chp.add_meta_evidence(_curie, 'True') elif context_curie == BIOLINK_DRUG_ENTITY: if message_type == 'drug' or message_type == 'gene_two_hop': if type(context_constraint.value) is list: for _curie in context_constraint.value: chp_query.add_dynamic_evidence( _curie, '==', 'True') else: chp_query.add_dynamic_evidence( context_constraint.value, '==', 'True') else: if type(context_constraint.value) is list: for _curie in context_constraint.value: chp_query.add_meta_evidence(_curie, 'True') else: chp_query.add_meta_evidence(_curie, 'True') else: raise ValueError( 'Unsupported context type: {}'.format(context_curie)) return chp_query def _extract_chp_query(self, query, message_type): # Extract Message message = query.message # Initialize CHP BKB Query chp_query = ChpQuery(reasoning_type='updating') # Grab edge for qedge_id, qedge in message.query_graph.edges.items(): break # Process predicate proxy chp_query = self._process_predicate_proxy(qedge, chp_query) # Process predicate context chp_query = self._process_predicate_context(qedge, message_type, chp_query) #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it. #if len(evidence) == 0: # raise ValueError('Did not supply context with a query that required context.') if message_type == 'standard': # Setup gene and drug evidence for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY or qnode.categories[ 0] == BIOLINK_DRUG_ENTITY: chp_query.add_meta_evidence(qnode.ids[0], 'True') elif message_type == 'gene' or message_type == 'drug_two_hop': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_DRUG_ENTITY: if qnode.ids is not None: chp_query.add_meta_evidence(qnode.ids[0], 'True') elif message_type == 'drug' or message_type == 'gene_two_hop': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: if qnode.ids is not None: chp_query.add_meta_evidence(qnode.ids[0], 'True') target = list(chp_query.dynamic_targets.keys())[0] truth_target = (target, '{} {}'.format( chp_query.dynamic_targets[target]["op"], chp_query.dynamic_targets[target]["value"])) # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ if query_type == 'standard': chp_query = self.joint_reasoner.run_query( chp_query, interpolation_type='gene') # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None return chp_query else: # Do this if a disease node is present if query_type == 'gene' or query_type == 'drug_two_hop': chp_query = self.joint_reasoner.run_query( chp_query, interpolation_type='drug', contribution_type='gene') elif query_type == 'drug' or query_type == 'gene_two_hop': chp_query = self.joint_reasoner.run_query( chp_query, interpolation_type='gene', contribution_type='drug') chp_res_dict = chp_query.result if chp_query.truth_target in chp_res_dict: unnormalized_truth_prob = chp_res_dict[chp_query.truth_target] else: unnormalized_truth_prob = 0 normalize = 0 for target, prob in chp_res_dict.items(): normalize += prob unnormalized_false_prob = normalize - unnormalized_truth_prob for target in chp_res_dict.keys(): chp_res_dict[target] /= normalize if chp_query.truth_target in chp_res_dict: chp_query.truth_prob = chp_res_dict[chp_query.truth_target] else: chp_query.truth_target = 0 # organize the contributions over curie then target wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, curies in chp_query.contributions.items(): for curie, contrib in curies.items(): if curie[1] == 'True': wildcard_contributions[curie[0]][target] = contrib #for curie in wildcard_contributions.keys(): # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): try: if target[0] == chp_query.truth_target[0] and target[ 1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / unnormalized_truth_prob else: nontruth_target_gene_contrib += contrib / unnormalized_false_prob except ZeroDivisionError: continue wildcard_contributions[curie][ 'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib if query_type == 'drug_two_hop' or query_type == 'gene_two_hop': # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in wildcard_contributions.items(): unsorted_wildcard_contributions.append( (contrib_dict['relative'], wildcard)) truncated_sorted_wildcard_contributions = [ (contrib, wildcard) for contrib, wildcard in sorted( unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True) ][:self.max_results] truncated_contribution_list = [ curie[1] for curie in truncated_sorted_wildcard_contributions ] chp_query.contributions = None wildcard_contributions = defaultdict(lambda: defaultdict(int)) for contrib in truncated_contribution_list: chp_query_extended = copy.deepcopy(chp_query) chp_query_extended.add_meta_evidence(contrib, 'True') if query_type == 'drug_two_hop': chp_query_extended = self.joint_reasoner.run_query( chp_query_extended, contribution_type='drug') else: chp_query_extended = self.joint_reasoner.run_query( chp_query_extended, contribution_type='gene') chp_res_dict = chp_query_extended.result if chp_query_extended.truth_target in chp_res_dict: extended_unnormalized_truth_prob = chp_res_dict[ chp_query_extended.truth_target] else: extended_unnormalized_truth_prob = 0 normalize = 0 for target, prob in chp_res_dict.items(): normalize += prob extended_unnormalized_false_prob = normalize - extended_unnormalized_truth_prob # organize the contributions over curie then target extended_wildcard_contributions = defaultdict( lambda: defaultdict(int)) for target, curies in chp_query_extended.contributions.items(): for curie, contrib in curies.items(): if curie[1] == 'True': extended_wildcard_contributions[ curie[0]][target] = contrib # normalize gene contributions by the target and take relative difference for curie in extended_wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in extended_wildcard_contributions[ curie].items(): try: if target[0] == chp_query_extended.truth_target[ 0] and target[ 1] == chp_query_extended.truth_target[ 1]: truth_target_gene_contrib += contrib / extended_unnormalized_truth_prob * unnormalized_truth_prob else: nontruth_target_gene_contrib += contrib / extended_unnormalized_false_prob * unnormalized_false_prob except ZeroDivisionError: continue wildcard_contributions[curie]['relative'] += ( truth_target_gene_contrib - nontruth_target_gene_contrib) chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_message(self, chp_query, query, query_type): # Helpful short cuts message = query.message qg = message.query_graph kg = message.knowledge_graph edge_bindings = {} node_bindings = {} # Process nodes for qnode_id, qnode in qg.nodes.items(): if qnode.ids is not None: if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY: #TODO: Add diseases to curies and fix name hack below. knode_key = kg.add_node( qnode.ids[0], qnode. ids[0], #TODO: Once curies is fixed, make this a name. qnode.categories[0].get_curie(), ) node_bindings[qnode_id] = [knode_key] else: wildcard_node = qnode if query_type == 'standard': for qedge_key, qedge in qg.edges.items(): kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY. get_curie(), ) message.results.add_result( node_bindings, edge_bindings, ) else: # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items( ): unsorted_wildcard_contributions.append( (contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [ (contrib, wildcard) for contrib, wildcard in sorted( unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True) ] # add kg gene nodes and edges edge_count = 0 node_count = 1 results = [] for contrib, wildcard in sorted_wildcard_contributions[:self. max_results]: _node_bindings = {} _edge_bindings = {} # Process node bindings bad_wildcard = False for qnode_id, qnode in qg.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY and qnode.ids is None and ( query_type == 'gene' or query_type == 'gene_two_hop'): try: knode_id = kg.add_node( wildcard, self.curies[BIOLINK_GENE_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] except KeyError: logger.info( "Couldn't find {} in curies[{}]".format( wildcard, BIOLINK_GENE)) bad_wildcard = True elif qnode.categories[ 0] == BIOLINK_DRUG_ENTITY and qnode.ids is None and ( query_type == 'drug' or query_type == 'drug_two_hop'): knode_id = kg.add_node( wildcard, self.curies[BIOLINK_DRUG_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] else: _node_bindings[qnode_id] = node_bindings[qnode_id] if bad_wildcard: continue # Process edge bindings for qedge_id, qedge in qg.edges.items(): kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] # Process node and edge binding results message.results.add_result( _node_bindings, _edge_bindings, ) return query
def setUp(self): self.bkb_handler = BkbDataHandler() self.joint_reasoner = ChpJointReasoner(self.bkb_handler)
class DefaultHandlerMixin: def _setup_handler(self): # Only do the rest of this if a query is passed if self.init_query is not None: # Setup queries self._setup_queries() # Instiatate Reasoners if 'default' in self.query_dict: if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if 'simple' in self.query_dict: if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_queries(self): if type(self.init_query) == list: self.query_dict = defaultdict(list) self.query_map = [] for query in self.init_query: self.query_map.append(query["query_id"]) if self._is_simple_query(query): self.query_dict['simple'].append( self._setup_single_query(query)) else: self.query_dict['default'].append( self._setup_single_query(query)) else: if self._is_simple_query(self.init_query): self.query_dict = { "simple": [self._setup_single_query(self.init_query)] } else: self.query_dict = { "default": [self._setup_single_query(self.init_query)] } def _is_simple_query(self, query): """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard query. """ _found_outcome = False _found_disease = False _found_gene = False _found_drug = False for node_key, node in query["query_graph"]["nodes"].items(): if node["category"] == BIOLINK_PHENOTYPIC_FEATURE: # If we've already found the target and there's another phenotypic feature, then this isn't simple. if _found_outcome: return False else: _found_outcome = True if node['category'] == BIOLINK_DISEASE: # If we've already found disease and there's another disease, then this isn't simple. if _found_disease: return False else: _found_disease = True if node["category"] == BIOLINK_GENE: if _found_gene: return False else: _found_gene = True if node['category'] == BIOLINK_DRUG: if _found_drug: return False else: _found_drug = True return True def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE: target_id = node_key total_nodes += 1 # get disease node info and ensure only 1 disease: for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 total_nodes += 1 # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node['id'] gene = gene_curie evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node['id'] drug = drug_curie evidence[node["id"]] = 'True' total_nodes += 1 # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query def _run_query(self, chp_query, query_type): if query_type == 'simple': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None else: chp_query = self.dynamic_reasoner.run_query(chp_query) chp_res_dict = chp_query.result.process_updates(normalize=True) chp_query.truth_prob = max([ 0, chp_res_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) chp_query.report = None return chp_query def _construct_trapi_response(self, chp_query, query_type=None): # Get orginal query if len(self.init_query) == 1: query = self.init_query[0] query_id = None else: for _query in self.init_query: if _query["query_id"] == chp_query.query_id: query = _query query_id = query["query_id"] break kg = copy.deepcopy(query["query_graph"]) # update target node info and form edge pair combos for results graph node_pairs = dict() for node_key in list(kg['nodes'].keys())[:]: qg_node_curie = kg['nodes'][node_key].pop('id') kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key) node_pairs[node_key] = qg_node_curie if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE: kg['nodes'][qg_node_curie]['name'] = self._get_curie_name( BIOLINK_GENE, qg_node_curie)[0] elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG: kg['nodes'][qg_node_curie]['name'] = self._get_curie_name( BIOLINK_DRUG, qg_node_curie)[0] edge_pairs = dict() knowledge_edges = 0 for edge_key in list(kg['edges'].keys())[:]: kg_id = 'kge{}'.format(knowledge_edges) knowledge_edges += 1 kg['edges'][kg_id] = kg['edges'].pop(edge_key) kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id] ['subject']] kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id] ['object']] edge_pairs[edge_key] = kg_id if kg['edges'][kg_id][ 'predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE: if 'properties' in kg['edges'][kg_id].keys(): kg['edges'][kg_id].pop('properties') kg['edges'][kg_id]['attributes'] = [{ 'name': 'Probability of Survival', 'type': BIOLINK_PROBABILITY, 'value': chp_query.truth_prob }] results = [] results.append({ 'edge_bindings': {}, 'node_bindings': {}, }) for edge_pair_key in edge_pairs: results[0]['edge_bindings'][edge_pair_key] = [{ 'id': edge_pairs[edge_pair_key] }] for node_pair_key in node_pairs: results[0]['node_bindings'][node_pair_key] = [{ 'id': node_pairs[node_pair_key] }] # query response trapi_message = { 'query_graph': query["query_graph"], 'knowledge_graph': kg, 'results': results } trapi_response = {'message': trapi_message} return query_id, trapi_response
def setUpClass(cls): super(TestBaseHandler, cls).setUpClass() cls.bkb_handler = BkbDataHandler() cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler) cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
def setUpClass(cls): super(TestJointReasoner, cls).setUpClass() cls.bkb_handler = BkbDataHandler() cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
class OneHopHandlerMixin: """ OneHopeHandler is the handler for 1-hop queries. That is query graphs (QGs) that consists of 2 nodes and a single edge. :param query: the query graph sent by the ARA. :type query: dict :param hosts_filename: a filename for a stored QG. Defaults to None :type hosts_filename: str :param num_processes_per_host: Not implemented thouroughly, but would be used for distributed reasoning. :type num_processes_per_host: int :param max_results: specific to 1-hop queries, specifies the number of wildcard genes to return. :type max_results: int """ def _setup_handler(self): self.default_survival_target = { "EFO:0000714": { "op": '>=', "value": 970 } } # Only do the rest of this if a query is passed if self.messages is not None: # Setup queries self._setup_messages() # Instiatate Reasoners if self.dynamic_reasoner is None: self.dynamic_reasoner = ChpDynamicReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) if self.joint_reasoner is None: self.joint_reasoner = ChpJointReasoner( bkb_handler=self.bkb_data_handler, hosts_filename=self.hosts_filename, num_processes_per_host=self.num_processes_per_host) def _setup_messages(self): self.message_dict = defaultdict(list) for message in self.messages: self.message_dict[self._get_onehop_type(message)].append(message) def _get_onehop_type(self, message): wildcard_type = None for node_id, node in message.query_graph.nodes.items(): if node.ids is None: if wildcard_type is None: wildcard_type = node.categories[0] # If standard onehop query if wildcard_type is None: return 'standard' elif wildcard_type == BIOLINK_DRUG_ENTITY: return 'drug' elif wildcard_type == BIOLINK_GENE_ENTITY: return 'gene' else: raise ValueError( 'Did not understand wildcard type {}.'.format(wildcard_type)) def check_query(self): """ Currently not implemented. Would check validity of query. """ return True @staticmethod def _process_predicate_proxy(qedge): dynamic_targets = {} predicate_proxy_constraint = qedge.find_constraint('predicate_proxy') if predicate_proxy_constraint is None: predicate_proxy = get_default_predicate_proxy() proxy_constraint = qedge.find_constraint(predicate_proxy) else: predicate_proxy = predicate_proxy_constraint.value[0] proxy_constraint = qedge.find_constraint(predicate_proxy) if proxy_constraint is None: proxy_operator = get_default_operator(predicate_proxy) proxy_value = get_default_value(predicate_proxy) else: proxy_operator = proxy_constraint.operator proxy_value = proxy_constraint.value # Setup dynamic target dynamic_targets[predicate_proxy] = { "op": proxy_operator, "value": proxy_value, } return dynamic_targets @staticmethod def _process_predicate_context(qedge, message_type): evidence = {} dynamic_evidence = {} predicate_context_constraint = qedge.find_constraint( 'predicate_context') if predicate_context_constraint is not None: for context in predicate_context_constraint.value: context_curie = get_biolink_entity(context) context_constraint = qedge.find_constraint(context) if context_constraint is None: raise ValueError( 'Provided no context details for {}'.format(context)) if context_curie == BIOLINK_GENE_ENTITY: if message_type == 'gene': if type(context_constraint.value) is list: for _curie in context_constraint.value: dynamic_evidence[_curie] = { "op": '==', "value": 'True', } else: dynamic_evidence[context_constraint.value] = { "op": '==', "value": 'True', } else: if type(context_constraint.value) is list: for _curie in context_constraint.value: evidence['_{}'.format(_curie)] = 'True' else: evidence['_{}'.format(_curie)] = 'True' elif context_curie == BIOLINK_DRUG_ENTITY: if message_type == 'drug': if type(context_constraint.value) is list: for _curie in context_constraint.value: dynamic_evidence[_curie] = { "op": '==', "value": 'True', } else: dynamic_evidence[context_constraint.value] = { "op": '==', "value": 'True', } else: if type(context_constraint.value) is list: for _curie in context_constraint.value: evidence['_{}'.format(_curie)] = 'True' else: evidence['_{}'.format(_curie)] = 'True' else: raise ValueError( 'Unsupported context type: {}'.format(context_curie)) return evidence, dynamic_evidence def _extract_chp_query(self, message, message_type): evidence = {} dynamic_targets = {} dynamic_evidence = {} if message_type == 'standard': # Setup gene and drug evidence for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY or qnode.categories[ 0] == BIOLINK_DRUG_ENTITY: evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'gene': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_DRUG_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' elif message_type == 'drug': for qnode_id, qnode in message.query_graph.nodes.items(): if qnode.categories[0] == BIOLINK_GENE_ENTITY: #dynamic_evidence[qnode.ids[0]] = { # "op": '==', # "value": 'True', # } evidence['_{}'.format(qnode.ids[0])] = 'True' # Grab edge for qedge_id, qedge in message.query_graph.edges.items(): break # Process predicate proxy dynamic_targets = self._process_predicate_proxy(qedge) # Process predicate context _evidence, _dynamic_evidence = self._process_predicate_context( qedge, message_type) evidence.update(_evidence) dynamic_evidence.update(_dynamic_evidence) #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it. #if len(evidence) == 0: # raise ValueError('Did not supply context with a query that required context.') target = list(dynamic_targets.keys())[0] truth_target = (target, '{} {}'.format(dynamic_targets[target]["op"], dynamic_targets[target]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target return chp_query def _run_query(self, chp_query, query_type): """ Runs build BKB query to calculate probability of survival. A probability is returned to specificy survival time w.r.t a drug. Contributions for each gene are calculuated and classified under their true/false target assignments. """ if query_type == 'standard': chp_query = self.joint_reasoner.run_query(chp_query) # If a probability was found for the target if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 chp_query.report = None return chp_query else: # Do this if a disease node is present if len(chp_query.evidence) == 0: # probability of survival chp_query = self.joint_reasoner.run_query(chp_query) if len(chp_query.result) > 0: # If a probability was found for the truth target if chp_query.truth_target in chp_query.result: total_unnormalized_prob = 0 for target, contrib in chp_query.result.items(): prob = max(0, contrib) total_unnormalized_prob += prob chp_query.truth_prob = max([ 0, chp_query.result[(chp_query.truth_target)] ]) / total_unnormalized_prob else: chp_query.truth_prob = 0 else: chp_query.truth_prob = -1 # patient_contributions num_all = len(self.joint_reasoner.patient_data) num_matched = chp_query.truth_prob * num_all patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, feature_dict in self.joint_reasoner.patient_data.items( ): for predicate_proxy, proxy_info in chp_query.dynamic_targets.items( ): proxy_op = get_operator(proxy_info["op"]) proxy_opp_op = get_opposite_operator(proxy_info["op"]) proxy_value = proxy_info["value"] if proxy_op(feature_dict[predicate_proxy], proxy_value): if num_matched == 0: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_op, proxy_value))][patient] = 0 else: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_op, proxy_value) )][patient] = chp_query.truth_prob / num_matched else: if num_matched == 0: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_opp_op, proxy_value) )][patient] = ( 1 - chp_query.truth_prob) / num_matched else: patient_contributions[( predicate_proxy, '{} {}'.format(proxy_opp_op, proxy_value) )][patient] = (1 - chp_query.truth_prob) / ( num_all - num_matched) ''' num_survived = 0 num_all = len(self.dynamic_reasoner.raw_patient_data.keys()) str_op = chp_query.dynamic_targets['EFO:0000714']['op'] opp_op = get_opposite_operator(str_op) op = get_operator(str_op) days = chp_query.dynamic_targets['EFO:0000714']['value'] for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): num_survived += 1 chp_query.truth_prob = num_survived/num_all # patient_contributions patient_contributions = defaultdict(lambda: defaultdict(int)) for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items(): if op(pat_dict['survival_time'], days): if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0 else: patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived else: if num_survived == 0: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all else: patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived) ''' else: if query_type == 'gene': chp_query = self.dynamic_reasoner.run_query( chp_query, bkb_type='drug') elif query_type == 'drug': chp_query = self.dynamic_reasoner.run_query( chp_query, bkb_type='gene') chp_res_dict = chp_query.result.process_updates() chp_res_norm_dict = chp_query.result.process_updates( normalize=True) #chp_query.result.summary() chp_res_contributions = chp_query.result.process_inode_contributions( ) chp_query.truth_prob = max([ 0, chp_res_norm_dict[chp_query.truth_target[0]][ chp_query.truth_target[1]] ]) # Collect all source inodes and process patient hashes patient_contributions = defaultdict(lambda: defaultdict(int)) for target, contrib_dict in chp_res_contributions.items(): target_comp_name, target_state_name = target for inode, contrib in contrib_dict.items(): comp_name, state_name = inode if '_Source_' in comp_name: # Split source state name to get patient hashes source_hashes_str = state_name.split('_')[-1] source_hashes = [ int(source_hash) for source_hash in source_hashes_str.split(',') ] hash_len = len(source_hashes) # Process patient contributions for _hash in source_hashes: # Normalize to get relative contribution patient_contributions[target][ _hash] += contrib / hash_len #/ chp_res_dict[target_comp_name][target_state_name] # Now iterate through the patient data to translate patient contributions to drug/gene contributions wildcard_contributions = defaultdict(lambda: defaultdict(int)) for target, patient_contrib_dict in patient_contributions.items(): for patient, contrib in patient_contrib_dict.items(): if query_type == 'gene': for gene_curie in self.dynamic_reasoner.raw_patient_data[ int(patient)]["gene_curies"]: wildcard_contributions[gene_curie][target] += contrib elif query_type == 'drug': for drug_curie in self.dynamic_reasoner.raw_patient_data[ int(patient)]["drug_curies"]: wildcard_contributions[drug_curie][target] += contrib # normalize gene contributions by the target and take relative difference for curie in wildcard_contributions.keys(): truth_target_gene_contrib = 0 nontruth_target_gene_contrib = 0 for target, contrib in wildcard_contributions[curie].items(): if target[0] == chp_query.truth_target[0] and target[ 1] == chp_query.truth_target[1]: truth_target_gene_contrib += contrib / chp_query.truth_prob else: nontruth_target_gene_contrib += contrib / ( 1 - chp_query.truth_prob) wildcard_contributions[curie][ 'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib chp_query.report = None chp_query.wildcard_contributions = wildcard_contributions return chp_query def _construct_trapi_message(self, chp_query, message, query_type): qg = message.query_graph kg = message.knowledge_graph edge_bindings = {} node_bindings = {} # Process nodes for qnode_id, qnode in qg.nodes.items(): if qnode.ids is not None: if qnode.categories[0] == BIOLINK_GENE_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_GENE_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DRUG_ENTITY: knode_key = kg.add_node( qnode.ids[0], self.curies[BIOLINK_DRUG_ENTITY.get_curie()][ qnode.ids[0]][0], qnode.categories[0].get_curie(), ) elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY: #TODO: Add diseases to curies and fix name hack below. knode_key = kg.add_node( qnode.ids[0], qnode. ids[0], #TODO: Once curies is fixed, make this a name. qnode.categories[0].get_curie(), ) node_bindings[qnode_id] = [knode_key] else: wildcard_node = qnode if query_type == 'standard': for qedge_key, qedge in qg.edges.items(): kedge_key = kg.add_edge( node_bindings[qedge.subject][0], node_bindings[qedge.object][0], predicate=qedge.predicates[0].get_curie(), relation=qedge.relation, ) edge_bindings[qedge_key] = [kedge_key] # Add Attribute kg.edges[kedge_key].add_attribute( attribute_type_id='Probability of Survival', value=chp_query.truth_prob, value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY. get_curie(), ) message.results.add_result( node_bindings, edge_bindings, ) else: # Build relative contribution results and added associated edges into knowledge graph unsorted_wildcard_contributions = [] for wildcard, contrib_dict in chp_query.wildcard_contributions.items( ): unsorted_wildcard_contributions.append( (contrib_dict['relative'], wildcard)) sorted_wildcard_contributions = [ (contrib, wildcard) for contrib, wildcard in sorted( unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True) ] # add kg gene nodes and edges edge_count = 0 node_count = 1 results = [] for contrib, wildcard in sorted_wildcard_contributions[:self. max_results]: _node_bindings = {} _edge_bindings = {} # Process node bindings bad_wildcard = False for qnode_id, qnode in qg.nodes.items(): if qnode.categories[ 0] == BIOLINK_GENE_ENTITY and query_type == 'gene': try: knode_id = kg.add_node( wildcard, self.curies[BIOLINK_GENE_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] except KeyError: logger.info( "Couldn't find {} in curies[{}]".format( wildcard, BIOLINK_GENE)) bad_wildcard = True elif qnode.categories[ 0] == BIOLINK_DRUG_ENTITY and query_type == 'drug': knode_id = kg.add_node( wildcard, self.curies[BIOLINK_DRUG_ENTITY.get_curie()] [wildcard][0], qnode.categories[0].get_curie(), ) _node_bindings[qnode_id] = [knode_id] else: _node_bindings[qnode_id] = node_bindings[qnode_id] if bad_wildcard: continue # Process edge bindings for qedge_id, qedge in qg.edges.items(): kedge_id = kg.add_edge( _node_bindings[qedge.subject][0], _node_bindings[qedge.object][0], predicate=qedge.predicates[0], relation=qedge.relation, ) kg.edges[kedge_id].add_attribute( attribute_type_id='Contribution', value=contrib, value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(), ) _edge_bindings[qedge_id] = [kedge_id] # Process node and edge binding results message.results.add_result( _node_bindings, _edge_bindings, ) return message