def test_write_node_with_out_export_labels(): # if no we send a node with type and its not normalizable, then it should be in the queue with empty frozen set key node = KNode('CURIE:1', type=node_types.CHEMICAL_SUBSTANCE) bf = BufferedWriter(rosetta_mock) bf.write_node(node) assert node == bf.node_queues[frozenset()][node.id]
def test_write_node_with_export_lables(): # assert that a node will be queued to its export types node = KNode('CURIE:1', type=node_types.NAMED_THING) all_types = [node_types.CHEMICAL_SUBSTANCE, node_types.NAMED_THING] node.add_export_labels(all_types) bf = BufferedWriter(rosetta_mock) bf.write_node(node) assert node.id in bf.written_nodes key = node.export_labels assert key in bf.node_queues queue = bf.node_queues[key] assert node.id in queue
def test_edge_changing_node_ids(): bf = BufferedWriter(rosetta_mock) # flush edge def write_transaction_mock_edge(export_func, edges, edge_label, merge_edges): import os assert os.environ.get('MERGE_EDGES', False) == merge_edges # make sure this is the right function assert edge_label == 'causes' # make sure we have out node id in there assert export_func == export_edge_chunk edge = edges[0] assert edge.source_id == 'CHEBI:127682' assert edge.target_id == 'NCBIGene:84125' print(edges) # pass the mock tester to bf and let it rip source_node = KNode('PUBCHEM:44490445') target_node = KNode('HGNC:25708') edge = KEdge({ 'source_id': source_node.id, 'target_id': target_node.id, 'provided_by': 'test_write_edges', 'original_predicate': LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes'), 'standard_predicate': None, 'input_id': 'PUBCHEM:44490445', 'publications': [], }) bf.write_node(source_node) bf.write_node(target_node) # a mock for writing node session_for_node = Mock() session_for_node.write_transaction = lambda export_func, node_list, labels: None # we are not testing for nodes here bf.flush_nodes(session_for_node) assert bf.synonym_map == { 'PUBCHEM:44490445': 'CHEBI:127682', 'HGNC:25708': 'NCBIGene:84125' } session_for_edge = Mock() session_for_edge.write_transaction = write_transaction_mock_edge bf.write_edge(edge) bf.flush_edges(session_for_edge)
def test_flush_nodes_changing_node(): # exact same test as non changing node except have to get different synonym map from flush_nodes node = KNode('MESH:D000096', type=node_types.CHEMICAL_SUBSTANCE) properties = {'a': 'some prop'} node.properties = properties bf = BufferedWriter(rosetta_mock) bf.write_node(node) def write_transaction_mock(export_func, nodes, types): print(types) # make sure this is the right function assert export_func == export_node_chunk # make sure we have out node id in there assert node.id in nodes # get the node and see if the properties are preserved assert nodes[node.id].properties == properties # see if the types are expected assert nodes[node.id].export_labels == types == frozenset([ "chemical_substance", "named_thing", "biological_entity", "molecular_entity" ]) session = Mock() session.write_transaction = write_transaction_mock # pass the mock tester to bf and let it rip bf.flush_nodes(session) # make sure the synonym map we get here to be used for edge correction is sane assert 'MESH:D000096' in bf.synonym_map assert bf.synonym_map['MESH:D000096'] == 'CHEBI:15347'
def test_flush_nodes_non_normilizable(): # exact same test as non changing node except have to get different synonym map from flush_nodes node = KNode('SOME:curie', type=node_types.CHEMICAL_SUBSTANCE) properties = {'a': 'some prop'} node.properties = properties bf = BufferedWriter(rosetta_mock) bf.write_node(node) def write_transaction_mock(export_func, nodes, types): print(types) # make sure this is the right function assert export_func == export_node_chunk # make sure we have out node id in there assert node.id in nodes # get the node and see if the properties are preserved assert nodes[node.id].properties == properties # see if the types are expected assert nodes[node.id].export_labels == [] assert types == frozenset() session = Mock() session.write_transaction = write_transaction_mock # pass the mock tester to bf and let it rip bf.flush_nodes(session) # make sure the synonym map we get here to be used for edge correction is sane assert 'SOME:curie' in bf.synonym_map assert bf.synonym_map['SOME:curie'] == 'SOME:curie'
def __init__(self, rosetta, push_to_queue=False): self.rosetta = rosetta self.synonymizer = rosetta.synonymizer response = requests.get(f"{os.environ['BROKER_API']}queues/") queues = response.json() num_consumers = [ q['consumers'] for q in queues if q['name'] == 'neo4j' ] if (num_consumers and num_consumers[0]) or push_to_queue: self.connection = pika.BlockingConnection( pika.ConnectionParameters( heartbeat=0, host=os.environ['BROKER_HOST'], virtual_host='builder', credentials=pika.credentials.PlainCredentials( os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD']))) self.channel = self.connection.channel() self.channel.queue_declare(queue='neo4j') else: self.connection = None self.channel = None self.buffered_writer = BufferedWriter(rosetta)
def setup_consumer(callback=callback): # Setup code same as our previous, creating the queue on the channel. # Not doing auto_ack incase the channel drops on us and we lose some data that # the channel has picked up but not processed yet. writer = BufferedWriter(rosetta) logger.info(f' [*] Setting up consumer, creating new connection') connection = pika.BlockingConnection( pika.ConnectionParameters( host=os.environ['BROKER_HOST'], virtual_host='builder', credentials=pika.credentials.PlainCredentials( os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD']))) partial_callback = partial(callback, writer=writer) channel = connection.channel() channel.queue_declare(queue='neo4j') channel.basic_consume('neo4j', partial_callback, auto_ack=False) return channel
def test_write_edges(): bf = BufferedWriter(rosetta_mock) edge = KEdge({ 'source_id': 'source:1', 'target_id': 'target:1', 'provided_by': 'test_write_edges' }) # edge.source_id = 'source:1' # edge.target_id = 'target:1' # edge.provided_by = 'test_write_edges' edge.original_predicate = LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes') bf.write_edge(edge) assert bf.written_edges[edge.source_id][edge.target_id] == set( [edge.original_predicate.identifier]) assert len(bf.edge_queues) == 1 # try to write it twice and it should be keeping edge queues as 1 bf.write_edge(edge) assert len(bf.edge_queues) == 1 bf.write_edge(edge, force_create=True) assert len(bf.edge_queues) == 2
def test_edge_source_target_update_when_synmap_empty(): bf = BufferedWriter(rosetta_mock) assert bf.synonym_map == {} source_node = KNode('PUBCHEM:44490445') target_node = KNode('HGNC:25708') edge = KEdge({ 'source_id': source_node.id, 'target_id': target_node.id, 'provided_by': 'test_write_edges', 'original_predicate': LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes'), 'standard_predicate': None, 'input_id': 'PUBCHEM:44490445', 'publications': [], }) # mock writer def write_transaction_mock_edge(export_func, edges, edge_label, merge_edges): import os assert os.environ.get('MERGE_EDGES', False) == merge_edges # make sure this is the right function assert edge_label == 'causes' # make sure we have out node id in there assert export_func == export_edge_chunk edge = edges[0] assert edge.source_id == 'CHEBI:127682' assert edge.target_id == 'NCBIGene:84125' print(edges) session = Mock() session.write_transaction = write_transaction_mock_edge bf.write_edge(edge) # assert if synmap is still empty assert bf.synonym_map == {} bf.flush_edges(session)
def test_flush_nodes_non_changing_node(): # test if nodes are sent to export function if they are already assigned primary id node = KNode('CHEBI:15347', type=node_types.CHEMICAL_SUBSTANCE) properties = {'a': 'some prop'} node.properties = properties bf = BufferedWriter(rosetta_mock) # label_by_export_graph = ['this_should_be_overridden'] # def mock_add_labels(node): node.add_export_labels(label_by_export_graph) # # patch export_graph.add_type_labels and see if its called # bf.export_graph.add_type_labels = mock_add_labels # we add the node bf.write_node(node) def write_transaction_mock(export_func, nodes, types): print(types) # make sure this is the right function assert export_func == export_node_chunk # make sure we have out node id in there assert node.id in nodes # get the node and see if the properties are preserved assert nodes[node.id].properties == properties # see if the types are expected assert nodes[node.id].export_labels == types == frozenset([ "chemical_substance", "named_thing", "biological_entity", "molecular_entity" ]) session = Mock() session.write_transaction = write_transaction_mock # pass the mock tester to bf and let it rip bf.flush_nodes(session) # make sure the synonym map we get here to be used for edge correction is sane assert 'CHEBI:15347' in bf.synonym_map assert bf.synonym_map['CHEBI:15347'] == 'CHEBI:15347'
def process_node(self, node, history, edge=None): """ We've got a new set of nodes (either initial nodes or from a query). They are attached to a particular concept in our query plan. We make sure that they're synonymized and then queue up their children """ if edge is not None: is_source = node.id == edge.source_id self.rosetta.synonymizer.synonymize(node) if edge is not None: if is_source: edge.source_id = node.id else: edge.target_id = node.id # check the node cache, compare to the provided history # to determine which ops are valid key = node.id # print(node.dump()) # if edge: # print(edge.dump()) print("-" * len(history) + "History: ", history) # only add a node if it wasn't cached completed = self.cache.get(key) # set of nodes we've been from here print("-" * len(history) + "Completed: ", completed) if completed is None: completed = set() self.cache.set(key, completed) if self.channel is None: with BufferedWriter(self.rosetta) as writer: writer.write_node(node) else: self.channel.basic_publish(exchange='', routing_key='neo4j', body=json.dumps({ 'nodes': [node.dump()], 'edges': [] })) print(" [x] Sent node") # make sure the edge is queued for creation AFTER the node if edge: if self.channel is None: with BufferedWriter(self.rosetta) as writer: writer.write_edge(edge) else: self.channel.basic_publish(exchange='', routing_key='neo4j', body=json.dumps({ 'nodes': [], 'edges': [edge.dump()] })) print(" [x] Sent edge") # quit if we've closed a loop if history[-1] in history[:-1]: print("-" * len(history) + "Closed a loop!") return source_id = int(history[-1]) # quit if there are no transitions from this node if source_id not in self.transitions: return destinations = self.transitions[source_id] completed = self.cache.get(key) for target_id in destinations: if not self.transitions[source_id][target_id]: continue # don't turn around if len(history) > 1 and str(target_id) == history[-2]: continue # don't repeat things if target_id in completed: continue completed.add(target_id) self.cache.set(key, completed) links = self.transitions[source_id][target_id] print("-" * len(history) + f"Destination: {target_id}") for link in links: print("-" * len(history) + "Executing: ", link['op']) self.process_op(link, node, history + str(target_id))
class WriterDelegator: def __init__(self, rosetta, push_to_queue=False): self.rosetta = rosetta self.synonymizer = rosetta.synonymizer response = requests.get(f"{os.environ['BROKER_API']}queues/") queues = response.json() num_consumers = [ q['consumers'] for q in queues if q['name'] == 'neo4j' ] if (num_consumers and num_consumers[0]) or push_to_queue: self.connection = pika.BlockingConnection( pika.ConnectionParameters( heartbeat=0, host=os.environ['BROKER_HOST'], virtual_host='builder', credentials=pika.credentials.PlainCredentials( os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD']))) self.channel = self.connection.channel() self.channel.queue_declare(queue='neo4j') else: self.connection = None self.channel = None self.buffered_writer = BufferedWriter(rosetta) @property def normalized(self, normalized): self.buffered_writer.normalized = normalized @normalized.getter def normalized(self): return self.buffered_writer.normalized @normalized.setter def normalized(self, normalized): self.buffered_writer.normalized = normalized def __enter__(self): return self def __del__(self): if self.connection is not None: self.connection.close() def __exit__(self, *args): self.flush() def write_node(self, node, synonymize=False, annotate=True): # check if node has already hit writer # this step is already done if node.id in self.buffered_writer.written_nodes: return if synonymize: self.synonymizer.synonymize(node) if annotate: try: result = annotate_shortcut(node, self.rosetta) #if type(result) == type(None): # logger.debug(f'No annotator found for {node}') except Exception as e: logger.error(e) logger.error(traceback.format_exc()) if self.channel is not None: self.channel.basic_publish(exchange='', routing_key='neo4j', body=pickle.dumps({ 'nodes': [node], 'edges': [] })) else: self.buffered_writer.write_node(node) def write_edge(self, edge, force_create=False): if self.channel is not None: write_message = {'nodes': [], 'edges': [edge]} if force_create: write_message['force'] = True self.channel.basic_publish(exchange='', routing_key='neo4j', body=pickle.dumps(write_message)) else: self.buffered_writer.write_edge(edge, force_create) def flush(self): if self.connection and self.connection.is_open: if self.channel is not None: self.channel.basic_publish(exchange='', routing_key='neo4j', body=pickle.dumps('flush')) else: self.buffered_writer.flush() def close(self): """ Sending close string so reciever can stop it's consumer""" if self.connection and self.connection.is_open: if self.channel is not None: self.channel.basic_publish(exchange='', routing_key='neo4j', body=pickle.dumps('close'))
from builder.api import logging_config logger = LoggingUtil.init_logging("builder.writer", level=logging.DEBUG) greent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..') sys.path.insert(0, greent_path) rosetta = setup(os.path.join(greent_path, 'greent', 'greent.conf')) connection = pika.BlockingConnection(pika.ConnectionParameters(host=os.environ['BROKER_HOST'], virtual_host='builder', credentials=pika.credentials.PlainCredentials(os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD']))) channel = connection.channel() channel.queue_declare(queue='neo4j') writer = BufferedWriter(rosetta) def callback(ch, method, properties, body): body = body.decode() # logger.info(f" [x] Received {body}") if isinstance(body, str) and body == 'flush': writer.flush() return graph = json.loads(body) for node in graph['nodes']: writer.write_node(KNode(node)) for edge in graph['edges']: writer.write_edge(KEdge(edge)) channel.basic_consume(callback, queue='neo4j',
def test_can_initialize(): assert BufferedWriter(rosetta_mock)