Пример #1
0
def test_write_node_with_out_export_labels():
    # if no we send a node with type and its not normalizable, then it should be in the queue with empty frozen set key
    node = KNode('CURIE:1', type=node_types.CHEMICAL_SUBSTANCE)

    bf = BufferedWriter(rosetta_mock)
    bf.write_node(node)
    assert node == bf.node_queues[frozenset()][node.id]
Пример #2
0
def test_write_node_with_export_lables():
    # assert that a node will be queued to its export types
    node = KNode('CURIE:1', type=node_types.NAMED_THING)
    all_types = [node_types.CHEMICAL_SUBSTANCE, node_types.NAMED_THING]
    node.add_export_labels(all_types)
    bf = BufferedWriter(rosetta_mock)
    bf.write_node(node)
    assert node.id in bf.written_nodes
    key = node.export_labels
    assert key in bf.node_queues
    queue = bf.node_queues[key]
    assert node.id in queue
Пример #3
0
def test_edge_changing_node_ids():
    bf = BufferedWriter(rosetta_mock)

    # flush edge
    def write_transaction_mock_edge(export_func, edges, edge_label,
                                    merge_edges):
        import os
        assert os.environ.get('MERGE_EDGES', False) == merge_edges
        # make sure this is the right function
        assert edge_label == 'causes'
        # make sure we have out node id in there
        assert export_func == export_edge_chunk
        edge = edges[0]
        assert edge.source_id == 'CHEBI:127682'
        assert edge.target_id == 'NCBIGene:84125'
        print(edges)

    # pass the mock tester to bf and let it rip
    source_node = KNode('PUBCHEM:44490445')
    target_node = KNode('HGNC:25708')
    edge = KEdge({
        'source_id':
        source_node.id,
        'target_id':
        target_node.id,
        'provided_by':
        'test_write_edges',
        'original_predicate':
        LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes'),
        'standard_predicate':
        None,
        'input_id':
        'PUBCHEM:44490445',
        'publications': [],
    })
    bf.write_node(source_node)
    bf.write_node(target_node)
    # a mock for writing node
    session_for_node = Mock()
    session_for_node.write_transaction = lambda export_func, node_list, labels: None
    # we are not testing for nodes here
    bf.flush_nodes(session_for_node)
    assert bf.synonym_map == {
        'PUBCHEM:44490445': 'CHEBI:127682',
        'HGNC:25708': 'NCBIGene:84125'
    }
    session_for_edge = Mock()
    session_for_edge.write_transaction = write_transaction_mock_edge
    bf.write_edge(edge)
    bf.flush_edges(session_for_edge)
Пример #4
0
def test_flush_nodes_changing_node():
    # exact same test as non changing node except have to get different synonym map from flush_nodes
    node = KNode('MESH:D000096', type=node_types.CHEMICAL_SUBSTANCE)
    properties = {'a': 'some prop'}
    node.properties = properties

    bf = BufferedWriter(rosetta_mock)

    bf.write_node(node)

    def write_transaction_mock(export_func, nodes, types):
        print(types)
        # make sure this is the right function
        assert export_func == export_node_chunk
        # make sure we have out node id in there
        assert node.id in nodes
        # get the node and see if the properties are preserved
        assert nodes[node.id].properties == properties
        # see if the types are expected
        assert nodes[node.id].export_labels == types == frozenset([
            "chemical_substance", "named_thing", "biological_entity",
            "molecular_entity"
        ])

    session = Mock()
    session.write_transaction = write_transaction_mock
    # pass the mock tester to bf and let it rip
    bf.flush_nodes(session)

    # make sure the synonym map we get here to be used for edge correction is sane
    assert 'MESH:D000096' in bf.synonym_map
    assert bf.synonym_map['MESH:D000096'] == 'CHEBI:15347'
Пример #5
0
def test_flush_nodes_non_normilizable():
    # exact same test as non changing node except have to get different synonym map from flush_nodes
    node = KNode('SOME:curie', type=node_types.CHEMICAL_SUBSTANCE)
    properties = {'a': 'some prop'}
    node.properties = properties

    bf = BufferedWriter(rosetta_mock)

    bf.write_node(node)

    def write_transaction_mock(export_func, nodes, types):
        print(types)
        # make sure this is the right function
        assert export_func == export_node_chunk
        # make sure we have out node id in there
        assert node.id in nodes
        # get the node and see if the properties are preserved
        assert nodes[node.id].properties == properties
        # see if the types are expected
        assert nodes[node.id].export_labels == []
        assert types == frozenset()

    session = Mock()
    session.write_transaction = write_transaction_mock
    # pass the mock tester to bf and let it rip
    bf.flush_nodes(session)

    # make sure the synonym map we get here to be used for edge correction is sane
    assert 'SOME:curie' in bf.synonym_map
    assert bf.synonym_map['SOME:curie'] == 'SOME:curie'
Пример #6
0
    def __init__(self, rosetta, push_to_queue=False):
        self.rosetta = rosetta
        self.synonymizer = rosetta.synonymizer
        response = requests.get(f"{os.environ['BROKER_API']}queues/")
        queues = response.json()
        num_consumers = [
            q['consumers'] for q in queues if q['name'] == 'neo4j'
        ]
        if (num_consumers and num_consumers[0]) or push_to_queue:
            self.connection = pika.BlockingConnection(
                pika.ConnectionParameters(
                    heartbeat=0,
                    host=os.environ['BROKER_HOST'],
                    virtual_host='builder',
                    credentials=pika.credentials.PlainCredentials(
                        os.environ['BROKER_USER'],
                        os.environ['BROKER_PASSWORD'])))
            self.channel = self.connection.channel()
            self.channel.queue_declare(queue='neo4j')
        else:
            self.connection = None
            self.channel = None

        self.buffered_writer = BufferedWriter(rosetta)
Пример #7
0
def setup_consumer(callback=callback):
    # Setup code same as our previous, creating the queue on the channel.
    # Not doing auto_ack incase the channel drops on us and we lose some data that
    # the channel has picked up but not processed yet.
    writer = BufferedWriter(rosetta)
    logger.info(f' [*] Setting up consumer, creating new connection')
    connection = pika.BlockingConnection(
        pika.ConnectionParameters(
            host=os.environ['BROKER_HOST'],
            virtual_host='builder',
            credentials=pika.credentials.PlainCredentials(
                os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD'])))
    partial_callback = partial(callback, writer=writer)
    channel = connection.channel()
    channel.queue_declare(queue='neo4j')
    channel.basic_consume('neo4j', partial_callback, auto_ack=False)
    return channel
Пример #8
0
def test_write_edges():
    bf = BufferedWriter(rosetta_mock)
    edge = KEdge({
        'source_id': 'source:1',
        'target_id': 'target:1',
        'provided_by': 'test_write_edges'
    })
    # edge.source_id = 'source:1'
    # edge.target_id = 'target:1'
    # edge.provided_by = 'test_write_edges'
    edge.original_predicate = LabeledID(identifier='SEMMEDDB:CAUSES',
                                        label='semmed:causes')
    bf.write_edge(edge)
    assert bf.written_edges[edge.source_id][edge.target_id] == set(
        [edge.original_predicate.identifier])
    assert len(bf.edge_queues) == 1
    # try to write it twice and it should be keeping edge queues as 1
    bf.write_edge(edge)
    assert len(bf.edge_queues) == 1
    bf.write_edge(edge, force_create=True)
    assert len(bf.edge_queues) == 2
Пример #9
0
def test_edge_source_target_update_when_synmap_empty():
    bf = BufferedWriter(rosetta_mock)
    assert bf.synonym_map == {}

    source_node = KNode('PUBCHEM:44490445')
    target_node = KNode('HGNC:25708')
    edge = KEdge({
        'source_id':
        source_node.id,
        'target_id':
        target_node.id,
        'provided_by':
        'test_write_edges',
        'original_predicate':
        LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes'),
        'standard_predicate':
        None,
        'input_id':
        'PUBCHEM:44490445',
        'publications': [],
    })

    # mock writer

    def write_transaction_mock_edge(export_func, edges, edge_label,
                                    merge_edges):
        import os
        assert os.environ.get('MERGE_EDGES', False) == merge_edges
        # make sure this is the right function
        assert edge_label == 'causes'
        # make sure we have out node id in there
        assert export_func == export_edge_chunk
        edge = edges[0]
        assert edge.source_id == 'CHEBI:127682'
        assert edge.target_id == 'NCBIGene:84125'
        print(edges)

    session = Mock()
    session.write_transaction = write_transaction_mock_edge
    bf.write_edge(edge)
    # assert if synmap is still empty
    assert bf.synonym_map == {}
    bf.flush_edges(session)
Пример #10
0
def test_flush_nodes_non_changing_node():
    # test if nodes are sent to export function if they are already assigned primary id
    node = KNode('CHEBI:15347', type=node_types.CHEMICAL_SUBSTANCE)
    properties = {'a': 'some prop'}
    node.properties = properties

    bf = BufferedWriter(rosetta_mock)
    # label_by_export_graph = ['this_should_be_overridden']
    # def mock_add_labels(node): node.add_export_labels(label_by_export_graph)
    # # patch export_graph.add_type_labels and see if its called
    # bf.export_graph.add_type_labels = mock_add_labels

    # we add the node
    bf.write_node(node)

    def write_transaction_mock(export_func, nodes, types):
        print(types)
        # make sure this is the right function
        assert export_func == export_node_chunk
        # make sure we have out node id in there
        assert node.id in nodes
        # get the node and see if the properties are preserved
        assert nodes[node.id].properties == properties
        # see if the types are expected
        assert nodes[node.id].export_labels == types == frozenset([
            "chemical_substance", "named_thing", "biological_entity",
            "molecular_entity"
        ])

    session = Mock()
    session.write_transaction = write_transaction_mock
    # pass the mock tester to bf and let it rip
    bf.flush_nodes(session)

    # make sure the synonym map we get here to be used for edge correction is sane
    assert 'CHEBI:15347' in bf.synonym_map
    assert bf.synonym_map['CHEBI:15347'] == 'CHEBI:15347'
Пример #11
0
    def process_node(self, node, history, edge=None):
        """
        We've got a new set of nodes (either initial nodes or from a query).  They are attached
        to a particular concept in our query plan. We make sure that they're synonymized and then
        queue up their children
        """
        if edge is not None:
            is_source = node.id == edge.source_id
        self.rosetta.synonymizer.synonymize(node)
        if edge is not None:
            if is_source:
                edge.source_id = node.id
            else:
                edge.target_id = node.id

        # check the node cache, compare to the provided history
        # to determine which ops are valid
        key = node.id

        # print(node.dump())
        # if edge:
        #     print(edge.dump())
        print("-" * len(history) + "History: ", history)

        # only add a node if it wasn't cached
        completed = self.cache.get(key)  # set of nodes we've been from here
        print("-" * len(history) + "Completed: ", completed)
        if completed is None:
            completed = set()
            self.cache.set(key, completed)

            if self.channel is None:
                with BufferedWriter(self.rosetta) as writer:
                    writer.write_node(node)
            else:
                self.channel.basic_publish(exchange='',
                                           routing_key='neo4j',
                                           body=json.dumps({
                                               'nodes': [node.dump()],
                                               'edges': []
                                           }))
            print(" [x] Sent node")

        # make sure the edge is queued for creation AFTER the node
        if edge:
            if self.channel is None:
                with BufferedWriter(self.rosetta) as writer:
                    writer.write_edge(edge)
            else:
                self.channel.basic_publish(exchange='',
                                           routing_key='neo4j',
                                           body=json.dumps({
                                               'nodes': [],
                                               'edges': [edge.dump()]
                                           }))
            print(" [x] Sent edge")

        # quit if we've closed a loop
        if history[-1] in history[:-1]:
            print("-" * len(history) + "Closed a loop!")
            return

        source_id = int(history[-1])

        # quit if there are no transitions from this node
        if source_id not in self.transitions:
            return

        destinations = self.transitions[source_id]
        completed = self.cache.get(key)
        for target_id in destinations:
            if not self.transitions[source_id][target_id]:
                continue
            # don't turn around
            if len(history) > 1 and str(target_id) == history[-2]:
                continue
            # don't repeat things
            if target_id in completed:
                continue
            completed.add(target_id)
            self.cache.set(key, completed)
            links = self.transitions[source_id][target_id]
            print("-" * len(history) + f"Destination: {target_id}")
            for link in links:
                print("-" * len(history) + "Executing: ", link['op'])
                self.process_op(link, node, history + str(target_id))
Пример #12
0
class WriterDelegator:
    def __init__(self, rosetta, push_to_queue=False):
        self.rosetta = rosetta
        self.synonymizer = rosetta.synonymizer
        response = requests.get(f"{os.environ['BROKER_API']}queues/")
        queues = response.json()
        num_consumers = [
            q['consumers'] for q in queues if q['name'] == 'neo4j'
        ]
        if (num_consumers and num_consumers[0]) or push_to_queue:
            self.connection = pika.BlockingConnection(
                pika.ConnectionParameters(
                    heartbeat=0,
                    host=os.environ['BROKER_HOST'],
                    virtual_host='builder',
                    credentials=pika.credentials.PlainCredentials(
                        os.environ['BROKER_USER'],
                        os.environ['BROKER_PASSWORD'])))
            self.channel = self.connection.channel()
            self.channel.queue_declare(queue='neo4j')
        else:
            self.connection = None
            self.channel = None

        self.buffered_writer = BufferedWriter(rosetta)

    @property
    def normalized(self, normalized):
        self.buffered_writer.normalized = normalized

    @normalized.getter
    def normalized(self):
        return self.buffered_writer.normalized

    @normalized.setter
    def normalized(self, normalized):
        self.buffered_writer.normalized = normalized

    def __enter__(self):
        return self

    def __del__(self):
        if self.connection is not None:
            self.connection.close()

    def __exit__(self, *args):
        self.flush()

    def write_node(self, node, synonymize=False, annotate=True):
        # check if node has already hit writer
        # this step is already done
        if node.id in self.buffered_writer.written_nodes:
            return
        if synonymize:
            self.synonymizer.synonymize(node)
        if annotate:
            try:
                result = annotate_shortcut(node, self.rosetta)
                #if type(result) == type(None):
                #    logger.debug(f'No annotator found for {node}')
            except Exception as e:
                logger.error(e)
                logger.error(traceback.format_exc())
        if self.channel is not None:
            self.channel.basic_publish(exchange='',
                                       routing_key='neo4j',
                                       body=pickle.dumps({
                                           'nodes': [node],
                                           'edges': []
                                       }))
        else:
            self.buffered_writer.write_node(node)

    def write_edge(self, edge, force_create=False):
        if self.channel is not None:
            write_message = {'nodes': [], 'edges': [edge]}
            if force_create:
                write_message['force'] = True
            self.channel.basic_publish(exchange='',
                                       routing_key='neo4j',
                                       body=pickle.dumps(write_message))
        else:
            self.buffered_writer.write_edge(edge, force_create)

    def flush(self):
        if self.connection and self.connection.is_open:
            if self.channel is not None:
                self.channel.basic_publish(exchange='',
                                           routing_key='neo4j',
                                           body=pickle.dumps('flush'))
        else:
            self.buffered_writer.flush()

    def close(self):
        """ Sending close string so reciever can stop it's consumer"""
        if self.connection and self.connection.is_open:
            if self.channel is not None:
                self.channel.basic_publish(exchange='',
                                           routing_key='neo4j',
                                           body=pickle.dumps('close'))
Пример #13
0
from builder.api import logging_config

logger = LoggingUtil.init_logging("builder.writer", level=logging.DEBUG)

greent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
sys.path.insert(0, greent_path)
rosetta = setup(os.path.join(greent_path, 'greent', 'greent.conf'))

connection = pika.BlockingConnection(pika.ConnectionParameters(host=os.environ['BROKER_HOST'],
    virtual_host='builder',
    credentials=pika.credentials.PlainCredentials(os.environ['BROKER_USER'], os.environ['BROKER_PASSWORD'])))
channel = connection.channel()

channel.queue_declare(queue='neo4j')

writer = BufferedWriter(rosetta)

def callback(ch, method, properties, body):
    body = body.decode()
    # logger.info(f" [x] Received {body}")
    if isinstance(body, str) and body == 'flush':
        writer.flush()
        return
    graph = json.loads(body)
    for node in graph['nodes']:
        writer.write_node(KNode(node))
    for edge in graph['edges']:
        writer.write_edge(KEdge(edge))

channel.basic_consume(callback,
                      queue='neo4j',
Пример #14
0
def test_can_initialize():
    assert BufferedWriter(rosetta_mock)