Exemplo n.º 1
0
def test_sanitize_doc():
    run_bundle = event_model.compose_run()
    desc_bundle = run_bundle.compose_descriptor(
        data_keys={'motor': {'shape': [], 'dtype': 'number', 'source': '...'},
                   'image': {'shape': [512, 512], 'dtype': 'number',
                             'source': '...', 'external': 'FILESTORE:'}},
        name='primary')
    desc_bundle_baseline = run_bundle.compose_descriptor(
        data_keys={'motor': {'shape': [], 'dtype': 'number', 'source': '...'}},
        name='baseline')
    event1 = desc_bundle.compose_event(
        data={'motor': 0, 'image': numpy.ones((512, 512))},
        timestamps={'motor': 0, 'image': 0}, filled={'image': True},
        seq_num=1)
    event2 = desc_bundle.compose_event(
        data={'motor': 0, 'image': numpy.ones((512, 512))},
        timestamps={'motor': 0, 'image': 0}, filled={'image': True},
        seq_num=2)
    event3 = desc_bundle_baseline.compose_event(
        data={'motor': 0},
        timestamps={'motor': 0},
        seq_num=1)

    event_page = event_model.pack_event_page(event1, event2)
    bulk_events = {'primary': [event1, event2], 'baseline': [event3]}
    json.dumps(event_model.sanitize_doc(event_page))
    json.dumps(event_model.sanitize_doc(bulk_events))
    json.dumps(event_model.sanitize_doc(event1))
Exemplo n.º 2
0
def compare(a, b):
    a = normalize(a)
    b = normalize(b)
    a_indexed = {}
    b_indexed = {}
    for name, doc in a:
        if name == 'resource':
            # Check for an extraneous duplicate key in old documents.
            if 'id' in doc:
                assert doc['id'] == doc['uid']
                doc = doc.copy()
                doc.pop('id')
        if name == 'datum':
            a_indexed[('datum', doc['datum_id'])] = doc
        # v0 yields {'_name": 'RunStop'} if the stop doc is missing; v2 yields None.
        elif name == 'stop' and doc is None or 'uid' not in doc:
            a_indexed[(name, None)] = None
        else:
            a_indexed[(name, doc['uid'])] = doc
    for name, doc in b:
        if name == 'resource':
            # Check for an extraneous duplicate key in old documents.
            if 'id' in doc:
                assert doc['id'] == doc['uid']
                doc = doc.copy()
                doc.pop('id')
        if name == 'datum':
            b_indexed[('datum', doc['datum_id'])] = doc
        # v0 yields {'_name": 'RunStop'} if the stop doc is missing; v2 yields None.
        elif name == 'stop' and doc is None or 'uid' not in doc:
            b_indexed[(name, None)] = None
        else:
            b_indexed[(name, doc['uid'])] = doc
    # Same number of each type of document?
    a_counter = collections.Counter(name for name, uid in a_indexed)
    b_counter = collections.Counter(name for name, uid in b_indexed)
    assert a_counter == b_counter
    # Same uids and names?
    assert set(a_indexed) == set(b_indexed)
    # Now delve into the documents themselves...
    for (name, unique_id), a_doc in a_indexed.items():
        b_doc = b_indexed[name, unique_id]
        # Handle special case if 'stop' is None.
        if name == 'stop' and unique_id is None:
            assert b_doc is None or 'uid' not in b_doc
            continue
        # Same top-level keys?
        assert set(a_doc) == set(b_doc)
        # Same contents?
        try:
            a_doc == b_doc
        except ValueError:
            # We end up here if, for example, the dict contains numpy arrays.
            event_model.sanitize_doc(a_doc) == event_model.sanitize_doc(b_doc)
Exemplo n.º 3
0
def test_update(db_factory, example_data):
    documents = example_data()
    metadatastore_db = db_factory()
    asset_registry_db = db_factory()
    serializer = Serializer(metadatastore_db, asset_registry_db)
    for item in documents:
        serializer(*item)
    original = documents[0][1]
    start = copy.deepcopy(original)
    start['user'] = '******'
    serializer.update('start', start)
    real = metadatastore_db.get_collection('run_start').find_one({'uid': start['uid']})
    real.pop('_id')
    assert sanitize_doc(real) == sanitize_doc(start)
    revision = metadatastore_db.get_collection('run_start_revisions').find_one({'document.uid': start['uid']})
    assert revision['revision'] == 0
    revision.pop('revision')
    revision.pop('_id')
    assert sanitize_doc(revision['document']) == sanitize_doc(original)

    revision1 = copy.deepcopy(start)
    start['user'] = '******'
    serializer.update('start', start)
    real = metadatastore_db.get_collection('run_start').find_one({'uid': start['uid']})
    real.pop('_id')
    assert sanitize_doc(real) == sanitize_doc(start)
    revision = metadatastore_db.get_collection('run_start_revisions').find_one({'document.uid': start['uid'],
                                                                                'revision': 1})
    assert revision['revision'] == 1
    revision.pop('revision')
    revision.pop('_id')
    assert sanitize_doc(revision['document']) == sanitize_doc(revision1)
Exemplo n.º 4
0
def create_expected(collector):
    '''This collects the metadata into a dict to compare to the loaded data
    '''
    expected = {'metadata': {}}
    expected['metadata']['descriptors'] = defaultdict(dict)
    for name, doc in collector:
        if name in ['start', 'stop']:
            sanitized_doc = event_model.sanitize_doc(doc)
            expected['metadata'][name] = sanitized_doc
        elif name == 'descriptor':
            sanitized_doc = event_model.sanitize_doc(doc)
            expected['metadata']['descriptors'][doc.get('name')][
                doc['uid']] = sanitized_doc

    return expected
Exemplo n.º 5
0
def test_mongo_consumer(
    RE,
    hw,
    numpy_md,
    publisher,
    data_broker,
    mongo_uri,
    consumer_process_factory,
    external_process_document_queue,
):
    """
    Subscribe a MongoConsumer to a kafka topic, and check that
    documents published to this topic are inserted correctly in a mongo database.

    If there is a problem with the Consumer running on the separate process. You may receive
    a very unhelpful error message: "KeyError 421e977f-eec1-48f6-9288-fb03fc5342b9" To debug
    try running pytest with the '-s' option. This should tell you what went wrong with the
    Consumer.
    """

    original_documents = []

    def record(name, doc):
        original_documents.append((name, doc))

    # Subscribe the publisher to the run engine. This puts the RE documents into Kafka.
    RE.subscribe(publisher)

    # Also keep a copy of the produced documents to compare with later.
    RE.subscribe(record)

    # Create the consumer, that takes documents from Kafka, and puts them in mongo.
    # For some reason this does not work as a fixture.
    with external_process_document_queue(
        topics=["^.*-kafka-test*"],
        group_id="kafka-unit-test-group-id",
        consumer_config={"auto.offset.reset": "latest"},
        process_factory=consumer_process_factory,
        consumer_factory=MongoConsumer,
        # the next two arguments will be passed to MongoConsumer() as **kwargs
        polling_duration=1.0,
        mongo_uri=mongo_uri,
    ) as document_queue:  # noqa

        # Run a plan to generate documents.
        (uid,) = RE(count([hw.det]), md=numpy_md)

        # The documents should now be flowing from the RE to the mongo database, via Kafka.
        time.sleep(10)

        # Get the documents from the mongo database.
        mongo_documents = list(data_broker["xyz"][uid].canonical(fill="no"))

        # Check that the original documents are the same as the documents in the mongo database.
        original_docs = [
            json.loads(json.dumps(event_model.sanitize_doc(item)))
            for item in original_documents
        ]
        compare(original_docs, mongo_documents, "mongo_consumer_test")
Exemplo n.º 6
0
def run(example_data, serializer, permanent_db):
    """
    Testbench for suitcase-mongo-embedded serializer.
    This stores all documents that are going to the serializer into a
    dictionary. After the run completes, it then queries the permanent
    mongo database, and reads the documents to a separate dictionary. The two
    dictionaries are checked to see if they match.
    """
    run_dict = {
        'start': {},
        'stop': {},
        'descriptor': [],
        'resource': [],
        'event': [],
        'datum': []
    }
    documents = example_data()
    mongo_serializer = serializer
    for item in documents:

        # Fix formatting for JSON.
        item = event_model.sanitize_doc(item)
        # Send the bluesky doc to the serializer
        mongo_serializer(*item)

        # Bulk_event/datum need to be converted to a list of events/datum
        # before inserting in the run_dict.
        if item[0] in {'bulk_events', 'bulk_datum'}:
            pages = bulk_to_pages(*item)
            doc_list = pages_to_list(pages)
            for doc in doc_list:
                run_dict[doc[0]].append(doc[1])
        else:
            if item[0] in {'event_page', 'datum_page'}:
                doc_list = page_to_list(*item)
                for doc in doc_list:
                    run_dict[doc[0]].append(doc[1])
            else:
                if type(run_dict.get(item[0])) == list:
                    run_dict[item[0]].append(item[1])
                else:
                    run_dict[item[0]] = item[1]

    # Read the run from the mongo database and store in a dict.
    frozen_run_dict = run_list_to_dict(
        get_embedded_run(permanent_db, run_dict['start']['uid']))

    # Sort the event field of each dictionary. With multiple streams, the
    # documents that don't go through the serializer don't appear to be sorted
    # correctly.
    if len(run_dict['event']):
        run_dict['event'] = sorted(run_dict['event'],
                                   key=lambda x: x['descriptor'])
        frozen_run_dict['event'] = sorted(frozen_run_dict['event'],
                                          key=lambda x: x['descriptor'])
    # Compare the two dictionaries.
    assert (json.loads(json.dumps(run_dict, sort_keys=True)) == json.loads(
        json.dumps(frozen_run_dict, sort_keys=True)))
Exemplo n.º 7
0
    def __call__(self, name, doc):
        # Before inserting into mongo, convert any numpy objects into built-in
        # Python types compatible with pymongo.
        sanitized_doc = event_model.sanitize_doc(doc)
        if self._worker_error:
            raise RuntimeError("Worker exception: ") from self._worker_error
        if self._frozen:
            raise RuntimeError("Cannot insert documents into "
                               "frozen Serializer.")

        return super().__call__(name, sanitized_doc)
Exemplo n.º 8
0
def merkle_hash(node):
    hasher = sha256()
    dbf_node = sanitize_doc(merkle_friendly_node(node))
    hash_string = ",".join(
        str(dbf_node[k]) for k in ["name", "mod", "args", "kwargs"])
    hasher.update(hash_string.encode("utf-8"))
    # Once we hit from event stream we don't need to go higher in the graph
    if isinstance(node, SimpleFromEventStream):
        return hasher.hexdigest()
    for u in node.upstreams:
        idx = u.downstreams.index(node)
        u_m_hash = merkle_hash(u)
        hasher.update(f"{idx}{u_m_hash}".encode("utf-8"))
    return hasher.hexdigest()
Exemplo n.º 9
0
 def descriptor(self, doc):
     '''Add `descriptor` document information to the metadata dictionary.
     This method adds the descriptor document information to the metadata
     dictionary. In addition it also creates the file for data with the
     stream_name given by the descriptor doc for later use.
     Parameters:
     -----------
     doc : dict
         EventDescriptor document
     '''
     # extract some useful info from the doc
     stream_name = doc.get('name')
     # replace numpy objects with python ones to ensure json compatibility
     sanitized_doc = event_model.sanitize_doc(doc)
     # Add the doc to self._meta
     self._meta['metadata']['descriptors'][stream_name][
         sanitized_doc['uid']] = sanitized_doc
Exemplo n.º 10
0
def test_kafka_publisher(RE, hw, bootstrap_servers):
    kafka_topic, runrouter_token = nslsii.subscribe_kafka_publisher(
        RE=RE,
        beamline_name="test",
        bootstrap_servers=bootstrap_servers,
        producer_config={
            "acks": "all",
            "enable.idempotence": False,
            "request.timeout.ms": 1000,
        },
    )

    assert kafka_topic == "test.bluesky.documents"
    assert isinstance(runrouter_token, int)

    # Run a RemoteDispatcher on a separate process. Pass the documents
    # it receives over a multiprocessing.Queue back to this process so
    # we can compare with locally stored documents.
    # The default "auto.commit.interval.ms" is 5000, but using the default
    # means some of the Kafka messages consumed here are not committed
    # and so are DELIVERED AGAIN the next time this test runs. The solution
    # is setting a very short "auto.commit.interval.ms" for this test.
    def make_and_start_dispatcher(document_queue):
        def put_in_queue(name, doc):
            document_queue.put((name, doc))

        kafka_dispatcher = RemoteDispatcher(
            topics=[kafka_topic],
            bootstrap_servers=bootstrap_servers,
            group_id="test_kafka_publisher",
            consumer_config={
                "auto.offset.reset": "latest",
                "auto.commit.interval.ms": 100,
            },
            polling_duration=1.0,
            deserializer=partial(msgpack.loads, object_hook=mpn.decode),
        )
        kafka_dispatcher.subscribe(put_in_queue)
        kafka_dispatcher.start()

    queue_ = multiprocessing.Queue()
    dispatcher_proc = multiprocessing.Process(
        target=make_and_start_dispatcher, daemon=True, args=(queue_,)
    )
    dispatcher_proc.start()
    # give the dispatcher process time to start
    time.sleep(10)

    local_documents = []

    # use a RunRouter to get event_pages locally because
    # the KafkaPublisher will produce event_pages
    def document_accumulator_factory(start_doc_name, start_doc):
        def document_accumulator(name, doc):
            local_documents.append((name, doc))

        return [document_accumulator], []

    local_run_router = RunRouter(factories=[document_accumulator_factory])
    RE.subscribe(local_run_router)

    # test that numpy data is transmitted correctly
    md1 = {
        "numpy_data": {"nested": np.array([1, 2, 3])},
        "numpy_scalar": np.float64(4),
        "numpy_array": np.ones((3, 3)),
    }

    RE(count([hw.det1]), md=md1)

    # test that numpy data is transmitted correctly
    md2 = {
        "numpy_data": {"nested": np.array([4, 5, 6])},
        "numpy_scalar": np.float64(7),
        "numpy_array": np.ones((4, 4)),
    }

    RE(count([hw.det2]), md=md2)

    # Get the documents from the inter-process queue (or timeout)
    remote_documents = []
    while True:
        try:
            name_, doc_ = queue_.get(timeout=1)
            remote_documents.append((name_, doc_))
        except queue.Empty:
            print("the queue is empty!")
            break

    dispatcher_proc.terminate()
    dispatcher_proc.join()

    # sanitize_doc normalizes some document data, such as numpy arrays,
    # that are problematic for direct document comparison by "assert"
    sanitized_local_published_documents = [
        (name, sanitize_doc(doc)) for name, doc in local_documents
    ]
    sanitized_remote_published_documents = [
        (name, sanitize_doc(doc)) for name, doc in remote_documents
    ]

    assert len(remote_documents) == len(local_documents)
    assert len(sanitized_remote_published_documents) == len(
        sanitized_local_published_documents
    )
    assert sanitized_remote_published_documents == sanitized_local_published_documents

    # test that we got the correct subscription token for the Kafka Publisher
    # KeyError is raised if the token is not known
    RE.unsubscribe(token=runrouter_token)
Exemplo n.º 11
0
def test_bluesky_consumer(
    RE,
    hw,
    serializer,
    deserializer,
    publisher_factory,
    consumer_process_factory,
    external_process_document_queue,
):
    # COMPONENT 1
    # a Kafka broker must be running
    # in addition the broker must have topic "test.bluesky.consumer"
    # or be configured to create topics on demand (recommended)

    # COMPONENT 2
    # Run a BlueskyConsumer polling loop in a separate process.
    # Pass the documents it receives over a Queue to this process
    # and compare them against the documents published directly
    # by the RunEngine.
    test_topic = "test.bluesky.consumer"
    with external_process_document_queue(
        topics=[test_topic],
        deserializer=deserializer,
        process_factory=partial(
            consumer_process_factory, consumer_factory=BlueskyConsumer
        ),
    ) as document_queue:
        # COMPONENT 3
        # Set up a RunEngine in this process that will
        # send all documents to a bluesky_kafka.Publisher
        # and accumulate all documents in the local_documents list
        kafka_publisher = publisher_factory(
            topic=test_topic, serializer=serializer, flush_on_stop_doc=True
        )
        RE.subscribe(kafka_publisher)

        local_documents = []
        RE.subscribe(
            lambda local_name, local_doc: local_documents.append(
                (local_name, local_doc)
            )
        )

        # test that numpy data is transmitted correctly
        md = {
            "numpy_data": {"nested": np.array([1, 2, 3])},
            "numpy_scalar": np.float64(3),
            "numpy_array": np.ones((3, 3)),
        }

        # documents will be generated by this plan
        # and published by the Kafka Publisher
        RE(count([hw.det]), md=md)

        # retrieve the documents published by the Kafka broker
        remote_documents = get_all_documents_from_queue(document_queue=document_queue)

        # sanitize_doc normalizes some document data, such as numpy arrays, that are
        # problematic for direct comparison of documents by "assert"
        sanitized_local_documents = [sanitize_doc(doc) for doc in local_documents]
        sanitized_remote_documents = [sanitize_doc(doc) for doc in remote_documents]

        assert len(sanitized_remote_documents) == len(sanitized_local_documents)
        assert sanitized_remote_documents == sanitized_local_documents
Exemplo n.º 12
0
def test_zmq_prefix(RE, hw):
    # COMPONENT 1
    # Run a 0MQ proxy on a separate process.
    def start_proxy():
        Proxy(5567, 5568).start()

    proxy_proc = multiprocessing.Process(target=start_proxy, daemon=True)
    proxy_proc.start()
    time.sleep(5)  # Give this plenty of time to start up.

    # COMPONENT 2
    # Run a Publisher and a RunEngine in this main process.
    p = Publisher('127.0.0.1:5567', prefix=b'sb')  # noqa
    p2 = Publisher('127.0.0.1:5567', prefix=b'not_sb')  # noqa
    RE.subscribe(p)
    RE.subscribe(p2)

    # COMPONENT 3
    # Run a RemoteDispatcher on another separate process. Pass the documents
    # it receives over a Queue to this process, so we can count them for our
    # test.

    def make_and_start_dispatcher(queue):
        def put_in_queue(name, doc):
            print('putting ', name, 'in queue')
            queue.put((name, doc))

        d = RemoteDispatcher('127.0.0.1:5568', prefix=b'sb')
        d.subscribe(put_in_queue)
        print("REMOTE IS READY TO START")
        d.loop.call_later(9, d.stop)
        d.start()

    queue = multiprocessing.Queue()
    dispatcher_proc = multiprocessing.Process(target=make_and_start_dispatcher,
                                              daemon=True,
                                              args=(queue, ))
    dispatcher_proc.start()
    time.sleep(5)  # As above, give this plenty of time to start.

    # Generate two documents. The Publisher will send them to the proxy
    # device over 5567, and the proxy will send them to the
    # RemoteDispatcher over 5568. The RemoteDispatcher will push them into
    # the queue, where we can verify that they round-tripped.

    local_accumulator = []

    def local_cb(name, doc):
        local_accumulator.append((name, doc))

    # Check that numpy stuff is sanitized by putting some in the start doc.
    md = {
        'stuff': {
            'nested': np.array([1, 2, 3])
        },
        'scalar_stuff': np.float64(3),
        'array_stuff': np.ones((3, 3))
    }

    # RE([Msg('open_run', **md), Msg('close_run')], local_cb)
    RE(count([hw.det]), local_cb, **md)
    time.sleep(1)

    # Get the two documents from the queue (or timeout --- test will fail)
    remote_accumulator = []
    for i in range(len(local_accumulator)):
        remote_accumulator.append(queue.get(timeout=2))
    p.close()
    proxy_proc.terminate()
    dispatcher_proc.terminate()
    proxy_proc.join()
    dispatcher_proc.join()
    ra = sanitize_doc(remote_accumulator)
    la = sanitize_doc(local_accumulator)
    assert ra == la
Exemplo n.º 13
0
def test_zmq_no_RE_newserializer(RE):
    cloudpickle = pytest.importorskip('cloudpickle')

    # COMPONENT 1
    # Run a 0MQ proxy on a separate process.
    def start_proxy():
        Proxy(5567, 5568).start()

    proxy_proc = multiprocessing.Process(target=start_proxy, daemon=True)
    proxy_proc.start()
    time.sleep(5)  # Give this plenty of time to start up.

    # COMPONENT 2
    # Run a Publisher and a RunEngine in this main process.
    p = Publisher('127.0.0.1:5567', serializer=cloudpickle.dumps)  # noqa

    # COMPONENT 3
    # Run a RemoteDispatcher on another separate process. Pass the documents
    # it receives over a Queue to this process, so we can count them for our
    # test.
    def make_and_start_dispatcher(queue):
        def put_in_queue(name, doc):
            print('putting ', name, 'in queue')
            queue.put((name, doc))

        d = RemoteDispatcher('127.0.0.1:5568', deserializer=cloudpickle.loads)
        d.subscribe(put_in_queue)
        print("REMOTE IS READY TO START")
        d.loop.call_later(9, d.stop)
        d.start()

    queue = multiprocessing.Queue()
    dispatcher_proc = multiprocessing.Process(target=make_and_start_dispatcher,
                                              daemon=True,
                                              args=(queue, ))
    dispatcher_proc.start()
    time.sleep(5)  # As above, give this plenty of time to start.

    # Generate two documents. The Publisher will send them to the proxy
    # device over 5567, and the proxy will send them to the
    # RemoteDispatcher over 5568. The RemoteDispatcher will push them into
    # the queue, where we can verify that they round-tripped.

    local_accumulator = []

    def local_cb(name, doc):
        local_accumulator.append((name, doc))

    RE([Msg('open_run'), Msg('close_run')], local_cb)

    # This time the Publisher isn't attached to an RE. Send the documents
    # manually. (The idea is, these might have come from a Broker instead...)
    for name, doc in local_accumulator:
        p(name, doc)
    time.sleep(1)

    # Get the two documents from the queue (or timeout --- test will fail)
    remote_accumulator = []
    for i in range(2):
        remote_accumulator.append(queue.get(timeout=2))
    p.close()
    proxy_proc.terminate()
    dispatcher_proc.terminate()
    proxy_proc.join()
    dispatcher_proc.join()
    ra = sanitize_doc(remote_accumulator)
    la = sanitize_doc(local_accumulator)
    assert ra == la
Exemplo n.º 14
0
 def collect(name, doc):
     docs.append((name, event_model.sanitize_doc(doc)))
Exemplo n.º 15
0
 def __call__(self, name, doc):
     # Before inserting into mongo, convert any numpy objects into built-in
     # Python types compatible with pymongo.
     sanitized_doc = event_model.sanitize_doc(doc)
     return super().__call__(name, sanitized_doc)
Exemplo n.º 16
0
def test_build_and_subscribe_kafka_queue_thread_publisher(
    kafka_bootstrap_servers,
    temporary_topics,
    consume_documents_from_kafka_until_first_stop_document,
    RE,
    hw,
):
    """Test threaded publishing of Kafka messages.

    This test follows the pattern in bluesky_kafka/tests/test_in_single_process.py,
    which is to publish Kafka messages _before_ subscribing a Kafka consumer to
    those messages. After the messages have been published a consumer is subscribed
    to the topic and should receive all messages since they will have been cached by
    the Kafka broker(s). This keeps the test code relatively simple.

    Start Kafka and Zookeeper like this:
      $ sudo docker-compose -f scripts/bitnami-kafka-docker-compose.yml up

    Remove Kafka and Zookeeper containers like this:
      $ sudo docker ps -a -q
      78485383ca6f
      8a80fb4a385f
      $ sudo docker stop 78485383ca6f 8a80fb4a385f
      78485383ca6f
      8a80fb4a385f
      $ sudo docker rm 78485383ca6f 8a80fb4a385f
      78485383ca6f
      8a80fb4a385f

    Or remove ALL containers like this:
      $ sudo docker stop $(sudo docker ps -a -q)
      $ sudo docker rm $(sudo docker ps -a -q)
    Use this in difficult cases to remove *all traces* of docker containers:
      $ sudo docker system prune -a

    Parameters
    ----------
    kafka_bootstrap_servers: str (pytest fixture)
        comma-delimited string of Kafka broker host:port, for example "kafka1:9092,kafka2:9092"
    temporary_topics: context manager (pytest fixture)
        creates and cleans up temporary Kafka topics for testing
    RE: pytest fixture
        bluesky RunEngine
    hw: pytest fixture
        ophyd simulated hardware objects
    """

    # use a random string as the beamline name so topics will not be duplicated across tests
    beamline_name = str(uuid.uuid4())[:8]
    with temporary_topics(
            topics=[f"{beamline_name}.bluesky.runengine.documents"]) as (
                beamline_topic, ):

        (
            nslsii_beamline_topic,
            kafka_publisher_thread_exit_event,
            re_subscription_token,
        ) = nslsii._subscribe_kafka_queue_thread_publisher(
            RE=RE,
            beamline_name=beamline_name,
            bootstrap_servers=kafka_bootstrap_servers,
            producer_config={
                "acks": "all",
                "enable.idempotence": False,
                "request.timeout.ms": 1000,
            },
        )

        assert nslsii_beamline_topic == beamline_topic
        assert isinstance(re_subscription_token, int)

        published_bluesky_documents = []

        # this function will store all documents
        # published by the RunEngine in a list
        def store_published_document(name, document):
            published_bluesky_documents.append((name, document))

        RE.subscribe(store_published_document)

        RE(count([hw.det]))

        # it is known that RE(count()) will produce four
        # documents: start, descriptor, event, stop
        assert len(published_bluesky_documents) == 4

        consumed_bluesky_documents = (
            consume_documents_from_kafka_until_first_stop_document(
                kafka_topic=nslsii_beamline_topic))

        assert len(published_bluesky_documents) == len(
            consumed_bluesky_documents)

        # sanitize_doc normalizes some document data, such as numpy arrays, that are
        # problematic for direct comparison of documents by 'assert'
        sanitized_published_bluesky_documents = [
            sanitize_doc(doc) for doc in published_bluesky_documents
        ]
        sanitized_consumed_bluesky_documents = [
            sanitize_doc(doc) for doc in consumed_bluesky_documents
        ]

        assert len(sanitized_consumed_bluesky_documents) == len(
            sanitized_published_bluesky_documents)
        assert (sanitized_consumed_bluesky_documents ==
                sanitized_published_bluesky_documents)
Exemplo n.º 17
0
def test_publisher_and_consumer(kafka_bootstrap_servers, temporary_topics,
                                publisher_factory, hw, serializer,
                                deserializer):
    """Test publishing and consuming bluesky documents in Kafka messages.

    Messages will be "consumed" by a `bluesky_kafka.BlueskyConsumer`.

    Parameters
    ----------
    kafka_bootstrap_servers: str (pytest fixture)
        comma-delimited string of Kafka broker host:port, for example "localhost:9092"
    temporary_topics: context manager (pytest fixture)
        creates and cleans up temporary Kafka topics for testing
    publisher_factory: pytest fixture
        fixture-as-a-factory for creating Publishers
    hw: pytest fixture
        ophyd simulated hardware objects
    serializer: function (pytest test parameter)
        function used to serialize bluesky documents in Kafka messages
    deserializer: function (pytest test parameter)
        function used to deserialize bluesky documents from Kafka messages
    """

    with temporary_topics(
            topics=[f"test.publisher.and.consumer.{serializer.__module__}"
                    ]) as (topic, ):

        bluesky_publisher = publisher_factory(topic=topic,
                                              key=f"{topic}.key",
                                              flush_on_stop_doc=True,
                                              serializer=serializer)

        published_bluesky_documents = []

        # this function will store all documents
        # published by the RunEngine in a list
        def store_published_document(name, document):
            published_bluesky_documents.append((name, document))

        RE = RunEngine()
        RE.subscribe(bluesky_publisher)
        RE.subscribe(store_published_document)

        # include some metadata in the count plan
        # to test numpy serialization
        md = {
            "numpy_data": {
                "nested": np.array([1, 2, 3])
            },
            "numpy_scalar": np.float64(3),
            "numpy_array": np.ones((3, 3)),
        }

        RE(count([hw.det]))

        # it is known that RE(count()) will produce four
        # documents: start, descriptor, event, stop
        assert len(published_bluesky_documents) == 4

        consumed_bluesky_documents = []

        # this function stores all documents bluesky_consumer
        # gets from the Kafka broker in a list
        def store_consumed_document(consumer, topic, name, document):
            consumed_bluesky_documents.append((name, document))

        bluesky_consumer = BlueskyConsumer(
            topics=[topic],
            bootstrap_servers=kafka_bootstrap_servers,
            group_id=f"{topic}.consumer.group",
            consumer_config={
                # it is important to set a short time interval
                # for automatic commits or the Kafka broker may
                # not be notified by the consumer that messages
                # were received before the test ends; the result
                # is that the Kafka broker will try to re-deliver
                # those messages to the next consumer that subscribes
                # to the same topic(s)
                "auto.commit.interval.ms": 100,
                # this consumer is intended to read messages that
                # have already been published, so it is necessary
                # to specify "earliest" here
                "auto.offset.reset": "earliest",
            },
            process_document=store_consumed_document,
            polling_duration=1.0,
            deserializer=deserializer,
        )

        # this function returns False to end the bluesky_consumer polling loop
        def until_first_stop_document():
            assert len(consumed_bluesky_documents) <= len(
                published_bluesky_documents)
            if "stop" in [name for name, _ in consumed_bluesky_documents]:
                return False
            else:
                return True

        # start() will return when 'until_first_stop_document' returns False
        bluesky_consumer.start(continue_polling=until_first_stop_document, )

        assert len(published_bluesky_documents) == len(
            consumed_bluesky_documents)

        # sanitize_doc normalizes some document data, such as numpy arrays, that are
        # problematic for direct comparison of documents by 'assert'
        sanitized_published_bluesky_documents = [
            sanitize_doc(doc) for doc in published_bluesky_documents
        ]
        sanitized_consumed_bluesky_documents = [
            sanitize_doc(doc) for doc in consumed_bluesky_documents
        ]

        assert len(sanitized_consumed_bluesky_documents) == len(
            sanitized_published_bluesky_documents)
        assert (sanitized_consumed_bluesky_documents ==
                sanitized_published_bluesky_documents)