def test_rechunk_event_pages(): def event_page_gen(page_size, num_pages): """ Generator event_pages for testing. """ data_keys = ['x', 'y', 'z'] array_keys = ['seq_num', 'time', 'uid'] for _ in range(num_pages): yield { 'descriptor': 'DESCRIPTOR', **{key: list(range(page_size)) for key in array_keys}, 'data': {key: list(range(page_size)) for key in data_keys}, 'timestamps': {key: list(range(page_size)) for key in data_keys}, 'filled': {key: list(range(page_size)) for key in data_keys} } # Get a list of event pages of size 13. event_pages = list(event_page_gen(13, 31)) # Change the size of the event_pages to size 7. event_pages_7 = list(event_model.rechunk_event_pages(event_pages, 7)) assert [7] * 57 + [4] == [len(page['uid']) for page in event_pages_7] # Change the size back to 13. event_pages_13 = event_model.rechunk_event_pages(event_pages_7, 13) # Check that it is equal to the original list of event_pages. assert event_pages == list(event_pages_13)
def interlace_event_page_chunks(*gens, chunk_size): """ Take event_page generators and interlace their results by timestamp. This is a modification of https://github.com/bluesky/databroker/pull/378/ Parameters ---------- gens : generators Generators of (name, dict) pairs where the dict contains a 'time' key. chunk_size : integer Size of pages to yield Yields ------ val : tuple The next (name, dict) pair in time order """ iters = [iter(event_model.rechunk_event_pages(g, chunk_size)) for g in gens] heap = [] def safe_next(indx): try: val = next(iters[indx]) except StopIteration: return heapq.heappush(heap, (val['time'][0], indx, val)) for i in range(len(iters)): safe_next(i) while heap: _, indx, val = heapq.heappop(heap) yield val