def test_load_children(session): description = stream_description(5, { 0: 1, 1: [2, 3] }, stream_arn="stream-arn") session.describe_stream.return_value = description # First shard in the description is unrelated to the root root = Shard(stream_arn="stream-arn", shard_id=description["Shards"][0]["ShardId"], session=session) assert not root.children # 0 -> 1 -> 2 # -> 3 # 4 child_id = description["Shards"][1]["ShardId"] first_grandchild_id = description["Shards"][2]["ShardId"] second_grandchild_id = description["Shards"][3]["ShardId"] # Loading shouldn't rely on implicit ordering random.shuffle(description["Shards"]) root.load_children() assert set(s.shard_id for s in root.children) == {child_id} assert root.children[0].shard_id == child_id grandchild_ids = [s.shard_id for s in root.children[0].children] assert set(grandchild_ids) == {first_grandchild_id, second_grandchild_id} session.describe_stream.assert_called_once_with(stream_arn="stream-arn", first_shard=root.shard_id)
def test_apply_records(initial_sequence_number, record_count, session): # Temporarily ignoring that an iterator should never be "latest" and have a sequence_number.. shard = Shard(stream_arn="stream-arn", shard_id="shard-id", iterator_type="initial-iterator-type", sequence_number=initial_sequence_number, session=session) records = [ dynamodb_record_with(key=True, sequence_number=i) for i in range(record_count) ] response = {"Records": records, "NextShardIterator": "next-iterator-id"} shard._apply_get_records_response(response) session.get_stream_records.assert_not_called() if records: if initial_sequence_number: # Don't overwrite; found records but already had a sequence_number assert shard.iterator_type == "initial-iterator-type" assert shard.sequence_number == initial_sequence_number else: # Remember first sequence_number; found records and no existing sequence_number assert shard.iterator_type == "at_sequence" assert shard.sequence_number == records[0]["dynamodb"][ "SequenceNumber"] == 0 assert shard.empty_responses == 0 else: # No records, no change assert shard.iterator_type == "initial-iterator-type" assert shard.sequence_number == initial_sequence_number assert shard.empty_responses == 1
def test_eq_not_set_or_different(attr): parent = Shard(stream_arn="parent-arn", shard_id="parent-id") children = [ Shard(stream_arn="child-arn", shard_id="child-id") for _ in range(2) ] kwargs = { "stream_arn": "stream-arn", "shard_id": "shard-id", "iterator_id": "iterator-id", "iterator_type": "iterator-type", "sequence_number": "sequence-number", "parent": parent } shard = Shard(**kwargs) other = Shard(**kwargs) # Initially equal assert shard == other assert other == shard shard.children.extend(children) assert not shard == other assert not other == shard # Compare equal regardless of order other.children.extend(children[::-1]) assert shard == other assert other == shard setattr(other, attr, "something else") assert not shard == other assert not other == shard
def test_token(caplog): parent = Shard(stream_arn="parent-stream-arn", shard_id="parent-id") shard = Shard(stream_arn="stream-arn", shard_id="shard-id", iterator_id="iterator-id", iterator_type="at_sequence", sequence_number="sequence-number", parent=parent) expected = { "stream_arn": "stream-arn", "shard_id": "shard-id", "iterator_type": "at_sequence", "sequence_number": "sequence-number", "parent": "parent-id" } assert shard.token == expected # Removing parent omits it from the token entirely shard.parent = None expected.pop("parent") assert shard.token == expected assert not caplog.records shard.iterator_type = "trim_horizon" shard.token assert caplog.record_tuples == [ ("bloop.stream", logging.WARNING, "creating shard token at non-exact location \"trim_horizon\"") ]
def test_remove_shard(is_active, is_root, has_buffered, coordinator): shard = Shard(stream_arn=coordinator.stream_arn, shard_id="shard-id", iterator_type="at_sequence", sequence_number="13") # Always has a buffered record other = Shard(stream_arn=coordinator.stream_arn, shard_id="other-shard-id", iterator_type="after_sequence", sequence_number="200") children = [Shard(stream_arn="child-arn", shard_id="child-" + str(i)) for i in range(4)] shard.children.extend(children) if is_active: coordinator.active.append(shard) if is_root: coordinator.roots.append(shard) if has_buffered: records = [local_record(sequence_number=str(i)) for i in range(7)] coordinator.buffer.push_all((r, shard) for r in records) coordinator.buffer.push(local_record(sequence_number="200"), other) coordinator.remove_shard(shard, drop_buffered_records=True) if is_active: assert all(child in coordinator.active for child in children) if is_root: assert all(child in coordinator.roots for child in children) # Any records that were buffered from the removed shard are gone. while coordinator.buffer: record, record_shard = coordinator.buffer.pop() assert record_shard is not shard
def build_shards(n, shape=None, session=None, stream_arn=None, shard_id_prefix=""): """Shape describes the parent/child relationships. a -> b -> c -> d -> e -> f is expressed as: build_shards(session, 6, {0: 1, 1: [2, 3], 2: 4, 3: 5}) """ # Default to flat shards, no hierarchy shape = shape or {} shard_id = lambda i: "{}shard-id-{}".format(shard_id_prefix + "-" if shard_id_prefix else "", i) shards = [ Shard(stream_arn=stream_arn, shard_id=shard_id(i), session=session) for i in range(n) ] for shard_index, child_indexes in shape.items(): if isinstance(child_indexes, int): shards[shard_index].children.append(shards[child_indexes]) shards[child_indexes].parent = shards[shard_index] else: for child_index in child_indexes: shards[shard_index].children.append(shards[child_index]) shards[child_index].parent = shards[shard_index] return shards
def test_token(): parent = Shard(stream_arn="parent-stream-arn", shard_id="parent-id") shard = Shard(stream_arn="stream-arn", shard_id="shard-id", iterator_id="iterator-id", iterator_type="at_sequence", sequence_number="sequence-number", parent=parent) expected = { "stream_arn": "stream-arn", "shard_id": "shard-id", "iterator_type": "at_sequence", "sequence_number": "sequence-number", "parent": "parent-id" } assert shard.token == expected # Removing parent omits it from the token entirely shard.parent = None expected.pop("parent") assert shard.token == expected
def test_token_closed_records(coordinator, session): """ When a shard is closed, the last set of records is still buffered even though the shard is no longer tracked. The token must include the closed shard until its buffered records are consumed. https://github.com/numberoverzero/bloop/issues/111 """ closed_shard = Shard( stream_arn=coordinator.stream_arn, shard_id="closed-shard-id", iterator_id="closed-iter-id", session=session) coordinator.active = [closed_shard] session.get_stream_records.return_value = { "Records": [ dynamodb_record_with(sequence_number=123, key=True), dynamodb_record_with(sequence_number=456, key=True), dynamodb_record_with(sequence_number=789, key=True) ] # last records so no NextShardIterator } # called when the coordinator session.describe_stream.return_value = { "Shards": [], "StreamArn": coordinator.stream_arn } initial_token = coordinator.token assert initial_token == { "stream_arn": "stream-arn", "active": ["closed-shard-id"], "shards": [] } record = next(coordinator) assert record["meta"]["sequence_number"] == "123" assert coordinator.closed[closed_shard] == len(coordinator.buffer) == 2 # the token should still include the shard in "active", and the "shards" # list should contain a pointer to the sequence number 123 token = coordinator.token assert token == { "stream_arn": "stream-arn", "active": ["closed-shard-id"], "shards": [{ "iterator_type": "after_sequence", "sequence_number": "123", "shard_id": "closed-shard-id", }] }
def test_move_to_old_token(coordinator, shard, session): """Can't rebuild from a token with shards that have no connection to the current generation""" root = Shard(stream_arn=coordinator.stream_arn, shard_id="parent-shard") shard.parent = root root.children.append(shard) coordinator.active.append(shard) coordinator.roots.append(root) token = coordinator.token # There is no lineage that connects the shard_id from the token to the shards in the stream description. session.describe_stream.return_value = stream_description(1) with pytest.raises(InvalidStream): coordinator.move_to(token)
def test_heartbeat_until_sequence_number(coordinator, session): """After heartbeat() finds records for a shard, the shard doesn't check during the next heartbeat.""" shard = Shard(stream_arn=coordinator.stream_arn, shard_id="shard-id", session=session, iterator_id="iterator-id", iterator_type="latest") coordinator.active.append(shard) session.get_stream_records.side_effect = build_get_records_responses(1, 0) # First call fetches records from DynamoDB coordinator.heartbeat() assert coordinator.buffer assert shard.sequence_number is not None session.get_stream_records.assert_called_once_with("iterator-id") # Second call skips the shard, since it now has a sequence_number. coordinator.heartbeat() assert session.get_stream_records.call_count == 1
def test_move_to_old_token(coordinator, shard, session, caplog): """Can't rebuild from a token with shards that have no connection to the current generation""" root = Shard(stream_arn=coordinator.stream_arn, shard_id="parent-shard") shard.parent = root root.children.append(shard) coordinator.active.append(shard) coordinator.roots.append(root) token = coordinator.token # There is no lineage that connects the shard_id from the token to the shards in the stream description. session.describe_stream.return_value = stream_description(1) with pytest.raises(InvalidStream): coordinator.move_to(token) assert caplog.record_tuples == [ ("bloop.stream", logging.INFO, "Unknown or expired shard \"parent-shard\" - pruning from stream token"), ("bloop.stream", logging.INFO, "Unknown or expired shard \"shard-id\" - pruning from stream token"), ]
def test_buffer_closed_records(coordinator, session): """ When a shard is closed, the last set of records is still buffered even though the shard is no longer tracked. https://github.com/numberoverzero/bloop/issues/111 """ closed_shard = Shard( stream_arn=coordinator.stream_arn, shard_id="closed-shard-id", iterator_id="closed-iter-id", session=session) coordinator.active = [closed_shard] session.get_stream_records.return_value = { "Records": [ dynamodb_record_with(sequence_number=123, key=True), dynamodb_record_with(sequence_number=456, key=True), dynamodb_record_with(sequence_number=789, key=True) ] # last records so no NextShardIterator } # called when the coordinator session.describe_stream.return_value = { "Shards": [], "StreamArn": coordinator.stream_arn } assert not coordinator.closed record = next(coordinator) assert not coordinator.active assert record["meta"]["sequence_number"] == "123" assert len(coordinator.buffer) == coordinator.closed[closed_shard] == 2 record = next(coordinator) assert record["meta"]["sequence_number"] == "456" assert len(coordinator.buffer) == coordinator.closed[closed_shard] == 1 record = next(coordinator) assert record["meta"]["sequence_number"] == "789" assert not coordinator.buffer assert not coordinator.closed
def test_repr(expected, kwargs): shard = Shard(stream_arn="stream-arn", shard_id="shard-id", **kwargs) assert repr(shard) == expected
def shard(session, stream_arn, shard_id): return Shard(stream_arn=stream_arn, shard_id=shard_id, session=session)
def test_advance_removes_exhausted(has_children, loads_children, coordinator, shard, session): """Exhausted shards are removed; any children are promoted, and reset to trim_horizon""" shard.iterator_id = last_iterator shard.iterator_type = "latest" coordinator.active.append(shard) if has_children: # Already loaded, doesn't need to call DescribeStream child = Shard( stream_arn=coordinator.stream_arn, shard_id="child-id", parent=shard, iterator_type="at_sequence", sequence_number="sequence-number", session=session) shard.children.append(child) elif loads_children: # Child exists, but isn't known locally session.describe_stream.return_value = { "Shards": [{ "SequenceNumberRange": { "EndingSequenceNumber": "820400000000000001192334", "StartingSequenceNumber": "820400000000000001192334" }, "ShardId": "child-id", "ParentShardId": "shard-id" }], "StreamArn": coordinator.stream_arn } else: # No children session.describe_stream.return_value = { "Shards": [], "StreamArn": coordinator.stream_arn } coordinator.advance_shards() # No records found assert not coordinator.buffer # No longer active assert shard not in coordinator.active if has_children: # Children are already loaded, no need to DescribeStream session.describe_stream.assert_not_called() else: # No children locally, DescribeStream tried to find some session.describe_stream.assert_called_once_with( stream_arn=coordinator.stream_arn, first_shard=shard.shard_id) # Children (pre-existing or found in DescribeStream) are active if has_children or loads_children: assert len(coordinator.active) == 1 assert coordinator.active[0].parent is shard # Part of promoting the child is resetting it to trim_horizon session.get_shard_iterator.assert_called_once_with( stream_arn=coordinator.stream_arn, shard_id="child-id", iterator_type="trim_horizon", sequence_number=None ) # Without a child, there's no need to get a new iterator else: session.get_shard_iterator.assert_not_called()