def test_increment_page_item(mocker): c = CheckpointCalc() (page, page_item) = c.next_page_and_item(0, 0) assert page == 0 assert page_item == 1
def test_increment_page_item_over_page_size2(mocker): c = CheckpointCalc() c.page_size = 100 (page, page_item) = c.next_page_and_item(100, 99) assert page == 101 assert page_item == 0
class DynamoDB: global_counter_key = '!!!RESERVED:GLOBAL-COUNTER!!!' global_counter_range = 0 def __init__(self, events_table, analysis_table): self.events_table = events_table self.analysis_table = analysis_table self.dynamodb_ll = boto3.client('dynamodb') self.checkpoint_calc = CheckpointCalc() def append(self, commit): item = { 'stream_id': { "S": commit.stream_id }, 'changeset_id': { "N": str(commit.changeset_id) }, 'metadata': { "S": json.dumps(commit.metadata) }, 'events': { "S": json.dumps(commit.events) }, 'first_event_id': { "N": str(commit.first_event_id) }, 'last_event_id': { "N": str(commit.last_event_id) }, 'timestamp': { "S": self.get_timestamp() } } condition = { 'stream_id': { "Exists": False }, 'changeset_id': { "Exists": False }, } try: self.dynamodb_ll.put_item(TableName=self.events_table, Item=item, Expected=condition) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': logger.debug( f"ConditionalCheckFailedException for {commit.stream_id}/{commit.changeset_id}" ) raise ConcurrencyException(commit.stream_id, commit.changeset_id) else: raise e def fetch_last_commit(self, stream_id, meta_only=False): projection = 'stream_id,changeset_id,events,metadata,first_event_id,last_event_id' if meta_only: projection = 'stream_id,changeset_id,first_event_id,last_event_id' response = self.dynamodb_ll.query(TableName=self.events_table, ProjectionExpression=projection, Limit=1, ScanIndexForward=False, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' } }) if response["Count"] == 0: return None return DynamoDB.parse_commit(response["Items"][0]) def fetch_stream_changesets(self, stream_id, from_changeset=None, to_changeset=None): if not from_changeset and not to_changeset: from_changeset = 1 range_condition = None if from_changeset and to_changeset: range_condition = { 'AttributeValueList': [{ 'N': str(from_changeset) }, { 'N': str(to_changeset) }], 'ComparisonOperator': 'BETWEEN' } elif from_changeset: range_condition = { 'AttributeValueList': [{ 'N': str(from_changeset) }], 'ComparisonOperator': 'GE' } elif to_changeset: range_condition = { 'AttributeValueList': [{ 'N': str(to_changeset) }], 'ComparisonOperator': 'LE' } response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', ScanIndexForward=True, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' }, 'changeset_id': range_condition }) return [DynamoDB.parse_commit(r) for r in response["Items"]] def fetch_stream_by_events(self, stream_id, from_event=None, to_event=None): if not from_event and not to_event: from_event = 1 index_name = None range_condition = None column = None if from_event and to_event and from_event == to_event: return [ self.read_changeset_containing_event(stream_id, from_event) ] if from_event and to_event: return self.fetch_changesets_by_events_range( stream_id, from_event, to_event) if from_event: index_name = 'LastEventId' column = 'last_event_id' range_condition = { 'AttributeValueList': [{ 'N': str(from_event) }], 'ComparisonOperator': 'GE' } elif to_event: index_name = 'FirstEventId' column = 'first_event_id' range_condition = { 'AttributeValueList': [{ 'N': str(to_event) }], 'ComparisonOperator': 'LE' } response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', IndexName=index_name, ScanIndexForward=True, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' }, column: range_condition }) return [DynamoDB.parse_commit(r) for r in response["Items"]] def fetch_changesets_by_events_range(self, stream_id, from_event, to_event): first_changeset = self.read_changeset_containing_event( stream_id, from_event) if not first_changeset: return None if first_changeset.last_event_id >= to_event: return [first_changeset] response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', IndexName="FirstEventId", ScanIndexForward=True, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' }, "first_event_id": { 'AttributeValueList': [ { 'N': str(from_event) }, { 'N': str(to_event) }, ], 'ComparisonOperator': 'BETWEEN' } }) return [first_changeset ] + [DynamoDB.parse_commit(r) for r in response["Items"]] def read_changeset_containing_event(self, stream_id, event_id): response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', IndexName='LastEventId', ScanIndexForward=True, Limit=1, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' }, 'last_event_id': { 'AttributeValueList': [{ 'N': str(event_id) }], 'ComparisonOperator': 'GE' } }) changesets = [DynamoDB.parse_commit(r) for r in response["Items"]] return changesets[0] if changesets else None @classmethod def parse_commit(cls, record): logger.debug(f"Parsing DynamoDB record: {record}") stream_id = record["stream_id"]["S"] changeset_id = int(record["changeset_id"]["N"]) first_event_id = int(record["first_event_id"]["N"]) last_event_id = int(record["last_event_id"]["N"]) events = None if "events" in record.keys(): events_json = record["events"]["S"] events = json.loads(events_json) metadata = None if "metadata" in record.keys(): metadata_json = record["metadata"]["S"] metadata = json.loads(metadata_json) page = None page_item = None if "page" in record.keys(): page = int(record["page"]["N"]) page_item = int(record["page_item"]["N"]) return CommitData(stream_id, changeset_id, metadata, events, first_event_id, last_event_id, page, page_item) def get_timestamp(self): return datetime.utcnow().isoformat("T") + "Z" def get_global_counter(self): counter = self.__get_global_counter() if not counter: self.init_global_counter() counter = self.__get_global_counter() return counter def __get_global_counter(self): response = self.dynamodb_ll.query( TableName=self.events_table, ProjectionExpression= 'page,page_item,prev_stream_id,prev_changeset_id', Limit=1, ScanIndexForward=False, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': self.global_counter_key }, ], 'ComparisonOperator': 'EQ' }, 'changeset_id': { 'AttributeValueList': [ { 'N': str(self.global_counter_range) }, ], 'ComparisonOperator': 'EQ' } }) if response["Count"] == 0: return None data = response["Items"][0] return GlobalCounter(int(data["page"]["N"]), int(data["page_item"]["N"]), data["prev_stream_id"]["S"], int(data["prev_changeset_id"]["N"])) def init_global_counter(self): item = { 'stream_id': { "S": self.global_counter_key }, 'changeset_id': { "N": str(self.global_counter_range) }, 'page': { "N": str(0) }, 'page_item': { "N": str(-1) }, 'prev_stream_id': { "S": "" }, 'prev_changeset_id': { "N": str(0) } } condition = { 'stream_id': { "Exists": False }, 'changeset_id': { "Exists": False }, } try: self.dynamodb_ll.put_item(TableName=self.events_table, Item=item, Expected=condition) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': return else: raise e def update_global_counter(self, prev_value, new_value): try: self.dynamodb_ll.update_item( TableName=self.events_table, Key={ 'stream_id': { "S": self.global_counter_key }, 'changeset_id': { "N": str(self.global_counter_range) } }, AttributeUpdates={ 'page': { "Value": { "N": str(new_value.page) } }, 'page_item': { "Value": { "N": str(new_value.page_item) } }, 'prev_stream_id': { "Value": { "S": new_value.prev_stream_id } }, 'prev_stream_changeset_id': { "Value": { "N": str(new_value.prev_changeset_id) } } }, Expected={ 'page': { "Value": { "N": str(prev_value.page) } }, 'page_item': { "Value": { "N": str(prev_value.page_item) } } }) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': raise ConcurrencyException(self.global_counter_key, self.global_counter_range) else: raise e def get_global_index_value(self, stream_id, changeset_id): response = self.dynamodb_ll.query( TableName=self.events_table, ProjectionExpression='page,page_item', Limit=1, ScanIndexForward=False, KeyConditions={ 'stream_id': { 'AttributeValueList': [ { 'S': stream_id }, ], 'ComparisonOperator': 'EQ' }, 'changeset_id': { 'AttributeValueList': [ { 'N': str(changeset_id) }, ], 'ComparisonOperator': 'EQ' } }) if response["Count"] == 0: return None data = response["Items"][0] page = data.get("page") page_item = data.get("page_item") if page: page = int(page["N"]) if page_item: page_item = int(page_item["N"]) return GlobalIndex(stream_id, changeset_id, page, page_item) def set_global_index(self, global_index): stream_id = global_index.stream_id changeset_id = global_index.changeset_id page = global_index.page page_item = global_index.page_item try: self.dynamodb_ll.update_item(TableName=self.events_table, Key={ 'stream_id': { "S": stream_id }, 'changeset_id': { "N": str(changeset_id) } }, AttributeUpdates={ 'page': { "Value": { "N": str(page) } }, 'page_item': { "Value": { "N": str(page_item) } } }, Expected={ 'page': { "Exists": False }, 'item': { "Exists": False } }) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': raise ConcurrencyException(self.global_counter_key, self.global_counter_range) else: raise e def fetch_global_changesets(self, checkpoint, limit): def fetch_batch(page, since_item, limit): response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', IndexName='EmumerationIndex', ScanIndexForward=True, Limit=limit, KeyConditions={ 'page': { 'AttributeValueList': [ { 'N': str(page) }, ], 'ComparisonOperator': 'EQ' }, 'page_item': { 'AttributeValueList': [{ 'N': str(since_item) }], 'ComparisonOperator': 'GE' } }) return [ DynamoDB.parse_commit(r) for r in response["Items"] if r["stream_id"]["S"] != self.global_counter_key ] (page, page_item) = self.checkpoint_calc.to_page_item(checkpoint) changesets_left = limit last_batch = None result = [] while True: last_batch = fetch_batch(page, page_item, changesets_left) if len(last_batch) > 0: result.extend(last_batch) (page, page_item) = self.checkpoint_calc.next_page_and_item( page, page_item) changesets_left = changesets_left - len(last_batch) else: break if changesets_left <= 0: break return result def fetch_global_events(self, checkpoint, event_in_checkpoint, limit): def fetch_batch(page, since_item, limit): response = self.dynamodb_ll.query(TableName=self.events_table, Select='ALL_ATTRIBUTES', IndexName='EmumerationIndex', ScanIndexForward=True, Limit=limit, KeyConditions={ 'page': { 'AttributeValueList': [ { 'N': str(page) }, ], 'ComparisonOperator': 'EQ' }, 'page_item': { 'AttributeValueList': [{ 'N': str(since_item) }], 'ComparisonOperator': 'GE' } }) return [ DynamoDB.parse_commit(r) for r in response["Items"] if r["stream_id"]["S"] != self.global_counter_key ] (page, page_item) = self.checkpoint_calc.to_page_item(checkpoint) changesets_left = limit last_batch = None result = [] while True: last_batch = fetch_batch(page, page_item, changesets_left) if len(last_batch) > 0: result.extend(last_batch) (page, page_item) = self.checkpoint_calc.next_page_and_item( page, page_item) changesets_left = changesets_left - len(last_batch) else: break if changesets_left <= 0: break return result def get_analysis_state(self): def fetch_state(): projection = 'projection_id,proj_state,version' response = self.dynamodb_ll.query(TableName=self.analysis_table, ProjectionExpression=projection, Limit=1, ScanIndexForward=False, KeyConditions={ 'projection_id': { 'AttributeValueList': [ { 'S': "analysis_state" }, ], 'ComparisonOperator': 'EQ' } }) if response["Count"] == 0: return None data = json.loads(response["Items"][0]["proj_state"]["S"]) return AnalysisState(total_streams=data["total_streams"], total_changesets=data["total_changesets"], total_events=data["total_events"], max_stream_length=data["max_stream_length"], version=int( response["Items"][0]["version"]["N"])) result = fetch_state() if not result: self.init_analysis_state() result = fetch_state() return result def init_analysis_state(self): state_value = { "total_streams": 0, "total_changesets": 0, "total_events": 0, "max_stream_length": 0, "version": 0 } item = { 'projection_id': { "S": "analysis_state" }, 'proj_state': { "S": json.dumps(state_value) }, 'version': { "N": str(0) } } condition = {'projection_id': {"Exists": False}} try: self.dynamodb_ll.put_item(TableName=self.analysis_table, Item=item, Expected=condition) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': # means it already exists, no nead to init return else: raise e def set_analysis_state(self, state, expected_version): state_value = { "total_streams": state.total_streams, "total_changesets": state.total_changesets, "total_events": state.total_events, "max_stream_length": state.max_stream_length, "version": state.version } item = { 'projection_id': { "S": "analysis_state" }, 'proj_state': { "S": json.dumps(state_value) }, 'version': { "N": str(state.version) } } condition = {'version': {"Value": {"N": str(expected_version)}}} try: self.dynamodb_ll.put_item(TableName=self.analysis_table, Item=item, Expected=condition) except botocore.exceptions.ClientError as e: if e.response['Error'][ 'Code'] == 'ConditionalCheckFailedException': logger.debug( f"ConditionalCheckFailedException for analysis model, expected version {expected_version}" ) raise ConcurrencyException("analysis_model", expected_version) else: raise e
class GlobalIndexer: def __init__(self, db): self.db = db self.checkpoint_calc = CheckpointCalc() def execute(self, cmd): for c in cmd.changesets: self.assign_global_index(c["stream_id"], c["changeset_id"]) def assign_global_index(self, stream_id, changeset_id): logger.info(f"Assign global index to {stream_id}/{changeset_id}") g_ind = self.db.get_global_index_value(stream_id, changeset_id) if g_ind.page != None and g_ind.page_item != None: logger.debug("The changeset already has an assigned global index") return self.ensure_prev_changeset_has_global_index(stream_id, changeset_id) last_assigned_index = self.db.get_global_counter() logger.debug(f"Current global counter: {last_assigned_index}") self.ensure_index_committed(last_assigned_index) if last_assigned_index.prev_stream_id != stream_id or \ last_assigned_index.prev_changeset_id != changeset_id: new_counter_value = self.increment_counter(stream_id, changeset_id, last_assigned_index) new_global_index = GlobalIndex(stream_id, changeset_id, new_counter_value.page, new_counter_value.page_item) self.db.set_global_index(new_global_index) logger.debug( f"Global index value set for {stream_id}/{changeset_id}: {new_global_index}" ) def ensure_prev_changeset_has_global_index(self, stream_id, changeset_id): if changeset_id > 1: prev_changeset_id = changeset_id - 1 logger.debug( f"First have to ensure that the prev changeset has a global index({stream_id}/{prev_changeset_id})" ) self.assign_global_index(stream_id, prev_changeset_id) def ensure_index_committed(self, index): if not index.prev_stream_id: return changeset_index = self.db.get_global_index_value( index.prev_stream_id, index.prev_changeset_id) if not changeset_index: return if changeset_index.page is None or changeset_index.page_item is None: logger.info( "The previous assigned index was not written. Repairing.") fixed_index = GlobalIndex(changeset_index.stream_id, changeset_index.changeset_id, index.page, index.page_item) self.db.set_global_index(fixed_index) def increment_counter(self, stream_id, changeset_id, prev_counter): (p, i) = self.checkpoint_calc.next_page_and_item(prev_counter.page, prev_counter.page_item) new_counter = GlobalCounter(p, i, stream_id, changeset_id) self.db.update_global_counter(prev_counter, new_counter) logger.debug(f"Counter increased from {prev_counter} to {new_counter}") return new_counter