def test_increment_page_item(mocker):
    c = CheckpointCalc()

    (page, page_item) = c.next_page_and_item(0, 0)

    assert page == 0
    assert page_item == 1
def test_increment_page_item_over_page_size2(mocker):
    c = CheckpointCalc()
    c.page_size = 100

    (page, page_item) = c.next_page_and_item(100, 99)

    assert page == 101
    assert page_item == 0
示例#3
0
class DynamoDB:
    global_counter_key = '!!!RESERVED:GLOBAL-COUNTER!!!'
    global_counter_range = 0

    def __init__(self, events_table, analysis_table):
        self.events_table = events_table
        self.analysis_table = analysis_table
        self.dynamodb_ll = boto3.client('dynamodb')
        self.checkpoint_calc = CheckpointCalc()

    def append(self, commit):
        item = {
            'stream_id': {
                "S": commit.stream_id
            },
            'changeset_id': {
                "N": str(commit.changeset_id)
            },
            'metadata': {
                "S": json.dumps(commit.metadata)
            },
            'events': {
                "S": json.dumps(commit.events)
            },
            'first_event_id': {
                "N": str(commit.first_event_id)
            },
            'last_event_id': {
                "N": str(commit.last_event_id)
            },
            'timestamp': {
                "S": self.get_timestamp()
            }
        }

        condition = {
            'stream_id': {
                "Exists": False
            },
            'changeset_id': {
                "Exists": False
            },
        }

        try:
            self.dynamodb_ll.put_item(TableName=self.events_table,
                                      Item=item,
                                      Expected=condition)
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                logger.debug(
                    f"ConditionalCheckFailedException for {commit.stream_id}/{commit.changeset_id}"
                )
                raise ConcurrencyException(commit.stream_id,
                                           commit.changeset_id)
            else:
                raise e

    def fetch_last_commit(self, stream_id, meta_only=False):
        projection = 'stream_id,changeset_id,events,metadata,first_event_id,last_event_id'
        if meta_only:
            projection = 'stream_id,changeset_id,first_event_id,last_event_id'

        response = self.dynamodb_ll.query(TableName=self.events_table,
                                          ProjectionExpression=projection,
                                          Limit=1,
                                          ScanIndexForward=False,
                                          KeyConditions={
                                              'stream_id': {
                                                  'AttributeValueList': [
                                                      {
                                                          'S': stream_id
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'EQ'
                                              }
                                          })
        if response["Count"] == 0:
            return None

        return DynamoDB.parse_commit(response["Items"][0])

    def fetch_stream_changesets(self,
                                stream_id,
                                from_changeset=None,
                                to_changeset=None):
        if not from_changeset and not to_changeset:
            from_changeset = 1

        range_condition = None
        if from_changeset and to_changeset:
            range_condition = {
                'AttributeValueList': [{
                    'N': str(from_changeset)
                }, {
                    'N': str(to_changeset)
                }],
                'ComparisonOperator':
                'BETWEEN'
            }
        elif from_changeset:
            range_condition = {
                'AttributeValueList': [{
                    'N': str(from_changeset)
                }],
                'ComparisonOperator': 'GE'
            }
        elif to_changeset:
            range_condition = {
                'AttributeValueList': [{
                    'N': str(to_changeset)
                }],
                'ComparisonOperator': 'LE'
            }

        response = self.dynamodb_ll.query(TableName=self.events_table,
                                          Select='ALL_ATTRIBUTES',
                                          ScanIndexForward=True,
                                          KeyConditions={
                                              'stream_id': {
                                                  'AttributeValueList': [
                                                      {
                                                          'S': stream_id
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'EQ'
                                              },
                                              'changeset_id': range_condition
                                          })

        return [DynamoDB.parse_commit(r) for r in response["Items"]]

    def fetch_stream_by_events(self,
                               stream_id,
                               from_event=None,
                               to_event=None):
        if not from_event and not to_event:
            from_event = 1

        index_name = None
        range_condition = None
        column = None

        if from_event and to_event and from_event == to_event:
            return [
                self.read_changeset_containing_event(stream_id, from_event)
            ]

        if from_event and to_event:
            return self.fetch_changesets_by_events_range(
                stream_id, from_event, to_event)

        if from_event:
            index_name = 'LastEventId'
            column = 'last_event_id'
            range_condition = {
                'AttributeValueList': [{
                    'N': str(from_event)
                }],
                'ComparisonOperator': 'GE'
            }
        elif to_event:
            index_name = 'FirstEventId'
            column = 'first_event_id'
            range_condition = {
                'AttributeValueList': [{
                    'N': str(to_event)
                }],
                'ComparisonOperator': 'LE'
            }

        response = self.dynamodb_ll.query(TableName=self.events_table,
                                          Select='ALL_ATTRIBUTES',
                                          IndexName=index_name,
                                          ScanIndexForward=True,
                                          KeyConditions={
                                              'stream_id': {
                                                  'AttributeValueList': [
                                                      {
                                                          'S': stream_id
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'EQ'
                                              },
                                              column: range_condition
                                          })

        return [DynamoDB.parse_commit(r) for r in response["Items"]]

    def fetch_changesets_by_events_range(self, stream_id, from_event,
                                         to_event):
        first_changeset = self.read_changeset_containing_event(
            stream_id, from_event)
        if not first_changeset:
            return None

        if first_changeset.last_event_id >= to_event:
            return [first_changeset]

        response = self.dynamodb_ll.query(TableName=self.events_table,
                                          Select='ALL_ATTRIBUTES',
                                          IndexName="FirstEventId",
                                          ScanIndexForward=True,
                                          KeyConditions={
                                              'stream_id': {
                                                  'AttributeValueList': [
                                                      {
                                                          'S': stream_id
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'EQ'
                                              },
                                              "first_event_id": {
                                                  'AttributeValueList': [
                                                      {
                                                          'N': str(from_event)
                                                      },
                                                      {
                                                          'N': str(to_event)
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'BETWEEN'
                                              }
                                          })

        return [first_changeset
                ] + [DynamoDB.parse_commit(r) for r in response["Items"]]

    def read_changeset_containing_event(self, stream_id, event_id):
        response = self.dynamodb_ll.query(TableName=self.events_table,
                                          Select='ALL_ATTRIBUTES',
                                          IndexName='LastEventId',
                                          ScanIndexForward=True,
                                          Limit=1,
                                          KeyConditions={
                                              'stream_id': {
                                                  'AttributeValueList': [
                                                      {
                                                          'S': stream_id
                                                      },
                                                  ],
                                                  'ComparisonOperator':
                                                  'EQ'
                                              },
                                              'last_event_id': {
                                                  'AttributeValueList': [{
                                                      'N':
                                                      str(event_id)
                                                  }],
                                                  'ComparisonOperator':
                                                  'GE'
                                              }
                                          })

        changesets = [DynamoDB.parse_commit(r) for r in response["Items"]]
        return changesets[0] if changesets else None

    @classmethod
    def parse_commit(cls, record):
        logger.debug(f"Parsing DynamoDB record: {record}")
        stream_id = record["stream_id"]["S"]
        changeset_id = int(record["changeset_id"]["N"])
        first_event_id = int(record["first_event_id"]["N"])
        last_event_id = int(record["last_event_id"]["N"])

        events = None
        if "events" in record.keys():
            events_json = record["events"]["S"]
            events = json.loads(events_json)

        metadata = None
        if "metadata" in record.keys():
            metadata_json = record["metadata"]["S"]
            metadata = json.loads(metadata_json)

        page = None
        page_item = None
        if "page" in record.keys():
            page = int(record["page"]["N"])
            page_item = int(record["page_item"]["N"])

        return CommitData(stream_id, changeset_id, metadata, events,
                          first_event_id, last_event_id, page, page_item)

    def get_timestamp(self):
        return datetime.utcnow().isoformat("T") + "Z"

    def get_global_counter(self):
        counter = self.__get_global_counter()
        if not counter:
            self.init_global_counter()
            counter = self.__get_global_counter()
        return counter

    def __get_global_counter(self):
        response = self.dynamodb_ll.query(
            TableName=self.events_table,
            ProjectionExpression=
            'page,page_item,prev_stream_id,prev_changeset_id',
            Limit=1,
            ScanIndexForward=False,
            KeyConditions={
                'stream_id': {
                    'AttributeValueList': [
                        {
                            'S': self.global_counter_key
                        },
                    ],
                    'ComparisonOperator': 'EQ'
                },
                'changeset_id': {
                    'AttributeValueList': [
                        {
                            'N': str(self.global_counter_range)
                        },
                    ],
                    'ComparisonOperator': 'EQ'
                }
            })
        if response["Count"] == 0:
            return None

        data = response["Items"][0]
        return GlobalCounter(int(data["page"]["N"]),
                             int(data["page_item"]["N"]),
                             data["prev_stream_id"]["S"],
                             int(data["prev_changeset_id"]["N"]))

    def init_global_counter(self):
        item = {
            'stream_id': {
                "S": self.global_counter_key
            },
            'changeset_id': {
                "N": str(self.global_counter_range)
            },
            'page': {
                "N": str(0)
            },
            'page_item': {
                "N": str(-1)
            },
            'prev_stream_id': {
                "S": ""
            },
            'prev_changeset_id': {
                "N": str(0)
            }
        }

        condition = {
            'stream_id': {
                "Exists": False
            },
            'changeset_id': {
                "Exists": False
            },
        }

        try:
            self.dynamodb_ll.put_item(TableName=self.events_table,
                                      Item=item,
                                      Expected=condition)
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                return
            else:
                raise e

    def update_global_counter(self, prev_value, new_value):
        try:
            self.dynamodb_ll.update_item(
                TableName=self.events_table,
                Key={
                    'stream_id': {
                        "S": self.global_counter_key
                    },
                    'changeset_id': {
                        "N": str(self.global_counter_range)
                    }
                },
                AttributeUpdates={
                    'page': {
                        "Value": {
                            "N": str(new_value.page)
                        }
                    },
                    'page_item': {
                        "Value": {
                            "N": str(new_value.page_item)
                        }
                    },
                    'prev_stream_id': {
                        "Value": {
                            "S": new_value.prev_stream_id
                        }
                    },
                    'prev_stream_changeset_id': {
                        "Value": {
                            "N": str(new_value.prev_changeset_id)
                        }
                    }
                },
                Expected={
                    'page': {
                        "Value": {
                            "N": str(prev_value.page)
                        }
                    },
                    'page_item': {
                        "Value": {
                            "N": str(prev_value.page_item)
                        }
                    }
                })
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                raise ConcurrencyException(self.global_counter_key,
                                           self.global_counter_range)
            else:
                raise e

    def get_global_index_value(self, stream_id, changeset_id):
        response = self.dynamodb_ll.query(
            TableName=self.events_table,
            ProjectionExpression='page,page_item',
            Limit=1,
            ScanIndexForward=False,
            KeyConditions={
                'stream_id': {
                    'AttributeValueList': [
                        {
                            'S': stream_id
                        },
                    ],
                    'ComparisonOperator': 'EQ'
                },
                'changeset_id': {
                    'AttributeValueList': [
                        {
                            'N': str(changeset_id)
                        },
                    ],
                    'ComparisonOperator': 'EQ'
                }
            })
        if response["Count"] == 0:
            return None

        data = response["Items"][0]
        page = data.get("page")
        page_item = data.get("page_item")
        if page:
            page = int(page["N"])
        if page_item:
            page_item = int(page_item["N"])

        return GlobalIndex(stream_id, changeset_id, page, page_item)

    def set_global_index(self, global_index):
        stream_id = global_index.stream_id
        changeset_id = global_index.changeset_id
        page = global_index.page
        page_item = global_index.page_item

        try:
            self.dynamodb_ll.update_item(TableName=self.events_table,
                                         Key={
                                             'stream_id': {
                                                 "S": stream_id
                                             },
                                             'changeset_id': {
                                                 "N": str(changeset_id)
                                             }
                                         },
                                         AttributeUpdates={
                                             'page': {
                                                 "Value": {
                                                     "N": str(page)
                                                 }
                                             },
                                             'page_item': {
                                                 "Value": {
                                                     "N": str(page_item)
                                                 }
                                             }
                                         },
                                         Expected={
                                             'page': {
                                                 "Exists": False
                                             },
                                             'item': {
                                                 "Exists": False
                                             }
                                         })
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                raise ConcurrencyException(self.global_counter_key,
                                           self.global_counter_range)
            else:
                raise e

    def fetch_global_changesets(self, checkpoint, limit):
        def fetch_batch(page, since_item, limit):
            response = self.dynamodb_ll.query(TableName=self.events_table,
                                              Select='ALL_ATTRIBUTES',
                                              IndexName='EmumerationIndex',
                                              ScanIndexForward=True,
                                              Limit=limit,
                                              KeyConditions={
                                                  'page': {
                                                      'AttributeValueList': [
                                                          {
                                                              'N': str(page)
                                                          },
                                                      ],
                                                      'ComparisonOperator':
                                                      'EQ'
                                                  },
                                                  'page_item': {
                                                      'AttributeValueList': [{
                                                          'N':
                                                          str(since_item)
                                                      }],
                                                      'ComparisonOperator':
                                                      'GE'
                                                  }
                                              })
            return [
                DynamoDB.parse_commit(r) for r in response["Items"]
                if r["stream_id"]["S"] != self.global_counter_key
            ]

        (page, page_item) = self.checkpoint_calc.to_page_item(checkpoint)

        changesets_left = limit
        last_batch = None
        result = []
        while True:
            last_batch = fetch_batch(page, page_item, changesets_left)
            if len(last_batch) > 0:
                result.extend(last_batch)
                (page, page_item) = self.checkpoint_calc.next_page_and_item(
                    page, page_item)
                changesets_left = changesets_left - len(last_batch)
            else:
                break

            if changesets_left <= 0:
                break

        return result

    def fetch_global_events(self, checkpoint, event_in_checkpoint, limit):
        def fetch_batch(page, since_item, limit):
            response = self.dynamodb_ll.query(TableName=self.events_table,
                                              Select='ALL_ATTRIBUTES',
                                              IndexName='EmumerationIndex',
                                              ScanIndexForward=True,
                                              Limit=limit,
                                              KeyConditions={
                                                  'page': {
                                                      'AttributeValueList': [
                                                          {
                                                              'N': str(page)
                                                          },
                                                      ],
                                                      'ComparisonOperator':
                                                      'EQ'
                                                  },
                                                  'page_item': {
                                                      'AttributeValueList': [{
                                                          'N':
                                                          str(since_item)
                                                      }],
                                                      'ComparisonOperator':
                                                      'GE'
                                                  }
                                              })
            return [
                DynamoDB.parse_commit(r) for r in response["Items"]
                if r["stream_id"]["S"] != self.global_counter_key
            ]

        (page, page_item) = self.checkpoint_calc.to_page_item(checkpoint)

        changesets_left = limit
        last_batch = None
        result = []
        while True:
            last_batch = fetch_batch(page, page_item, changesets_left)
            if len(last_batch) > 0:
                result.extend(last_batch)
                (page, page_item) = self.checkpoint_calc.next_page_and_item(
                    page, page_item)
                changesets_left = changesets_left - len(last_batch)
            else:
                break

            if changesets_left <= 0:
                break

        return result

    def get_analysis_state(self):
        def fetch_state():
            projection = 'projection_id,proj_state,version'

            response = self.dynamodb_ll.query(TableName=self.analysis_table,
                                              ProjectionExpression=projection,
                                              Limit=1,
                                              ScanIndexForward=False,
                                              KeyConditions={
                                                  'projection_id': {
                                                      'AttributeValueList': [
                                                          {
                                                              'S':
                                                              "analysis_state"
                                                          },
                                                      ],
                                                      'ComparisonOperator':
                                                      'EQ'
                                                  }
                                              })
            if response["Count"] == 0:
                return None

            data = json.loads(response["Items"][0]["proj_state"]["S"])
            return AnalysisState(total_streams=data["total_streams"],
                                 total_changesets=data["total_changesets"],
                                 total_events=data["total_events"],
                                 max_stream_length=data["max_stream_length"],
                                 version=int(
                                     response["Items"][0]["version"]["N"]))

        result = fetch_state()
        if not result:
            self.init_analysis_state()
            result = fetch_state()
        return result

    def init_analysis_state(self):
        state_value = {
            "total_streams": 0,
            "total_changesets": 0,
            "total_events": 0,
            "max_stream_length": 0,
            "version": 0
        }

        item = {
            'projection_id': {
                "S": "analysis_state"
            },
            'proj_state': {
                "S": json.dumps(state_value)
            },
            'version': {
                "N": str(0)
            }
        }

        condition = {'projection_id': {"Exists": False}}

        try:
            self.dynamodb_ll.put_item(TableName=self.analysis_table,
                                      Item=item,
                                      Expected=condition)
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                # means it already exists, no nead to init
                return
            else:
                raise e

    def set_analysis_state(self, state, expected_version):
        state_value = {
            "total_streams": state.total_streams,
            "total_changesets": state.total_changesets,
            "total_events": state.total_events,
            "max_stream_length": state.max_stream_length,
            "version": state.version
        }

        item = {
            'projection_id': {
                "S": "analysis_state"
            },
            'proj_state': {
                "S": json.dumps(state_value)
            },
            'version': {
                "N": str(state.version)
            }
        }

        condition = {'version': {"Value": {"N": str(expected_version)}}}

        try:
            self.dynamodb_ll.put_item(TableName=self.analysis_table,
                                      Item=item,
                                      Expected=condition)
        except botocore.exceptions.ClientError as e:
            if e.response['Error'][
                    'Code'] == 'ConditionalCheckFailedException':
                logger.debug(
                    f"ConditionalCheckFailedException for analysis model, expected version {expected_version}"
                )
                raise ConcurrencyException("analysis_model", expected_version)
            else:
                raise e
class GlobalIndexer:
    def __init__(self, db):
        self.db = db
        self.checkpoint_calc = CheckpointCalc()

    def execute(self, cmd):
        for c in cmd.changesets:
            self.assign_global_index(c["stream_id"], c["changeset_id"])

    def assign_global_index(self, stream_id, changeset_id):
        logger.info(f"Assign global index to {stream_id}/{changeset_id}")
        g_ind = self.db.get_global_index_value(stream_id, changeset_id)
        if g_ind.page != None and g_ind.page_item != None:
            logger.debug("The changeset already has an assigned global index")
            return

        self.ensure_prev_changeset_has_global_index(stream_id, changeset_id)

        last_assigned_index = self.db.get_global_counter()
        logger.debug(f"Current global counter: {last_assigned_index}")
        self.ensure_index_committed(last_assigned_index)

        if last_assigned_index.prev_stream_id != stream_id or \
           last_assigned_index.prev_changeset_id != changeset_id:
            new_counter_value = self.increment_counter(stream_id, changeset_id,
                                                       last_assigned_index)
            new_global_index = GlobalIndex(stream_id, changeset_id,
                                           new_counter_value.page,
                                           new_counter_value.page_item)
            self.db.set_global_index(new_global_index)
            logger.debug(
                f"Global index value set for {stream_id}/{changeset_id}: {new_global_index}"
            )

    def ensure_prev_changeset_has_global_index(self, stream_id, changeset_id):
        if changeset_id > 1:
            prev_changeset_id = changeset_id - 1
            logger.debug(
                f"First have to ensure that the prev changeset has a global index({stream_id}/{prev_changeset_id})"
            )
            self.assign_global_index(stream_id, prev_changeset_id)

    def ensure_index_committed(self, index):
        if not index.prev_stream_id:
            return

        changeset_index = self.db.get_global_index_value(
            index.prev_stream_id, index.prev_changeset_id)
        if not changeset_index:
            return

        if changeset_index.page is None or changeset_index.page_item is None:
            logger.info(
                "The previous assigned index was not written. Repairing.")
            fixed_index = GlobalIndex(changeset_index.stream_id,
                                      changeset_index.changeset_id, index.page,
                                      index.page_item)

            self.db.set_global_index(fixed_index)

    def increment_counter(self, stream_id, changeset_id, prev_counter):
        (p,
         i) = self.checkpoint_calc.next_page_and_item(prev_counter.page,
                                                      prev_counter.page_item)
        new_counter = GlobalCounter(p, i, stream_id, changeset_id)
        self.db.update_global_counter(prev_counter, new_counter)
        logger.debug(f"Counter increased from {prev_counter} to {new_counter}")
        return new_counter