Пример #1
0
def test_update_invalid_dependent_versions(msg_bus, r1):
    r1.create('key1', 'value1')

    # Create dependent version vector from THE FUTURE
    dependents = VersionVector()
    dependents.update_version(Version(r1.replica_id, 20))
    with pytest.raises(ValueError):
        r1.update('key1', 'new_value', dependents)
Пример #2
0
 def __init__(self,
              version=Version(),
              timestamp=VersionVector(),
              value=None):
     self.version = version
     self.timestamp = timestamp
     self.value = value
Пример #3
0
def test_create_single(msg_bus, r1, r2):
    expected_dependents = VersionVector()
    expected_dependents.inc_version(r1.replica_id)

    # Create object on r1, do not propagate messages yet
    r1.create('place', 'chicago')
    r1_v = r1.read('place')
    r2_v = r2.read('place')
    assert r1_v.dependent_versions == expected_dependents
    assert r1_v.values == ['chicago']
    assert r2_v.dependent_versions == VersionVector()
    assert r2_v.values == []

    # Replicate to r2
    msg_bus.deliver_all()
    r2_v = r2.read('place')
    assert r2_v.dependent_versions == r1_v.dependent_versions
    assert r2_v.values == r1_v.values
Пример #4
0
    def __init__(self, replica_id, msg_bus):
        self.logger = logging.getLogger("Replica-%s" % replica_id)
        self.replica_id = replica_id
        self.msg_bus = msg_bus
        self.update_lock = threading.Lock()

        # TODO Load persistent state
        self.db = SimDataStore()
        self.knowledge = VersionSet()
        self.committed_visible = VersionVector()
        assert self.knowledge.get_version(self.replica_id) == \
            self.committed_visible.get_version(self.replica_id)
        assert self.knowledge.dominates_vv(self.committed_visible)

        self.visible = deepcopy(self.committed_visible)

        # Initialize sync requestor state
        self.sync_in_progress = False
        self.sync_replica = None
        self.sync_cookie = 0
        self.sync_replica_visible = None
        self.sync_replica_knowledge = None
Пример #5
0
    def create(self, key, value):
        """Creates an object in the database identified by the given key
           and containing the given value.

           Returns:
              The version of the object that was created.
           Exceptions:
              DuplicateKeyException - If an object with the given key
                    already exists on this replica."""
        assert key is not None
        assert value is not None

        self.logger.debug("create('%s', %s)", key, value)

        self.update_lock.acquire()
        try:
            obj_record = self.db.get(key)
            if obj_record is not None:
                # We don't require the caller to explicitly give us the
                # dependent versions for the operation. It's a create so there
                # really aren't any dependent versions from the caller's
                # perspective.
                #
                # However, if we are internally storing tombstones then those
                # have to be the dependent versions so the create occurs
                # causally after the previous deletions.
                dependent_versions, visible_values = \
                    self._filter_visible_versions(obj_record)
                for v in visible_values:
                    if v is not None:
                        raise DuplicateKeyException()
            else:
                dependent_versions = VersionVector()
                obj_record = ObjectRecord()
            return self._local_update(obj_record, key, value,
                                      dependent_versions)
        finally:
            self.update_lock.release()
Пример #6
0
    def _filter_visible_versions(self, obj_record):
        """Returns a list of ObjectVersion objects for the visible versions of
           the given Object record."""

        assert self.knowledge.dominates_vv(self.visible)
        assert self.visible.dominates(self.committed_visible)

        # First, filter out non-visible versions. An object o is visible
        # at replica r if r.visible dominates o.version OR r.knowledge
        # dominates o.timestamp. When the second case is true, we also update
        # r.visible so that o and all of its dependencies will be visible.
        # Eventually r.visible will be merged into r.committed_visible.

        visible_versions = []
        for ov in obj_record.versions:
            if self.visible.dominates_version(ov.version):
                visible_versions.append(ov)
            else:
                # visible doesn't dominate, therefore committed_visible
                # won't dominate, therefore the timestamp could not
                # have been optimized out
                assert not self.committed_visible.dominates_version(ov.version)
                assert ov.timestamp is not None

                if self.knowledge.dominates_vv(ov.timestamp):
                    # Latch in a swath of versions as visible
                    self.visible.update(ov.timestamp)
                    visible_versions.append(ov)

        # Now, of the visible versions, filter out the ones that have
        # been replaced by newer versions
        for i in range(len(visible_versions)):
            if visible_versions[i] is None:
                continue
            for j in range(i + 1, len(visible_versions)):
                if visible_versions[j] is None:
                    continue

                # Timestamps must be present because there are multiple
                # versions
                assert visible_versions[i].timestamp is not None
                assert visible_versions[j].timestamp is not None

                if visible_versions[i].timestamp.dominates_version(
                        visible_versions[j].version):
                    visible_versions[j] = None
                elif visible_versions[j].timestamp.dominates_version(
                        visible_versions[i].version):
                    visible_versions[i] = None
                    break

        # Construct our final result
        resulting_values = []
        resulting_vv = VersionVector()
        for ov in visible_versions:
            if ov is None:
                continue
            resulting_values.append(ov.value)

            # There must only be one version for any single replica. Otherwise
            # the replica had somehow conflicted itself.
            assert resulting_vv.get_version(ov.version.replica_id).counter == 0
            resulting_vv.update_version(ov.version)
        return (resulting_vv, resulting_values)
Пример #7
0
 def __init__(self, dependent_versions=VersionVector(), values=[]):
     self.dependent_versions = dependent_versions
     self.values = values
Пример #8
0
class Replica:
    def __init__(self, replica_id, msg_bus):
        self.logger = logging.getLogger("Replica-%s" % replica_id)
        self.replica_id = replica_id
        self.msg_bus = msg_bus
        self.update_lock = threading.Lock()

        # TODO Load persistent state
        self.db = SimDataStore()
        self.knowledge = VersionSet()
        self.committed_visible = VersionVector()
        assert self.knowledge.get_version(self.replica_id) == \
            self.committed_visible.get_version(self.replica_id)
        assert self.knowledge.dominates_vv(self.committed_visible)

        self.visible = deepcopy(self.committed_visible)

        # Initialize sync requestor state
        self.sync_in_progress = False
        self.sync_replica = None
        self.sync_cookie = 0
        self.sync_replica_visible = None
        self.sync_replica_knowledge = None

    def read(self, key):
        """Reads the value(s) of the given key. Returns a ReadTuple with
           the values and their associated update dependency versions.

           If there is no object identified by the given key then an empty
           ReadTuple is returned."""
        assert key is not None

        result = ReadTuple()
        obj_record = self.db.get(key)
        if obj_record is None:
            return result

        result.dependent_versions, result.values = \
            self._filter_visible_versions(obj_record)
        # If all the values are tombstones then just return an empty list
        for v in result.values:
            if v is not None:
                # At least one value is not a tombstone
                return result

        return ReadTuple()

    def create(self, key, value):
        """Creates an object in the database identified by the given key
           and containing the given value.

           Returns:
              The version of the object that was created.
           Exceptions:
              DuplicateKeyException - If an object with the given key
                    already exists on this replica."""
        assert key is not None
        assert value is not None

        self.logger.debug("create('%s', %s)", key, value)

        self.update_lock.acquire()
        try:
            obj_record = self.db.get(key)
            if obj_record is not None:
                # We don't require the caller to explicitly give us the
                # dependent versions for the operation. It's a create so there
                # really aren't any dependent versions from the caller's
                # perspective.
                #
                # However, if we are internally storing tombstones then those
                # have to be the dependent versions so the create occurs
                # causally after the previous deletions.
                dependent_versions, visible_values = \
                    self._filter_visible_versions(obj_record)
                for v in visible_values:
                    if v is not None:
                        raise DuplicateKeyException()
            else:
                dependent_versions = VersionVector()
                obj_record = ObjectRecord()
            return self._local_update(obj_record, key, value,
                                      dependent_versions)
        finally:
            self.update_lock.release()

    def update(self, key, value, dependent_versions):
        """Updates the value of the object with the given key. An object
           with the given key must have already been created.

           Parameters:
              key - The identifier of the object to update
              value - The new value of the object
              dependent_versions - The dependent versions returned by the
                    previous read() call for the same key.
           Returns:
              The version of the object that was created.
           Exceptions:
              NoSuchKeyException - If an object with the given key is not found
              ConcurrentUpdateException - If the value of the object has
                    changed since the read() call was performed."""
        assert key is not None
        assert value is not None
        assert type(dependent_versions) is VersionVector

        self.logger.debug("update('%s', %s, %s)", key, value,
                          dependent_versions)

        self.update_lock.acquire()
        try:
            obj_record = self.db.get(key)
            if obj_record is None:
                raise NoSuchKeyException()
            return self._local_update(obj_record, key, value,
                                      dependent_versions)
        finally:
            self.update_lock.release()

    def delete(self, key, dependent_versions):
        """Deletes the object identified by the given key.

           Parameters:
              key - The identifier of the object to delete
              dependent_versions - The dependent versions returned by the
                    previous read() call for the same key.

           Exceptions:
              ConcurrentUpdateException - If the value of the object has
                    changed since the read() call was performed."""
        assert key is not None
        assert type(dependent_versions) is VersionVector

        self.logger.debug("delete('%s', %s)", key, dependent_versions)

        self.update_lock.acquire()
        try:
            obj_record = self.db.get(key)
            if obj_record is not None:
                self._local_update(obj_record, key, None, dependent_versions)
        finally:
            self.update_lock.release()

    def request_sync(self, sync_replica_id):
        """Requests a state sync from the given replica."""
        if self.sync_in_progress:
            self.logger.info("Sync from %s already in progress",
                             self.sync_replica)
            return

        # Request a sync by sending the peer replica a request with our
        # current knowledge
        self.logger.info("Requesting state sync from %s", sync_replica_id)
        self.sync_replica = sync_replica_id
        self.sync_cookie = random.getrandbits(32)
        self.sync_replica_visible = None
        self.sync_replica_knowledge = None
        self.sync_in_progress = True
        self.msg_bus.send(
            self.replica_id, sync_replica_id,
            SyncRequestMessage(self.sync_cookie, deepcopy(self.knowledge)))

    def deliver_message(self, sender_id, msg):
        if type(msg) is UpdateMessage:
            self.logger.debug("Processing UpdateMessage from %s", sender_id)
            self._process_update(sender_id, msg)
        elif type(msg) is SyncRequestMessage:
            self.logger.debug("Processing SyncRequestMessage from %s",
                              sender_id)
            self._process_sync_request(sender_id, msg)
        elif type(msg) is SyncResponseSetupMessage:
            self.logger.debug("Processing SyncResponseSetupMessage from %s",
                              sender_id)
            self._process_sync_response_setup(sender_id, msg)
        elif type(msg) is SyncResponseDataMessage:
            self.logger.debug("Processing SyncResponseDataMessage from %s",
                              sender_id)
            self._process_sync_response_data(sender_id, msg)
        elif type(msg) is SyncResponseCompleteMessage:
            self.logger.debug("Processing SyncResponseCompleteMessage from %s",
                              sender_id)
            self._process_sync_response_complete(sender_id, msg)
        else:
            self.logger.warn("Received unknown message type from %s",
                             sender_id)

    def _process_update(self, sender_id, msg):
        assert type(msg) is UpdateMessage

        self.update_lock.acquire()
        try:
            if self.knowledge.has_version(msg.obj_ver.version):
                # We already have this object
                return

            obj_record = self.db.get(msg.key)
            if obj_record is None:
                obj_record = ObjectRecord()
            self._insert_object(obj_record, msg.key, msg.obj_ver)
        finally:
            self.update_lock.release()

    def _process_sync_request(self, requestor_id, req_msg):
        assert type(req_msg) is SyncRequestMessage
        cookie = req_msg.cookie
        requestor_knowledge = req_msg.requestor_knowledge

        # Send all necessary objects back to the requestor.
        # *** In this simulation we assume that some prefix of these
        # *** messages are delivered in order.
        # We'll use self.committed_visible as our replacement timestamp
        self.msg_bus.send(
            self.replica_id, requestor_id,
            SyncResponseSetupMessage(cookie, deepcopy(self.knowledge),
                                     deepcopy(self.committed_visible)))
        for k in self.db.iterkeys():
            obj_record = self.db.get(k)
            discard_timestamp_for_replacement_vv(obj_record,
                                                 self.committed_visible)

            for obj_ver in obj_record.versions:
                if requestor_knowledge.has_version(obj_ver.version):
                    continue
                self.msg_bus.send(self.replica_id, requestor_id,
                                  SyncResponseDataMessage(cookie, k, obj_ver))
        self.msg_bus.send(self.replica_id, requestor_id,
                          SyncResponseCompleteMessage(cookie))

    def _process_sync_response_setup(self, sender_id, msg):
        if not self.sync_in_progress:
            return
        if sender_id != self.sync_replica or msg.cookie != self.sync_cookie:
            return

        assert type(msg.server_knowledge) is VersionSet
        assert type(msg.server_visible) is VersionVector
        assert msg.server_knowledge.dominates_vv(msg.server_visible)
        self.sync_replica_knowledge = msg.server_knowledge
        self.sync_replica_visible = msg.server_visible

    def _process_sync_response_data(self, sender_id, msg):
        if not self.sync_in_progress:
            return
        if sender_id != self.sync_replica or msg.cookie != self.sync_cookie:
            return
        if self.knowledge.has_version(msg.obj_ver.version):
            return

        assert type(self.sync_replica_knowledge) is VersionSet
        assert type(self.sync_replica_visible) is VersionVector

        if msg.obj_ver.timestamp is None:
            msg.obj_ver.timestamp = deepcopy(self.sync_replica_visible)

        self.update_lock.acquire()
        try:
            obj_record = self.db.get(msg.key)
            if obj_record is None:
                obj_record = ObjectRecord()
            self._insert_object(obj_record, msg.key, msg.obj_ver)
        finally:
            self.update_lock.release()

    def _process_sync_response_complete(self, sender_id, msg):
        if not self.sync_in_progress:
            return
        if sender_id != self.sync_replica or msg.cookie != self.sync_cookie:
            return

        assert type(self.sync_replica_knowledge) is VersionSet
        assert type(self.sync_replica_visible) is VersionVector

        self.logger.info(
            "Sync from %s completed. Merging in knowledge=%s and visible=%s",
            self.sync_replica, self.sync_replica_knowledge,
            self.sync_replica_visible)

        # Merge the server's knowledge into our knowledge. This will
        # fill in version number gaps for versions that the server knew
        # about but no longer exist
        self.update_lock.acquire()
        try:
            self.knowledge.merge(self.sync_replica_knowledge)
            self.visible.update(self.sync_replica_visible)
            self.committed_visible.update(self.visible)
        finally:
            self.update_lock.release()
        self.sync_in_progress = False
        self.sync_replica_knowledge = None
        self.sync_replica_visible = None

    def _filter_visible_versions(self, obj_record):
        """Returns a list of ObjectVersion objects for the visible versions of
           the given Object record."""

        assert self.knowledge.dominates_vv(self.visible)
        assert self.visible.dominates(self.committed_visible)

        # First, filter out non-visible versions. An object o is visible
        # at replica r if r.visible dominates o.version OR r.knowledge
        # dominates o.timestamp. When the second case is true, we also update
        # r.visible so that o and all of its dependencies will be visible.
        # Eventually r.visible will be merged into r.committed_visible.

        visible_versions = []
        for ov in obj_record.versions:
            if self.visible.dominates_version(ov.version):
                visible_versions.append(ov)
            else:
                # visible doesn't dominate, therefore committed_visible
                # won't dominate, therefore the timestamp could not
                # have been optimized out
                assert not self.committed_visible.dominates_version(ov.version)
                assert ov.timestamp is not None

                if self.knowledge.dominates_vv(ov.timestamp):
                    # Latch in a swath of versions as visible
                    self.visible.update(ov.timestamp)
                    visible_versions.append(ov)

        # Now, of the visible versions, filter out the ones that have
        # been replaced by newer versions
        for i in range(len(visible_versions)):
            if visible_versions[i] is None:
                continue
            for j in range(i + 1, len(visible_versions)):
                if visible_versions[j] is None:
                    continue

                # Timestamps must be present because there are multiple
                # versions
                assert visible_versions[i].timestamp is not None
                assert visible_versions[j].timestamp is not None

                if visible_versions[i].timestamp.dominates_version(
                        visible_versions[j].version):
                    visible_versions[j] = None
                elif visible_versions[j].timestamp.dominates_version(
                        visible_versions[i].version):
                    visible_versions[i] = None
                    break

        # Construct our final result
        resulting_values = []
        resulting_vv = VersionVector()
        for ov in visible_versions:
            if ov is None:
                continue
            resulting_values.append(ov.value)

            # There must only be one version for any single replica. Otherwise
            # the replica had somehow conflicted itself.
            assert resulting_vv.get_version(ov.version.replica_id).counter == 0
            resulting_vv.update_version(ov.version)
        return (resulting_vv, resulting_values)

    def _local_update(self, obj_record, key, value, dependent_versions):
        assert type(obj_record) is ObjectRecord
        assert key is not None
        assert type(dependent_versions) is VersionVector
        assert self.update_lock.locked()

        visible_versions = self._filter_visible_versions(obj_record)[0]
        # If the set of versions is different then the update cannot proceed.
        # The caller must resolve the conflict and retry. This is due to the
        # restriction that a replica must create objects that are causally
        # after all objects that it already knows about. (Due to the
        # replica-granularity logical clock)
        if not self.visible.dominates(dependent_versions):
            raise ValueError("Dependent versions from the future")
        if visible_versions != dependent_versions:
            raise ConcurrentUpdateException()

        assert self.knowledge.dominates_vv(self.visible)
        assert self.knowledge.get_version(self.replica_id) == \
            self.visible.get_version(self.replica_id)
        assert self.visible.dominates(self.committed_visible)
        assert self.visible.get_version(self.replica_id) == \
            self.committed_visible.get_version(self.replica_id)

        ver = self.knowledge.get_version(self.replica_id)
        ver.counter += 1
        obj_ver = ObjectVersion(ver, deepcopy(self.visible), value)
        obj_ver.timestamp.update_version(ver)

        obj_ver_copy = deepcopy(obj_ver)
        self._insert_object(obj_record, key, obj_ver)
        assert self.committed_visible.dominates(obj_ver_copy.timestamp)
        self.msg_bus.broadcast(self.replica_id,
                               UpdateMessage(key, obj_ver_copy))
        return ver

    def _insert_object(self, obj_record, key, obj_ver):
        """Insert an object and possibly make it visible. Update lock
           must be held"""
        assert type(obj_record) is ObjectRecord
        assert key is not None
        assert type(obj_ver) is ObjectVersion
        assert not self.knowledge.has_version(obj_ver.version)
        assert obj_ver.timestamp is not None
        assert self.update_lock.locked()

        self.logger.debug("Inserting object '%s' version %s, timestamp=%s",
                          key, obj_ver.version, obj_ver.timestamp)

        # Reconstruct the timestamps for existing versions while we
        # integrate the new object version
        for ov in obj_record.versions:
            if ov.timestamp is None:
                # It is safe to replace the timestamp with committed_visible
                # because committed_visible satisfies all the constraints
                # for a timestamp that has been discarded
                ov.timestamp = deepcopy(self.committed_visible)

        obj_record.versions.append(obj_ver)
        self.knowledge.insert_version(obj_ver.version)
        if self.knowledge.dominates_vv(obj_ver.timestamp):
            self.visible.update(obj_ver.timestamp)

        # TODO recalculate visible_versions more efficiently
        visible_versions = self._filter_visible_versions(obj_record)[0]

        # Filter out versions no longer needed. A version needs to be
        # retained when:
        #  * It is visible; OR
        #  * It has not yet been made visible
        for i in range(len(obj_record.versions) - 1, -1, -1):
            obj_ver = obj_record.versions[i]
            if obj_ver.version == visible_versions.get_version(
                    obj_ver.version.replica_id):
                # Object version is visible. Keep it!
                continue
            if not self.visible.dominates_version(obj_ver.version):
                # Object version has not yet been made visible
                continue
            del obj_record.versions[i]
        assert len(obj_record.versions) > 0

        discard_timestamp_for_replacement_vv(obj_record, self.visible)

        self.db.put(key, obj_record)
        self.committed_visible.update(self.visible)
Пример #9
0
def test_create_conflict(msg_bus, r1, r2, r3):
    expected_dependents_r1 = VersionVector()
    expected_dependents_r1.inc_version(r1.replica_id)
    expected_dependents_r2 = VersionVector()
    expected_dependents_r2.inc_version(r2.replica_id)
    expected_dependents_r1r2 = VersionVector()
    expected_dependents_r1r2.update(expected_dependents_r1)
    expected_dependents_r1r2.update(expected_dependents_r2)

    # Create object on r1 and r2 but do not propagate messages
    r1.create('place', 'chicago')
    r2.create('place', 'munich')

    r1_v = r1.read('place')
    assert r1_v.dependent_versions == expected_dependents_r1
    assert r1_v.values == ['chicago']
    r2_v = r2.read('place')
    assert r2_v.dependent_versions == expected_dependents_r2
    assert r2_v.values == ['munich']

    # Replicate around
    msg_bus.deliver_all()

    # Verify all replicas
    for replica in (r1, r2, r3):
        rres = replica.read('place')
        assert rres.dependent_versions == expected_dependents_r1r2
        assert sorted(rres.values) == ['chicago', 'munich']
Пример #10
0
def test_delete_nonexistant(r1):
    r1.delete('fakekey', VersionVector())
Пример #11
0
def test_update_known_nonexistant(r1):
    with pytest.raises(NoSuchKeyException):
        r1.update('fakekey', 'the_value', VersionVector())