def request_push( push_url, extra_data=None, encode=None, decode=None, headers=None, timeout=None, extensions=True, session=None ): message = PushMessage() message.latest_version_id = core.get_latest_version_id(session=session) compress(session=session) message.add_unversioned_operations(session=session, include_extensions=extensions) message.set_node(session.query(Node).order_by(Node.node_id.desc()).first()) data = message.to_json() data.update({"extra_data": extra_data or {}}) code, reason, response = post_request(push_url, data, encode, decode, headers, timeout) if (code // 100 != 2) or response is None: if suggests_pull is not None and suggests_pull(code, reason, response): raise PullSuggested(code, reason, response) raise PushRejected(code, reason, response) new_version_id = response.get("new_version_id") if new_version_id is None: raise PushRejected(code, reason, {"error": "server didn't respond with new version id", "response": response}) # Who should set the dates? Maybe send a complete Version from the # server. For now the field is ignored, so it doesn't matter. session.add(Version(version_id=new_version_id, created=datetime.datetime.now())) for op in message.operations: op.version_id = new_version_id # return the response for the programmer to do what she wants # afterwards return response
def test_compression_consistency(): addstuff() changestuff() session = Session() ops = session.query(models.Operation).all() compress() news = session.query(models.Operation).order_by(models.Operation.order).all() assert news == compressed_operations(ops)
def test_compression_consistency(): addstuff() changestuff() session = Session() ops = session.query(models.Operation).all() compress() news = session.query(models.Operation).order_by( models.Operation.order).all() assert news == compressed_operations(ops)
def test_compression(): addstuff() changestuff() compress() # remove unnecesary operations session = Session() assert session.query(models.Operation).\ filter(models.Operation.command == 'i').\ count() == 4, "insert operations don't match" assert session.query(models.Operation).\ filter(models.Operation.command == 'u').\ count() == 0, "update operations don't match" assert session.query(models.Operation).\ filter(models.Operation.command == 'd').\ count() == 0, "delete operations don't match"
def request_push(push_url: str, extra_data=None, encode=None, decode=None, headers=None, timeout=None, extensions=True, session=None) -> Dict[str, Any]: message = PushMessage() message.latest_version_id = core.get_latest_version_id(session=session) compress(session=session) message.add_unversioned_operations(session=session, include_extensions=extensions) if not message.operations: return {} message.set_node(session.query(Node).order_by(Node.node_id.desc()).first()) data = message.to_json() data.update({'extra_data': extra_data or {}}) code, reason, response = post_request(push_url, data, encode, decode, headers, timeout) if (code // 100 != 2) or response is None: if suggests_pull is not None and suggests_pull(code, reason, response): raise PullSuggested(code, reason, response) raise PushRejected(code, reason, response) new_version_id = response.get('new_version_id') if new_version_id is None: raise PushRejected( code, reason, { 'error': "server didn't respond with new version id", 'response': response }) # Who should set the dates? Maybe send a complete Version from the # server. For now the field is ignored, so it doesn't matter. session.add( Version(version_id=new_version_id, created=datetime.datetime.now())) for op in message.operations: op.version_id = new_version_id # return the response for the programmer to do what she wants # afterwards return response
def create_push_message(self, session: Optional[ sqlalchemy.orm.session.Session] = None, extensions=True, do_compress=True) -> PushMessage: # TODO: mit do_compress=True muss noch getestet werden, welche Szenarien die referentielle Integritaet # verletzen koennen. Denn wenn die Tabellen in richtiger Reihenfolge synchronisiert werden # koennte man auf das Aussetzen der RI verzichten if not session: session = self.Session() # TODO: p # breakpoint() message = PushMessage() message.latest_version_id = core.get_latest_version_id(session=session) if do_compress: compress(session=session) message.add_unversioned_operations(session=session, include_extensions=extensions) return message
def merge(pull_message, session=None): """ Merges a message from the server with the local database. *pull_message* is an instance of dbsync.messages.pull.PullMessage. """ if not isinstance(pull_message, PullMessage): raise TypeError("need an instance of dbsync.messages.pull.PullMessage " "to perform the local merge operation") valid_cts = set(ct for ct in core.synched_models.ids) unversioned_ops = compress(session=session) pull_ops = filter( attr('content_type_id').in_(valid_cts), pull_message.operations) pull_ops = compressed_operations(pull_ops) # I) first phase: resolve unique constraint conflicts if # possible. Abort early if a human error is detected unique_conflicts, unique_errors = find_unique_conflicts( pull_ops, unversioned_ops, pull_message, session) if unique_errors: raise UniqueConstraintError(unique_errors) conflicting_objects = set() for uc in unique_conflicts: obj = uc['object'] conflicting_objects.add(obj) for key, value in izip(uc['columns'], uc['new_values']): setattr(obj, key, value) # Resolve potential cyclical conflicts by deleting and reinserting for obj in conflicting_objects: make_transient(obj) # remove from session for model in set(type(obj) for obj in conflicting_objects): pk_name = get_pk(model) pks = [ getattr(obj, pk_name) for obj in conflicting_objects if type(obj) is model ] session.query(model).filter(getattr(model, pk_name).in_(pks)).\ delete(synchronize_session=False) # remove from the database session.add_all(conflicting_objects) # reinsert them session.flush() # II) second phase: detect conflicts between pulled operations and # unversioned ones direct_conflicts = find_direct_conflicts(pull_ops, unversioned_ops) # in which the delete operation is registered on the pull message dependency_conflicts = find_dependency_conflicts(pull_ops, unversioned_ops, session) # in which the delete operation was performed locally reversed_dependency_conflicts = find_reversed_dependency_conflicts( pull_ops, unversioned_ops, pull_message) insert_conflicts = find_insert_conflicts(pull_ops, unversioned_ops) # III) third phase: perform pull operations, when allowed and # while resolving conflicts def extract(op, conflicts): return [local for remote, local in conflicts if remote is op] def purgelocal(local): session.delete(local) exclude = lambda tup: tup[1] is not local mfilter(exclude, direct_conflicts) mfilter(exclude, dependency_conflicts) mfilter(exclude, reversed_dependency_conflicts) mfilter(exclude, insert_conflicts) unversioned_ops.remove(local) for pull_op in pull_ops: # flag to control whether the remote operation is free of obstacles can_perform = True # flag to detect the early exclusion of a remote operation reverted = False # the class of the operation class_ = pull_op.tracked_model direct = extract(pull_op, direct_conflicts) if direct: if pull_op.command == 'd': can_perform = False for local in direct: pair = (pull_op.command, local.command) if pair == ('u', 'u'): can_perform = False # favor local changes over remote ones elif pair == ('u', 'd'): pull_op.command = 'i' # negate the local delete purgelocal(local) elif pair == ('d', 'u'): local.command = 'i' # negate the remote delete session.flush() reverted = True else: # ('d', 'd') purgelocal(local) dependency = extract(pull_op, dependency_conflicts) if dependency and not reverted: can_perform = False order = min(op.order for op in unversioned_ops) # first move all operations further in order, to make way # for the new one for op in unversioned_ops: op.order = op.order + 1 session.flush() # then create operation to reflect the reinsertion and # maintain a correct operation history session.add( Operation(row_id=pull_op.row_id, content_type_id=pull_op.content_type_id, command='i', order=order)) reversed_dependency = extract(pull_op, reversed_dependency_conflicts) for local in reversed_dependency: # reinsert record local.command = 'i' local.perform(pull_message, session) # delete trace of deletion purgelocal(local) insert = extract(pull_op, insert_conflicts) for local in insert: session.flush() next_id = max(max_remote(class_, pull_message), max_local(class_, session)) + 1 update_local_id(local.row_id, next_id, class_, session) local.row_id = next_id if can_perform: pull_op.perform(pull_message, session) session.flush() # IV) fourth phase: insert versions from the pull_message for pull_version in pull_message.versions: session.add(pull_version)
def pull(pull_url, extra_data=None, encode=None, decode=None, headers=None, monitor=None, timeout=None, include_extensions=True): """ Attempts a pull from the server. Returns the response body. Additional data can be passed to the request by giving *extra_data*, a dictionary of values. If not interrupted, the pull will perform a local merge. If the response from the server isn't appropriate, it will raise a dbysnc.client.pull.BadResponseError. By default, the *encode* function is ``json.dumps``, the *decode* function is ``json.loads``, and the *headers* are appropriate HTTP headers for JSON. *monitor* should be a routine that receives a dictionary with information of the state of the request and merge procedure. *include_extensions* dictates whether the extension functions will be called during the merge or not. Default is ``True``. """ assert isinstance(pull_url, basestring), "pull url must be a string" assert bool(pull_url), "pull url can't be empty" if extra_data is not None: assert isinstance(extra_data, dict), "extra data must be a dictionary" request_message = PullRequestMessage() for op in compress(): request_message.add_operation(op) data = request_message.to_json() data.update({'extra_data': extra_data or {}}) code, reason, response = post_request(pull_url, data, encode, decode, headers, timeout, monitor) if (code // 100 != 2): if monitor: monitor({'status': "error", 'reason': reason.lower()}) raise BadResponseError(code, reason, response) if response is None: if monitor: monitor({'status': "error", 'reason': "invalid response format"}) raise BadResponseError(code, reason, response) message = None try: message = PullMessage(response) except KeyError: if monitor: monitor({'status': "error", 'reason': "invalid message format"}) raise BadResponseError("response object isn't a valid PullMessage", response) if monitor: monitor({'status': "merging", 'operations': len(message.operations)}) merge(message, include_extensions=include_extensions) if monitor: monitor({'status': "done"}) # return the response for the programmer to do what she wants # afterwards return response
def pull(pull_url, extra_data=None, encode=None, decode=None, headers=None, monitor=None, timeout=None, include_extensions=True): """ Attempts a pull from the server. Returns the response body. Additional data can be passed to the request by giving *extra_data*, a dictionary of values. If not interrupted, the pull will perform a local merge. If the response from the server isn't appropriate, it will raise a dbysnc.client.pull.BadResponseError. By default, the *encode* function is ``json.dumps``, the *decode* function is ``json.loads``, and the *headers* are appropriate HTTP headers for JSON. *monitor* should be a routine that receives a dictionary with information of the state of the request and merge procedure. *include_extensions* dictates whether the extension functions will be called during the merge or not. Default is ``True``. """ assert isinstance(pull_url, basestring), "pull url must be a string" assert bool(pull_url), "pull url can't be empty" if extra_data is not None: assert isinstance(extra_data, dict), "extra data must be a dictionary" request_message = PullRequestMessage() for op in compress(): request_message.add_operation(op) data = request_message.to_json() data.update({'extra_data': extra_data or {}}) code, reason, response = post_request( pull_url, data, encode, decode, headers, timeout, monitor) if (code // 100 != 2): if monitor: monitor({'status': "error", 'reason': reason.lower()}) raise BadResponseError(code, reason, response) if response is None: if monitor: monitor({ 'status': "error", 'reason': "invalid response format"}) raise BadResponseError(code, reason, response) message = None try: message = PullMessage(response) except KeyError: if monitor: monitor({ 'status': "error", 'reason': "invalid message format"}) raise BadResponseError( "response object isn't a valid PullMessage", response) if monitor: monitor({ 'status': "merging", 'operations': len(message.operations)}) merge(message, include_extensions=include_extensions) if monitor: monitor({'status': "done"}) # return the response for the programmer to do what she wants # afterwards return response
def merge(pull_message, session=None): """ Merges a message from the server with the local database. *pull_message* is an instance of dbsync.messages.pull.PullMessage. """ if not isinstance(pull_message, PullMessage): raise TypeError("need an instance of dbsync.messages.pull.PullMessage " "to perform the local merge operation") valid_cts = set(ct for ct in core.synched_models.ids) unversioned_ops = compress(session=session) pull_ops = filter(attr('content_type_id').in_(valid_cts), pull_message.operations) pull_ops = compressed_operations(pull_ops) # I) first phase: resolve unique constraint conflicts if # possible. Abort early if a human error is detected unique_conflicts, unique_errors = find_unique_conflicts( pull_ops, unversioned_ops, pull_message, session) if unique_errors: raise UniqueConstraintError(unique_errors) conflicting_objects = set() for uc in unique_conflicts: obj = uc['object'] conflicting_objects.add(obj) for key, value in izip(uc['columns'], uc['new_values']): setattr(obj, key, value) # Resolve potential cyclical conflicts by deleting and reinserting for obj in conflicting_objects: make_transient(obj) # remove from session for model in set(type(obj) for obj in conflicting_objects): pk_name = get_pk(model) pks = [getattr(obj, pk_name) for obj in conflicting_objects if type(obj) is model] session.query(model).filter(getattr(model, pk_name).in_(pks)).\ delete(synchronize_session=False) # remove from the database session.add_all(conflicting_objects) # reinsert them session.flush() # II) second phase: detect conflicts between pulled operations and # unversioned ones direct_conflicts = find_direct_conflicts(pull_ops, unversioned_ops) # in which the delete operation is registered on the pull message dependency_conflicts = find_dependency_conflicts( pull_ops, unversioned_ops, session) # in which the delete operation was performed locally reversed_dependency_conflicts = find_reversed_dependency_conflicts( pull_ops, unversioned_ops, pull_message) insert_conflicts = find_insert_conflicts(pull_ops, unversioned_ops) # III) third phase: perform pull operations, when allowed and # while resolving conflicts def extract(op, conflicts): return [local for remote, local in conflicts if remote is op] def purgelocal(local): session.delete(local) exclude = lambda tup: tup[1] is not local mfilter(exclude, direct_conflicts) mfilter(exclude, dependency_conflicts) mfilter(exclude, reversed_dependency_conflicts) mfilter(exclude, insert_conflicts) unversioned_ops.remove(local) for pull_op in pull_ops: # flag to control whether the remote operation is free of obstacles can_perform = True # flag to detect the early exclusion of a remote operation reverted = False # the class of the operation class_ = pull_op.tracked_model direct = extract(pull_op, direct_conflicts) if direct: if pull_op.command == 'd': can_perform = False for local in direct: pair = (pull_op.command, local.command) if pair == ('u', 'u'): can_perform = False # favor local changes over remote ones elif pair == ('u', 'd'): pull_op.command = 'i' # negate the local delete purgelocal(local) elif pair == ('d', 'u'): local.command = 'i' # negate the remote delete session.flush() reverted = True else: # ('d', 'd') purgelocal(local) dependency = extract(pull_op, dependency_conflicts) if dependency and not reverted: can_perform = False order = min(op.order for op in unversioned_ops) # first move all operations further in order, to make way # for the new one for op in unversioned_ops: op.order = op.order + 1 session.flush() # then create operation to reflect the reinsertion and # maintain a correct operation history session.add(Operation(row_id=pull_op.row_id, content_type_id=pull_op.content_type_id, command='i', order=order)) reversed_dependency = extract(pull_op, reversed_dependency_conflicts) for local in reversed_dependency: # reinsert record local.command = 'i' local.perform(pull_message, session) # delete trace of deletion purgelocal(local) insert = extract(pull_op, insert_conflicts) for local in insert: session.flush() next_id = max(max_remote(class_, pull_message), max_local(class_, session)) + 1 update_local_id(local.row_id, next_id, class_, session) local.row_id = next_id if can_perform: pull_op.perform(pull_message, session) session.flush() # IV) fourth phase: insert versions from the pull_message for pull_version in pull_message.versions: session.add(pull_version)
async def run_pull( self, session: Optional[sqlalchemy.orm.session.Session] = None, extra_data: Dict[str, Any] = None, monitor: Optional[Callable[[Dict[str, Any]], None]] = None): include_extensions = False if extra_data is None: extra_data = {} logger.info(f"run_pull begin") # new_version_id: Optional[int] # message = self.create_push_message() if not session: session = self.Session() if extra_data is not None: assert isinstance(extra_data, dict), "extra data must be a dictionary" request_message = PullRequestMessage() for op in compress(): request_message.add_operation(op) data = request_message.to_json() data.update({'extra_data': extra_data or {}}) msg = json.dumps(data, cls=SyncdbJSONEncoder) st = time.time() logger.info("requesting PullMessage") await self.websocket.send(msg) logger.info(f"sent msg {time.time() - st}") # XXX: must be possible to fetch in a loop because this response can be fairly huge response_str = await self.websocket.recv() logger.info( f"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@received : {len(response_str)} in {time.time()-st} seconds" ) response = json.loads(response_str) message = None try: message = PullMessage(response) # logger.info(f"got PullMessage: {message} from response: {response_str}") except KeyError: if monitor: monitor({ 'status': "error", 'reason': "invalid message format" }) raise BadResponseError("response object isn't a valid PullMessage", response) logger.info( f"pull message contains {len(message.operations)} operations") if monitor: monitor({ 'status': "merging", 'operations': len(message.operations) }) logger.info("merging PullMessage...") await merge(message, include_extensions=include_extensions, websocket=self.websocket) #TODO: request_payload etc. if monitor: monitor({'status': "done"}) # return the response for the programmer to do what she wants # afterwards return response