def test_change_xref(self): w = self.create_user('user') coll = self.create_collection() xref = { 'entity_id': 'a1', 'collection_id': coll.id, 'match_id': 'a2', 'match_collection_id': coll.id, } decide_xref(xref, decision=True, context_id=w.id, decider_id=w.id) linkages = list(Linkage.all()) assert len(linkages) == 2, linkages assert linkages[0].profile_id == linkages[1].profile_id, linkages assert linkages[0].decision == linkages[1].decision, linkages xref = { 'entity_id': 'a1', 'collection_id': coll.id, 'match_id': 'a2', 'match_collection_id': coll.id, } decide_xref(xref, decision=False, context_id=w.id, decider_id=w.id) linkages = list(Linkage.all()) assert len(linkages) == 2, linkages assert linkages[0].decision != linkages[1].decision, linkages
def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) try: aggregator.drop() finally: aggregator.close() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering linkages metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id, sync=True)
def index(): """Returns a list of linkages for god entities --- get: summary: List linkages parameters: - description: >- Choose to filter for a specific role context. in: query name: "filter:context_id" schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/Linkage' tags: - Linkage """ require(request.authz.logged_in) parser = QueryParser(request.args, request.authz) context_ids = parser.getintlist('filter:context_id') q = Linkage.by_authz(request.authz, context_ids=context_ids) result = DatabaseQueryResult(request, q, parser=parser) return LinkageSerializer.jsonify_result(result)
def test_decide_xref(self): w = self.create_user('user') coll = self.create_collection() xref = { 'entity_id': 'a1', 'collection_id': coll.id, 'match_id': 'a2', 'match_collection_id': coll.id, } decide_xref(xref, decision=True, context_id=w.id, decider_id=w.id) linkages = list(Linkage.all()) assert len(linkages) == 2, linkages assert linkages[0].profile_id == linkages[1].profile_id, linkages xref = { 'entity_id': 'a1', 'collection_id': coll.id, 'match_id': 'b1', 'match_collection_id': coll.id, } decide_xref(xref, decision=False, context_id=w.id, decider_id=w.id) linkages = list(Linkage.all()) assert len(linkages) == 3, linkages profiles = db.session.query(Linkage.profile_id.distinct()).count() assert profiles == 1, profiles xref = { 'entity_id': 'b1', 'collection_id': coll.id, 'match_id': 'b2', 'match_collection_id': coll.id, } decide_xref(xref, decision=True, context_id=w.id, decider_id=w.id) profiles = db.session.query(Linkage.profile_id.distinct()).count() assert profiles == 2, profiles xref = { 'entity_id': 'a1', 'collection_id': coll.id, 'match_id': 'b1', 'match_collection_id': coll.id, } decide_xref(xref, decision=True, context_id=w.id, decider_id=w.id) profiles = db.session.query(Linkage.profile_id.distinct()).count() assert profiles == 1, profiles
def delete_collection(collection, keep_metadata=False, sync=False, reset_sync=False): reset_collection(collection, sync=reset_sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering this metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=sync) Authz.flush() refresh_collection(collection.id, sync=True)
def test_linkage_index(self): role, headers = self.login() other, _ = self.login(foreign_id='other') profile_id = make_textid() coll = self.create_collection() self.grant_publish(coll) Linkage.save(profile_id, make_textid(), coll.id, role.id, decision=True, decider_id=role.id) Linkage.save(profile_id, make_textid(), coll.id, self.group.id, decision=True, decider_id=role.id) Linkage.save(profile_id, make_textid(), coll.id, other.id, decision=True, decider_id=other.id) db.session.commit() res = self.client.get('/api/2/linkages', headers=headers) assert res.json['total'] == 2, res.json url = '/api/2/linkages?filter:context_id=%s' % role.id res = self.client.get(url, headers=headers) assert res.json['total'] == 1, res.json
def delete_entity(collection, entity, deleted_at=None, sync=False): # This is recursive and will also delete any entities which # reference the given entity. Usually this is going to be child # documents, or directoships referencing a person. It's a pretty # dangerous operation, though. entity_id = collection.ns.sign(entity.get('id')) for adjacent in index.iter_adjacent(entity): log.warning("Recursive delete: %r", adjacent) delete_entity(collection, adjacent, deleted_at=deleted_at, sync=sync) flush_notifications(entity_id, clazz=Entity) obj = Entity.by_id(entity_id, collection=collection) if obj is not None: obj.delete(deleted_at=deleted_at) doc = Document.by_id(entity_id, collection=collection) if doc is not None: doc.delete(deleted_at=deleted_at) index.delete_entity(entity_id, sync=sync) Linkage.delete_by_entity(entity_id) xref_index.delete_xref(collection, entity_id=entity_id, sync=sync) delete_aggregator_entity(collection, entity_id) refresh_entity(entity_id, sync=sync) refresh_collection(collection.id, sync=sync)
def index(collection_id): """ --- get: summary: Fetch cross-reference results description: >- Fetch cross-reference matches for entities in the collection with id `collection_id` parameters: - in: path name: collection_id required: true schema: type: integer responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/XrefResponse' tags: - Xref - Collection """ get_index_collection(collection_id) result = XrefQuery.handle(request, collection_id=collection_id) context_id = result.parser.getint('context_id', request.authz.id) if context_id is not None: require(request.authz.can_read_role(context_id)) pairs = [] for xref in result.results: pairs.append((xref.get('entity_id'), xref.get('match_id'))) decisions = Linkage.decisions(pairs, context_id) for xref in result.results: key = (xref.get('entity_id'), xref.get('match_id')) xref['decision'] = decisions.get(key) return XrefSerializer.jsonify_result(result)
def decide_xref(xref, decision=None, context_id=None, decider_id=None): """Store user feedback from an Xref result as a set of entity linkages. The problem here is that we're trying to translate a single pair-wise user decision into a merge or split decision regarding a cluster of entities. This works for most cases, with the exception that a profile, once established, cannot be split in a way that preserves what entities were linked to what other entities originally.""" entity_id = xref.get('entity_id') collection_id = xref.get('collection_id') entity_linkage = Linkage.by_entity(entity_id, decision=True, collection_id=collection_id, context_id=context_id).first() match_id = xref.get('match_id') match_collection_id = xref.get('match_collection_id') match_linkage = Linkage.by_entity(match_id, decision=True, collection_id=match_collection_id, context_id=context_id).first() # If we are undecided, and we stay undecided, not much to change. if entity_linkage is None or match_linkage is None: if decision is None: return if entity_linkage is None and match_linkage is None: # Case 1: Neither entity is linked to a profile, make a new one. profile_id = make_textid() Linkage.save(profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) Linkage.save(profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) elif entity_linkage is None and match_linkage is not None: # Case 2a: One entity is part of a profile, the other isn't. # Add the other entity to the existing profile. Linkage.save(match_linkage.profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) elif entity_linkage is not None and match_linkage is None: # Case 2b: Like 2a, but the other way around. Linkage.save(entity_linkage.profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) elif decision is True: # Case 3: Both entities are part of profiles. These now need to be # merged. Linkage.merge(entity_linkage.profile_id, match_linkage.profile_id) else: # Case 4: Both entities are part of profiles, and have been # judged not to be the same. Mark them as distinct. Linkage.save(entity_linkage.profile_id, match_id, match_collection_id, context_id, decision=decision, decider_id=decider_id) # Case 4b: Splitting an existing profile somewhat randomly. if entity_linkage.profile_id != match_linkage.profile_id: Linkage.save(match_linkage.profile_id, entity_id, collection_id, context_id, decision=decision, decider_id=decider_id) db.session.commit()