def test_xref(self): _, headers = self.login(foreign_id=self.user.foreign_id) url = '/api/2/entities' entity = { 'schema': 'Person', 'name': 'Carlos Danger', 'collection_id': self.coll_a.id, 'data': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'name': 'Carlos Danger', 'collection_id': self.coll_b.id, 'data': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Company', 'name': 'Carlos Danger', 'collection_id': self.coll_b.id, 'data': { 'nationality': 'GB' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'name': 'Pure Risk', 'collection_id': self.coll_b.id, 'data': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') q = db.session.query(Match) assert 0 == q.count(), q.count() xref_collection(self.coll_a) q = db.session.query(Match) assert 2 == q.count(), q.count()
def test_summary(self): xref_collection(self.residents.id) res = self.client.get('/api/2/collections/%s/xref' % self.obsidian.id) assert res.status_code == 403, res # Not logged in resi_url = '/api/2/collections/%s/xref' % self.residents.id res = self.client.get(resi_url) assert res.status_code == 200, res assert res.json['total'] == 1, res.json coll0 = res.json['results'][0]['collection'] assert 'Obsidian Order' not in coll0['label'], res.json assert 'Dabo Girls' in coll0['label'], res.json # Logged in as outsider (restricted access) _, headers = self.login(foreign_id='outsider') res = self.client.get(resi_url, headers=headers) assert res.status_code == 200, res assert res.json['total'] == 1, res.json coll0 = res.json['results'][0]['collection'] assert 'Obsidian Order' not in coll0['label'], res.json assert 'Dabo Girls' in coll0['label'], res.json # Logged in as creator (all access) _, headers = self.login(foreign_id='creator') res = self.client.get(resi_url, headers=headers) assert res.status_code == 200, res assert res.json['total'] == 2, res.json labels = [m['collection']['label'] for m in res.json['results']] assert 'Obsidian Order' in labels, res.json assert 'Dabo Girls' in labels, res.json
def handle_task(queue, payload, context): log.info("Task [%s]: %s (begin)", queue.dataset, queue.operation) try: collection = Collection.by_foreign_id(queue.dataset) if collection is None: log.error("Collection not found: %s", queue.dataset) return if queue.operation == OP_INDEX: index_aggregate(queue, collection) if queue.operation == OP_BULKLOAD: bulk_load(queue, collection, payload) if queue.operation == OP_PROCESS: process_collection(collection, **payload) if queue.operation == OP_XREF: xref_collection(queue, collection, **payload) log.info("Task [%s]: %s (done)", queue.dataset, queue.operation) except (SystemExit, KeyboardInterrupt, Exception): retries = int(context.get('retries', 0)) if retries < settings.QUEUE_RETRY: log.info("Queueing failed task for re-try...") context['retries'] = retries + 1 queue.queue_task(payload, context) raise finally: queue.task_done()
def test_xref_specific_collections(self): q = db.session.query(Match) assert 0 == q.count(), q.count() xref_collection(self.stage, self.coll_a, against_collection_ids=[self.coll_c.id]) q = db.session.query(Match) assert 1 == q.count(), q.count()
def test_matches(self): xref_collection(self.residents.id) # Not logged in match_dabo = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.dabo.id)) assert match_dabo.status_code == 200, match_dabo assert match_dabo.json['total'] == 1, match_dabo.json assert 'Leeta' in match_dabo.json['results'][0]['entity']['name'] assert 'Garak' not in match_dabo.json['results'][0]['entity']['name'] assert 'Tain' not in match_dabo.json['results'][0]['match']['name'] assert 'MPella' not in match_dabo.json['results'][0]['match']['name'] match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.obsidian.id)) assert match_obsidian.status_code == 403, match_obsidian # Logged in as outsider (restricted) _, headers = self.login('outsider') match_dabo = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.dabo.id), headers=headers) assert match_dabo.status_code == 200, match_dabo assert match_dabo.json['total'] == 1, match_dabo.json assert 'Leeta' in match_dabo.json['results'][0]['entity']['name'] assert 'Garak' not in match_dabo.json['results'][0]['entity']['name'] assert 'Tain' not in match_dabo.json['results'][0]['match']['name'] assert 'MPella' not in match_dabo.json['results'][0]['match']['name'] match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.obsidian.id), headers=headers) assert match_obsidian.status_code == 403, match_obsidian # Logged in as creator (all access) _, headers = self.login('creator') match_dabo = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.dabo.id), headers=headers) assert match_dabo.status_code == 200, match_dabo assert match_dabo.json['total'] == 1, match_dabo.json assert 'Leeta' in match_dabo.json['results'][0]['entity']['name'] assert 'Garak' not in match_dabo.json['results'][0]['entity']['name'] assert 'Tain' not in match_dabo.json['results'][0]['match']['name'] assert 'MPella' not in match_dabo.json['results'][0]['match']['name'] match_obsidian = self.client.get('/api/2/collections/%s/xref/%s' % (self.residents.id, self.obsidian.id), headers=headers) assert match_obsidian.status_code == 200, match_obsidian assert match_obsidian.json['total'] == 1, match_obsidian.json assert 'Garak' in match_obsidian.json['results'][0]['entity']['name'] assert 'Leeta' not in match_obsidian.json['results'][0]['entity'][ 'name'] # noqa assert 'Tain' not in match_obsidian.json['results'][0]['match']['name'] assert 'MPella' not in match_obsidian.json['results'][0]['match'][ 'name'] # noqa
def test_export(self): xref.xref_collection(self.residents) url = "/api/2/collections/%s/xref.xlsx" % self.obsidian.id res = self.client.post(url) assert res.status_code == 403, res _, headers = self.login(foreign_id="creator") res = self.client.post(url, headers=headers) assert res.status_code == 202, res
def xref(foreign_id, against=None): """Cross-reference all entities and documents in a collection.""" collection = get_collection(foreign_id) against_collection_ids = None if against is not None: against_collection_ids = list( map(lambda entry: get_collection(entry).id, against)) xref_collection(collection.id, against_collection_ids=against_collection_ids)
def test_export(self): xref_collection(self.stage, self.residents) url = '/api/2/collections/%s/xref/export' % self.obsidian.id res = self.client.get(url) assert res.status_code == 403, res _, headers = self.login(foreign_id='creator') res = self.client.get(url, headers=headers) assert res.status_code == 200, res
def test_xref(self): self.setup_entities() q = db.session.query(Match) assert 0 == q.count(), q.count() self.flush_index() xref_collection(self.coll_a.id) q = db.session.query(Match) assert 3 == q.count(), q.count()
def test_xref_specific_collections(self): self.setup_entities() q = db.session.query(Match) assert 0 == q.count(), q.count() self.flush_index() xref_collection(self.coll_a.id, against_collection_ids=[self.coll_c.id]) q = db.session.query(Match) assert 1 == q.count(), q.count()
def test_matches(self): xref.xref_collection(self.residents) url = "/api/2/collections/%s/xref" % self.residents.id # Not logged in res = self.client.get(url) assert res.status_code == 403, res self.grant_publish(self.residents) res = self.client.get(url) assert res.status_code == 200, res assert res.json["total"] == 1, res.json res0 = res.json["results"][0] assert "Leeta" in get_caption(res0["entity"]) assert "Garak" not in get_caption(res0["entity"]) assert "Tain" not in get_caption(res0["match"]) assert "MPella" not in get_caption(res0["match"]) # Logged in as outsider (restricted) _, headers = self.login("outsider") res = self.client.get(url, headers=headers) assert res.status_code == 200, res assert res.json["total"] == 1, res.json res0 = res.json["results"][0] assert "Leeta" in get_caption(res0["entity"]) assert "Garak" not in get_caption(res0["entity"]) assert "Tain" not in get_caption(res0["match"]) assert "MPella" not in get_caption(res0["match"]) # Logged in as creator (all access) _, headers = self.login("creator") res = self.client.get(url, headers=headers) assert res.status_code == 200, res assert res.json["total"] == 2, res.json res0 = res.json["results"][0] assert "Garak" in get_caption(res0["entity"]) assert "Leeta" not in get_caption(res0["entity"]) assert "Tain" not in get_caption(res0["match"]) assert "MPella" not in get_caption(res0["match"]) res1 = res.json["results"][1] assert "Leeta" in get_caption(res1["entity"]) assert "Garak" not in get_caption(res1["entity"]) assert "Tain" not in get_caption(res1["match"]) assert "MPella" not in get_caption(res1["match"])
def test_matches(self): xref.xref_collection(self.stage, self.residents) url = '/api/2/collections/%s/xref' % self.residents.id # Not logged in res = self.client.get(url) assert res.status_code == 403, res self.grant_publish(self.residents) res = self.client.get(url) assert res.status_code == 200, res assert res.json['total'] == 1, res.json res0 = res.json['results'][0] assert 'Leeta' in get_caption(res0['entity']) assert 'Garak' not in get_caption(res0['entity']) assert 'Tain' not in get_caption(res0['match']) assert 'MPella' not in get_caption(res0['match']) # Logged in as outsider (restricted) _, headers = self.login('outsider') res = self.client.get(url, headers=headers) assert res.status_code == 200, res assert res.json['total'] == 1, res.json res0 = res.json['results'][0] assert 'Leeta' in get_caption(res0['entity']) assert 'Garak' not in get_caption(res0['entity']) assert 'Tain' not in get_caption(res0['match']) assert 'MPella' not in get_caption(res0['match']) # Logged in as creator (all access) _, headers = self.login('creator') res = self.client.get(url, headers=headers) assert res.status_code == 200, res assert res.json['total'] == 2, res.json res0 = res.json['results'][0] assert 'Garak' in get_caption(res0['entity']) assert 'Leeta' not in get_caption(res0['entity']) assert 'Tain' not in get_caption(res0['match']) assert 'MPella' not in get_caption(res0['match']) res1 = res.json['results'][1] assert 'Leeta' in get_caption(res1['entity']) assert 'Garak' not in get_caption(res1['entity']) assert 'Tain' not in get_caption(res1['match']) assert 'MPella' not in get_caption(res1['match'])
def dispatch_task(self, collection, task): stage = task.stage payload = task.payload sync = task.context.get("sync", False) if stage.stage == OP_INDEX: index_many(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_REINGEST: reingest_collection(collection, job_id=stage.job.id, **payload) if stage.stage == OP_REINDEX: reindex_collection(collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_BULKLOAD: bulk_load(stage, collection, payload) if stage.stage == OP_PROCESS: process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection, **payload) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_many(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_REINGEST: reingest_collection(collection, job_id=stage.job.id, **payload) if stage.stage == OP_REINDEX: reindex_collection(collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def handle(self, task): stage = task.stage payload = task.payload collection = Collection.by_foreign_id(task.job.dataset.name) if collection is None: log.error("Collection not found: %s", task.job.dataset) return sync = task.context.get('sync', False) if stage.stage == OP_INDEX: index_aggregate(stage, collection, sync=sync, **payload) if stage.stage == OP_LOAD_MAPPING: load_mapping(stage, collection, **payload) if stage.stage == OP_FLUSH_MAPPING: flush_mapping(stage, collection, sync=sync, **payload) if stage.stage == OP_PROCESS: if payload.pop('reset', False): reset_collection(collection, sync=True) process_collection(stage, collection, sync=sync, **payload) if stage.stage == OP_XREF: xref_collection(stage, collection) if stage.stage == OP_XREF_ITEM: xref_item(stage, collection, **payload) log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
def test_xref_collection(self): _, headers = self.login(foreign_id=self.user.foreign_id) url = '/api/2/entities' entity = { 'schema': 'Person', 'name': 'Carlos Danger', 'collection_id': self.coll_a.id, 'properties': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'name': 'Carlos Danger', 'collection_id': self.coll_b.id, 'properties': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'LegalEntity', 'name': 'Carlos Danger', 'collection_id': self.coll_b.id, 'properties': { 'nationality': 'GB' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'name': 'Pure Risk', 'collection_id': self.coll_b.id, 'properties': { 'nationality': 'US' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'LegalEntity', 'name': 'Carlof Danger', 'collection_id': self.coll_c.id, 'properties': { 'nationality': 'FR' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') entity = { 'schema': 'Person', 'name': 'Dorian Gray', 'collection_id': self.coll_c.id, 'properties': { 'nationality': 'GB' } } self.client.post(url, data=json.dumps(entity), headers=headers, content_type='application/json') q = db.session.query(Match) assert 0 == q.count(), q.count() self.flush_index() xref_collection(self.coll_a.id, self.coll_c.id) q = db.session.query(Match) assert 1 == q.count(), q.count()
def xref(foreign_id, against=None): """Cross-reference all entities and documents in a collection.""" collection = get_collection(foreign_id) against = ensure_list(against) against = [get_collection(c).id for c in against] xref_collection(collection.id, against_collection_ids=against)
def op_xref_handler(collection, task): xref_collection(task.stage, collection)
def test_xref(self): matches = list(iter_matches(self.coll_a, self.authz)) assert 0 == len(matches), len(matches) xref_collection(self.stage, self.coll_a) matches = list(iter_matches(self.coll_a, self.authz)) assert 3 == len(matches), len(matches)
def test_xref(self): q = db.session.query(Match) assert 0 == q.count(), q.count() xref_collection(self.stage, self.coll_a) q = db.session.query(Match) assert 3 == q.count(), q.count()
def xref(foreign_id): """Cross-reference all entities and documents in a collection.""" collection = get_collection(foreign_id) xref_collection(collection.id)
def xref(foreign_id): """Cross-reference all entities and documents in a collection.""" collection = get_collection(foreign_id) stage = get_stage(collection, OP_XREF) xref_collection(stage, collection)
def test_csv(self): xref_collection(self.stage, self.residents) url = '/api/2/collections/%s/xref.csv' % self.obsidian.id res = self.client.get(url) assert res.status_code == 403, res
def xref(foreign_id): """Cross-reference all entities and documents in a collection.""" collection = Collection.by_foreign_id(foreign_id) if collection is None: raise ValueError("No such collection: %r" % foreign_id) xref_collection(collection)
def op_xref_handler(collection, task): xref_collection(collection)