def test_upload_csv_doc(self): _, headers = self.login(is_admin=True) meta = { 'countries': ['de', 'us'], 'languages': ['en'], 'source_url': 'http://pudo.org/experts.csv' } data = { 'meta': json.dumps(meta), 'foo': (open(self.csv_path), 'experts.csv') } res = self.client.post(self.url, data=data, headers=headers) assert res.status_code == 200, (res, res.data) docs = res.json['documents'] assert len(docs) == 1, docs assert docs[0]['file_name'] == 'experts.csv', docs flush_index() res = self.client.get('/api/2/documents', headers=headers) assert res.json['total'] == 1, res.json res = self.client.get('/api/2/documents/1', headers=headers) assert res.json['countries'] == ['de', 'us'], res.json res = self.client.get('/api/2/documents/1/file', headers=headers) assert 'Klaus Trutzel' in res.data assert 'text/csv' in res.content_type, res.content_type
def test_entity_tags(self): _, headers = self.login(is_admin=True) url = '/api/2/entities' data = { 'schema': 'Person', 'name': "Blaaaa blubb", 'properties': { 'phone': '+491769817271' }, 'collection_id': self.col.id } resa = self.client.post(url, data=json.dumps(data), headers=headers, content_type='application/json') data = { 'schema': 'Person', 'name': "Nobody Man", 'properties': { 'phone': '+491769817271' }, 'collection_id': self.col.id } resa = self.client.post(url, data=json.dumps(data), headers=headers, content_type='application/json') flush_index() url = '/api/2/entities/%s/tags' % resa.json['id'] res = self.client.get(url, headers=headers) assert res.status_code == 200, (res.status_code, res.json) results = res.json['results'] assert len(results) == 1, results assert results[0]['value'] == '+491769817271', results
def test_load_sqlite(self): count = Collection.all().count() assert 0 == count, count db_uri = 'sqlite:///' + self.get_fixture_path('kek.sqlite') os.environ['ALEPH_TEST_BULK_DATABASE_URI'] = db_uri yml_path = self.get_fixture_path('kek.yml') config = load_config_file(yml_path) bulk_load(config) count = Collection.all().count() assert 1 == count, count coll = Collection.by_foreign_id('kek') assert coll.category == 'scrape', coll.category _, headers = self.login(is_admin=True) flush_index() res = self.client.get('/api/2/entities?q=friede+springer', headers=headers) assert res.status_code == 200, res assert res.json['total'] == 1, res.json res0 = res.json['results'][0] assert res0['id'] == '9895ccc1b3d6444ccc6371ae239a7d55c748a714', res0
def test_similar_entity(self): _, headers = self.login(is_admin=True) url = '/api/2/entities' data = { 'schema': 'Person', 'name': "Osama bin Laden", 'collection_id': self.col.id } res = self.client.post(url, data=json.dumps(data), headers=headers, content_type='application/json') data = { 'schema': 'Person', 'name': "Osama ben Ladyn", 'collection_id': self.col.id } res = self.client.post(url, data=json.dumps(data), headers=headers, content_type='application/json') flush_index() res = self.client.get('/api/2/entities/%s/similar' % res.json['id'], headers=headers) assert res.status_code == 200, (res.status_code, res.json) data = res.json assert len(data['results']) == 1, data assert 'Laden' in data['results'][0]['name'], data assert 'Pooh' not in res.data, res.data
def update_entity(entity): index_entity(entity) update_entity_full.apply_async([entity.id], queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) # needed to make the call to view() work: flush_index()
def test_upload_html_doc(self): html_path = self.get_fixture_path('samples/website.html') _, headers = self.login(is_admin=True) meta = { 'countries': ['ru', 'us'], 'languages': ['en'], 'source_url': 'https://en.wikipedia.org/wiki/How_does_one_patch_KDE2_under_FreeBSD%3F' # noqa } data = {'meta': json.dumps(meta), 'foo': open(html_path)} res = self.client.post(self.url, data=data, headers=headers) assert res.status_code == 200, (res, res.data) docs = res.json['documents'] assert len(docs) == 1, docs assert docs[0]['type'] == 'html', docs flush_index() res = self.client.get('/api/2/documents', headers=headers) assert res.json['total'] == 1, res.json res = self.client.get('/api/2/documents/1', headers=headers) assert 'us' in res.json['countries'], res.json assert 'html' in res.json, res.json assert 'Wikipedia, the free encyclopedia' in res.json['html'], \ res.json['html'] res = self.client.get('/api/2/documents/1/file', headers=headers) assert 'KDE2' in res.data assert 'text/html' in res.content_type, res.content_type
def test_delete_source(self): collection = Collection.by_id(1000) res = self.client.get('/api/2/search?q="mention fruit"') assert res.json['total'] == 1, res.json delete_collection(collection.id) flush_index() res = self.client.get('/api/2/search?q="mention fruit"') assert res.json['total'] == 0, res.json
def load_fixtures(self, file_name, process_documents=True): filepath = self.get_fixture_path(file_name) load_fixtures(db, loaders.load(filepath)) db.session.commit() reindex_entities() if process_documents: for doc in Document.all(): analyze_document(doc) flush_index()
def test_index(self): update_collection(self.col) flush_index() res = self.client.get('/api/2/collections') assert res.status_code == 200, res assert res.json['total'] == 0, res.json _, headers = self.login(is_admin=True) res = self.client.get('/api/2/collections', headers=headers) assert res.status_code == 200, res assert res.json['total'] == 1, res.json
def test_suggest_entity(self): self.login(is_admin=True) url = '/api/1/entities' data = { 'schema': 'Person', 'name': "Osama bin Laden", 'collection_id': self.col.id } res = self.client.post(url, data=json.dumps(data), content_type='application/json') flush_index() res = self.client.get('/api/1/entities/_suggest?prefix=osa') assert res.status_code == 200, (res.status_code, res.json) data = res.json assert len(data['results']) == 1, data assert 'Laden' in data['results'][0]['name'], data
def test_entity_references(self): db_uri = 'file://' + self.get_fixture_path('experts.csv') os.environ['ALEPH_TEST_BULK_CSV'] = db_uri yml_path = self.get_fixture_path('experts.yml') config = load_config_file(yml_path) bulk_load(config) flush_index() res = self.client.get('/api/2/entities?q=Climate') assert res.json['total'] == 1, res.json grp_id = res.json['results'][0]['id'] res = self.client.get('/api/2/entities/%s/references' % grp_id) results = res.json['results'] assert len(results) == 1, results assert results[0]['count'] == 3, results
def test_load_sqlite(self): count = Collection.all().count() assert 0 == count, count yml_path = self.get_fixture_path('kek.yml') config = load_config_file(yml_path) bulk_load(config) flush_index() count = Collection.all().count() assert 1 == count, count res = self.client.get('/api/2/entities?q=friede+springer') assert res.status_code == 200, res assert res.json['total'] == 1, res.json res0 = res.json['results'][0] assert res0['id'] == '9895ccc1b3d6444ccc6371ae239a7d55c748a714', res0
def test_load_csv(self): count = Collection.all().count() assert 0 == count, count db_uri = 'file://' + self.get_fixture_path('experts.csv') os.environ['ALEPH_TEST_BULK_CSV'] = db_uri yml_path = self.get_fixture_path('experts.yml') config = load_config_file(yml_path) bulk_load(config) flush_index() count = Collection.all().count() assert 1 == count, count res = self.client.get('/api/2/entities?q=Greenfield') assert res.status_code == 200, res assert res.json['total'] == 1, res.json res0 = res.json['results'][0] assert res0['id'] == '6897ef1acd633c229d812c1c495f030d212c9081', res0
def test_index(self): index_entity(self.ent) flush_index() res = self.client.get('/api/1/entities?facet=collections') assert res.status_code == 200, res assert res.json['total'] == 0, res.json assert len(res.json['facets']['collections']['values']) == 0, res.json self.login(is_admin=True) res = self.client.get('/api/1/entities?facet=collections') assert res.status_code == 200, res assert res.json['total'] == 1, res.json assert len(res.json['facets']['collections']['values']) == 1, res.json col0 = res.json['facets']['collections']['values'][0] assert col0['id'] == str(self.col.id), res.json assert col0['label'] == self.col.label, res.json assert len(res.json['facets']) == 1, res.json res = self.client.get('/api/1/entities?facet=countries') assert len(res.json['facets']) == 1, res.json assert 'values' in res.json['facets']['countries'], res.json
def test_upload_html_doc(self): _, headers = self.login(is_admin=True) data = { 'meta': json.dumps(self.meta), 'foo': (StringIO("this is a futz with a banana"), 'futz.html') } res = self.client.post(self.url, data=data, headers=headers) assert res.status_code == 200, (res, res.data) docs = res.json['documents'] assert len(docs) == 1, docs assert docs[0]['file_name'] == 'futz.html', docs flush_index() res = self.client.get('/api/2/documents', headers=headers) assert res.json['total'] == 1, res.json res = self.client.get('/api/2/documents/1', headers=headers) assert res.json['countries'] == ['de', 'us'], res.json res = self.client.get('/api/2/documents/1/file', headers=headers) assert 'futz with a banana' in res.data assert 'text/html' in res.content_type, res.content_type
def delete_pending(collection_id=None): """Deletes any pending entities.""" q = db.session.query(Entity.id) q = q.filter(Entity.state == Entity.STATE_PENDING) if collection_id is not None: q = q.filter(Entity.collection_id == collection_id) q.delete(synchronize_session='fetch') rq = db.session.query(Reference) sq = db.session.query(Entity.id) sq = sq.filter(Entity.state == Entity.STATE_PENDING) if collection_id is not None: sq = sq.filter(Entity.collection_id == collection_id) rq = rq.filter(Reference.entity_id.in_(sq)) rq.delete(synchronize_session='fetch') db.session.commit() flush_index()
def setUp(self): super(XrefApiTestCase, self).setUp() self.creator = self.create_user(foreign_id='creator') self.outsider = self.create_user(foreign_id='outsider') self.guest = self.create_user(foreign_id=Role.SYSTEM_GUEST) # First public collection and entities self.residents = Collection.create( { 'label': 'Residents of Habitat Ring', 'foreign_id': 'test_residents' }, role=self.creator) db.session.add(self.residents) db.session.flush() Permission.grant(self.residents, self.guest, True, False) self.ent = Entity.create({ 'schema': 'Person', 'name': 'Elim Garak', }, self.residents) db.session.add(self.ent) self.ent2 = Entity.create({ 'schema': 'Person', 'name': 'Leeta', }, self.residents) db.session.add(self.ent2) # Second public collection and entities self.dabo = Collection.create( { 'label': 'Dabo Girls', 'foreign_id': 'test_dabo' }, role=self.creator) db.session.add(self.dabo) db.session.flush() Permission.grant(self.dabo, self.guest, True, False) self.ent3 = Entity.create({ 'schema': 'Person', 'name': 'MPella', }, self.dabo) db.session.add(self.ent3) self.ent4 = Entity.create({ 'schema': 'Person', 'name': 'Leeta', }, self.dabo) db.session.add(self.ent4) self.ent5 = Entity.create({ 'schema': 'Person', 'name': 'Mardah', }, self.dabo) db.session.add(self.ent5) # Private collection and entities self.obsidian = Collection.create( { 'label': 'Obsidian Order', 'foreign_id': 'test_obsidian', 'category': 'leak' }, role=self.creator) db.session.add(self.obsidian) db.session.flush() self.ent6 = Entity.create({ 'schema': 'Person', 'name': 'Elim Garack', }, self.obsidian) db.session.add(self.ent6) self.ent7 = Entity.create( { 'schema': 'Person', 'name': 'Enabran Tain', }, self.obsidian) db.session.add(self.ent7) db.session.commit() index_entity(self.ent) index_entity(self.ent2) index_entity(self.ent3) index_entity(self.ent4) index_entity(self.ent5) index_entity(self.ent6) index_entity(self.ent7) flush_index()
def flush_index(self): flush_index()
def update_entity(entity): index_entity(entity) update_entity_full.apply_async([entity.id], priority=7) # needed to make the call to view() work: flush_index()