def upsert_entity(data, collection, authz=None, sync=False, sign=False, job_id=None): """Create or update an entity in the database. This has a side effect of migrating entities created via the _bulk API or a mapper to a database entity in the event that it gets edited by the user. """ from aleph.logic.profiles import profile_fragments entity = None entity_id = collection.ns.sign(data.get("id")) if entity_id is not None: entity = Entity.by_id(entity_id, collection=collection) if entity is None: role_id = authz.id if authz is not None else None entity = Entity.create(data, collection, sign=sign, role_id=role_id) else: entity.update(data, collection, sign=sign) collection.touch() proxy = entity.to_proxy() aggregator = get_aggregator(collection) aggregator.delete(entity_id=proxy.id) aggregator.put(proxy, origin=MODEL_ORIGIN) profile_fragments(collection, aggregator, entity_id=proxy.id) index.index_proxy(collection, proxy, sync=sync) refresh_entity(collection, proxy.id) queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=proxy.id) return entity.id
def upsert_entity(data, collection, authz=None, sync=False): """Create or update an entity in the database. This has a side hustle of migrating entities created via the _bulk API or a mapper to a database entity in the event that it gets edited by the user. """ entity = None entity_id = collection.ns.sign(data.get("id")) if entity_id is not None: entity = Entity.by_id(entity_id, collection=collection) if entity is None: role_id = authz.id if authz is not None else None entity = Entity.create(data, collection, role_id=role_id) else: entity.update(data, collection) # Inline name properties from adjacent entities. See the # docstring on `inline_names` for a more detailed discussion. proxy = entity.to_proxy() entity_ids = proxy.get_type_values(registry.entity) for rel in index.entities_by_ids(entity_ids): inline_names(proxy, model.get_proxy(rel)) entity.data = proxy.properties db.session.add(entity) delete_aggregator_entity(collection, entity.id) index.index_proxy(collection, proxy, sync=sync) refresh_entity(collection, entity.id) return entity.id
def ingest_upload(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() sync = get_flag('sync', default=False) meta, foreign_id = _load_metadata() parent = _load_parent(collection, meta) upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.')) try: content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = upload_dir.joinpath(path) storage.save(str(path)) content_hash = archive.archive_file(path) document = Document.save(collection=collection, parent=parent, foreign_id=foreign_id, content_hash=content_hash, meta=meta, uploader_id=request.authz.id) collection.touch() db.session.commit() proxy = document.to_proxy() if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync: index_proxy(collection, proxy, sync=sync) ingest_entity(collection, proxy, job_id=job_id, sync=sync) document_id = collection.ns.sign(document.id) _notify(collection, document_id) finally: shutil.rmtree(upload_dir) return jsonify({'status': 'ok', 'id': document_id}, status=201)
def ingest_entity(collection, proxy, job_id=None, sync=False): """Send the given FtM entity proxy to the ingest-file service.""" if proxy.schema.is_a(Document.SCHEMA_FOLDER): index_proxy(collection, proxy, sync=sync) log.debug("Ingest entity [%s]: %s", proxy.id, proxy.caption) stage = get_stage(collection, OP_INGEST, job_id=job_id) from aleph.logic.aggregator import get_aggregator_name context = { 'languages': collection.languages, 'balkhash_name': get_aggregator_name(collection), 'next_stage': OP_INDEX, 'sync': sync } stage.queue(proxy.to_dict(), context)
def setUp(self): super(MappingAPITest, self).setUp() self.col = self.create_collection(foreign_id="map1") aggregator = get_aggregator(self.col) aggregator.delete() _, self.headers = self.login(is_admin=True) self.rolex = self.create_user(foreign_id="user_3") _, self.headers_x = self.login(foreign_id="user_3") self.fixture = self.get_fixture_path("experts.csv") self.content_hash = archive.archive_file(self.fixture) data = { "id": "foo", "schema": "Table", "properties": { "csvHash": self.content_hash, "contentHash": self.content_hash, "mimeType": "text/csv", "fileName": "experts.csv", "name": "experts.csv", }, } self.ent = EntityProxy.from_dict(model, data, cleaned=False) self.ent.id = self.col.ns.sign(self.ent.id) index_proxy(self.col, self.ent) data = { "id": "foo2", "schema": "Table", "properties": { "csvHash": self.content_hash, "contentHash": self.content_hash, "mimeType": "text/csv", "fileName": "experts.csv", "name": "experts.csv", }, } self.ent2 = EntityProxy.from_dict(model, data, cleaned=False) self.ent2.id = self.col.ns.sign(self.ent2.id) index_proxy(self.col, self.ent2) data = { "id": "bar", "schema": "LegalEntity", "properties": { "name": "John Doe" }, } ent = EntityProxy.from_dict(model, data, cleaned=False) ent.id = self.col.ns.sign(ent.id) index_proxy(self.col, ent)
def setUp(self): super(MappingAPITest, self).setUp() self.col = self.create_collection(data={'foreign_id': 'map1'}) _, self.headers = self.login(is_admin=True) self.rolex = self.create_user(foreign_id='user_3') _, self.headers_x = self.login(foreign_id='user_3') self.fixture = self.get_fixture_path('experts.csv') self.content_hash = archive.archive_file(self.fixture) data = { 'id': 'foo', 'schema': 'Table', 'properties': { 'csvHash': self.content_hash, 'contentHash': self.content_hash, 'mimeType': 'text/csv', 'fileName': 'experts.csv', 'name': 'experts.csv' } } self.ent = EntityProxy.from_dict(model, data) self.ent.id = self.col.ns.sign(self.ent.id) index_proxy(self.col, self.ent) data = { 'id': 'foo2', 'schema': 'Table', 'properties': { 'csvHash': self.content_hash, 'contentHash': self.content_hash, 'mimeType': 'text/csv', 'fileName': 'experts.csv', 'name': 'experts.csv' } } self.ent2 = EntityProxy.from_dict(model, data) self.ent2.id = self.col.ns.sign(self.ent2.id) index_proxy(self.col, self.ent2) data = { 'id': 'bar', 'schema': 'LegalEntity', 'properties': { 'name': 'John Doe' } } ent = EntityProxy.from_dict(model, data) ent.id = self.col.ns.sign(ent.id) index_proxy(self.col, ent)
def ingest_upload(collection_id): """ --- post: summary: Upload a document to a collection description: Upload a document to a collection with id `collection_id` parameters: - in: path name: collection_id required: true schema: type: integer requestBody: content: multipart/form-data: schema: type: object properties: file: type: string format: binary description: The document to upload meta: $ref: '#/components/schemas/DocumentIngest' responses: '200': description: OK content: application/json: schema: properties: id: description: id of the uploaded document type: integer status: type: string type: object tags: - Ingest - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() sync = get_flag('sync', default=False) meta, foreign_id = _load_metadata() parent = _load_parent(collection, meta) upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.')) try: content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = upload_dir.joinpath(path) storage.save(str(path)) content_hash = archive.archive_file(path) document = Document.save(collection=collection, parent=parent, foreign_id=foreign_id, content_hash=content_hash, meta=meta, uploader_id=request.authz.id) collection.touch() db.session.commit() proxy = document.to_proxy() if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync: index_proxy(collection, proxy, sync=sync) ingest_entity(collection, proxy, job_id=job_id, sync=sync) document_id = collection.ns.sign(document.id) _notify(collection, document_id) finally: shutil.rmtree(upload_dir) return jsonify({'status': 'ok', 'id': document_id}, status=201)