def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() excludes = ["text", "numeric.*"] entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) html = proxy.first("bodyHtml", quiet=True) source_url = proxy.first("sourceUrl", quiet=True) encoding = proxy.first("encoding", quiet=True) entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding) entity["shallow"] = False return EntitySerializer.jsonify(entity)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def entities_index(entityset_id): """Search entities in the entity set with id `entityset_id`. --- get: summary: Search entities in the entity set with id `entityset_id` description: > Supports all query filters and arguments present in the normal entity search API, but all resulting entities will be members of the set. parameters: - description: The entityset id. in: path name: entityset_id required: true schema: type: string example: 3a0d91ece2dce88ad3259594c7b642485235a048 responses: '200': description: Resturns a list of entities in result content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - EntitySet """ entityset = get_entityset(entityset_id, request.authz.READ) parser = SearchQueryParser(request.args, request.authz) tag_request(query=parser.text, prefix=parser.prefix) result = EntitySetItemsQuery.handle(request, parser=parser, entityset=entityset) return EntitySerializer.jsonify_result(result)
def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityCreate') collection = get_nested_collection(data, request.authz.WRITE) data.pop('id', None) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, sync=True, validate=validate) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) entity_id = create_entity(data, collection, sync=True) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def match(): entity = parse_request(EntityUpdateSchema) entity = model.get_proxy(entity) tag_request(schema=entity.schema.name, caption=entity.caption) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def match(): entity = parse_request(EntityUpdateSchema) record_audit(Audit.ACT_MATCH, entity=entity) entity = model.get_proxy(entity) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def match(): entity = parse_request(EntityUpdateSchema) record_audit(Audit.ACT_MATCH, entity=entity) entity = model.get_proxy(entity) tag_request(schema=entity.schema.name, caption=entity.caption) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=get_flag('sync', True)) return EntitySerializer.jsonify(data)
def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=get_flag('sync', True)) return EntitySerializer.jsonify(data)
def index(): # enable_cache(vary_user=True) parser = SearchQueryParser(request.args, request.authz) result = EntitiesQuery.handle(request, parser=parser) links = {} if request.authz.logged_in and result.total <= EXPORT_MAX: query = list(request.args.items(multi=True)) links['export'] = url_for('entities_api.export', format='excel', _authorize=True, _query=query) return EntitySerializer.jsonify_result(result, extra={'links': links})
def index(): # enable_cache(vary_user=True) parser = SearchQueryParser(request.args, request.authz) tag_request(query=parser.text, prefix=parser.prefix) result = EntitiesQuery.handle(request, parser=parser) links = {} if request.authz.logged_in and result.total <= EXPORT_MAX: query = list(request.args.items(multi=True)) links['export'] = url_for('entities_api.export', format='excel', _authorize=True, _query=query) return EntitySerializer.jsonify_result(result, extra={'links': links})
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. parameters: - in: query name: sign description: Sign entity IDs referenced in nested properties. required: false schema: type: boolean requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request("EntityCreate") collection = get_nested_collection(data, request.authz.WRITE) data.pop("id", None) if get_flag("validate", default=False): validate_entity(data) entity_id = upsert_entity( data, collection, authz=request.authz, sync=True, sign=get_flag("sign", default=False), job_id=get_session_id(), ) db.session.commit() tag_request(entity_id=entity_id, collection_id=collection.id) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def merge(id, other_id): entity = get_db_entity(id, request.authz.WRITE) other = get_db_entity(other_id, request.authz.WRITE) try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() sync = get_flag('sync', True) data = update_entity(entity, sync=sync) update_entity(other, sync=sync) return EntitySerializer.jsonify(data)
def merge(entity_id, other_id): entity = get_db_entity(entity_id, request.authz.WRITE) other = get_db_entity(other_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() sync = get_flag('sync', True) data = update_entity(entity, sync=sync) update_entity(other, sync=sync) return EntitySerializer.jsonify(data)
def index(): # enable_cache(vary_user=True) parser = SearchQueryParser(request.args, request.authz) if parser.text: QueryLog.save(request.authz.id, request._session_id, parser.text) db.session.commit() tag_request(query=parser.text, prefix=parser.prefix) result = EntitiesQuery.handle(request, parser=parser) links = {} if request.authz.logged_in and result.total <= MAX_PAGE: query = list(request.args.items(multi=True)) links['export'] = url_for('entities_api.export', _authorize=True, _query=query) return EntitySerializer.jsonify_result(result, extra={'links': links})
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string format: entity_id requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityUpdate') try: entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) except NotFound: collection = get_nested_collection(data, request.authz.WRITE) tag_request(collection_id=collection.id) data['id'] = entity_id sync = get_flag('sync', default=True) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, validate=validate, sync=sync) db.session.commit() entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def similar(entity_id): """ --- get: summary: Get similar entities description: > Get a list of similar entities to the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string - in: query name: 'filter:schema' schema: items: type: string type: array - in: query name: 'filter:schemata' schema: items: type: string type: array responses: '200': description: Returns a list of entities content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request('EntityUpdate') if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() update_entity(entity, sync=get_flag('sync', True)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def match(): """ --- post: summary: Query for similar entities description: >- Query for similar entities matching a given entity inside a given list of collections. parameters: - in: query name: collection_ids schema: type: array items: type: string responses: '200': description: Returns a list of entities in result content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' tags: - Entity """ entity = parse_request("EntityUpdate") entity = model.get_proxy(entity, cleaned=False) tag_request(schema=entity.schema.name, caption=entity.caption) collection_ids = request.args.getlist("collection_ids") result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def view(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) data = create_entity(data, collection, sync=get_flag('sync', True)) tag_request(entity_id=data.get('id'), collection_id=str(collection.id)) return EntitySerializer.jsonify(data)
def index(): """ --- get: summary: Search entities description: > Returns a list of entities matching the given search criteria. A filter can be applied to show only results from a particular collection: `?filter:collection_id={collection_id}`. If you know you only want to search documents (unstructured, ingested data) or entities (structured data which may have been extracted from a dataset, or entered by a human) you can use these arguments with the `/documents` or `/entities` endpoints. parameters: - description: >- A query string in ElasticSearch query syntax. Can include field searches, such as `title:penguin` in: query name: q schema: type: string - description: >- Return facet values for the given metadata field, such as `languages`, `countries`, `mime_type` or `extension`. This can be specified multiple times for more than one facet to be added. in: query name: facet schema: type: string - description: > Filter the results by the given field. This is useful when used in conjunction with facet to create a drill-down mechanism. Useful fields are: - `collection_id`, documents belonging to a particular collection. - `title`, of the document. - `file_name`, of the source file. - `source_url`, URL of the source file. - `extension`, file extension of the source file. - `languages`, in the document. - `countries`, associated with the document. - `keywords`, from the document. - `emails`, email addresses mentioned in the document. - `domains`, websites mentioned in the document. - `phones`, mentioned in the document. - `dates`, in any of the following formats: yyyy-MM-dd, yyyy-MM, yyyy-MM-d, yyyy-M, yyyy - `mime_type`, of the source file. - `author`, according to the source file's metadata. - `summary`, of the document. - `text`, entire text extracted from the document. - `created_at`, when the document was added to aleph (yyyy-mm -ddThh:ii:ss.uuuuuu). - `updated_at`, when the document was modified in aleph (yyyy -mm-ddThh:ii:ss.uuuuuu). in: query name: 'filter:{field_name}' schema: type: string - description: 'The number of results to return, max. 10,000.' in: query name: limit schema: type: integer - description: > The number of results to skip at the beginning of the result set. in: query name: offset schema: type: integer responses: '200': description: Resturns a list of entities in result content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - Entity """ # enable_cache(vary_user=True) parser = SearchQueryParser(request.values, request.authz) result = EntitiesQuery.handle(request, parser=parser) tag_request(query=result.query.to_text(), prefix=parser.prefix) links = {} if request.authz.logged_in and result.total <= MAX_PAGE: query = list(request.args.items(multi=True)) links["export"] = url_for("entities_api.export", _query=query) return EntitySerializer.jsonify_result(result, extra={"links": links})
def index(): # enable_cache(vary_user=True) parser = SearchQueryParser(request.args, request.authz) result = EntitiesQuery.handle(request, parser=parser) return EntitySerializer.jsonify_result(result)