def decide(collection_id, xref_id): """ --- post: summary: Give feedback about the veracity of an xref match. description: > This lets a user decide if they think a given xref match is a true or false match, and what group of users (context) should be privy to this insight. parameters: - in: path name: collection_id required: true schema: type: integer - in: path name: xref_id required: true schema: type: string requestBody: content: application/json: schema: $ref: '#/components/schemas/XrefDecide' responses: '202': content: application/json: schema: properties: status: description: accepted type: string type: object description: Accepted tags: - Xref - Profiles - EntitySet """ data = parse_request("XrefDecide") xref = obj_or_404(get_xref(xref_id, collection_id=collection_id)) require(request.authz.can(collection_id, request.authz.WRITE)) entity = get_index_entity(xref.get("entity_id")) match = get_index_entity(xref.get("match_id")) if entity is None and match is None: # This will raise a InvalidData error if the two types are not compatible model.common_schema(entity.get("schema"), match.get("schema")) decide_xref(xref, judgement=data.get("decision"), authz=request.authz) return jsonify({"status": "ok"}, status=204)
def pairwise(): """ --- post: summary: Make a pairwise judgement between an entity and a match. description: > This lets a user decide if they think a given xref match is a true or false match. Implicitly, this might create or alter a profile in the collection used by requestBody: content: application/json: schema: $ref: '#/components/schemas/Pairwise' responses: '200': content: application/json: schema: properties: status: description: accepted type: string profile_id: description: profile_id for `entity`. type: string type: object description: Accepted tags: - Profile """ data = parse_request("Pairwise") entity = get_index_entity(data.get("entity_id")) collection = get_db_collection(entity["collection_id"], request.authz.WRITE) match = get_index_entity(data.get("match_id")) match_collection = get_db_collection(match["collection_id"]) profile = decide_pairwise( collection, entity, match_collection, match, judgement=data.get("judgement"), authz=request.authz, ) job_id = get_session_id() queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=entity.get("id")) profile_id = profile.id if profile is not None else None return jsonify({"status": "ok", "profile_id": profile_id}, status=200)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get("collection_id"), request.authz.WRITE) tag_request(collection_id=collection.id) sync = get_flag("sync", default=True) job_id = get_session_id() delete_entity(collection, entity, sync=sync, job_id=job_id) return ("", 204)
def pivot(id): enable_cache() entity = get_index_entity(id, request.authz.READ) return jsonify({ 'status': 'ok', 'results': entity_pivot(entity, request.authz) })
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) entity_id = create_entity(data, collection, sync=True) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def similar(id): enable_cache() entity = get_index_entity(id, request.authz.READ) result = SimilarEntitiesQuery.handle(request, entity=entity, schema=CombinedSchema) return jsonify(result)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) tag_request(collection_id=collection.id) delete_entity(collection, entity, sync=get_flag('sync', True)) db.session.commit() return ('', 204)
def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def documents(id): enable_cache() entity = get_index_entity(id, request.authz.READ) result = EntityDocumentsQuery.handle(request, entity=entity, schema=CombinedSchema) return jsonify(result)
def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() excludes = ["text", "numeric.*"] entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) html = proxy.first("bodyHtml", quiet=True) source_url = proxy.first("sourceUrl", quiet=True) encoding = proxy.first("encoding", quiet=True) entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding) entity["shallow"] = False return EntitySerializer.jsonify(entity)
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) results = entity_tags(model.get_proxy(entity), request.authz) return jsonify({"status": "ok", "total": len(results), "results": results})
def similar(id): enable_cache() entity = get_index_entity(id, request.authz.READ) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=id) result = MatchQuery.handle(request, entity=entity, schema=CombinedSchema) return jsonify(result)
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityCreate') collection = get_nested_collection(data, request.authz.WRITE) data.pop('id', None) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, sync=True, validate=validate) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string format: entity_id - in: query name: sign description: Sign entity IDs referenced in nested properties. required: false schema: type: boolean requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request("EntityUpdate") try: entity = get_index_entity(entity_id, request.authz.WRITE) require(check_write_entity(entity, request.authz)) collection = get_db_collection(entity.get("collection_id"), request.authz.WRITE) except NotFound: collection = get_nested_collection(data, request.authz.WRITE) tag_request(collection_id=collection.id) data["id"] = entity_id if get_flag("validate", default=False): validate_entity(data) entity_id = upsert_entity( data, collection, authz=request.authz, sync=get_flag("sync", default=True), sign=get_flag("sign", default=False), job_id=get_session_id(), ) db.session.commit() return view(entity_id)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string format: entity_id requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityUpdate') try: entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) except NotFound: collection = get_nested_collection(data, request.authz.WRITE) tag_request(collection_id=collection.id) data['id'] = entity_id sync = get_flag('sync', default=True) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, validate=validate, sync=sync) db.session.commit() entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def item_update(entityset_id): """Add an item to the entity set with id `entityset_id`, or change the items judgement. To delete an item from the entity set, apply the judgement: `no_judgement`. --- post: summary: Add item to an entityset parameters: - description: The entityset id. in: path name: entityset_id required: true schema: type: string example: 3a0d91ece2dce88ad3259594c7b642485235a048 requestBody: content: application/json: schema: $ref: '#/components/schemas/EntitySetItemUpdate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/EntitySetItem' description: OK '204': description: Item removed tags: - EntitySetItem """ entityset = get_entityset(entityset_id, request.authz.WRITE) data = parse_request("EntitySetItemUpdate") entity = data.pop("entity", {}) entity_id = data.pop("entity_id", entity.get("id")) entity = get_index_entity(entity_id, request.authz.READ) collection = get_db_collection(entity["collection_id"]) data["added_by_id"] = request.authz.id data.pop("collection", None) item = save_entityset_item(entityset, collection, entity_id, **data) db.session.commit() job_id = get_session_id() queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=entity_id) if item is not None: # The entityset is needed to check if the item is writeable in the serializer: item = item.to_dict(entityset=entityset) else: item = { "id": "$".join((entityset_id, entity_id)), "entityset_id": entityset_id, "entityset_collection_id": entityset.collection_id, "entity_id": entity_id, "collection_id": entity["collection_id"], "judgement": Judgement.NO_JUDGEMENT, } return EntitySetItemSerializer.jsonify(item)
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def similar(entity_id): """ --- get: summary: Get similar entities description: > Get a list of similar entities to the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string - in: query name: 'filter:schema' schema: items: type: string type: array - in: query name: 'filter:schemata' schema: items: type: string type: array responses: '200': description: Returns a list of scored and judged entities content: application/json: schema: $ref: '#/components/schemas/SimilarResponse' tags: - Entity """ # enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) result = MatchQuery.handle(request, entity=proxy) entities = list(result.results) pairs = [(entity_id, s.get("id")) for s in entities] judgements = pairwise_judgements(pairs, entity.get("collection_id")) result.results = [] for obj in entities: item = { "score": compare(model, proxy, obj), "judgement": judgements.get((entity_id, obj.get("id"))), "collection_id": entity.get("collection_id"), "entity": obj, } result.results.append(item) return SimilarSerializer.jsonify_result(result)
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() update_entity(entity, sync=get_flag('sync', True)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def content(entity_id): """ --- get: summary: Get the content of an entity description: > Return the text and/or html content of the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': content: application/json: schema: properties: headers: type: object html: type: string text: type: string type: object description: OK '404': description: Not Found tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) html = proxy.first('bodyHtml', quiet=True) source_url = proxy.first('sourceUrl', quiet=True) encoding = proxy.first('encoding', quiet=True) html = sanitize_html(html, source_url, encoding=encoding) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. Tags include the query string to make a search by that particular tag. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def references(id): enable_cache() entity = get_index_entity(id, request.authz.READ) results = [] for prop, total in entity_references(entity, request.authz): key = ('filter:properties.%s' % prop.name, id) link = url_for('search_api.search', _query=(key, )) results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, 'results': link }) return jsonify({'status': 'ok', 'results': results})
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. parameters: - in: query name: sign description: Sign entity IDs referenced in nested properties. required: false schema: type: boolean requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request("EntityCreate") collection = get_nested_collection(data, request.authz.WRITE) data.pop("id", None) if get_flag("validate", default=False): validate_entity(data) entity_id = upsert_entity( data, collection, authz=request.authz, sync=True, sign=get_flag("sign", default=False), job_id=get_session_id(), ) db.session.commit() tag_request(entity_id=entity_id, collection_id=collection.id) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def references(id): enable_cache() entity = get_index_entity(id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=id) results = [] for prop, total in entity_references(entity, request.authz): key = ('filter:properties.%s' % prop.name, id) link = url_for('entities_api.index', _query=(key, )) results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, 'results': link }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. Tags include the query string to make a search by that particular tag. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode("utf-8")) key = ("filter:%s" % field, qvalue) qid = query_string([key]) results.append({ "id": qid, "value": value, "field": field, "count": total }) results.sort(key=lambda p: p["count"], reverse=True) return jsonify({"status": "ok", "total": len(results), "results": results})
def update(collection_id, mapping_id): """Update the mapping with id `mapping_id`. --- post: summary: Update a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) data = parse_request('MappingCreate') entity_id = data.get('table_id') query = load_query() entity = get_index_entity(entity_id, request.authz.READ) mapping.update(query=query, table_id=entity.get('id')) db.session.commit() return MappingSerializer.jsonify(mapping)
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def content(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) html = sanitize_html(proxy.first('bodyHtml', quiet=True), proxy.first('sourceUrl', quiet=True)) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def view(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)