def view(collection_id, mapping_id): """Return the mapping with id `mapping_id`. --- get: summary: Fetch a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) return MappingSerializer.jsonify(mapping)
def view(diagram_id): """Return the diagram with id `diagram_id`. --- get: summary: Fetch a diagram parameters: - description: The diagram id. in: path name: diagram_id required: true schema: minimum: 1 type: integer example: 2 responses: '200': content: application/json: schema: $ref: '#/components/schemas/Diagram' description: OK tags: - Diagram """ diagram = obj_or_404(Diagram.by_id(diagram_id)) get_db_collection(diagram.collection_id, request.authz.READ) return DiagramSerializer.jsonify(diagram)
def matches(id, other_id): collection = get_db_collection(id) other = get_db_collection(other_id) parser = QueryParser(request.args, request.authz, limit=10) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser, schema=MatchSchema) return jsonify(result)
def generate_matches(collection_id, other_id): collection = get_db_collection(collection_id, request.authz.WRITE) other = get_db_collection(other_id) process_xref.apply_async([collection.id, other.id], queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) return jsonify({'status': 'accepted'}, status=202)
def matches(collection_id, other_id): collection = get_db_collection(collection_id) other = get_db_collection(other_id) parser = QueryParser(request.args, request.authz) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser) return MatchSerializer.jsonify_result(result)
def delete(collection_id, mapping_id): """Delete a mapping. --- delete: summary: Delete a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '204': description: No Content tags: - Collection - Mapping """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) mapping.delete() db.session.commit() return ('', 204)
def matches(id, other_id): collection = get_db_collection(id) record_audit(Audit.ACT_COLLECTION, id=collection.id) other = get_db_collection(other_id) record_audit(Audit.ACT_COLLECTION, id=other.id) parser = QueryParser(request.args, request.authz) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser, schema=MatchSchema) return jsonify(result)
def matches(collection_id, other_id): collection = get_db_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection.id) other = get_db_collection(other_id) record_audit(Audit.ACT_COLLECTION, id=other.id) parser = QueryParser(request.args, request.authz) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser) return MatchSerializer.jsonify_result(result)
def pairwise(): """ --- post: summary: Make a pairwise judgement between an entity and a match. description: > This lets a user decide if they think a given xref match is a true or false match. Implicitly, this might create or alter a profile in the collection used by requestBody: content: application/json: schema: $ref: '#/components/schemas/Pairwise' responses: '200': content: application/json: schema: properties: status: description: accepted type: string profile_id: description: profile_id for `entity`. type: string type: object description: Accepted tags: - Profile """ data = parse_request("Pairwise") entity = get_index_entity(data.get("entity_id")) collection = get_db_collection(entity["collection_id"], request.authz.WRITE) match = get_index_entity(data.get("match_id")) match_collection = get_db_collection(match["collection_id"]) profile = decide_pairwise( collection, entity, match_collection, match, judgement=data.get("judgement"), authz=request.authz, ) job_id = get_session_id() queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=entity.get("id")) profile_id = profile.id if profile is not None else None return jsonify({"status": "ok", "profile_id": profile_id}, status=200)
def records(document_id=None, collection_id=None): require(request.authz.can_export()) if collection_id is not None: get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) elif document_id is not None: get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) else: # no authz on records, this means *full* export. require(request.authz.is_admin) records = iter_records(document_id=document_id, collection_id=collection_id) return stream_ijson(records)
def entities(collection_id=None): require(request.authz.can_export()) schemata = ensure_list(request.args.getlist('schema')) excludes = ['text', 'roles', 'fingerprints'] includes = ensure_list(request.args.getlist('include')) includes = [f for f in includes if f not in excludes] if collection_id is not None: get_db_collection(id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=id) entities = iter_entities(authz=request.authz, collection_id=collection_id, schemata=schemata, excludes=excludes, includes=includes) return stream_ijson(entities)
def update(collection_id, mapping_id): """Update the mapping with id `mapping_id`. --- post: summary: Update a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) data = parse_request("MappingCreate") mapping.update( query=load_query(), table_id=get_table_id(data), entityset_id=get_entityset_id(data), ) db.session.commit() return MappingSerializer.jsonify(mapping)
def index(id): collection = get_db_collection(id, request.authz.WRITE) record_audit(Audit.ACT_COLLECTION, id=id) roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({ 'total': len(permissions), 'results': permissions })
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) entity_id = create_entity(data, collection, sync=True) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get("collection_id"), request.authz.WRITE) tag_request(collection_id=collection.id) sync = get_flag("sync", default=True) job_id = get_session_id() delete_entity(collection, entity, sync=sync, job_id=job_id) return ("", 204)
def delete(collection_id): """ --- delete: summary: Delete a collection description: Delete the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query description: Wait for delete to finish in backend. name: sync schema: type: boolean - in: query description: Delete only the contents, but not the collection itself. name: keep_metadata schema: type: boolean responses: '204': description: No Content tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) keep_metadata = get_flag("keep_metadata", default=False) sync = get_flag("sync", default=True) delete_collection(collection, keep_metadata=keep_metadata, sync=sync) return ("", 204)
def index(collection_id): collection = get_db_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection.id) parser = QueryParser(request.args, request.authz) q = Match.group_by_collection(collection.id, authz=request.authz) result = DatabaseQueryResult(request, q, parser=parser) return MatchCollectionsSerializer.jsonify_result(result)
def cancel(collection_id): """ --- delete: summary: Cancel processing of a collection description: > Cancel all queued tasks for the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer responses: '200': content: application/json: schema: $ref: '#/components/schemas/CollectionStatus' description: OK tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) cancel_queue(collection) refresh_collection(collection_id) return ("", 204)
def generate(collection_id): data = parse_request(XrefSchema) collection = get_db_collection(collection_id, request.authz.WRITE) against = ensure_list(data.get("against_collection_ids")) payload = {'against_collection_ids': against} queue_task(collection, OP_XREF, payload=payload) return jsonify({'status': 'accepted'}, status=202)
def collection(id): require(request.authz.logged_in) collection = get_db_collection(id) channel_name = channel(collection) query = Notification.by_channel(channel_name) result = DatabaseQueryResult(request, query, schema=NotificationSchema) return jsonify(result)
def status(collection_id): """ --- get: summary: Check processing status of a collection description: > Return the task queue status for the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CollectionStatus' tags: - Collection """ collection = get_db_collection(collection_id, request.authz.READ) request.rate_limit = None return jsonify(get_status(collection))
def update(id): collection = get_db_collection(id, request.authz.WRITE) data = parse_request(CollectionSchema) collection.update(data) db.session.commit() update_collection(collection) return serialize_data(collection, CollectionSchema)
def reingest(collection_id): """ --- post: summary: Re-ingest a collection description: > Trigger a process to re-parse the content of all documents stored in the collection with id `collection_id`. parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query name: index description: Index documents while they're being processed. schema: type: boolean responses: '202': description: Accepted tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() data = {"index": get_flag("index", False)} queue_task(collection, OP_REINGEST, job_id=job_id, payload=data) return ("", 202)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) tag_request(collection_id=collection.id) delete_entity(collection, entity, sync=get_flag('sync', True)) db.session.commit() return ('', 204)
def triples(collection_id): """ --- get: summary: Linked data stream of the collection description: >- This will return a list of triples that describe each entity in the given collection. The format is `ntriples`. parameters: - in: path name: collection_id required: true schema: type: integer responses: '200': description: OK content: text/plain: schema: type: string tags: - Entity """ require(request.authz.can_stream()) log.debug("Stream triples [%r] begins... (coll: %s)", request.authz, collection_id) collection = get_db_collection(collection_id, request.authz.READ) return Response(export_collection(collection), mimetype='text/plain')
def generate(collection_id): """ --- post: summary: Generate cross-reference matches description: > Generate cross-reference matches for entities in a collection. parameters: - in: path name: collection_id required: true schema: type: integer responses: '202': content: application/json: schema: properties: status: description: accepted type: string type: object description: Accepted tags: - Xref - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) queue_task(collection, OP_XREF) return jsonify({"status": "accepted"}, status=202)
def export(collection_id): """ --- post: summary: Download cross-reference results description: Download results of cross-referencing as an Excel file parameters: - in: path name: collection_id required: true schema: type: integer responses: '202': description: Accepted tags: - Xref - Collection """ collection = get_db_collection(collection_id, request.authz.READ) label = "%s - Crossreference results" % collection.label export = create_export( operation=OP_EXPORT_XREF_RESULTS, role_id=request.authz.id, label=label, collection=collection, mime_type=XLSX, ) job_id = get_session_id() payload = { "collection_id": collection_id, "export_id": export.id, } queue_task(None, OP_EXPORT_XREF_RESULTS, job_id=job_id, payload=payload) return ("", 202)
def permissions_index(id): collection = get_db_collection(id, request.authz.WRITE) q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role': role }) return jsonify({ 'total': len(permissions), 'results': PermissionSchema().dump(permissions, many=True) })
def triples(collection_id): require(request.authz.can_stream()) log.debug("Stream triples [%r] begins... (coll: %s)", request.authz, collection_id) collection = get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) return Response(export_collection(collection), mimetype='text/plain')
def reindex(collection_id): """ --- post: summary: Re-index a collection description: > Re-index the entities in the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query description: Delete the index before re-generating it. name: flush schema: type: boolean responses: '202': description: Accepted tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() data = {"flush": get_flag("flush", False)} queue_task(collection, OP_REINDEX, job_id=job_id, payload=data) return ("", 202)
def index(id): collection = get_db_collection(id, request.authz.WRITE) roles = Role.all_groups(request.authz).all() if request.authz.is_admin: roles.extend(Role.all_system()) q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({'total': len(permissions), 'results': permissions})
def view(collection_id): """ --- get: summary: Get a collection description: Return the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CollectionFull' tags: - Collection """ data = get_index_collection(collection_id) cobj = get_db_collection(collection_id) if get_flag("refresh", False): update_collection_stats(collection_id, ["schema"]) data.update({ "statistics": get_collection_stats(cobj.id), "status": get_status(cobj), "shallow": False, }) return CollectionSerializer.jsonify(data)
def export(collection_id): """ --- get: summary: Download cross-reference results description: Download results of cross-referencing as an Excel file parameters: - in: path name: collection_id required: true schema: type: integer responses: '200': description: OK content: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: schema: type: object tags: - Xref - Collection """ collection = get_db_collection(collection_id, request.authz.READ) buffer = export_matches(collection, request.authz) file_name = '%s - Crossreference.xlsx' % collection.label return send_file(buffer, mimetype=XLSX_MIME, as_attachment=True, attachment_filename=file_name)
def delete(diagram_id): """Delete a diagram. --- delete: summary: Delete a diagram parameters: - description: The diagram id. in: path name: diagram_id required: true schema: minimum: 1 type: integer example: 2 responses: '204': description: No Content tags: - Diagram """ diagram = obj_or_404(Diagram.by_id(diagram_id)) collection = get_db_collection(diagram.collection_id, request.authz.WRITE) diagram.delete() collection.touch() db.session.commit() return ('', 204)
def update(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request(CollectionUpdateSchema) sync = get_flag('sync') collection.update(data) db.session.commit() data = update_collection(collection, sync=sync) return CollectionSerializer.jsonify(data)
def entities(collection_id=None): require(request.authz.can_stream()) log.debug("Stream entities [%r] begins... (coll: %s)", request.authz, collection_id) schemata = ensure_list(request.args.getlist('schema')) excludes = ['text', 'roles', 'fingerprints'] includes = ensure_list(request.args.getlist('include')) includes = [f for f in includes if f not in excludes] if collection_id is not None: get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) entities = iter_entities(authz=request.authz, collection_id=collection_id, schemata=schemata, excludes=excludes, includes=includes) return stream_ijson(entities)
def generate(collection_id): data = parse_request(XrefSchema) collection = get_db_collection(collection_id, request.authz.WRITE) args = { "against_collection_ids": data.get("against_collection_ids") } xref_collection.apply_async([collection.id], kwargs=args, priority=5) return jsonify({'status': 'accepted'}, status=202)
def bulk(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) require(request.authz.can_bulk_import()) merge = get_flag('merge', default=False) # This will disable certain security measures in order to allow bulk # loading of document data. unsafe = get_flag('unsafe', default=False) unsafe = unsafe and request.authz.is_admin entities = ensure_list(request.get_json(force=True)) bulk_write(collection, entities, merge=merge, unsafe=unsafe) refresh_collection(id) return ('', 204)
def mapping_process(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) require(request.authz.can_bulk_import()) # TODO: we need to look into possible abuse of mapping load path for local # path access on the machine running the mapping. Until then, this action # must be restricted to admins: require(request.authz.is_admin) if not request.is_json: raise BadRequest() data = request.get_json().get(collection.foreign_id) for query in keys_values(data, 'queries', 'query'): try: model.make_mapping(query) bulk_load_query.apply_async([collection.id, query], priority=6) except InvalidMapping as invalid: raise BadRequest(invalid) return ('', 204)
def update(id): collection = get_db_collection(id, request.authz.WRITE) for permission in parse_request(PermissionSchema, many=True): role_id = permission.get('role_id') role = Role.by_id(role_id) if not check_visible(role, request.authz): continue if role.is_public: permission['write'] = False if collection.casefile and role.is_public: permission['read'] = False update_permission(role, collection, permission['read'], permission['write'], editor_id=request.authz.id) update_collection(collection) return index(id)
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) data = create_entity(data, collection, sync=get_flag('sync', True)) tag_request(entity_id=data.get('id'), collection_id=str(collection.id)) return EntitySerializer.jsonify(data)
def process(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) # re-process the documents process_documents.delay(collection_id=collection.id) return ('', 204)
def delete(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) sync = get_flag('sync', default=True) delete_collection(collection, sync=sync) return ('', 204)
def csv_export(collection_id): collection = get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection.id) matches = export_matches_csv(collection.id, request.authz) return stream_csv(stream_with_context(matches))