def export(): """ --- post: summary: Download the results of a search description: >- Downloads all the results of a search as a zip archive; upto a max of 10,000 results. The returned file will contain an Excel document with structured data as well as the binary files from all matching documents. Supports the same query parameters as the search API. responses: '202': description: Accepted tags: - Entity """ require(request.authz.logged_in) parser = SearchQueryParser(request.args, request.authz) tag_request(query=parser.text, prefix=parser.prefix) query = EntitiesQuery(parser) label = gettext("Search: %s") % query.to_text() export = create_export( operation=OP_EXPORT_SEARCH, role_id=request.authz.id, label=label, mime_type=ZIP, meta={"query": query.get_full_query()}, ) job_id = get_session_id() queue_task(None, OP_EXPORT_SEARCH, job_id=job_id, export_id=export.id) return ("", 202)
def export(): """ --- get: summary: Download the results of a search description: >- Downloads all the results of a search as a zip archive; upto a max of 10,000 results. The returned file will contain an Excel document with structured data as well as the binary files from all matching documents. Supports the same query parameters as the search API. responses: '200': content: application/zip: schema: format: binary type: string description: OK tags: - Entity """ require(request.authz.logged_in) parser = SearchQueryParser(request.args, request.authz) parser.limit = MAX_PAGE tag_request(query=parser.text, prefix=parser.prefix) result = EntitiesQuery.handle(request, parser=parser) stream = export_entities(request, result) response = Response(stream, mimetype='application/zip') disposition = 'attachment; filename={}'.format('Query_export.zip') response.headers['Content-Disposition'] = disposition return response
def handle(cls, request, parser=None, **kwargs): if parser is None: parser = SearchQueryParser(request.args, request.authz) # Log the search keys = ['prefix', 'text', 'filters'] record_audit(Audit.ACT_SEARCH, keys=keys, **parser.to_dict()) result = cls(parser, **kwargs).search() return cls.RESULT_CLASS(request, parser, result)
def export(): """ --- post: summary: Download the results of a search description: >- Downloads all the results of a search as a zip archive; upto a max of 10,000 results. The returned file will contain an Excel document with structured data as well as the binary files from all matching documents. Supports the same query parameters as the search API. responses: '202': description: Accepted tags: - Entity """ require(request.authz.logged_in) parser = SearchQueryParser(request.args, request.authz) parser.limit = MAX_PAGE tag_request(query=parser.text, prefix=parser.prefix) result = EntitiesQuery.handle(request, parser=parser) label = "Search results for query: %s" % parser.text export = create_export( operation=OP_EXPORT_SEARCH_RESULTS, role_id=request.authz.id, label=label, file_path=None, expires_after=Export.DEFAULT_EXPIRATION, collection=None, mime_type=ZIP, ) job_id = get_session_id() payload = { "export_id": export.id, "result": result.to_dict(), } queue_task(None, OP_EXPORT_SEARCH_RESULTS, job_id=job_id, payload=payload) return ("", 202)
def index(): """ --- get: summary: Search entities description: > Returns a list of entities matching the given search criteria. A filter can be applied to show only results from a particular collection: `?filter:collection_id={collection_id}`. If you know you only want to search documents (unstructured, ingested data) or entities (structured data which may have been extracted from a dataset, or entered by a human) you can use these arguments with the `/documents` or `/entities` endpoints. parameters: - description: >- A query string in ElasticSearch query syntax. Can include field searches, such as `title:penguin` in: query name: q schema: type: string - description: >- Return facet values for the given metadata field, such as `languages`, `countries`, `mime_type` or `extension`. This can be specified multiple times for more than one facet to be added. in: query name: facet schema: type: string - description: > Filter the results by the given field. This is useful when used in conjunction with facet to create a drill-down mechanism. Useful fields are: - `collection_id`, documents belonging to a particular collection. - `title`, of the document. - `file_name`, of the source file. - `source_url`, URL of the source file. - `extension`, file extension of the source file. - `languages`, in the document. - `countries`, associated with the document. - `keywords`, from the document. - `emails`, email addresses mentioned in the document. - `domains`, websites mentioned in the document. - `phones`, mentioned in the document. - `dates`, in any of the following formats: yyyy-MM-dd, yyyy-MM, yyyy-MM-d, yyyy-M, yyyy - `mime_type`, of the source file. - `author`, according to the source file's metadata. - `summary`, of the document. - `text`, entire text extracted from the document. - `created_at`, when the document was added to aleph (yyyy-mm -ddThh:ii:ss.uuuuuu). - `updated_at`, when the document was modified in aleph (yyyy -mm-ddThh:ii:ss.uuuuuu). in: query name: 'filter:{field_name}' schema: type: string - description: 'The number of results to return, max. 10,000.' in: query name: limit schema: type: integer - description: > The number of results to skip at the beginning of the result set. in: query name: offset schema: type: integer responses: '200': description: Resturns a list of entities in result content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - Entity """ # enable_cache(vary_user=True) parser = SearchQueryParser(request.values, request.authz) result = EntitiesQuery.handle(request, parser=parser) tag_request(query=result.query.to_text(), prefix=parser.prefix) links = {} if request.authz.logged_in and result.total <= MAX_PAGE: query = list(request.args.items(multi=True)) links["export"] = url_for("entities_api.export", _query=query) return EntitySerializer.jsonify_result(result, extra={"links": links})
def query(args): return Query(SearchQueryParser(args, None))
def handle(cls, request, parser=None, **kwargs): if parser is None: parser = SearchQueryParser(request.args, request.authz) query = cls(parser, **kwargs) return SearchQueryResult(request, query)
def handle(cls, request, limit=None, schema=None, **kwargs): parser = SearchQueryParser(request.args, request.authz, limit=limit) result = cls(parser, **kwargs).search() return cls.RESULT_CLASS(request, parser, result, schema=schema)
def handle(cls, request, parser=None, **kwargs): if parser is None: parser = SearchQueryParser(request.args, request.authz) result = cls(parser, **kwargs).search() return cls.RESULT_CLASS(request, parser, result)