Exemplo n.º 1
0
def suggest_entity():
    """Suggest API, emulates Google Refine API."""
    args = {
        'prefix': request.args.get('prefix'),
        'filter:schemata': request.args.getlist('type')
    }
    matches = []
    parser = SearchQueryParser(args, request.authz)
    if parser.prefix is not None:
        query = EntitiesQuery(parser)
        for doc in query.search().get('hits').get('hits'):
            source = doc.get('_source')
            match = {
                'quid': doc.get('_id'),
                'id': doc.get('_id'),
                'name': source.get('name'),
                'r:score': doc.get('_score'),
            }
            for type_ in get_freebase_types():
                if source.get('schema') == type_['id']:
                    match['n:type'] = type_
                    match['type'] = [type_['name']]
            matches.append(match)

    return jsonify({
        "code": "/api/status/ok",
        "status": "200 OK",
        "prefix": request.args.get('prefix', ''),
        "result": matches
    })
Exemplo n.º 2
0
def export():
    """
    ---
    post:
      summary: Download the results of a search
      description: >-
        Downloads all the results of a search as a zip archive; upto a max of
        10,000 results. The returned file will contain an Excel document with
        structured data as well as the binary files from all matching
        documents.

        Supports the same query parameters as the search API.
      responses:
        '202':
          description: Accepted
      tags:
      - Entity
    """
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    tag_request(query=parser.text, prefix=parser.prefix)
    query = EntitiesQuery(parser)
    label = gettext("Search: %s") % query.to_text()
    export = create_export(
        operation=OP_EXPORT_SEARCH,
        role_id=request.authz.id,
        label=label,
        mime_type=ZIP,
        meta={"query": query.get_full_query()},
    )
    job_id = get_session_id()
    queue_task(None, OP_EXPORT_SEARCH, job_id=job_id, export_id=export.id)
    return ("", 202)
Exemplo n.º 3
0
def export():
    """
    ---
    get:
      summary: Download the results of a search
      description: >-
        Downloads all the results of a search as a zip archive; upto a max of
        10,000 results. The returned file will contain an Excel document with
        structured data as well as the binary files from all matching
        documents.

        Supports the same query parameters as the search API.
      responses:
        '200':
          content:
            application/zip:
              schema:
                format: binary
                type: string
          description: OK
      tags:
      - Entity
    """
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    parser.limit = MAX_PAGE
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    stream = export_entities(request, result)
    response = Response(stream, mimetype='application/zip')
    disposition = 'attachment; filename={}'.format('Query_export.zip')
    response.headers['Content-Disposition'] = disposition
    return response
Exemplo n.º 4
0
def index():
    parser = SearchQueryParser(request.args, request.authz)
    result = EntitiesQuery.handle(request,
                                  parser=parser,
                                  schema=CombinedSchema)
    enable_cache(vary_user=True, vary=result.cache_key)
    return jsonify(result)
Exemplo n.º 5
0
def index():
    parser = SearchQueryParser(request.args, request.authz)
    if parser.cache:
        enable_cache()
    result = EntitiesQuery.handle(request,
                                  parser=parser,
                                  schema=CombinedSchema)
    return jsonify(result)
Exemplo n.º 6
0
def suggest_entity():
    """Suggest API, emulates Google Refine API."""
    prefix = request.args.get('prefix', '')
    args = {
        'prefix': prefix,
        'filter:schemata': request.args.getlist('type'),
        'filter:collection_id': request.args.getlist('filter:collection_id')
    }
    parser = SearchQueryParser(args, request.authz)
    query = EntitiesQuery(parser)
    result = query.search()
    matches = list(entity_matches(result))
    return jsonify({
        "code": "/api/status/ok",
        "status": "200 OK",
        "prefix": prefix,
        "result": matches
    })
Exemplo n.º 7
0
def export(format):
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    parser.limit = EXPORT_MAX
    result = EntitiesQuery.handle(request, parser=parser)
    stream = export_entities(request, result, format)
    response = Response(stream, mimetype='application/zip')
    disposition = 'attachment; filename={}'.format('Query_export.zip')
    response.headers['Content-Disposition'] = disposition
    return response
Exemplo n.º 8
0
def suggest_entity():
    """Suggest API, emulates Google Refine API."""
    prefix = request.args.get("prefix", "")
    tag_request(prefix=prefix)
    types = request.args.getlist("type") or Entity.THING
    args = {
        "prefix": prefix,
        "filter:schemata": types,
        "filter:collection_id": request.args.getlist("filter:collection_id"),
    }
    parser = SearchQueryParser(args, request.authz)
    query = EntitiesQuery(parser)
    result = query.search()
    matches = list(entity_matches(result))
    return jsonify({
        "code": "/api/status/ok",
        "status": "200 OK",
        "prefix": prefix,
        "result": matches,
    })
Exemplo n.º 9
0
def export(format):
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    parser.limit = EXPORT_MAX
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    stream = export_entities(request, result, format)
    response = Response(stream, mimetype='application/zip')
    disposition = 'attachment; filename={}'.format('Query_export.zip')
    response.headers['Content-Disposition'] = disposition
    return response
Exemplo n.º 10
0
def suggest_entity():
    """Suggest API, emulates Google Refine API."""
    prefix = request.args.get('prefix', '')
    tag_request(prefix=prefix)
    types = request.args.getlist('type') or Entity.THING
    args = {
        'prefix': prefix,
        'filter:schemata': types,
        'filter:collection_id': request.args.getlist('filter:collection_id')
    }
    parser = SearchQueryParser(args, request.authz)
    query = EntitiesQuery(parser)
    result = query.search()
    matches = list(entity_matches(result))
    return jsonify({
        "code": "/api/status/ok",
        "status": "200 OK",
        "prefix": prefix,
        "result": matches
    })
Exemplo n.º 11
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= EXPORT_MAX:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  format='excel',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Exemplo n.º 12
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= EXPORT_MAX:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  format='excel',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Exemplo n.º 13
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    if parser.text:
        QueryLog.save(request.authz.id, request._session_id, parser.text)
        db.session.commit()
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= MAX_PAGE:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Exemplo n.º 14
0
def export():
    """
    ---
    post:
      summary: Download the results of a search
      description: >-
        Downloads all the results of a search as a zip archive; upto a max of
        10,000 results. The returned file will contain an Excel document with
        structured data as well as the binary files from all matching
        documents.

        Supports the same query parameters as the search API.
      responses:
        '202':
          description: Accepted
      tags:
      - Entity
    """
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    parser.limit = MAX_PAGE
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    label = "Search results for query: %s" % parser.text
    export = create_export(
        operation=OP_EXPORT_SEARCH_RESULTS,
        role_id=request.authz.id,
        label=label,
        file_path=None,
        expires_after=Export.DEFAULT_EXPIRATION,
        collection=None,
        mime_type=ZIP,
    )
    job_id = get_session_id()
    payload = {
        "export_id": export.id,
        "result": result.to_dict(),
    }
    queue_task(None, OP_EXPORT_SEARCH_RESULTS, job_id=job_id, payload=payload)
    return ("", 202)
Exemplo n.º 15
0
def index():
    enable_cache()
    result = EntitiesQuery.handle(request, schema=CombinedSchema)
    return jsonify(result)
Exemplo n.º 16
0
def index():
    """
    ---
    get:
      summary: Search entities
      description: >
        Returns a list of entities matching the given search criteria.

        A filter can be applied to show only results from a particular
        collection: `?filter:collection_id={collection_id}`.

        If you know you only want to search documents (unstructured, ingested
        data) or entities (structured data which may have been extracted from
        a dataset, or entered by a human) you can use these arguments with the
        `/documents` or `/entities` endpoints.
      parameters:
      - description: >-
          A query string in ElasticSearch query syntax. Can include field
          searches, such as `title:penguin`
        in: query
        name: q
        schema:
          type: string
      - description: >-
          Return facet values for the given metadata field, such as
          `languages`, `countries`, `mime_type` or `extension`. This can be
          specified multiple times for more than one facet to be added.
        in: query
        name: facet
        schema:
          type: string
      - description: >
          Filter the results by the given field. This is useful when used in
          conjunction with facet to create a drill-down mechanism. Useful
          fields are:

          - `collection_id`, documents belonging to a particular collection.

          - `title`, of the document.

          - `file_name`, of the source file.

          - `source_url`, URL of the source file.

          - `extension`, file extension of the source file.

          - `languages`, in the document.

          - `countries`, associated with the document.

          - `keywords`, from the document.

          - `emails`, email addresses mentioned in the document.

          - `domains`, websites mentioned in the document.

          - `phones`, mentioned in the document.

          - `dates`, in any of the following formats: yyyy-MM-dd, yyyy-MM,
          yyyy-MM-d, yyyy-M, yyyy

          - `mime_type`, of the source file.

          - `author`, according to the source file's metadata.

          - `summary`, of the document.

          - `text`, entire text extracted from the document.

          - `created_at`, when the document was added to aleph (yyyy-mm
          -ddThh:ii:ss.uuuuuu).

          - `updated_at`, when the document was modified in aleph (yyyy
          -mm-ddThh:ii:ss.uuuuuu).
        in: query
        name: 'filter:{field_name}'
        schema:
          type: string
      - description: 'The number of results to return, max. 10,000.'
        in: query
        name: limit
        schema:
          type: integer
      - description: >
            The number of results to skip at the beginning of the result set.
        in: query
        name: offset
        schema:
          type: integer
      responses:
        '200':
          description: Resturns a list of entities in result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitiesResponse'
      tags:
      - Entity
    """
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.values, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    tag_request(query=result.query.to_text(), prefix=parser.prefix)
    links = {}
    if request.authz.logged_in and result.total <= MAX_PAGE:
        query = list(request.args.items(multi=True))
        links["export"] = url_for("entities_api.export", _query=query)
    return EntitySerializer.jsonify_result(result, extra={"links": links})
Exemplo n.º 17
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    return EntitySerializer.jsonify_result(result)