Пример #1
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    record_audit(Audit.ACT_COLLECTION, id=id)
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({
        'total': len(permissions),
        'results': permissions
    })
Пример #2
0
def index(collection_id):
    collection = get_db_collection(collection_id)
    record_audit(Audit.ACT_COLLECTION, id=collection.id)
    parser = QueryParser(request.args, request.authz)
    q = Match.group_by_collection(collection.id, authz=request.authz)
    result = DatabaseQueryResult(request, q, parser=parser)
    return MatchCollectionsSerializer.jsonify_result(result)
Пример #3
0
def similar(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    entity = model.get_proxy(entity)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Пример #4
0
def similar(id):
    enable_cache()
    entity = get_index_entity(id, request.authz.READ)
    entity = model.get_proxy(entity)
    record_audit(Audit.ACT_ENTITY, id=id)
    result = MatchQuery.handle(request, entity=entity, schema=CombinedSchema)
    return jsonify(result)
Пример #5
0
def file(document_id):
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    resp = _serve_archive(document.content_hash,
                          document.safe_file_name,
                          document.mime_type)
    return _resp_canonical(resp, document_id)
Пример #6
0
def triples(collection_id):
    require(request.authz.can_stream())
    log.debug("Stream triples [%r] begins... (coll: %s)", request.authz,
              collection_id)
    collection = get_db_collection(collection_id, request.authz.READ)
    record_audit(Audit.ACT_COLLECTION, id=collection_id)
    return Response(export_collection(collection), mimetype='text/plain')
Пример #7
0
def triples(collection_id):
    require(request.authz.can_stream())
    log.debug("Stream triples [%r] begins... (coll: %s)",
              request.authz, collection_id)
    collection = get_db_collection(collection_id, request.authz.READ)
    record_audit(Audit.ACT_COLLECTION, id=collection_id)
    return Response(export_collection(collection), mimetype='text/plain')
Пример #8
0
def oauth_callback():
    if not settings.OAUTH:
        abort(404)

    resp = oauth.provider.authorized_response()
    if resp is None or isinstance(resp, OAuthException):
        log.warning("Failed OAuth: %r", resp)
        return Unauthorized("Authentication has failed.")

    response = signals.handle_oauth_session.send(provider=oauth.provider,
                                                 oauth=resp)
    for (_, role) in response:
        if role is None:
            continue
        db.session.commit()
        update_role(role)
        log.info("Logged in: %r", role)
        request.authz = Authz.from_role(role)
        record_audit(Audit.ACT_LOGIN)
        token = request.authz.to_token(role=role)
        token = token.decode('utf-8')
        state = request.args.get('state')
        next_url = get_best_next_url(state, request.referrer)
        next_url, _ = urldefrag(next_url)
        next_url = '%s#token=%s' % (next_url, token)
        return redirect(next_url)

    log.error("No OAuth handler for %r was installed.", oauth.provider.name)
    return Unauthorized("Authentication has failed.")
Пример #9
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    record_audit(Audit.ACT_COLLECTION, id=id)
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({'total': len(permissions), 'results': permissions})
Пример #10
0
def index(collection_id):
    collection = get_db_collection(collection_id)
    record_audit(Audit.ACT_COLLECTION, id=collection.id)
    parser = QueryParser(request.args, request.authz)
    q = Match.group_by_collection(collection.id, authz=request.authz)
    result = DatabaseQueryResult(request, q, parser=parser)
    return MatchCollectionsSerializer.jsonify_result(result)
Пример #11
0
def documents(id):
    enable_cache()
    entity = get_index_entity(id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=id)
    result = EntityDocumentsQuery.handle(request,
                                         entity=entity,
                                         schema=CombinedSchema)
    return jsonify(result)
Пример #12
0
 def handle(cls, request, parser=None, **kwargs):
     if parser is None:
         parser = SearchQueryParser(request.args, request.authz)
     # Log the search
     keys = ['prefix', 'text', 'filters']
     record_audit(Audit.ACT_SEARCH, keys=keys, **parser.to_dict())
     result = cls(parser, **kwargs).search()
     return cls.RESULT_CLASS(request, parser, result)
Пример #13
0
def similar(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    entity = model.get_proxy(entity)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Пример #14
0
 def handle(cls, request, parser=None, **kwargs):
     if parser is None:
         parser = SearchQueryParser(request.args, request.authz)
     # Log the search
     keys = ['prefix', 'text', 'filters']
     record_audit(Audit.ACT_SEARCH, keys=keys, **parser.to_dict())
     result = cls(parser, **kwargs).search()
     return cls.RESULT_CLASS(request, parser, result)
Пример #15
0
def match():
    entity = parse_request(EntityUpdateSchema)
    record_audit(Audit.ACT_MATCH, entity=entity)
    entity = model.get_proxy(entity)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request, entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Пример #16
0
def matches(collection_id, other_id):
    collection = get_db_collection(collection_id)
    record_audit(Audit.ACT_COLLECTION, id=collection.id)
    other = get_db_collection(other_id)
    record_audit(Audit.ACT_COLLECTION, id=other.id)
    parser = QueryParser(request.args, request.authz)
    q = Match.find_by_collection(collection.id, other.id)
    result = DatabaseQueryResult(request, q, parser=parser)
    return MatchSerializer.jsonify_result(result)
Пример #17
0
def match():
    entity = parse_request(EntityUpdateSchema)
    record_audit(Audit.ACT_MATCH, entity=entity)
    entity = model.get_proxy(entity)
    tag_request(schema=entity.schema.name, caption=entity.caption)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request, entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Пример #18
0
def matches(id, other_id):
    collection = get_db_collection(id)
    record_audit(Audit.ACT_COLLECTION, id=collection.id)
    other = get_db_collection(other_id)
    record_audit(Audit.ACT_COLLECTION, id=other.id)
    parser = QueryParser(request.args, request.authz)
    q = Match.find_by_collection(collection.id, other.id)
    result = DatabaseQueryResult(request, q, parser=parser, schema=MatchSchema)
    return jsonify(result)
Пример #19
0
def pdf(document_id):
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    if not document.supports_pages:
        raise BadRequest("PDF is only available for text documents")
    file_name = document.safe_file_name
    if document.pdf_version != document.content_hash:
        file_name = '%s.pdf' % file_name
    resp = _serve_archive(document.pdf_version, file_name, PDF)
    return _resp_canonical(resp, document_id)
Пример #20
0
def records(document_id):
    enable_cache()
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    if not document.supports_records:
        raise BadRequest("This document does not have records.")
    result = RecordsQuery.handle(request,
                                 document=document,
                                 schema=RecordSchema)
    return jsonify(result)
Пример #21
0
def record(document_id, index):
    enable_cache()
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    if not document.supports_records:
        raise BadRequest("This document does not have records.")
    record = DocumentRecord.by_index(document.id, index)
    if record is None:
        raise NotFound("No such record: %s" % index)
    return serialize_data(record, RecordSchema)
Пример #22
0
def match():
    enable_cache()
    entity = parse_request(EntityUpdateSchema)
    record_audit(Audit.ACT_MATCH, entity=entity)
    entity = model.get_proxy(entity)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request,
                               entity=entity,
                               collection_ids=collection_ids,
                               schema=CombinedSchema)
    return jsonify(result)
Пример #23
0
def content(document_id):
    enable_cache()
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    return jsonify({
        'headers':
        document.headers,
        'text':
        document.body_text,
        'html':
        sanitize_html(document.body_raw, document.source_url)
    })
Пример #24
0
def references(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    results = []
    for prop, total in entity_references(entity, request.authz):
        results.append({
            'count': total,
            'property': prop,
            'schema': prop.schema.name,
        })
    return jsonify({'status': 'ok', 'total': len(results), 'results': results})
Пример #25
0
def records(document_id=None, collection_id=None):
    require(request.authz.can_export())
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=collection_id)
    elif document_id is not None:
        get_db_document(document_id)
        record_audit(Audit.ACT_ENTITY, id=document_id)
    else:
        # no authz on records, this means *full* export.
        require(request.authz.is_admin)
    records = iter_records(document_id=document_id,
                           collection_id=collection_id)
    return stream_ijson(records)
Пример #26
0
def references(id):
    enable_cache()
    entity = get_index_entity(id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=id)
    results = []
    for prop, total in entity_references(entity, request.authz):
        key = ('filter:properties.%s' % prop.name, id)
        link = url_for('entities_api.index', _query=(key, ))
        results.append({
            'count': total,
            'property': prop,
            'schema': prop.schema.name,
            'results': link
        })
    return jsonify({'status': 'ok', 'total': len(results), 'results': results})
Пример #27
0
def entities(collection_id=None):
    require(request.authz.can_export())
    schemata = ensure_list(request.args.getlist('schema'))
    excludes = ['text', 'roles', 'fingerprints']
    includes = ensure_list(request.args.getlist('include'))
    includes = [f for f in includes if f not in excludes]
    if collection_id is not None:
        get_db_collection(id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=id)
    entities = iter_entities(authz=request.authz,
                             collection_id=collection_id,
                             schemata=schemata,
                             excludes=excludes,
                             includes=includes)
    return stream_ijson(entities)
Пример #28
0
def view(document_id):
    enable_cache()
    data = get_index_document(document_id)
    document = get_db_document(document_id)
    data['headers'] = document.headers
    # TODO: should this be it's own API? Probably so, but for that it would
    # be unclear if we should JSON wrap it, or serve plain with the correct
    # MIME type?
    if Document.SCHEMA_HTML in document.model.names:
        data['html'] = sanitize_html(document.body_raw, document.source_url)
    if Document.SCHEMA_TEXT in document.model.names:
        data['text'] = document.body_text
    if Document.SCHEMA_IMAGE in document.model.names:
        data['text'] = document.body_text
    record_audit(Audit.ACT_ENTITY, id=document_id)
    return serialize_data(data, CombinedSchema)
Пример #29
0
def tags(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    results = []
    for (field, value, total) in entity_tags(entity, request.authz):
        qvalue = quote(value.encode('utf-8'))
        key = ('filter:%s' % field, qvalue)
        results.append({
            'id': query_string([key]),
            'value': value,
            'field': field,
            'count': total,
        })

    results.sort(key=lambda p: p['count'], reverse=True)
    return jsonify({'status': 'ok', 'total': len(results), 'results': results})
Пример #30
0
def entities(collection_id=None):
    require(request.authz.can_stream())
    log.debug("Stream entities [%r] begins... (coll: %s)",
              request.authz, collection_id)
    schemata = ensure_list(request.args.getlist('schema'))
    excludes = ['text', 'roles', 'fingerprints']
    includes = ensure_list(request.args.getlist('include'))
    includes = [f for f in includes if f not in excludes]
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=collection_id)
    entities = iter_entities(authz=request.authz,
                             collection_id=collection_id,
                             schemata=schemata,
                             excludes=excludes,
                             includes=includes)
    return stream_ijson(entities)
Пример #31
0
def references(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    results = []
    for prop, total in entity_references(entity, request.authz):
        results.append({
            'count': total,
            'property': prop,
            'schema': prop.schema.name,
        })
    return jsonify({
        'status': 'ok',
        'total': len(results),
        'results': results
    })
Пример #32
0
def content(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    for entity in entities_by_ids([entity_id],
                                  schemata=entity.get('schema'),
                                  excludes=['text']):
        proxy = model.get_proxy(entity)
        record_audit(Audit.ACT_ENTITY, id=entity_id)
        html = sanitize_html(proxy.first('bodyHtml', quiet=True),
                             proxy.first('sourceUrl', quiet=True))
        headers = proxy.first('headers', quiet=True)
        headers = registry.json.unpack(headers)
        return jsonify({
            'headers': headers,
            'text': proxy.first('bodyText', quiet=True),
            'html': html
        })
    return ('', 404)
Пример #33
0
def password_login():
    """Provides email and password authentication."""
    data = parse_request(LoginSchema)
    role = Role.by_email(data.get('email'))
    if role is None or not role.has_password:
        return Unauthorized("Authentication has failed.")

    if not role.check_password(data.get('password')):
        return Unauthorized("Authentication has failed.")

    db.session.commit()
    update_role(role)
    authz = Authz.from_role(role)
    request.authz = authz
    record_audit(Audit.ACT_LOGIN)
    return jsonify({
        'status': 'ok',
        'token': authz.to_token(role=role)
    })
Пример #34
0
def content(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    for entity in entities_by_ids([entity_id],
                                  schemata=entity.get('schema'),
                                  excludes=['text']):
        proxy = model.get_proxy(entity)
        record_audit(Audit.ACT_ENTITY, id=entity_id)
        html = sanitize_html(proxy.first('bodyHtml', quiet=True),
                             proxy.first('sourceUrl', quiet=True))
        headers = proxy.first('headers', quiet=True)
        headers = registry.json.unpack(headers)
        return jsonify({
            'headers': headers,
            'text': proxy.first('bodyText', quiet=True),
            'html': html
        })
    return ('', 404)
Пример #35
0
def tags(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    results = []
    for (field, value, total) in entity_tags(entity, request.authz):
        qvalue = quote(value.encode('utf-8'))
        key = ('filter:%s' % field, qvalue)
        results.append({
            'id': query_string([key]),
            'value': value,
            'field': field,
            'count': total,
        })

    results.sort(key=lambda p: p['count'], reverse=True)
    return jsonify({
        'status': 'ok',
        'total': len(results),
        'results': results
    })
Пример #36
0
def retrieve():
    claim = request.args.get('claim')
    role_id, content_hash, file_name, mime_type = archive_claim(claim)
    require(request.authz.id == role_id)
    record_audit(Audit.ACT_ARCHIVE, content_hash=content_hash)
    tag_request(content_hash=content_hash, file_name=file_name)
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)
    try:
        local_path = archive.load_file(content_hash)
        if local_path is None:
            return Response(status=404)
        return send_file(local_path,
                         as_attachment=True,
                         conditional=True,
                         attachment_filename=file_name,
                         mimetype=mime_type)
    finally:
        archive.cleanup_file(content_hash)
Пример #37
0
def view(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    tag_request(collection_id=entity.get('collection_id'))
    return EntitySerializer.jsonify(entity)
Пример #38
0
def view(id):
    collection = get_index_collection(id)
    record_audit(Audit.ACT_COLLECTION, id=id)
    return serialize_data(collection, CollectionSchema)
Пример #39
0
def view(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    tag_request(collection_id=entity.get('collection_id'))
    return EntitySerializer.jsonify(entity)
Пример #40
0
def csv_export(collection_id):
    collection = get_db_collection(collection_id, request.authz.READ)
    record_audit(Audit.ACT_COLLECTION, id=collection.id)
    matches = export_matches_csv(collection.id, request.authz)
    return stream_csv(stream_with_context(matches))
Пример #41
0
def csv_export(id):
    collection = get_db_collection(id, request.authz.READ)
    record_audit(Audit.ACT_COLLECTION, id=id)
    matches = export_matches_csv(collection.id, request.authz)
    return stream_csv(stream_with_context(matches))
Пример #42
0
def view(id):
    collection = get_index_collection(id)
    record_audit(Audit.ACT_COLLECTION, id=id)
    return CollectionSerializer.jsonify(collection)
Пример #43
0
def rdf(id):
    collection = get_db_collection(id, request.authz.READ)
    record_audit(Audit.ACT_COLLECTION, id=id)
    return Response(export_collection(collection), mimetype='text/plain')
Пример #44
0
def view(collection_id):
    collection = get_index_collection(collection_id)
    record_audit(Audit.ACT_COLLECTION, id=collection_id)
    return CollectionSerializer.jsonify(collection)