Exemplo n.º 1
0
def file(document_id):
    document = get_document(document_id)
    url = archive.generate_url(document.meta)
    if url is not None:
        return redirect(url)

    local_path = archive.load_file(document.meta)
    fh = open(local_path, 'rb')
    return send_file(fh, as_attachment=True,
                     attachment_filename=document.meta.file_name,
                     mimetype=document.meta.mime_type)
Exemplo n.º 2
0
def view(document_id):
    doc = get_document(document_id)
    enable_cache()
    data = doc.to_dict()
    log_event(request, document_id=doc.id)
    data['data_url'] = archive.generate_url(doc.meta)
    if data['data_url'] is None:
        data['data_url'] = url_for('documents_api.file',
                                   document_id=document_id)
    if doc.meta.is_pdf:
        data['pdf_url'] = data['data_url']
    else:
        try:
            data['pdf_url'] = archive.generate_url(doc.meta.pdf)
        except Exception as ex:
            log.info('Could not generate PDF url: %r', ex)
        if data.get('pdf_url') is None:
            data['pdf_url'] = url_for('documents_api.pdf',
                                      document_id=document_id)
    return jsonify(data)
Exemplo n.º 3
0
def pdf(document_id):
    document = get_document(document_id)
    if document.type != Document.TYPE_TEXT:
        raise BadRequest("PDF is only available for text documents")
    pdf = document.meta.pdf
    url = archive.generate_url(pdf)
    if url is not None:
        return redirect(url)

    local_path = archive.load_file(pdf)
    fh = open(local_path, 'rb')
    return send_file(fh, mimetype=pdf.mime_type)
Exemplo n.º 4
0
def _get_table_csv_link(table):
    proxy = model.get_proxy(table)
    csv_hash = proxy.first("csvHash")
    if csv_hash is None:
        raise RuntimeError("Source table doesn't have a CSV version")
    url = archive.generate_url(csv_hash)
    if url is None:
        local_path = archive.load_file(csv_hash)
        if local_path is not None:
            url = local_path.as_posix()
    if url is None:
        raise RuntimeError("Could not generate CSV URL for the table")
    return url
Exemplo n.º 5
0
def view(document_id):
    doc = get_document(document_id)
    enable_cache()
    data = doc.to_dict()
    if doc.parent is not None:
        data['parent'] = doc.parent.to_dict()
    log_event(request, document_id=doc.id)
    data['data_url'] = archive.generate_url(doc.content_hash)
    if data['data_url'] is None:
        data['data_url'] = url_for('documents_api.file',
                                   document_id=document_id)
    if doc.pdf_version:
        data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id)
    return jsonify(data)
Exemplo n.º 6
0
def retrieve():
    """Downloads a binary blob from the blob storage archive.
    ---
    get:
      summary: Download a blob from the archive
      parameters:
      - description: Authorization token for an archive blob
        in: query
        name: claim
        schema:
          type: string
          description: A signed JWT with the object hash.
      responses:
        '200':
          description: OK
          content:
            '*/*': {}
        '404':
          description: Object does not exist.
      tags:
      - Archive
    """
    token = request.args.get("token")
    token = jwt.decode(token, key=settings.SECRET_KEY, verify=True)
    content_hash = token.get("c")
    file_name = token.get("f")
    mime_type = token.get("m")
    expire = datetime.utcfromtimestamp(token["exp"])
    tag_request(content_hash=content_hash, file_name=file_name)
    url = archive.generate_url(
        content_hash,
        file_name=file_name,
        mime_type=mime_type,
        expire=expire,
    )
    if url is not None:
        return redirect(url)
    try:
        local_path = archive.load_file(content_hash)
        if local_path is None:
            return Response(status=404)
        return send_file(
            str(local_path),
            as_attachment=True,
            conditional=True,
            attachment_filename=file_name,
            mimetype=mime_type,
        )
    finally:
        archive.cleanup_file(content_hash)
Exemplo n.º 7
0
def get_table_csv_link(table_id):
    table = get_entity(table_id)
    properties = table.get('properties', {})
    csv_hash = first(properties.get('csvHash'))
    if csv_hash is None:
        raise RuntimeError("Source table doesn't have a CSV version")
    url = archive.generate_url(csv_hash)
    if not url:
        local_path = archive.load_file(csv_hash)
        if local_path is not None:
            url = local_path.as_posix()
    if url is None:
        raise RuntimeError("Could not generate CSV URL for the table")
    return url
Exemplo n.º 8
0
def pdf(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    if document.type != Document.TYPE_TEXT:
        raise BadRequest("PDF is only available for text documents")
    url = archive.generate_url(document.pdf_version, mime_type=PDF_MIME)
    if url is not None:
        return redirect(url)

    path = archive.load_file(document.pdf_version,
                             file_name=document.file_name)
    if path is None:
        raise NotFound("Missing PDF file.")
    return send_file(open(path, 'rb'), mimetype=PDF_MIME)
Exemplo n.º 9
0
def write_document(zip_archive, collection, entity):
    if not entity.has('contentHash', quiet=True):
        return
    name = entity.first('fileName') or entity.caption
    name = "{0}-{1}".format(entity.id, name)
    path = os.path.join(collection.get('label'), name)
    content_hash = entity.first('contentHash')
    url = archive.generate_url(content_hash)
    if url is not None:
        stream = requests.get(url, stream=True)
        zip_archive.write_iter(path, stream.iter_content())
    else:
        local_path = archive.load_file(content_hash)
        if local_path is not None:
            zip_archive.write(local_path, arcname=path)
Exemplo n.º 10
0
def make_mapper(collection, mapping):
    table = get_entity(mapping.table_id)
    properties = table.get('properties', {})
    csv_hash = first(properties.get('csvHash'))
    if csv_hash is None:
        raise RuntimeError("Source table doesn't have a CSV version")
    url = archive.generate_url(csv_hash)
    if not url:
        local_path = archive.load_file(csv_hash)
        if local_path is not None:
            url = local_path.as_posix()
    if url is None:
        raise RuntimeError("Could not generate CSV URL for the table")
    data = {'csv_url': url, 'entities': mapping.query}
    return model.make_mapping(data, key_prefix=collection.foreign_id)
Exemplo n.º 11
0
Arquivo: export.py Projeto: pudo/aleph
def write_document(zip_archive, collection, entity):
    if not entity.has('contentHash', quiet=True):
        return
    name = entity.first('fileName') or entity.caption
    name = "{0}-{1}".format(entity.id, name)
    path = os.path.join(collection.get('label'), name)
    content_hash = entity.first('contentHash')
    url = archive.generate_url(content_hash)
    if url is not None:
        stream = requests.get(url, stream=True)
        zip_archive.write_iter(path, stream.iter_content())
    else:
        local_path = archive.load_file(content_hash)
        if local_path is not None:
            zip_archive.write(local_path, arcname=path)
Exemplo n.º 12
0
def file(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    url = archive.generate_url(document.meta)
    if url is not None:
        return redirect(url)

    local_path = archive.load_file(document.meta)
    if not os.path.isfile(local_path):
        raise NotFound("File does not exist.")

    fh = open(local_path, 'rb')
    return send_file(fh, as_attachment=True,
                     attachment_filename=document.meta.file_name,
                     mimetype=document.meta.mime_type)
Exemplo n.º 13
0
def pdf(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    if document.type != Document.TYPE_TEXT:
        raise BadRequest("PDF is only available for text documents")
    pdf = document.meta.pdf
    url = archive.generate_url(pdf)
    if url is not None:
        return redirect(url)

    try:
        local_path = archive.load_file(pdf)
        fh = open(local_path, 'rb')
    except Exception as ex:
        raise NotFound("Missing PDF file: %r" % ex)
    return send_file(fh, mimetype=pdf.mime_type)
Exemplo n.º 14
0
def retrieve():
    """Downloads a binary blob from the blob storage archive.
    ---
    get:
      summary: Download a blob from the archive
      parameters:
      - description: Authorization token for an archive blob
        in: query
        name: claim
        schema:
          type: string
          description: A signed JWT with the object hash.
      responses:
        '200':
          description: OK
          content:
            '*/*': {}
        '404':
          description: Object does not exist.
      tags:
      - Archive
    """
    claim = request.args.get("claim")
    role_id, content_hash, file_name, mime_type = archive_claim(claim)
    require(request.authz.id == role_id)
    tag_request(content_hash=content_hash, file_name=file_name)
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)
    try:
        local_path = archive.load_file(content_hash)
        if local_path is None:
            return Response(status=404)
        return send_file(
            str(local_path),
            as_attachment=True,
            conditional=True,
            attachment_filename=file_name,
            mimetype=mime_type,
        )
    finally:
        archive.cleanup_file(content_hash)
Exemplo n.º 15
0
def _serve_archive(content_hash, file_name, mime_type):
    """Serve a file from the archive or by generating an external URL."""
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)

    try:
        local_path = archive.load_file(content_hash, file_name=file_name)
        if local_path is None:
            return Response(status=404)

        return send_file(local_path,
                         as_attachment=True,
                         conditional=True,
                         attachment_filename=file_name,
                         mimetype=mime_type)
    finally:
        archive.cleanup_file(content_hash)
Exemplo n.º 16
0
def _serve_archive(content_hash, file_name, mime_type):
    """Serve a file from the archive or by generating an external URL."""
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)

    enable_cache()
    try:
        local_path = archive.load_file(content_hash, file_name=file_name)
        if local_path is None:
            raise NotFound("File does not exist.")

        return send_file(open(local_path, 'rb'),
                         as_attachment=True,
                         attachment_filename=file_name,
                         mimetype=mime_type)
    finally:
        archive.cleanup_file(content_hash)
Exemplo n.º 17
0
def retrieve():
    claim = request.args.get('claim')
    role_id, content_hash, file_name, mime_type = archive_claim(claim)
    require(request.authz.id == role_id)
    tag_request(content_hash=content_hash, file_name=file_name)
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)
    try:
        local_path = archive.load_file(content_hash)
        if local_path is None:
            return Response(status=404)
        return send_file(str(local_path),
                         as_attachment=True,
                         conditional=True,
                         attachment_filename=file_name,
                         mimetype=mime_type)
    finally:
        archive.cleanup_file(content_hash)
Exemplo n.º 18
0
def retrieve():
    claim = request.args.get('claim')
    role_id, content_hash, file_name, mime_type = archive_claim(claim)
    require(request.authz.id == role_id)
    record_audit(Audit.ACT_ARCHIVE, content_hash=content_hash)
    tag_request(content_hash=content_hash, file_name=file_name)
    url = archive.generate_url(content_hash,
                               file_name=file_name,
                               mime_type=mime_type)
    if url is not None:
        return redirect(url)
    try:
        local_path = archive.load_file(content_hash)
        if local_path is None:
            return Response(status=404)
        return send_file(local_path,
                         as_attachment=True,
                         conditional=True,
                         attachment_filename=file_name,
                         mimetype=mime_type)
    finally:
        archive.cleanup_file(content_hash)