def file(document_id): document = get_document(document_id) url = archive.generate_url(document.meta) if url is not None: return redirect(url) local_path = archive.load_file(document.meta) fh = open(local_path, 'rb') return send_file(fh, as_attachment=True, attachment_filename=document.meta.file_name, mimetype=document.meta.mime_type)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = archive.generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def pdf(document_id): document = get_document(document_id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = archive.generate_url(pdf) if url is not None: return redirect(url) local_path = archive.load_file(pdf) fh = open(local_path, 'rb') return send_file(fh, mimetype=pdf.mime_type)
def _get_table_csv_link(table): proxy = model.get_proxy(table) csv_hash = proxy.first("csvHash") if csv_hash is None: raise RuntimeError("Source table doesn't have a CSV version") url = archive.generate_url(csv_hash) if url is None: local_path = archive.load_file(csv_hash) if local_path is not None: url = local_path.as_posix() if url is None: raise RuntimeError("Could not generate CSV URL for the table") return url
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() if doc.parent is not None: data['parent'] = doc.parent.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.content_hash) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.pdf_version: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def retrieve(): """Downloads a binary blob from the blob storage archive. --- get: summary: Download a blob from the archive parameters: - description: Authorization token for an archive blob in: query name: claim schema: type: string description: A signed JWT with the object hash. responses: '200': description: OK content: '*/*': {} '404': description: Object does not exist. tags: - Archive """ token = request.args.get("token") token = jwt.decode(token, key=settings.SECRET_KEY, verify=True) content_hash = token.get("c") file_name = token.get("f") mime_type = token.get("m") expire = datetime.utcfromtimestamp(token["exp"]) tag_request(content_hash=content_hash, file_name=file_name) url = archive.generate_url( content_hash, file_name=file_name, mime_type=mime_type, expire=expire, ) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash) if local_path is None: return Response(status=404) return send_file( str(local_path), as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type, ) finally: archive.cleanup_file(content_hash)
def get_table_csv_link(table_id): table = get_entity(table_id) properties = table.get('properties', {}) csv_hash = first(properties.get('csvHash')) if csv_hash is None: raise RuntimeError("Source table doesn't have a CSV version") url = archive.generate_url(csv_hash) if not url: local_path = archive.load_file(csv_hash) if local_path is not None: url = local_path.as_posix() if url is None: raise RuntimeError("Could not generate CSV URL for the table") return url
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") url = archive.generate_url(document.pdf_version, mime_type=PDF_MIME) if url is not None: return redirect(url) path = archive.load_file(document.pdf_version, file_name=document.file_name) if path is None: raise NotFound("Missing PDF file.") return send_file(open(path, 'rb'), mimetype=PDF_MIME)
def write_document(zip_archive, collection, entity): if not entity.has('contentHash', quiet=True): return name = entity.first('fileName') or entity.caption name = "{0}-{1}".format(entity.id, name) path = os.path.join(collection.get('label'), name) content_hash = entity.first('contentHash') url = archive.generate_url(content_hash) if url is not None: stream = requests.get(url, stream=True) zip_archive.write_iter(path, stream.iter_content()) else: local_path = archive.load_file(content_hash) if local_path is not None: zip_archive.write(local_path, arcname=path)
def make_mapper(collection, mapping): table = get_entity(mapping.table_id) properties = table.get('properties', {}) csv_hash = first(properties.get('csvHash')) if csv_hash is None: raise RuntimeError("Source table doesn't have a CSV version") url = archive.generate_url(csv_hash) if not url: local_path = archive.load_file(csv_hash) if local_path is not None: url = local_path.as_posix() if url is None: raise RuntimeError("Could not generate CSV URL for the table") data = {'csv_url': url, 'entities': mapping.query} return model.make_mapping(data, key_prefix=collection.foreign_id)
def write_document(zip_archive, collection, entity): if not entity.has('contentHash', quiet=True): return name = entity.first('fileName') or entity.caption name = "{0}-{1}".format(entity.id, name) path = os.path.join(collection.get('label'), name) content_hash = entity.first('contentHash') url = archive.generate_url(content_hash) if url is not None: stream = requests.get(url, stream=True) zip_archive.write_iter(path, stream.iter_content()) else: local_path = archive.load_file(content_hash) if local_path is not None: zip_archive.write(local_path, arcname=path)
def file(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) url = archive.generate_url(document.meta) if url is not None: return redirect(url) local_path = archive.load_file(document.meta) if not os.path.isfile(local_path): raise NotFound("File does not exist.") fh = open(local_path, 'rb') return send_file(fh, as_attachment=True, attachment_filename=document.meta.file_name, mimetype=document.meta.mime_type)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = archive.generate_url(pdf) if url is not None: return redirect(url) try: local_path = archive.load_file(pdf) fh = open(local_path, 'rb') except Exception as ex: raise NotFound("Missing PDF file: %r" % ex) return send_file(fh, mimetype=pdf.mime_type)
def retrieve(): """Downloads a binary blob from the blob storage archive. --- get: summary: Download a blob from the archive parameters: - description: Authorization token for an archive blob in: query name: claim schema: type: string description: A signed JWT with the object hash. responses: '200': description: OK content: '*/*': {} '404': description: Object does not exist. tags: - Archive """ claim = request.args.get("claim") role_id, content_hash, file_name, mime_type = archive_claim(claim) require(request.authz.id == role_id) tag_request(content_hash=content_hash, file_name=file_name) url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash) if local_path is None: return Response(status=404) return send_file( str(local_path), as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type, ) finally: archive.cleanup_file(content_hash)
def _serve_archive(content_hash, file_name, mime_type): """Serve a file from the archive or by generating an external URL.""" url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash, file_name=file_name) if local_path is None: return Response(status=404) return send_file(local_path, as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type) finally: archive.cleanup_file(content_hash)
def _serve_archive(content_hash, file_name, mime_type): """Serve a file from the archive or by generating an external URL.""" url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) enable_cache() try: local_path = archive.load_file(content_hash, file_name=file_name) if local_path is None: raise NotFound("File does not exist.") return send_file(open(local_path, 'rb'), as_attachment=True, attachment_filename=file_name, mimetype=mime_type) finally: archive.cleanup_file(content_hash)
def retrieve(): claim = request.args.get('claim') role_id, content_hash, file_name, mime_type = archive_claim(claim) require(request.authz.id == role_id) tag_request(content_hash=content_hash, file_name=file_name) url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash) if local_path is None: return Response(status=404) return send_file(str(local_path), as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type) finally: archive.cleanup_file(content_hash)
def retrieve(): claim = request.args.get('claim') role_id, content_hash, file_name, mime_type = archive_claim(claim) require(request.authz.id == role_id) record_audit(Audit.ACT_ARCHIVE, content_hash=content_hash) tag_request(content_hash=content_hash, file_name=file_name) url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash) if local_path is None: return Response(status=404) return send_file(local_path, as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type) finally: archive.cleanup_file(content_hash)