Пример #1
0
def get_document_pdf(request, pk):
    """
    Retrieve a document by primary key, as a PDF file.  Note that this
    is not a JSON view.

    """
    document = request.user.documents.get(pk = pk)
    assets = document.assets.filter(
        asset_class__name = models.AssetClass.DOCUMENT,
        mime_type__name   = models.MimeType.PDF )

    if len(assets) != 0:
        meta = operations.instantiate_asset(assets[0])
        try:
            response = HttpResponse(open(meta['Local-Path'], 'rb'))
            response['Content-Disposition'] = \
                'attachment; filename=doc-%d.pdf' % document.pk
            response['Content-Type']   = meta['Content-Type']
            response['Content-Length'] = os.stat(meta['Local-Path']).st_size
            response['ETag' ]          = meta['ETag' ]
            response['Last-Modified']  = meta['Last-Modified']
        finally:
            shutil.rmtree(os.path.dirname(meta['Local-Path']))
    else:
        response = HttpResponse(content_type = 'application/pdf')
        response['Content-Disposition'] = \
        'attachment; filename=doc-%d.pdf' % document.pk
        pdf.render_document(document, response, request.user.username, 
                            str(document.pk))

    return response
Пример #2
0
def handle_work_item(processor, item):
    """ Process a work item.  The work item will be provided and its local
        temp directory will be cleaned up by the process driver
        framework.  If this method does not raise an exception the
        work item will also be removed from the work queue.

    """
    document = item['Asset-Instance'].related_document

    num_ocr_pages = document.pages.filter(
        assets__asset_class__name=models.AssetClass.PAGE_TEXT).count()

    if document.num_pages != num_ocr_pages:
        raise NotReadyException(
            "Postponing PDF generation, OCR not complete for pages")

    pdf_stream = StringIO(
        pdf.render_document(document,
                            output_buffer=StringIO(),
                            username=document.owner.username,
                            title=document.title).getvalue())

    # trial account handling -- send the PDF in attachment
    # and delete all associated assets
    if handle_trial_account(document, pdf_stream.getvalue()):
        return

    # classify the document based on the creation time of its PDF asset
    tag_document(document, datetime.timedelta(0,
                                              UPLOAD_AGGREGATE_TIME_TRESHOLD))

    pdf_assets = document.assets.filter(
        asset_class__name=models.AssetClass.DOCUMENT,
        mime_type__name=models.MimeType.PDF)

    if len(pdf_assets) != 0:
        pdf_asset = pdf_assets[0]
        pdf_asset.producer = processor
        operations.upload_asset_stream(pdf_asset, pdf_stream)
    else:
        pdf_asset = operations.create_asset_from_stream(
            data_stream=pdf_stream,
            owner=item['Owner'],
            producer=processor,
            asset_class=models.AssetClass.DOCUMENT,
            related_document=document,
            file_name=document.title,
            parent=item['Asset-Instance'],
            child_number=1,
            mime_type=models.MimeType.PDF)

    return [pdf_asset]
Пример #3
0
def handle_work_item(processor, item):

    """ Process a work item.  The work item will be provided and its local
        temp directory will be cleaned up by the process driver
        framework.  If this method does not raise an exception the
        work item will also be removed from the work queue.

    """
    document = item["Asset-Instance"].related_document

    num_ocr_pages = document.pages.filter(assets__asset_class__name=models.AssetClass.PAGE_TEXT).count()

    if document.num_pages != num_ocr_pages:
        raise NotReadyException("Postponing PDF generation, OCR not complete for pages")

    pdf_stream = StringIO(
        pdf.render_document(
            document, output_buffer=StringIO(), username=document.owner.username, title=document.title
        ).getvalue()
    )

    # trial account handling -- send the PDF in attachment
    # and delete all associated assets
    if handle_trial_account(document, pdf_stream.getvalue()):
        return

    # classify the document based on the creation time of its PDF asset
    tag_document(document, datetime.timedelta(0, UPLOAD_AGGREGATE_TIME_TRESHOLD))

    pdf_assets = document.assets.filter(
        asset_class__name=models.AssetClass.DOCUMENT, mime_type__name=models.MimeType.PDF
    )

    if len(pdf_assets) != 0:
        pdf_asset = pdf_assets[0]
        pdf_asset.producer = processor
        operations.upload_asset_stream(pdf_asset, pdf_stream)
    else:
        pdf_asset = operations.create_asset_from_stream(
            data_stream=pdf_stream,
            owner=item["Owner"],
            producer=processor,
            asset_class=models.AssetClass.DOCUMENT,
            related_document=document,
            file_name=document.title,
            parent=item["Asset-Instance"],
            child_number=1,
            mime_type=models.MimeType.PDF,
        )

    return [pdf_asset]