Пример #1
0
def handle_work_item(processor, item):

    """ Pick up a (possibly) multipage PDF upload and turn it into a
        document having (possibly) multiple individual pages.

    """

    asset       = item['Asset-Instance']
    local_path  = item['Local-Path']
    work_dir    = os.path.dirname(local_path)
    page_prefix = os.path.join(work_dir, 'page-')
    asset_list  = []

    pdf.split_pages( local_path, page_prefix )

    if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0:
        document = operations.create_document(
            asset.owner,
            title = 'Uploaded on %s (%s)' % (
                asset.date_created,
                asset.producer.process ))
    else:
        document = None

    position = 1
    all_page_files = glob.glob('%s*.pdf' % page_prefix)
    all_page_files.sort()

    for page_pdf_path in all_page_files:
        if document:
            page_asset = operations.create_asset_from_file(
                owner        = document.owner,
                producer     = processor,
                asset_class  = models.AssetClass.PAGE_ORIGINAL,
                file_name    = page_pdf_path,
                related_page = operations.create_page(document, position),
                parent       = asset,
                child_number = position,
                mime_type    = models.MimeType.PDF ),
        else:
            page_asset = asset.children.get(position=position)
            operations.upload_asset_file(page_asset, page_pdf_path)

        asset_list.append(page_asset)
        position += 1

    asset_list.append(
        document.assets.get(
            asset_class__name = models.AssetClass.DOCUMENT,
            mime_type__name   = models.MimeType.BINARY ))

    return asset_list
Пример #2
0
def handle_work_item(processor, item):
    """ Pick up a (possibly) multipage PDF upload and turn it into a
        document having (possibly) multiple individual pages.

    """

    asset = item['Asset-Instance']
    local_path = item['Local-Path']
    work_dir = os.path.dirname(local_path)
    page_prefix = os.path.join(work_dir, 'page-')
    asset_list = []

    pdf.split_pages(local_path, page_prefix)

    if asset.get_children(models.AssetClass.PAGE_ORIGINAL).count() == 0:
        document = operations.create_document(
            asset.owner,
            title='Uploaded on %s (%s)' %
            (asset.date_created, asset.producer.process))
    else:
        document = None

    position = 1
    all_page_files = glob.glob('%s*.pdf' % page_prefix)
    all_page_files.sort()

    for page_pdf_path in all_page_files:
        if document:
            page_asset = operations.create_asset_from_file(
                owner=document.owner,
                producer=processor,
                asset_class=models.AssetClass.PAGE_ORIGINAL,
                file_name=page_pdf_path,
                related_page=operations.create_page(document, position),
                parent=asset,
                child_number=position,
                mime_type=models.MimeType.PDF),
        else:
            page_asset = asset.children.get(position=position)
            operations.upload_asset_file(page_asset, page_pdf_path)

        asset_list.append(page_asset)
        position += 1

    asset_list.append(
        document.assets.get(asset_class__name=models.AssetClass.DOCUMENT,
                            mime_type__name=models.MimeType.BINARY))

    return asset_list
Пример #3
0
def redo_page(
    processor,
    parent_asset,
    tiff_original_path,
    position ):

    """ Re-convert the given TIFF file (representing a s single page) to a
        JPEG and a thumbnail.
    """
    try:
        original = parent_asset.children.get(
            child_number = position,
            asset_class = models.AssetClass.PAGE_ORIGINAL)

        image = parent_asset.get(
            child_number = position,
            asset_class = models.AssetClass.PAGE_IMAGE)

        thumbnail = parent_asset.get(
            child_number = position,
            asset_class = models.AssetClass.PAGE_THUMBNAIL)

    except Asset.DoesNotExist:
        logging.debug("Skipping deleted page")
        return

    # Stuff we'll need later
    base_name    = os.path.splitext(tiff_original_path)[0]
    rgba_path    = '%s.rgba' % base_name
    jpeg_path    = '%s.jpeg' % base_name
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Convert original TIFF to RGBA
    # TODO use convert instead of tiff2rgba
    os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path))

    # Save the original as JPEG
    image.save(
        image.load(rgba_path),
        jpeg_path)

    # Save the thumbnail as JPEG
    image.save(
        image.thumbnail(
            image.load(rgba_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Upload the new asset files
    operations.upload_asset_file( original,  pdf_orig_path )
    operations.upload_asset_file( image,     jpeg_path     )
    operations.upload_asset_file( thumbnail, thumb_path    )

    # Put the assets into the work queue
    return [ original, image, thumbnail ]
Пример #4
0
def redo_page(
    processor,
    parent_asset,
    pdf_orig_path,
    position ):

    """ Re-convert the given PDF file (representing a s single page) to a
        JPEG and a thumbnail.
    """
    try:
        asset = {
            'original' : parent_asset.children.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_ORIGINAL),

            'image' : parent_asset.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_IMAGE),

            'thumbnail' : parent_asset.get(
                child_number = position,
                asset_class = models.AssetClass.PAGE_THUMBNAIL),
            }

    except models.Asset.DoesNotExist:
        logging.debug("Skipping deleted page")
        return

    # Stuff we'll need later
    base_name    = os.path.splitext(pdf_orig_path)[0]
    jpeg_path    = pdf.convert(pdf_orig_path, 'jpeg')
    thumb_path   = '%s-thumbnail.jpeg' % base_name

    # Save the re-converted JPEG as a new thumbnail JPEG
    image.save(
        image.thumbnail(
            image.load(jpeg_path),
            settings.THUMBNAIL_SIZE),
        thumb_path)

    # Upload the new asset files
    operations.upload_asset_file( asset['original'],  pdf_orig_path )
    operations.upload_asset_file( asset['image'],     jpeg_path     )
    operations.upload_asset_file( asset['thumbnail'], thumb_path    )

    # Put the assets into the work queue
    return asset.values()
Пример #5
0
def redo_page(processor, parent_asset, tiff_original_path, position):
    """ Re-convert the given TIFF file (representing a s single page) to a
        JPEG and a thumbnail.
    """
    try:
        original = parent_asset.children.get(
            child_number=position, asset_class=models.AssetClass.PAGE_ORIGINAL)

        image = parent_asset.get(child_number=position,
                                 asset_class=models.AssetClass.PAGE_IMAGE)

        thumbnail = parent_asset.get(
            child_number=position,
            asset_class=models.AssetClass.PAGE_THUMBNAIL)

    except Asset.DoesNotExist:
        logging.debug("Skipping deleted page")
        return

    # Stuff we'll need later
    base_name = os.path.splitext(tiff_original_path)[0]
    rgba_path = '%s.rgba' % base_name
    jpeg_path = '%s.jpeg' % base_name
    thumb_path = '%s-thumbnail.jpeg' % base_name

    # Convert original TIFF to RGBA
    # TODO use convert instead of tiff2rgba
    os.system('tiff2rgba %r %r' % (tiff_original_path, rgba_path))

    # Save the original as JPEG
    image.save(image.load(rgba_path), jpeg_path)

    # Save the thumbnail as JPEG
    image.save(image.thumbnail(image.load(rgba_path), settings.THUMBNAIL_SIZE),
               thumb_path)

    # Upload the new asset files
    operations.upload_asset_file(original, pdf_orig_path)
    operations.upload_asset_file(image, jpeg_path)
    operations.upload_asset_file(thumbnail, thumb_path)

    # Put the assets into the work queue
    return [original, image, thumbnail]