Python PdfArray примеры, pdfrw.PdfArray Python примеры использования

Пример #1

0

Показать файл

Файл: makesymind.py Проект: dominique-unruh/declmath

def popup_bg_links(show_ocmd, ocgs):
    from pdfrw import PdfDict, PdfArray, PdfName
    if not pdf_popup_config['backgroundlink']: return
    if pdf_popup_config['backgroundlink'] not in ('front', 'back'):
        raise ValueError(
            "pdf_popup_config['backgroundlink'] must be front or back or None")

    for page in popup_pdf.pages:
        rect = page.MediaBox
        if pdf_popup_config['backgroundlink-debug']: rect = [90, 800, 180, 200]
        link = PdfDict(
            indirect=True,
            Type=PdfName.Annot,
            H=PdfName.N,
            Subtype=PdfName.Link,
            Rect=rect,
            #F=2, # Link is hidden
            Border=[0, 0, 10]
            if pdf_popup_config['backgroundlink-debug'] else [0, 0, 0],
            C=[1, 0, 0] if pdf_popup_config['backgroundlink-debug'] else None,
            OC=show_ocmd,
            A=PdfDict(S=PdfName.SetOCGState,
                      State=PdfArray([PdfName.OFF] + ocgs)),
        )

        if page.Annots == None: page.Annots = PdfArray()
        if pdf_popup_config['backgroundlink'] == 'back':
            page.Annots.insert(0, link)
        elif pdf_popup_config['backgroundlink'] == 'front':
            page.Annots.append(link)
        else:
            raise RuntimeException("Unexpected value")

Пример #2

0

Показать файл

Файл: print_two.py Проект: zwj2017-NK/PDF-Malware-Parser

def fixpage(page, count=[0]):
    count[0] += 1
    evenpage = not (count[0] & 1)

    # For demo purposes, just go with the MediaBox and toast the others
    box = [float(x) for x in page.MediaBox]
    assert box[0] == box[1] == 0, "demo won't work on this PDF"

    for key, value in sorted(page.iteritems()):
        if 'box' in key.lower():
            del page[key]

    startsize = tuple(box[2:])
    finalsize = box[3], 2 * box[2]
    page.MediaBox = PdfArray((0, 0) + finalsize)
    page.Rotate = (int(page.Rotate or 0) + 90) % 360

    contents = page.Contents
    if contents is None:
        return page
    contents = isinstance(contents, dict) and [contents] or contents

    prefix = '0 1 -1 0 %s %s cm\n' % (finalsize[0], 0)
    if evenpage:
        prefix = '1 0 0 1 %s %s cm\n' % (0, finalsize[1] / 2) + prefix
    first_prefix = 'q\n-1 0 0 -1 %s %s cm\n' % finalsize + prefix
    second_prefix = '\nQ\n' + prefix
    first_prefix = IndirectPdfDict(stream=first_prefix)
    second_prefix = IndirectPdfDict(stream=second_prefix)
    contents = PdfArray(([second_prefix] + contents) * 2)
    contents[0] = first_prefix
    page.Contents = contents
    return page

Пример #3

0

Показать файл

def create_bookmarks(bookmarks, pages, parent=None):
    count = len(bookmarks)
    bookmark_objects = []
    for label, target, children in bookmarks:
        destination = (pages[target[0]].indirect, PdfName('XYZ'), target[1],
                       target[2], 0)
        bookmark_object = PdfDict(Title=PdfString.encode(label),
                                  A=PdfDict(Type=PdfName('Action'),
                                            S=PdfName('GoTo'),
                                            D=PdfArray(destination)))
        bookmark_object.indirect = True
        children_objects, children_count = create_bookmarks(
            children, pages, parent=bookmark_object)
        bookmark_object.Count = 1 + children_count
        if bookmark_objects:
            bookmark_object.Prev = bookmark_objects[-1]
            bookmark_objects[-1].Next = bookmark_object
        if children_objects:
            bookmark_object.First = children_objects[0]
            bookmark_object.Last = children_objects[-1]
        if parent is not None:
            bookmark_object.Parent = parent
        count += children_count
        bookmark_objects.append(bookmark_object)
    return bookmark_objects, count

Пример #4

0

Показать файл

Файл: booklet.py Проект: sandyroddick/pdfrw-fork

def fixpage(*pages):
    pages = [pagexobj(x) for x in pages]

    class PageStuff(tuple):
        pass

    x = y = 0
    for i, page in enumerate(pages):
        index = '/P%s' % i
        shift_right = x and '1 0 0 1 %s 0 cm ' % x or ''
        stuff = PageStuff((index, page))
        stuff.stream = 'q %s%s Do Q\n' % (shift_right, index)
        x += page.BBox[2]
        y = max(y, page.BBox[3])
        pages[i] = stuff

    # Multiple copies of first page used as a placeholder to
    # get blank page on back.
    for p1, p2 in zip(pages, pages[1:]):
        if p1[1] is p2[1]:
            pages.remove(p1)

    return IndirectPdfDict(
        Type=PdfName.Page,
        Contents=PdfDict(stream=''.join(page.stream for page in pages)),
        MediaBox=PdfArray([0, 0, x, y]),
        Resources=PdfDict(XObject=PdfDict(pages), ),
    )

Пример #5

0

Показать файл

def fixpage(page, watermark):

    # Find the page's resource dictionary. Create if none
    resources = page.inheritable.Resources
    if resources is None:
        resources = page.Resources = PdfDict()

    # Find or create the parent's xobject dictionary
    xobjdict = resources.XObject
    if xobjdict is None:
        xobjdict = resources.XObject = PdfDict()

    # Allow for an infinite number of cascaded watermarks
    index = 0
    while 1:
        watermark_name = '/Watermark.%d' % index
        if watermark_name not in xobjdict:
            break
        index += 1
    xobjdict[watermark_name] = watermark

    # Turn the contents into an array if it is not already one
    contents = page.Contents
    if not isinstance(contents, PdfArray):
        contents = page.Contents = PdfArray([contents])

    # Save initial state before executing page
    contents.insert(0, IndirectPdfDict(stream='q\n'))

    # Restore initial state and append the watermark
    contents.append(IndirectPdfDict(stream='Q %s Do\n' % watermark_name))
    return page

Пример #6

0

Показать файл

Файл: pdf_transition_system.py Проект: duduuu/ast_lstm

def ast_to_pdf(asdl_ast):
    constructor_name = asdl_ast.production.constructor.name

    if constructor_name == 'PdfDict':
        pdict = {}
        for arg in asdl_ast['args'].value:
            # apply
            name = BasePdfName(asdl_ast['name'].value)
            if name in ['/Parent', '/P', '/Dest', '/Prev']:
                pass
            op = ast_to_pdf(asdl_ast['op'].value)

            pdict[name] = op

        x = PdfDict(pdict)

    elif constructor_name == 'PdfArray':
        parray = []
        for arg in asdl_ast['args'].value:
            args.append(ast_to_pdf(arg))

        x = PdfArray(pdfarray)

    elif constructor_name == 'PdfList':
        var = asdl_ast['value'].value

        x = PdfArray(list(var))

    elif constructor_name == 'PdfObject':
        var = asdl_ast['value'].value
        
        x = PdfObject(var)

    elif constructor_name == 'PdfStr':
        var = asdl_ast['value'].value
        
        x = PdfStr(var)

    elif constructor_name == 'BasePdfName':
        var = asdl_ast['value'].value
        
        x = BasePdfName(var)

    return x

Пример #7

0

Показать файл

Файл: makesymind.py Проект: dominique-unruh/declmath

def make_popup(page, rect, popupname, popup, code):
    from pdfrw import PdfDict, PdfArray, PdfName
    from pdfrw.uncompress import uncompress
    codeword_on, codeword_off = code

    show_action = PdfDict(S=PdfName.SetOCGState,
                          State=PdfArray([PdfName.OFF] + codeword_off +
                                         [PdfName.ON] + codeword_on))

    link = PdfDict(indirect=True,
                   Type=PdfName.Annot,
                   H=PdfName.I,
                   Subtype=PdfName.Link,
                   A=show_action,
                   Rect=rect)

    if pdf_popup_config['popuplinkcolor']:
        link.C = PdfArray(pdf_popup_config['popuplinkcolor'])
    else:
        link.Border = [0, 0, 0]

    page.Annots.append(link)

    ocmd = PdfDict(Type=PdfName.OCMD, OCGs=codeword_on, P=PdfName.AllOn)

    popup_pdfname = '/SPopup' + popupname
    ocmd_pdfname = '/SPopupOCMD{}'.format(popup_unique_id())

    if not page.Resources.Properties: page.Resources.Properties = PdfDict()
    if not page.Resources.XObject: page.Resources.XObject = PdfDict()

    page.Resources.XObject[popup_pdfname] = popup
    page.Resources.Properties[ocmd_pdfname] = ocmd
    if page.Contents.Filter:
        uncompress(
            [page.Contents]
        )  # Important. Otherwise appending to stream add plain text to compressed stream
    page.Contents.stream += "q /OC {ocmd} BDC 1 0 0 1 {x} {y} cm {popup} Do EMC Q\n".\
                            format(x=rect[0],y=float(rect[1])-popup.BBox[3],
                                   ocmd=ocmd_pdfname,
                                   popup=popup_pdfname)

Пример #8

0

Показать файл

def create_highlight(points,
                     color=(1, 0.92, 0.23),
                     author=None,
                     contents=None):
    """Given Quad points, create a highligh object in standard pdf format."""
    new_highlight = PdfDict()
    new_highlight.F = 4
    new_highlight.Type = PdfName('Annot')
    new_highlight.Subtype = PdfName('Highlight')
    if author:
        new_highlight.T = author
    new_highlight.C = color
    if contents:
        new_highlight.Contents = contents
    new_highlight.indirect = True

    #############################################################
    ### Search for bounding coordinates
    #############################################################
    bot_left_x = float('inf')
    bot_left_y = float('inf')
    top_right_x = 0.0
    top_right_y = 0.0

    quad_pts = []
    for (x1, y1, x2, y2) in points:
        # this quadpoints specified PDF definition of rect box
        quad_pts.extend([x1, y2, x2, y2, x1, y1, x2, y1])
        bot_left_x = min(bot_left_x, x1, x2)
        bot_left_y = min(bot_left_y, y1, y2)
        top_right_x = max(top_right_x, x1, x2)
        top_right_y = max(top_right_y, y1, y2)

    new_highlight.QuadPoints = PdfArray(quad_pts)
    new_highlight.Rect = PdfArray(
        [bot_left_x, bot_left_y, top_right_x, top_right_y])
    return new_highlight

Пример #9

0

Показать файл

Файл: pdfmarker.py Проект: Modelmat/pdf-bookmarker

def parse_page_labels(page_labels: PdfArray, number_pages: int) -> List[str]:
    page_numbers = []

    # add the final stop position
    page_labels.append(number_pages)

    for i in range(0, len(page_labels) - 1, 2):
        start, options, stop = page_labels[i:i + 3]
        stop = int(stop)
        start = int(start)

        # /S specifies the numbering style for page numbers:
        #   /D - Arabic numerals (1,2,3...)
        #   /r - lowercase Roman numerals (i, ii, iii,...)
        #   /R - uppercase Roman numerals (I, II, III,...)
        #   /A - uppercase letters (A-Z)
        #   /a - lowercase letters (a-z)
        # /P (optional) - page number prefix
        # /St (optional) - the value of the first page number in the range (default: 1)
        page_offset = int(options.St or 1)
        page_range = range(page_offset, (stop - start) + 1)

        option_mapping = {
            "/D": str,
            "/r": lambda x: to_roman(x).lower(),
            "/R": to_roman,
            "/a": ascii_lowercase.__getitem__,
            "/A": ascii_uppercase.__getitem__,
        }

        range_numbers = map(option_mapping.get(options.S), page_range)
        if options.P is not None and options.P != "()":
            range_numbers = map(lambda x: options.P + x, range_numbers)

        page_numbers.extend(range_numbers)

    return page_numbers

Пример #10

0

Показать файл

def apply_annotations(rmpage, page_annot, ocgorderinner):
    for k, layer_a in enumerate(page_annot):
        layerannots = layer_a[1]
        for a in layerannots:
            # PDF origin is in bottom-left, so invert all
            # y-coordinates.
            author = 'RCU'  #self.model.device_info['rcuname']
            pdf_a = PdfDict(Type=PdfName('Annot'),
                            Rect=PdfArray([(a[1] * PTPERPX),
                                           PDFHEIGHT - (a[2] * PTPERPX),
                                           (a[3] * PTPERPX),
                                           PDFHEIGHT - (a[4] * PTPERPX)]),
                            T=author,
                            ANN='pdfmark',
                            Subtype=PdfName(a[0]),
                            P=rmpage)
            # Set to indirect because it makes a cleaner PDF
            # output.
            pdf_a.indirect = True
            if ocgorderinner:
                pdf_a.OC = ocgorderinner[k]
            if not '/Annots' in rmpage:
                rmpage.Annots = PdfArray()
            rmpage.Annots.append(pdf_a)

Пример #11

0

Показать файл

Файл: pdfThumbnails.py Проект: starikan/pdfThumbnails

def getPages(allpages, x, y, gap):

    # Number of pages to combine
    count = x * y

    # Pull pages off the list
    pages = [pagexobj(p) for p in allpages[:count]]
    del allpages[:count]

    # Out page size
    width_max = max(page.BBox[2] for page in pages)
    height_max = max(page.BBox[3] for page in pages)

    stream = []
    xobjdict = PdfDict()

    line = y
    for index, page in enumerate(pages):

        width = (index % x) * width_max / x
        if not width:
            line = line - 1
        height = line * height_max / y

        # Page number
        index = '/P{}'.format(index)

        format_stream = {
            "x": 1. / x - gap,
            "y": 1. / y - gap,
            "w": width,
            "h": height,
            "i": index
        }
        stream.append(
            'q {x} 0 0 {y} {w} {h} cm {i} Do Q\n'.format(**format_stream))

        xobjdict[index] = page

    return PdfDict(
        Type=PdfName.Page,
        Contents=PdfDict(stream=''.join(stream)),
        MediaBox=PdfArray([-1000 * gap, -1000 * gap, width_max, height_max]),
        Resources=PdfDict(XObject=xobjdict),
    )

Пример #12

0

Показать файл

Файл: text.py Проект: ptwz/pdf-annotate

    def make_cid_font_object(tt_font):
        """Make a CID Type 2 font object for including as a descendant of a composite
        Type 0 font object.

        :param TrueTypeFont tt_font: Our utility class used to parse and calculate font metrics
        from a true type font.
        :returns PdfDict: CID Font Type 2 PdfDict object.
        """
        return IndirectPdfDict(
            Type=PdfName('Font'),
            Subtype=PdfName('CIDFontType2'),
            BaseFont=PdfName(tt_font.fontName),
            CIDSystemInfo=FreeText.make_cid_system_info_object(),
            FontDescriptor=FreeText.make_font_descriptor_object(tt_font),
            DW=int(round(tt_font.metrics.defaultWidth, 0)),
            Widths=PdfArray(tt_font.metrics.widths),
            CIDToGIDMap=FreeText.make_cid_to_gid_map_object(tt_font),
        )

Пример #13

0

Показать файл

Файл: text.py Проект: ptwz/pdf-annotate

    def make_composite_font_object(font_file_path):
        """Make a PDF Type0 composite font object for embedding in the annotation's
        Resources dict.

        :param str font_file_path: The path and filename to the true type font we want to embed.
        :returns PdfDict: Resources PdfDict object, ready to be included in the
            Resources 'Font' subdictionary.
        """
        # TODO: Get font name from font program itself
        tt_font = get_true_type_font(font_file_path, DEFAULT_BASE_FONT)

        return IndirectPdfDict(Type=PdfName('Font'),
                               Subtype=PdfName('Type0'),
                               BaseFont=PdfName(tt_font.fontName),
                               Encoding=PdfName('Identity-H'),
                               DescendantFonts=PdfArray(
                                   [FreeText.make_cid_font_object(tt_font)]),
                               ToUnicode=FreeText.make_to_unicode_object())

Пример #14

0

Показать файл

Файл: makesymind.py Проект: dominique-unruh/declmath

def popup_make_ocgs(num):
    from pdfrw import PdfDict, PdfArray, PdfName

    n = 2
    while choose(n, n / 2) < num:
        n += 1

    ocgs = []

    for i in range(n):
        ocg = PdfDict(Type=PdfName.OCG, Name="OCG {}".format(i), indirect=True)
        ocgs.append(ocg)

    if popup_pdf.Root.OCProperties:
        print "Root.OCProperties already exists"
    ocgs = PdfArray(ocgs)
    #ocgs.indirect = True
    popup_pdf.Root.OCProperties = PdfDict(OCGs=ocgs,
                                          D=PdfDict(Order=ocgs,
                                                    ON=[],
                                                    OFF=ocgs))

    code = [([], [])]
    for ocg in ocgs:
        code = [(c + [ocg], d) if take else (c, d + [ocg]) for c, d in code
                for take in (True, False)]
    code = [(c, d) for c, d in code if len(c) == n / 2]

    # code is now an array of all different pairs (c,d)
    # where c contains floor(n/2) OCGs and d the rest of the OCGs

    hide_ocmd = PdfDict(indirect=True,
                        Type=PdfName.OCMD,
                        OCGs=ocgs,
                        P=PdfName.AllOff)

    show_ocmd = PdfDict(indirect=True,
                        Type=PdfName.OCMD,
                        OCGs=ocgs,
                        P=PdfName.AnyOn)

    return code, ocgs, hide_ocmd, show_ocmd

Пример #15

0

Показать файл

Файл: 4up.py Проект: sandyroddick/pdfrw-fork

def get4(allpages):
    # Pull a maximum of 4 pages off the list
    pages = [pagexobj(x) for x in allpages[:4]]
    del allpages[:4]

    x_max = max(page.BBox[2] for page in pages)
    y_max = max(page.BBox[3] for page in pages)

    stream = []
    xobjdict = PdfDict()
    for index, page in enumerate(pages):
        x = x_max * (index & 1) / 2.0
        y = y_max * (index <= 1) / 2.0
        index = '/P%s' % index
        stream.append('q 0.5 0 0 0.5 %s %s cm %s Do Q\n' % (x, y, index))
        xobjdict[index] = page

    return PdfDict(
        Type=PdfName.Page,
        Contents=PdfDict(stream=''.join(stream)),
        MediaBox=PdfArray([0, 0, x_max, y_max]),
        Resources=PdfDict(XObject=xobjdict),
    )

Пример #16

0

Показать файл

Файл: poster.py Проект: sandyroddick/pdfrw-fork

def adjust(page):
    page = pagexobj(page)
    assert page.BBox == [0, 0, 11 * 72, int(8.5 * 72)], page.BBox
    margin = 72 // 2
    old_x, old_y = page.BBox[2] - 2 * margin, page.BBox[3] - 2 * margin

    new_x, new_y = 48 * 72, 36 * 72
    ratio = 1.0 * new_x / old_x
    assert ratio == 1.0 * new_y / old_y

    index = '/BasePage'
    x = -margin * ratio
    y = -margin * ratio
    stream = 'q %0.2f 0 0 %0.2f %s %s cm %s Do Q\n' % (ratio, ratio, x, y, index)
    xobjdict = PdfDict()
    xobjdict[index] = page

    return PdfDict(
        Type = PdfName.Page,
        Contents = PdfDict(stream=stream),
        MediaBox = PdfArray([0, 0, new_x, new_y]),
        Resources = PdfDict(XObject = xobjdict),
    )

Пример #17

0

Показать файл

def merge_pages(basepage, rmpage, changed_page):
    # The general appraoch is to keep the base PDF. So, all
    # operations must be made upon the basepage. PyPDF2 will
    # keep all those pages' metadata and annotations,
    # including the paper size. However, a few things must
    # also occur.

    # The basepage must be reisized to the ratio of the rM
    # page so that no brush strokes get cut.

    # The new (rM) page must be resized to the dimensions of
    # the basepage. The PDF standard allows different page
    # sizes in one document, so each page must be measured.

    # ...

    # There is a bug here that can be seen with the NH file
    # It is possible (why?) for a page not to have a
    # MediaBox, so one must be taken from the parent. The
    # rM adds a bit to the width AND the height on this
    # file.
    bpage_box = basepage.MediaBox
    if not bpage_box:
        # Should probably check if the parent has a mediabox
        bpage_box = basepage.Parent.MediaBox
    bpage_w = float(bpage_box[2]) - float(bpage_box[0])
    bpage_h = float(bpage_box[3]) - float(bpage_box[1])
    # Round because floating point makes it prissy
    bpage_ratio = round(bpage_w / bpage_h * 10000) / 10000
    landscape_bpage = False
    if bpage_w > bpage_h:
        landscape_bpage = True

    # If the base PDF page was really wide, the rM rotates
    # it -90deg (CCW) on the screen, but doesn't actually
    # rotate it in the PDF. Also, if a notebook is in
    # landscape format, it remains in portrait mode during
    # the Web UI export. So, we must actually rotate the rM
    # page 90deg (CW) to fit on these wide pages.

    rpage_box = rmpage.MediaBox
    rpage_w = float(rpage_box[2]) - float(rpage_box[0])
    rpage_h = float(rpage_box[3]) - float(rpage_box[1])
    rpage_ratio = rpage_w / rpage_h
    if landscape_bpage:
        rmpage.Rotate = 90
        rpage_ratio = rpage_h / rpage_w

        # Annotations must be rotated because this rotation
        # statement won't hit until the page merge, and
        # pdfrw is unaware of annotations.
        if '/Annots' in rmpage:
            for a, annot in enumerate(rmpage.Annots):
                rect = annot.Rect
                rmpage.Annots[a].Rect = PdfArray(
                    [rect[1], PDFWIDTH - rect[0], rect[3], PDFWIDTH - rect[2]])

    # Resize the base page to the notebook page ratio by
    # adjusting the trimBox. If the basepage was landscape,
    # the trimbox must expand laterally, because the rM
    # rotates the page on-screen into portrait. If the
    # basepage was already portrait, it must expand
    # laterally.

    adjust = 0
    if bpage_ratio <= rpage_ratio:
        # Basepage is taller, so need to expand the width.
        # The basepage should be pushed to the right, which
        # is also the top of the rM in portrait mode. A
        # push to the right is really just decreasing the
        # left side.
        new_width = rpage_ratio * bpage_h
        if landscape_bpage:
            adjust = float(bpage_box[2]) - new_width
            bpage_box[0] = adjust
        else:
            # Portrait documents get pushed to the left, so
            # expand the right side.
            adjust = float(bpage_box[0])
            bpage_box[2] = new_width + float(bpage_box[0])
    elif bpage_ratio > rpage_ratio:
        # Basepage is fatter, so need to expand the height.
        # The basepage should be pushed to the top, which is
        # also the top of the rM in portrait mode. A push to
        # the top is really decreasing the bottom side.
        new_height = (1 / rpage_ratio) * bpage_w
        adjust = float(bpage_box[3]) - new_height
        bpage_box[1] = adjust

    # If this wasn't a changed page, don't bother with the
    # following.
    if not changed_page:
        return

    # Scale and (if necesssary) rotate the notebook page
    # and overlay it to the basepage. Might have to push
    # it a bit, depending on the direction.
    #basepage.Rotate = -90
    np = PageMerge(basepage).add(rmpage)

    annot_adjust = [0, 0]

    if bpage_ratio <= rpage_ratio:
        scale = bpage_h / np[1].h
        np[1].scale(scale)
        np[1].x = adjust
        annot_adjust[0] = adjust
    elif bpage_ratio > rpage_ratio:
        scale = bpage_w / np[1].w
        np[1].scale(scale)
        np[1].y = adjust
        annot_adjust[1] = adjust

    if '/Annots' in rmpage:
        for a, annot in enumerate(rmpage.Annots):
            rect = annot.Rect
            newrect = PdfArray([
                rect[0] * scale + annot_adjust[0],
                rect[1] * scale + annot_adjust[1],
                rect[2] * scale + annot_adjust[0],
                rect[3] * scale + annot_adjust[1]
            ])
            rmpage.Annots[a].Rect = newrect

    # Gives the basepage the rmpage as a new object
    np.render()

    # Annots aren't carried over--pdfrw isn't aware.
    if '/Annots' in rmpage:
        if not '/Annots' in basepage:
            basepage.Annots = PdfArray()
        basepage.Annots += rmpage.Annots

Пример #18

0

Показать файл

 def pdfdict(self):
     """Return a PageLabel entry to pe inserted in the root of a PdfReader object"""
     nums = (i for label in sorted(self)
                 for i in label.pdfobjs())
     return PdfDict(Type=PdfName("Catalog"),
                    Nums = PdfArray(nums))

Пример #19

0

Показать файл

def add_annot(pdfrw_page, annot):
    """Add annotations to page, create an array if none exists yet"""
    if pdfrw_page.Annots is None:
        pdfrw_page.Annots = PdfArray()
    pdfrw_page.Annots.append(annot)

Пример #20

0

Показать файл

                                                  105]  # unnecessary clean-up: getting rid of traces of Image-7370
pdf_kid.MediaBox = img_kid.MediaBox

alt_img = PdfDict(Type=PdfName.XObject,
                  SubType=PdfName.Image,
                  BitsPerComponent=8,
                  ColorSpace=PdfName.DeviceRGB,
                  Height=800,
                  Width=600,
                  Length=0,
                  F=PdfDict(FS=PdfName.URL,
                            F='https://chezsoi.org/lucas/ThePatch.jpg'),
                  FFilter=PdfName.DCTDecode)
alt_img.indirect = true

alternates = PdfArray([PdfDict(DefaultForPrinting=True, Image=alt_img)])
alternates.indirect = true

img_name = PdfName('Image-9960')
img = img_kid.Resources.XObject[img_name]
img.Alternates = alternates
pdf_kid.Resources.XObject = PdfDict()
pdf_kid.Resources.XObject[img_name] = img

out = PdfWriter()
out.addpage(pdf.pages[0])
out.write('out.pdf')

# CONCLUSION: neither Adobe nor Sumatra readers visit the link...
# It may be that readers do not follow this "Alternates" images spec anymore, that HTTPS is not supported, or that I made a mistake in the resulting PDF.
# Anyway, I'm giving up.

Пример #21

0

Показать файл

def render(source,
           *,
           progress_cb=lambda x: None,
           expand_pages=True,
           template_alpha=0.3,
           only_annotated=False,
           black='black',
           white='white',
           gray=None,
           highlight=HIGHLIGHT_DEFAULT_COLOR):
    """Render a source document as a PDF file.

    source: The reMarkable document to be rendered.  This may be
              - A filename or pathlib.Path to a zip file containing the
                document, such as is provided by the Cloud API.
              - A filename or pathlib.Path to a root-level file from the
                document, such as might be copied off the device directly.
              - An object implementing the Source API.  See rmrl.sources
                for examples and further documentation.
    progress_cb: A function which will be called with a progress percentage
                 between 0 and 100.  The first 50% indicate rendering the
                 annotations, and the second the merging of these into the
                 base PDF file.  If this callback raises an error, this
                 function will abort gracefully and propagate the error up
                 the stack.
    expand_pages: Boolean value (default True) indicating whether pages
                  should be made larger, to reflect the view provided by
                  the reMarkable device.
    template_alpha: Opacity of the template backgrounds in notebooks.  0
                    makes the templates invisible, 1 makes them fully dark.
    only_annotated: Boolean value (default False) indicating whether only
                    pages with annotations should be output.
    black: A string giving the color to use as "black" in the document.
           Can be a color name or a hex string.  Default: 'black'
    white: A string giving the color to use as "white" in the document.
           See `black` parameter for format.  Default: 'white'
    gray: A string giving the color to use as "gray" in the document.
          See `black` parameter for format.  Default: None, which means to
          pick an average between the "white" and "black" values.
    highlight: A string giving the color to use for the highlighter.
               See `black` parameter for format.
    """

    colors = parse_colors(black, white, gray, highlight)

    vector = True  # TODO: Different rendering styles
    source = sources.get_source(source)

    # If this is using a base PDF, the percentage is calculated
    # differently.
    uses_base_pdf = source.exists('{ID}.pdf')

    # Generate page information
    # If a PDF file was uploaded, but never opened, there may not be
    # a .content file. So, just load a barebones one with a 'pages'
    # key of zero length, so it doesn't break the rest of the
    # process.
    pages = []
    if source.exists('{ID}.content'):
        with source.open('{ID}.content', 'r') as f:
            pages = json.load(f).get('pages', [])

    # Render each page as a pdf
    tmpfh = tempfile.TemporaryFile()
    pdf_canvas = canvas.Canvas(tmpfh, (PDFWIDTH, PDFHEIGHT))
    # TODO: check pageCompression

    # Don't load all the pages into memory, because large notebooks
    # about 500 pages could use up to 3 GB of RAM. Create them by
    # iteration so they get released by garbage collector.
    changed_pages = []
    annotations = []
    for i in range(0, len(pages)):
        page = document.DocumentPage(source, pages[i], i, colors=colors)
        if source.exists(page.rmpath):
            changed_pages.append(i)
        page.render_to_painter(pdf_canvas, vector, template_alpha)
        annotations.append(page.get_grouped_annotations())
        progress_cb((i + 1) / len(pages) * 50)
    pdf_canvas.save()
    tmpfh.seek(0)

    # This new PDF represents just the notebook. If there was a
    # parent PDF, merge it now.
    if uses_base_pdf and not changed_pages:
        # Since there is no stroke data, just return the PDF data
        progress_cb(100)

        log.info('exported pdf')
        return source.open('{ID}.pdf', 'rb')

    # PDF exists, stroke data exists, so mix them together.
    if uses_base_pdf:
        rmpdfr = PdfReader(tmpfh)
        basepdfr = PdfReader(source.open('{ID}.pdf', 'rb'))
    else:
        basepdfr = PdfReader(tmpfh)
        # Alias, which is used for annotations and layers.
        rmpdfr = basepdfr

    # If making a 'layered' PDF (with optional content groups,
    # OCGs), associate the annoatations with the layer.

    # This property list is put into the rmpdfr document, which
    # will not have any existing properties.
    ocgprop = IndirectPdfDict(OCGs=PdfArray(), D=PdfDict(Order=PdfArray()))

    for i in range(0, len(basepdfr.pages)):
        basepage = basepdfr.pages[i]
        rmpage = rmpdfr.pages[i]

        # Apply OCGs
        apply_ocg = False  #TODO configurable? bool(int(QSettings().value(
        #'pane/notebooks/export_pdf_ocg')))
        if apply_ocg:
            ocgorderinner = do_apply_ocg(basepage, rmpage, i, uses_base_pdf,
                                         ocgprop, annotations)
        else:
            ocgorderinner = None

        # Apply annotations to the rmpage. This must come after
        # applying OCGs, because the annotation may belong to
        # one of those groups.
        apply_annotations(rmpage, annotations[i], ocgorderinner)

        # If this is a normal notebook with highlighting,
        # just add the annotations and forget about the rest,
        # which are page geometry transformations.
        if uses_base_pdf:
            merge_pages(basepage, rmpage, i in changed_pages, expand_pages)

        progress_cb(((i + 1) / rmpdfr.numPages * 50) + 50)

    # Apply the OCG order. The basepdf may have already had OCGs
    # and so we must not overwrite them. NOTE: there are other
    # properties that ought to be carried over, but this is the
    # minimum required.
    if apply_ocg:
        if '/OCProperties' in basepdfr.Root:
            basepdfr.Root.OCProperties.OCGs += ocgprop.OCGs
            basepdfr.Root.OCProperties.D.Order += ocgprop.D.Order
        else:
            basepdfr.Root.OCProperties = ocgprop

    stream = tempfile.SpooledTemporaryFile(SPOOL_MAX)
    pdfw = PdfWriter(stream)
    if not only_annotated:
        # We are writing out everything, so we can take this shortcut:
        pdfw.write(trailer=basepdfr)
    else:
        for i, page in enumerate(basepdfr.pages):
            if i in changed_pages:
                pdfw.addpage(page)
        pdfw.write()
    stream.seek(0)

    log.info('exported pdf')
    return stream

Пример #22

0

Показать файл

def write_pdf_metadata(document, fileobj, scale, metadata, attachments,
                       url_fetcher):
    """Append to a seekable file-like object to add PDF metadata."""
    fileobj.seek(0)
    trailer = PdfReader(fileobj)
    pages = trailer.Root.Pages.Kids

    bookmarks, links = prepare_metadata(document, scale, pages)
    if bookmarks:
        bookmark_objects, count = create_bookmarks(bookmarks, pages)
        trailer.Root.Outlines = PdfDict(Type=PdfName('Outlines'),
                                        Count=count,
                                        First=bookmark_objects[0],
                                        Last=bookmark_objects[-1])

    attachments = metadata.attachments + (attachments or [])
    if attachments:
        embedded_files = []
        for attachment in attachments:
            attachment_object = _create_pdf_attachment(attachment, url_fetcher)
            if attachment_object is not None:
                embedded_files.append(PdfString.encode('attachment'))
                embedded_files.append(attachment_object)
        if embedded_files:
            trailer.Root.Names = PdfDict(EmbeddedFiles=PdfDict(
                Names=PdfArray(embedded_files)))

    # A single link can be split in multiple regions. We don't want to embedded
    # a file multiple times of course, so keep a reference to every embedded
    # URL and reuse the object number.
    # TODO: If we add support for descriptions this won't always be correct,
    # because two links might have the same href, but different titles.
    annot_files = {}
    for page_links in links:
        for link_type, target, rectangle in page_links:
            if link_type == 'attachment' and target not in annot_files:
                # TODO: use the title attribute as description
                annot_files[target] = _create_pdf_attachment((target, None),
                                                             url_fetcher)

    # TODO: splitting a link into multiple independent rectangular annotations
    # works well for pure links, but rather mediocre for other annotations and
    # fails completely for transformed (CSS) or complex link shapes (area).
    # It would be better to use /AP for all links and coalesce link shapes that
    # originate from the same HTML link. This would give a feeling similiar to
    # what browsers do with links that span multiple lines.
    for page, page_links in zip(pages, links):
        annotations = PdfArray()
        for link_type, target, rectangle in page_links:
            if link_type != 'attachment' or annot_files[target] is None:
                annotation = PdfDict(Type=PdfName('Annot'),
                                     Subtype=PdfName('Link'),
                                     Rect=PdfArray(rectangle),
                                     Border=PdfArray((0, 0, 0)))
                if link_type == 'internal':
                    destination = (target[0], PdfName('XYZ'), target[1],
                                   target[2], 0)
                    annotation.A = PdfDict(Type=PdfName('Action'),
                                           S=PdfName('GoTo'),
                                           D=PdfArray(destination))
                else:
                    annotation.A = PdfDict(Type=PdfName('Action'),
                                           S=PdfName('URI'),
                                           URI=PdfString.encode(
                                               iri_to_uri(target)))
            else:
                assert annot_files[target] is not None
                ap = PdfDict(N=PdfDict(BBox=PdfArray(rectangle),
                                       Subtype=PdfName('Form'),
                                       Type=PdfName('XObject')))
                # evince needs /T or fails on an internal assertion. PDF
                # doesn't require it.
                annotation = PdfDict(Type=PdfName('Annot'),
                                     Subtype=PdfName('FileAttachment'),
                                     T=PdfString.encode(''),
                                     Rect=PdfArray(rectangle),
                                     Border=PdfArray((0, 0, 0)),
                                     FS=annot_files[target],
                                     AP=ap)
            annotations.append(annotation)

        if annotations:
            page.Annots = annotations

    trailer.Info.Producer = VERSION_STRING
    for attr, key in (('title', 'Title'), ('description', 'Subject'),
                      ('generator', 'Creator')):
        value = getattr(metadata, attr)
        if value is not None:
            setattr(trailer.Info, key, value)
    for attr, key in (('authors', 'Author'), ('keywords', 'Keywords')):
        value = getattr(metadata, attr)
        if value is not None:
            setattr(trailer.Info, key, ', '.join(getattr(metadata, attr)))
    for attr, key in (('created', 'CreationDate'), ('modified', 'ModDate')):
        value = w3c_date_to_pdf(getattr(metadata, attr), attr)
        if value is not None:
            setattr(trailer.Info, key, value)

    for page, document_page in zip(pages, document.pages):
        left, top, right, bottom = (float(value) for value in page.MediaBox)
        # Convert pixels into points
        bleed = {
            key: value * 0.75
            for key, value in document_page.bleed.items()
        }

        trim_left = left + bleed['left']
        trim_top = top + bleed['top']
        trim_right = right - bleed['right']
        trim_bottom = bottom - bleed['bottom']
        page.TrimBox = PdfArray((trim_left, trim_top, trim_right, trim_bottom))

        # Arbitrarly set PDF BleedBox between CSS bleed box (PDF MediaBox) and
        # CSS page box (PDF TrimBox), at most 10 points from the TrimBox.
        bleed_left = trim_left - min(10, bleed['left'])
        bleed_top = trim_top - min(10, bleed['top'])
        bleed_right = trim_right + min(10, bleed['right'])
        bleed_bottom = trim_bottom + min(10, bleed['bottom'])
        page.BleedBox = PdfArray(
            (bleed_left, bleed_top, bleed_right, bleed_bottom))

    fileobj.seek(0)
    PdfWriter().write(fileobj, trailer=trailer)
    fileobj.truncate()

Пример #23

0

Показать файл

def render(source, *, progress_cb=lambda x: None):
    # Exports the self as a PDF document to disk

    # progress_cb will be called with a progress percentage between 0 and
    # 100.  This percentage calculation is split 50% for the rendering
    # of the lines and 50% merging with the base PDF file.  This callback
    # also provides an opportunity to abort the process. If the callback
    # raises an error, this function will take steps to abort gracefullly
    # and pass the error upwards.

    vector = True  # TODO: Different rendering styles
    source = sources.get_source(source)

    # If this is using a base PDF, the percentage is calculated
    # differently.
    uses_base_pdf = source.exists('{ID}.pdf')

    # Document metadata should already be loaded (from device)
    # ...

    # Generate page information
    # If a PDF file was uploaded, but never opened, there may not be
    # a .content file. So, just load a barebones one with a 'pages'
    # key of zero length, so it doesn't break the rest of the
    # process.
    pages = []
    if source.exists('{ID}.content'):
        with source.open('{ID}.content', 'r') as f:
            pages = json.load(f).get('pages', [])

    # Render each page as a pdf
    tmpfh = tempfile.TemporaryFile()
    pdf_canvas = canvas.Canvas(tmpfh, (PDFWIDTH, PDFHEIGHT))
    # TODO: check pageCompression

    # Don't load all the pages into memory, because large notebooks
    # about 500 pages could use up to 3 GB of RAM. Create them by
    # iteration so they get released by garbage collector.
    changed_pages = []
    annotations = []
    for i in range(0, len(pages)):
        page = document.DocumentPage(source, pages[i], i)
        if source.exists(page.rmpath):
            changed_pages.append(i)
        page.render_to_painter(pdf_canvas, vector)
        annotations.append(page.get_grouped_annotations())
        progress_cb((i + 1) / len(pages) * 50)
    pdf_canvas.save()
    tmpfh.seek(0)

    # This new PDF represents just the notebook. If there was a
    # parent PDF, merge it now.
    if uses_base_pdf and not changed_pages:
        # Since there is no stroke data, just return the PDF data
        progress_cb(100)

        log.info('exported pdf')
        return source.open('{ID}.pdf', 'rb')

    # PDF exists, stroke data exists, so mix them together.
    if uses_base_pdf:
        rmpdfr = PdfReader(tmpfh)
        basepdfr = PdfReader(source.open('{ID}.pdf', 'rb'))
    else:
        basepdfr = PdfReader(tmpfh)
        # Alias, which is used for annotations and layers.
        rmpdfr = basepdfr

    # If making a 'layered' PDF (with optional content groups,
    # OCGs), associate the annoatations with the layer.

    # This property list is put into the rmpdfr document, which
    # will not have any existing properties.
    ocgprop = IndirectPdfDict(OCGs=PdfArray(), D=PdfDict(Order=PdfArray()))

    for i in range(0, len(basepdfr.pages)):
        basepage = basepdfr.pages[i]
        rmpage = rmpdfr.pages[i]

        # Apply OCGs
        apply_ocg = False  #TODO configurable? bool(int(QSettings().value(
        #'pane/notebooks/export_pdf_ocg')))
        if apply_ocg:
            ocgorderinner = do_apply_ocg(basepage, rmpage, i, uses_base_pdf,
                                         ocgprop, annotations)
        else:
            ocgorderinner = None

        # Apply annotations to the rmpage. This must come after
        # applying OCGs, because the annotation may belong to
        # one of those groups.
        apply_annotations(rmpage, annotations[i], ocgorderinner)

        # If this is a normal notebook with highlighting,
        # just add the annotations and forget about the rest,
        # which are page geometry transformations.
        if uses_base_pdf:
            merge_pages(basepage, rmpage, i in changed_pages)

        progress_cb(((i + 1) / rmpdfr.numPages * 50) + 50)

    # Apply the OCG order. The basepdf may have already had OCGs
    # and so we must not overwrite them. NOTE: there are other
    # properties that ought to be carried over, but this is the
    # minimum required.
    if apply_ocg:
        if '/OCProperties' in basepdfr.Root:
            basepdfr.Root.OCProperties.OCGs += ocgprop.OCGs
            basepdfr.Root.OCProperties.D.Order += ocgprop.D.Order
        else:
            basepdfr.Root.OCProperties = ocgprop

    pdfw = PdfWriter()
    stream = tempfile.SpooledTemporaryFile(SPOOL_MAX)
    pdfw.write(stream, basepdfr)
    stream.seek(0)

    log.info('exported pdf')
    return stream

Пример #24

0

Показать файл

def merge_pages(basepage, rmpage, changed_page, expand_pages):
    # The general appraoch is to keep the base PDF. So, all
    # operations must be made upon the basepage. PyPDF2 will
    # keep all those pages' metadata and annotations,
    # including the paper size. However, a few things must
    # also occur.

    # The basepage must be reisized to the ratio of the rM
    # page so that no brush strokes get cut.

    # The new (rM) page must be resized to the dimensions of
    # the basepage. The PDF standard allows different page
    # sizes in one document, so each page must be measured.

    # ...

    # There is a bug here that can be seen with the NH file
    # reMarkable uses the CropBox if it exists, otherwise
    # the MediaBox.
    # It is possible (why?) for a page not to have a
    # MediaBox, so one must be taken from the parent. The
    # rM adds a bit to the width AND the height on this
    # file.
    bpage_box = list(
        map(float, basepage.CropBox or basepage.MediaBox
            or basepage.Parent.MediaBox))

    # Fix any malformed PDF that has a CropBox extending outside of
    # the MediaBox, by limiting the area to the intersection.
    if basepage.MediaBox:
        for i, op in enumerate((max, max, min, min)):
            bpage_box[i] = op(float(basepage.MediaBox[i]), bpage_box[i])

    bpage_w = bpage_box[2] - bpage_box[0]
    bpage_h = bpage_box[3] - bpage_box[1]
    # Round because floating point makes it prissy
    bpage_ratio = round(bpage_w / bpage_h * 10000) / 10000
    landscape_bpage = False
    if bpage_w > bpage_h:
        landscape_bpage = True
        bpage_ratio = 1 / bpage_ratio  # <= 1 always
    if basepage.Rotate in ('90', '270'):
        landscape_bpage = not landscape_bpage

    # If the base PDF page was really wide, the rM rotates
    # it -90deg (CCW) on the screen, but doesn't actually
    # rotate it in the PDF. Also, if a notebook is in
    # landscape format, it remains in portrait mode during
    # the Web UI export. So, we must actually rotate the rM
    # page 90deg (CW) to fit on these wide pages.

    # Since we create this page, we know there isn't a different
    # CropBox to worry about.  We also know width < height
    rpage_box = list(map(float, rmpage.MediaBox))
    rpage_w = rpage_box[2] - rpage_box[0]
    rpage_h = rpage_box[3] - rpage_box[1]
    rpage_ratio = rpage_w / rpage_h

    effective_rotation = int(basepage.Rotate or 0)
    # If the page is landscape, reMarkable adds a -90 degree rotation.
    if landscape_bpage:
        effective_rotation = (effective_rotation + 270) % 360
    # The rmpage picks up the rotation of the base page -- that is,
    # its own rotation is relative to the basepage.  We don't want
    # any net rotation, so we rotate it backwards now, so that with
    # the basepage rotation, it ends up upright.
    rmpage.Rotate = (360 - effective_rotation) % 360

    if effective_rotation in (0, 180):
        flip_base_dims = False
    elif effective_rotation in (90, 270):
        flip_base_dims = True
    else:
        assert False, f"Unexpected rotation: {effective_rotation}"

    if bpage_ratio <= rpage_ratio:
        # These ratios < 1, so this indicates the basepage is more
        # narrow, and thus we need to extend the width.  Extra space
        # is added to the right of the screen, but that ends up being
        # a different page edge, depending on rotation.
        if not flip_base_dims:
            new_width = rpage_ratio * bpage_h
            scale = bpage_h / rpage_h
            if effective_rotation == 0:
                bpage_box[2] = new_width + bpage_box[0]
            else:
                bpage_box[0] = bpage_box[2] - new_width
        else:
            # Height and width are flipped for the basepage
            new_height = rpage_ratio * bpage_w
            scale = bpage_w / rpage_h
            if effective_rotation == 90:
                bpage_box[3] = new_height + bpage_box[1]
            else:
                bpage_box[1] = bpage_box[3] - new_height
    else:
        # Basepage is wider, so need to expand the height.
        # Extra space is added at the bottom of the screen.
        if not flip_base_dims:
            new_height = 1 / rpage_ratio * bpage_w
            scale = bpage_w / rpage_w
            if effective_rotation == 0:
                bpage_box[1] = bpage_box[3] - new_height
            else:
                bpage_box[3] = new_height + bpage_box[1]
        else:
            # Height and width are flipped for the basepage
            new_width = 1 / rpage_ratio * bpage_h
            scale = bpage_h / rpage_w
            if effective_rotation == 90:
                bpage_box[2] = new_width + bpage_box[0]
            else:
                bpage_box[0] = bpage_box[2] - new_width

    if expand_pages:
        # Create a CropBox, whether or not there was one before.
        basepage.CropBox = bpage_box
        if not basepage.MediaBox:
            # Provide a MediaBox, in the odd case where there isn't one.
            basepage.MediaBox = bpage_box
        else:
            # Expand the MediaBox as necessary to include the entire CropBox.
            for i, op in enumerate((min, min, max, max)):
                basepage.MediaBox[i] = op(float(basepage.MediaBox[i]),
                                          bpage_box[i])

    # If this wasn't a changed page, don't bother with the
    # following.
    if not changed_page:
        return

    # Scale and (if necesssary) rotate the notebook page
    # and overlay it to the basepage. Might have to push
    # it a bit, depending on the direction.
    np = PageMerge(basepage).add(rmpage)

    # Move the overlay page to be based on the coordinates
    # of the base page CropBox
    np[1].x = bpage_box[0]
    np[1].y = bpage_box[1]
    np[1].scale(scale)

    #TODO: Test all of these annotations with various rotations
    # and offsets.
    if landscape_bpage:
        # Annotations must be rotated because this rotation
        # statement won't hit until the page merge, and
        # pdfrw is unaware of annotations.
        if '/Annots' in rmpage:
            for a, annot in enumerate(rmpage.Annots):
                rect = annot.Rect
                rmpage.Annots[a].Rect = PdfArray(
                    [rect[1], PDFWIDTH - rect[0], rect[3], PDFWIDTH - rect[2]])

    annot_adjust = [0, 0]

    if '/Annots' in rmpage:
        for a, annot in enumerate(rmpage.Annots):
            rect = annot.Rect
            newrect = PdfArray([
                rect[0] * scale + annot_adjust[0],
                rect[1] * scale + annot_adjust[1],
                rect[2] * scale + annot_adjust[0],
                rect[3] * scale + annot_adjust[1]
            ])
            rmpage.Annots[a].Rect = newrect

    # Gives the basepage the rmpage as a new object
    np.render()

    # Annots aren't carried over--pdfrw isn't aware.
    if '/Annots' in rmpage:
        if not '/Annots' in basepage:
            basepage.Annots = PdfArray()
        basepage.Annots += rmpage.Annots

Пример #25

0

Показать файл

def do_apply_ocg(basepage, rmpage, i, uses_base_pdf, ocgprop, annotations):
    ocgpage = IndirectPdfDict(Type=PdfName('OCG'), Name='Page ' + str(i + 1))
    ocgprop.OCGs.append(ocgpage)

    # The Order dict is a Page, followed by Inner
    ocgorderinner = PdfArray()

    # Add Template OCG layer
    # If this uses a basepdf, the template is located
    # elsewhere.

    # If using a basepdf, assign its stream as a
    # 'Background' layer under this page. When the page
    # primary OCG is disabled, the background will
    # remain, making it easy to disable all annotations.
    if uses_base_pdf:
        ocgorigdoc = IndirectPdfDict(Type=PdfName('OCG'), Name='Background')
        ocgprop.OCGs.append(ocgorigdoc)
        ocgorderinner.append(ocgorigdoc)

        uncompress.uncompress([basepage.Contents])
        stream = basepage.Contents.stream
        stream = '/OC /ocgorigdoc BDC\n' \
            + stream \
            + 'EMC\n'
        basepage.Contents.stream = stream
        compress.compress([basepage.Contents])

        if '/Properties' in basepage.Resources:
            props = basepage.Resources.Properties
        else:
            props = PdfDict()
        props.ocgorigdoc = ocgorigdoc
        basepage.Resources.Properties = props

    # If not using a basepdf, assign the rmpage's stream
    # as a 'Template' layer under this page. It will be
    # affected by disabling the primary Page OCG (which
    # by itself is kind of useless for exported
    # notebooks).

    # Regardless of using a basepdf or not, put the
    # rmpage layers into their own OCGs.

    # If the template has an XObject, we want to skip
    # the first one. This happens when the template
    # contains a PNG. Question--what happens when the
    # template contains more than one PNG? How do we
    # detect all of those?

    template_xobj_keys = []
    vector_layers = []
    uncompress.uncompress([rmpage.Contents])
    if uses_base_pdf:
        # The entire thing is the page ocg
        stream = '/OC /ocgpage BDC\n'
        stream += rmpage.Contents.stream
        stream += 'EMC\n'
        rmpage.Contents.stream = stream
    else:
        stream = rmpage.Contents.stream
        # Mark the template ocg separate from page ocg
        template_endpos = 0
        page_inatpos = 0
        findkey = '1 w 2 J 2 j []0  d\nq\n'
        # Finds only the first instance, which should be
        # for the template.
        findloc = stream.find(findkey)
        if findloc < 0:
            # May be a vector, which we stick a marker
            # in for.
            # ?? Why is this a half-point off ??
            findkey = '799.500000 85 l\n'
            m = re.search(findkey, rmpage.Contents.stream)
            if m:
                findloc = m.start()
        if findloc > 0:
            template_endpos = findloc + len(findkey)
            # Add vector template OCG
            stream = '/OC /ocgtemplate BDC\n'
            stream += rmpage.Contents.stream[:template_endpos]
            stream += 'EMC\n'
            page_inatpos = len(stream)
            stream += rmpage.Contents.stream[template_endpos:]
            # Save stream
            rmpage.Contents.stream = stream

        # Add template ocg
        ocgtemplate = IndirectPdfDict(Type=PdfName('OCG'), Name='Template')
        ocgprop.OCGs.append(ocgtemplate)
        ocgorderinner.append(ocgtemplate)

        # If a template (which is SVG) has embedded PNG
        # images, those appear as XObjects. This will
        # mess up the layer order, so we will ignore
        # them later.
        template_xobj_keys = \
            re.findall(r'(\/Im[0-9]+)\s',
                        stream[:template_endpos])

        # Page ocg
        stream = rmpage.Contents.stream[:page_inatpos]
        stream += '/OC /ocgpage BDC\n'
        stream += rmpage.Contents.stream[page_inatpos:]
        stream += 'EMC\n'
        # Save stream
        rmpage.Contents.stream = stream

    # Find all other vector layers using the magic
    # point (DocumentPageLayer.render_to_painter()).
    # ?? Why is this a half-point off ??
    while True:
        m = re.search('420.500000 69 m\n', rmpage.Contents.stream)
        if not m:
            break
        stream = ''
        layerid = 'ocglayer{}'.format(len(vector_layers) + 1)
        stream = rmpage.Contents.stream[:m.start()]
        if len(vector_layers):
            # close previous layer
            stream += 'EMC\n'
        stream += '/OC /{} BDC\n'.format(layerid)
        stream += rmpage.Contents.stream[m.end():]
        vector_layers.append(layerid)
        rmpage.Contents.stream = stream
    # If we added vector layers, have to end the
    # first one.
    if len(vector_layers):
        stream = rmpage.Contents.stream + 'EMC\n'
        rmpage.Contents.stream = stream

    # Done--recompress the stream.
    compress.compress([rmpage.Contents])

    # There shouldn't be any Properties there since we
    # generated the rmpage ourselves, so don't bother
    # checking.
    rmpage.Resources.Properties = PdfDict(ocgpage=ocgpage)
    if not uses_base_pdf:
        rmpage.Resources.Properties.ocgtemplate = ocgtemplate

    # Add individual OCG layers (Bitmap)
    was_vector = True
    for n, key in enumerate(rmpage.Resources.XObject):
        if str(key) in template_xobj_keys:
            continue
        was_vector = False
        l = n - len(template_xobj_keys)
        # This would indicate a bug in the handling of a
        # notebook.
        try:
            layer = annotations[i][l]
        except:
            log.error(
                'could not associate XObject with layer: (i, l) ({}, {})'.
                format(i, l))
            log.error(str(annotations))
            log.error('document: {} ()').format('uuid', 'self.visible_name')
            continue
        layername = layer[0]
        ocg = IndirectPdfDict(Type=PdfName('OCG'), Name=layername)
        ocgprop.OCGs.append(ocg)
        ocgorderinner.append(ocg)
        rmpage.Resources.XObject[key].OC = ocg

    # Add individual OCG layers (Vector)
    if was_vector:
        for l, layerid in enumerate(vector_layers):
            # This would indicate a bug in the handling of a
            # notebook.
            try:
                layer = annotations[i][l]
            except:
                log.error(
                    'could not associate layerid with layer: (i, l, layerid) ({}, {}, {})'
                    .format(i, l, layerid))
                log.error('document: {} ()').format('uuid',
                                                    'self.visible_name')
                log.error(str(annotations))
                continue
            layername = layer[0]
            ocg = IndirectPdfDict(Type=PdfName('OCG'), Name=layername)
            ocgprop.OCGs.append(ocg)
            ocgorderinner.append(ocg)
            rmpage.Resources.Properties[PdfName(layerid)] = \
                ocg

    # Add order of OCGs to primary document
    ocgprop.D.Order.append(ocgpage)
    ocgprop.D.Order.append(ocgorderinner)

    return ocgorderinner

Python PdfArray примеры использования