示例#1
0
文件: base.py 项目: ptwz/pdf-annotate
    def as_pdf_object(self, transform, page):
        """Return the PdfDict object representing the annotation, that will be
        inserted as is into the PDF document.

        :param list transform: Transformation matrix to transform the coords
            of the annotation from client-specified space to PDF user space.
        :param PdfDict page: The pdfrw page object from the PDF document
        :returns PdfDict: the annotation object to be inserted into the PDF
        """
        bounding_box = transform_rect(self.make_rect(), transform)
        appearance_stream = self._make_appearance_stream_dict(
            bounding_box,
            transform,
        )

        obj = PdfDict(
            Type=PdfName('Annot'),
            Subtype=PdfName(self.subtype),
            Rect=bounding_box,
            AP=appearance_stream,
        )
        if self._page_as_parent:
            obj.P = page

        for name in self._related:
            subobject = self._related[name].as_pdf_object(transform, page)
            setattr(obj, name, subobject)

        self._add_metadata(obj, self._metadata)
        self.add_additional_pdf_object_data(obj)
        obj.indirect = True

        return obj
示例#2
0
文件: base.py 项目: ptwz/pdf-annotate
def _make_border_dict(width, style, dash_array=None):
    border = PdfDict(
        Type=PdfName('Border'),
        W=width,
        S=PdfName(style),
    )
    if dash_array:
        if style != 'D':
            raise ValueError('Dash array only applies to dashed borders!')
        border.D = dash_array
    return border
示例#3
0
文件: base.py 项目: ptwz/pdf-annotate
    def _add_graphics_state_resources(resources, A):
        """Add in the resources dict for turning on transparency in the
        graphics state. For example, if both stroke and fill were transparent,
        this would add:
            << /ExtGState /PdfAnnotatorGS <<
                /CA 0.5 /ca 0.75 /Type /ExtGState
            >> >>
        to the Resources dict.

        Graphics states can also be specified externally, for use in explicit
        content streams. This is done by using the `graphics_states` property
        on the appearance object.
        """
        states = []
        internal_state = Annotation._get_internal_graphics_state(resources, A)
        if internal_state is not None:
            states.append((GRAPHICS_STATE_NAME, internal_state))

        if A.graphics_states:
            for name, state in A.graphics_states.items():
                states.append((name, state.as_pdf_dict()))

        if states:
            resources.ExtGState = PdfDict()
            for name, state in states:
                resources.ExtGState[PdfName(name)] = state
示例#4
0
文件: base.py 项目: ptwz/pdf-annotate
 def _add_xobject_resources(resources, A):
     """Adds in provided, explicit XObjects into the appearance stream's
     Resources dict. This is used when the user is explicitly specifying the
     appearance stream and they want to include, say, an image.
     """
     if A.xobjects:
         resources.XObject = PdfDict()
         for xobject_name, xobject in A.xobjects.items():
             resources.XObject[PdfName(xobject_name)] = xobject
示例#5
0
文件: base.py 项目: ptwz/pdf-annotate
    def _make_ap_resources(self):
        """Make the Resources entry for the appearance stream dictionary.

        Implement add_additional_resources to add additional entries -
        fonts, XObjects, graphics state - to the Resources dictionary.
        """
        resources = PdfDict(ProcSet=PdfName('PDF'))
        self._add_graphics_state_resources(resources, self._appearance)
        self._add_xobject_resources(resources, self._appearance)
        self._add_font_resources(resources, self._appearance)
        self.add_additional_resources(resources)
        return resources
示例#6
0
文件: base.py 项目: ptwz/pdf-annotate
    def _make_appearance_stream_dict(self, bounding_box, transform):
        resources = self._make_ap_resources()

        # Either use user-specified content stream or generate content stream
        # based on annotation type.
        stream = self._appearance.appearance_stream
        if stream is None:
            stream = self.make_appearance_stream()

        # Transform the appearance stream into PDF space and turn it into a str
        appearance_stream = stream.transform(transform).resolve()

        normal_appearance = PdfDict(
            stream=appearance_stream,
            BBox=bounding_box,
            Resources=resources,
            Matrix=translate(-bounding_box[0], -bounding_box[1]),
            Type=PdfName('XObject'),
            Subtype=PdfName('Form'),
            FormType=1,
        )
        return PdfDict(N=normal_appearance)
示例#7
0
文件: base.py 项目: ptwz/pdf-annotate
 def _add_metadata(self, obj, metadata):
     if metadata is None:
         return
     for name, value in metadata.iter():
         obj[PdfName(name)] = serialize_value(value)
示例#8
0
文件: base.py 项目: ptwz/pdf-annotate
 def _add_font_resources(resources, A):
     if A.fonts:
         resources.Font = PdfDict()
         for font_name, font in A.fonts.items():
             resources.Font[PdfName(font_name)] = font
示例#9
0
def update_metadata(trailer, options):
    # Update the PDF's Document Information Dictionary, which contains keys like
    # Title, Author, Subject, Keywords, Creator, Producer, CreationDate, and ModDate
    # (the latter two containing Date values, the rest strings).

    import codecs
    from pdfrw.objects import PdfString, PdfName

    # Create the metadata dict if it doesn't exist, since the caller may be adding fields.
    if not trailer.Info:
        trailer.Info = PdfDict()

    # Get a list of all metadata fields that exist in the PDF plus any fields
    # that there are metadata filters for (since they may insert field values).
    keys = set(str(k)[1:] for k in trailer.Info.keys()) \
      | set(k for k in options.metadata_filters.keys() if k not in ("DEFAULT", "ALL"))

    # Update each metadata field.
    for key in keys:
        # Get the functions to apply to this field.
        functions = options.metadata_filters.get(key)
        if functions is None:
            # If nothing is defined for this field, use the DEFAULT functions.
            functions = options.metadata_filters.get("DEFAULT", [])

        # Append the ALL functions.
        functions += options.metadata_filters.get("ALL", [])

        # Run the functions on any existing values.
        value = trailer.Info[PdfName(key)]
        for f in functions:
            # Before passing to the function, convert from a PdfString to a Python string.
            if isinstance(value, PdfString):
                # decode from PDF's "(...)" syntax.
                value = value.decode()

            # Filter the value.
            value = f(value)

            # Convert Python data type to PdfString.
            if isinstance(value, str) or (sys.version_info < (3, )
                                          and isinstance(value, unicode)):
                # Convert string to a PdfString instance.
                value = PdfString.from_unicode(value)

            elif isinstance(value, datetime):
                # Convert datetime into a PDF "D" string format.
                value = value.strftime("%Y%m%d%H%M%S%z")
                if len(value) == 19:
                    # If TZ info was included, add an apostrophe between the hour/minutes offsets.
                    value = value[:17] + "'" + value[17:]
                value = PdfString("(D:%s)" % value)

            elif value is None:
                # delete the metadata value
                pass

            else:
                raise ValueError(
                    "Invalid type of value returned by metadata_filter function. %s was returned by %s."
                    % (repr(value), f.__name__ or "anonymous function"))

            # Replace value.
            trailer.Info[PdfName(key)] = value
示例#10
0
文件: djpdf.py 项目: 5l1v3r1/djpdf
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))
示例#11
0
文件: djpdf.py 项目: 5l1v3r1/djpdf
    def _build_font():
        with open(FONT_FILENAME, "rb") as f:
            embedded_font_stream = f.read()
        embedded_font = PdfDict()
        embedded_font.indirect = True
        embedded_font.Filter = [PdfName.FlateDecode]
        embedded_font.stream = zlib.compress(embedded_font_stream,
                                             9).decode("latin-1")
        embedded_font.Length1 = len(embedded_font_stream)

        font_descriptor = PdfDict()
        font_descriptor.indirect = True
        font_descriptor.Ascent = 1000
        font_descriptor.CapHeight = 1000
        font_descriptor.Descent = -1
        font_descriptor.Flags = 5  # FixedPitch + Symbolic
        font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500])
        font_descriptor.FontFile2 = embedded_font
        font_descriptor.FontName = PdfName.GlyphLessFont
        font_descriptor.ItalicAngle = 0
        font_descriptor.StemV = 80
        font_descriptor.Type = PdfName.FontDescriptor

        # Map everything to glyph 1
        cid_to_gid_map_stream = b"\0\1" * (1 << 16)
        cid_to_gid_map = PdfDict()
        cid_to_gid_map.indirect = True
        cid_to_gid_map.Filter = [PdfName.FlateDecode]
        cid_to_gid_map.stream = zlib.compress(cid_to_gid_map_stream,
                                              9).decode("latin-1")
        cid_to_gid_map.Length1 = len(cid_to_gid_map_stream)

        cid_system_info = PdfDict()
        cid_system_info.Ordering = PdfString.from_unicode("Identity")
        cid_system_info.Registry = PdfString.from_unicode("Adobe")
        cid_system_info.Supplement = 0

        cid_font = PdfDict()
        cid_font.indirect = True
        cid_font.CIDToGIDMap = cid_to_gid_map
        cid_font.BaseFont = PdfName.GlyphLessFont
        cid_font.CIDSystemInfo = cid_system_info
        cid_font.FontDescriptor = font_descriptor
        cid_font.Subtype = PdfName.CIDFontType2
        cid_font.Type = PdfName.Font
        cid_font.DW = 500

        with open(UNICODE_CMAP_FILENAME, "rb") as f:
            unicode_cmap_stream = f.read()
        unicode_cmap = PdfDict()
        unicode_cmap.indirect = True
        unicode_cmap.Filter = [PdfName.FlateDecode]
        unicode_cmap.stream = zlib.compress(unicode_cmap_stream,
                                            9).decode("latin-1")

        font = PdfDict()
        font.indirect = True
        font.BaseFont = PdfName.GlyphLessFont
        font.DescendantFonts = PdfArray([cid_font])
        font.Encoding = PdfName("Identity-H")
        font.Subtype = PdfName.Type0
        font.ToUnicode = unicode_cmap
        font.Type = PdfName.Font

        return font