示例#1
0
 def addBookmark(self, title, pagenum, parent=None):
     """
     Add a bookmark to the pdf, using the specified title and pointing at
     the specified page number. A parent can be specified to make this a
     nested bookmark below the parent.
     """
     if parent is None:
         iloc = [len(self.bookmarks)-1]
     elif type(parent) == list:
         iloc = parent
     else:
         iloc = self.findBookmark(parent)
     dest = Bookmark(TextStringObject(title), NumberObject(pagenum),
                     NameObject('/FitH'), NumberObject(826))
     if parent is None:
         self.bookmarks.append(dest)
     else:
         bmparent = self.bookmarks
         for i in iloc[:-1]:
             bmparent = bmparent[i]
         npos = iloc[-1]+1
         if npos < len(bmparent) and type(bmparent[npos]) == list:
             bmparent[npos].append(dest)
         else:
             bmparent.insert(npos, [dest])
示例#2
0
 def addNamedDestination(self, title, pagenum):
     """
     Add a destination to the pdf, using the specified title and pointing
     at the specified page number.
     """
     dest = Destination(TextStringObject(title), NumberObject(pagenum),
                        NameObject('/FitH'), NumberObject(826))
     self.named_dests.append(dest)
示例#3
0
 def _addPage(self, page, action):
     assert page["/Type"] == "/Page"
     page[NameObject("/Parent")] = self._pages
     page = self._addObject(page)
     pages = self.getObject(self._pages)
     action(pages["/Kids"], page)
     pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
示例#4
0
 def __init__(self):
     self._header = b_("%PDF-1.3")
     self._objects = []  # array of indirect objects
     # The root of our page tree node.
     pages = DictionaryObject()
     pages.update({
         NameObject("/Type"): NameObject("/Pages"),
         NameObject("/Count"): NumberObject(0),
         NameObject("/Kids"): ArrayObject()
     })
     self._pages = self._addObject(pages)
     # info object
     info = DictionaryObject()
     info.update({
         NameObject("/Producer"):
         createStringObject(
             u"Python PDF Library - http://pybrary.net/pyPdf/")
     })
     self._info = self._addObject(info)
     # root object
     root = DictionaryObject()
     root.update({
         NameObject("/Type"): NameObject("/Catalog"),
         NameObject("/Pages"): self._pages
     })
     self._root = self._addObject(root)
示例#5
0
    def addBookmark(self, title, pagenum, parent=None):
        """
        Add a bookmark to the pdf, using the specified title and pointing at
        the specified page number. A parent can be specified to make this a
        nested bookmark below the parent.
        """
        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
        action = DictionaryObject()
        action.update({
            NameObject('/D'):
            ArrayObject([pageRef,
                         NameObject('/FitH'),
                         NumberObject(826)]),
            NameObject('/S'):
            NameObject('/GoTo')
        })
        actionRef = self._addObject(action)

        outlineRef = self.getOutlineRoot()
        if parent is None:
            parent = outlineRef
        bookmark = TreeObject()
        bookmark.update({
            NameObject('/A'): actionRef,
            NameObject('/Title'): createStringObject(title)
        })
        bookmarkRef = self._addObject(bookmark)

        parent = parent.getObject()
        parent.addChild(bookmarkRef, self)
        return bookmarkRef
示例#6
0
 def add(self, title, pagenum):
     pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
     action = DictionaryObject()
     action.update({NameObject('/D'): ArrayObject([pageRef,
                                                   NameObject('/FitH'),
                                                   NumberObject(826)]),
                    NameObject('/S'): NameObject('/GoTo')})
     actionRef = self.pdf._addObject(action)
     bookmark = TreeObject()
     bookmark.update({NameObject('/A'): actionRef,
                      NameObject('/Title'): createStringObject(title)})
     self.pdf._addObject(bookmark)
     self.tree.addChild(bookmark)
示例#7
0
 def _associate_dests_to_pages(self, pages):
     for nd in self.named_dests:
         pageno = None
         np = nd['/Page']
         if type(np) == NumberObject:
             continue
         for p in pages:
             if np.getObject() == p.pagedata.getObject():
                 pageno = p.id
         if pageno is not None:
             nd[NameObject('/Page')] = NumberObject(pageno)
         else:
             raise ValueError("Unresolved named destination '%s'" %
                              nd['/Title'])
示例#8
0
 def encrypt(self, user_pwd, owner_pwd=None, use_128bit=True):
     import time
     import random
     if owner_pwd is None:
         owner_pwd = user_pwd
     if use_128bit:
         V = 2
         rev = 3
         keylen = 128 / 8
     else:
         V = 1
         rev = 2
         keylen = 40 / 8
     # permit everything:
     P = -1
     O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
     ID_1 = md5(repr(time.time())).digest()
     ID_2 = md5(repr(random.random())).digest()
     self._ID = ArrayObject(
         (ByteStringObject(ID_1), ByteStringObject(ID_2)))
     if rev == 2:
         U, key = _alg34(user_pwd, O, P, ID_1)
     else:
         assert rev == 3
         U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
     encrypt = DictionaryObject()
     encrypt[NameObject("/Filter")] = NameObject("/Standard")
     encrypt[NameObject("/V")] = NumberObject(V)
     if V == 2:
         encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
     encrypt[NameObject("/R")] = NumberObject(rev)
     encrypt[NameObject("/O")] = ByteStringObject(O)
     encrypt[NameObject("/U")] = ByteStringObject(U)
     encrypt[NameObject("/P")] = NumberObject(P)
     self._encrypt = self._addObject(encrypt)
     self._encrypt_key = key
示例#9
0
    def addNamedDestination(self, title, pagenum):
        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
        dest = DictionaryObject()
        dest.update({
            NameObject('/D'):
            ArrayObject([pageRef,
                         NameObject('/FitH'),
                         NumberObject(826)]),
            NameObject('/S'):
            NameObject('/GoTo')
        })
        destRef = self._addObject(dest)
        nd = self.getNamedDestRoot()

        nd.extend([title, destRef])
        return destRef
示例#10
0
    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
        if bookmarks is None:
            bookmarks = self.bookmarks

        for b in bookmarks:
            if type(b) == list:
                self._associate_bookmarks_to_pages(pages, b)
                continue
            pageno = None
            bp = b['/Page']
            if type(bp) == NumberObject:
                continue
            for p in pages:
                if bp.getObject() == p.pagedata.getObject():
                    pageno = p.id
            if pageno is not None:
                b[NameObject('/Page')] = NumberObject(pageno)
            else:
                raise ValueError("Unresolved bookmark '%s'" % b['/Title'])
示例#11
0
 def _rotate(self, angle):
     currentAngle = self.get("/Rotate", 0)
     self[NameObject("/Rotate")] = NumberObject(currentAngle + angle)
示例#12
0
    def write(self, stream):
        externalReferenceMap = {}

        # PDF objects sometimes have circular references to their /Page objects
        # inside their object tree (for example, annotations).  Those will be
        # indirect references to objects that we've recreated in this PDF.  To
        # address this problem, PageObject's store their original object
        # reference number, and we add it to the external reference map before
        # we sweep for indirect references.  This forces self-page-referencing
        # trees to reference the correct new object location, rather than
        # copying in a new copy of the page object.
        for objIndex in xrange(len(self._objects)):
            obj = self._objects[objIndex]
            if isinstance(obj, PageObject) and obj.indirectRef is not None:
                data = obj.indirectRef
                externalReferenceMap.setdefault(data.pdf, {})
                externalReferenceMap[data.pdf].setdefault(data.generation, {})
                externalReferenceMap[data.pdf][data.generation][data.idnum] = \
                    IndirectObject(objIndex + 1, 0, self)

        self.stack = []
        self._sweepIndirectReferences(externalReferenceMap, self._root)
        del self.stack

        # Begin writing:
        object_positions = []
        stream.write(self._header + b_("\n"))
        for i in range(len(self._objects)):
            idnum = (i + 1)
            obj = self._objects[i]
            object_positions.append(stream.tell())
            stream.write(b_(str(idnum) + " 0 obj\n"))
            key = None
            if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
                pack1 = struct.pack("<i", i + 1)[:3]
                pack2 = struct.pack("<i", 0)[:2]
                key = self._encrypt_key + pack1 + pack2
                assert len(key) == (len(self._encrypt_key) + 5)
                md5_hash = md5(key).digest()
                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
            if obj is not None:
                obj.writeToStream(stream, key)
                stream.write(b_("\nendobj\n"))

        # xref table
        xref_location = stream.tell()
        stream.write(b_("xref\n"))
        stream.write(b_("0 %s\n" % (len(self._objects) + 1)))
        stream.write(b_("%010d %05d f \n" % (0, 65535)))
        for offset in object_positions:
            stream.write(b_("%010d %05d n \n" % (offset, 0)))

        # trailer
        stream.write(b_("trailer\n"))
        trailer = DictionaryObject()
        trailer.update({
            NameObject("/Size"):
            NumberObject(len(self._objects) + 1),
            NameObject("/Root"):
            self._root,
            NameObject("/Info"):
            self._info
        })
        if hasattr(self, "_ID"):
            trailer[NameObject("/ID")] = self._ID
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)

        # eof
        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
示例#13
0
文件: reader.py 项目: jeansch/PyPDF2
    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation,
                                          {}).get(indirectReference.idnum,
                                                  None)
        if retval is not None:
            return retval
        if indirectReference.generation == 0 \
                and indirectReference.idnum in self.xref_objStm:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First']+offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        if indirectReference.idnum \
                not in self.xref[indirectReference.generation]:
            warnings.warn("Object %d %d not defined." % (
                indirectReference.idnum, indirectReference.generation),
                utils.PdfReadWarning)
            return None
        start = self.xref[indirectReference.generation][
            indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        try:
            assert idnum == indirectReference.idnum
        except AssertionError:
            if self.xrefIndex:
                # Xref table probably had bad indexes due to not
                # being zero-indexed
                if self.strict:
                    raise utils.PdfReadError(
                        "Expected object ID (%d %d) does "
                        "not match actual (%d %d); xref "
                        "table not zero-indexed." % (
                            indirectReference.idnum,
                            indirectReference.generation,
                            idnum,
                            generation))
                else:
                    # should not happen since the xref table is corrected in
                    # non-strict mode
                    pass
            else:  # some other problem
                raise utils.PdfReadError("Expected object ID (%d %d) does not "
                                         " match actual (%d %d)." % (
                                             indirectReference.idnum,
                                             indirectReference.generation,
                                             idnum, generation))
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)
        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval
示例#14
0
    def merge(self, position, fileobj, bookmark=None,
              pages=None, import_bookmarks=True):
        """
        >>> merge(position, file, bookmark=None,
                  pages=None, import_bookmarks=True)
        Merges the pages from the source document specified by "file"
        into the output file at the page number specified by "position".

        Optionally, you may specify a bookmark to be applied at the beginning
        of the included file by supplying the text of the bookmark in the
        "bookmark" parameter.

        You may prevent the source document's bookmarks from being imported by
        specifying "import_bookmarks" as False.

        You may also use the "pages" parameter to merge only the specified
        range of pages from the source document into the output document.
        """

        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False

        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a StringIO stream object; if
        # it is a PdfFileReader, copy that reader's stream into a
        # StringIO stream.
        # If fileobj is none of the above types, it is not modified
        if type(fileobj) in (str, unicode):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif type(fileobj) == file:
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StringIO(filecontent)
            my_file = True
        elif type(fileobj) == PdfFileReader:
            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            filecontent = StringIO(fileobj.stream.read())
            # reset the stream to its original location
            fileobj.stream.seek(orig_tell)
            fileobj = filecontent
            my_file = True

        # Create a new PdfFileReader instance using the stream
        # (either file or StringIO) created above
        pdfr = PdfFileReader(fileobj, strict=self.strict)
        # Find the range of pages to merge
        if pages is None:
            pages = (0, pdfr.getNumPages())
        elif type(pages) in (int, float, str, unicode):
            raise TypeError('"pages" must be a tuple of (start, end)')
        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark),
                                NumberObject(self.id_count),
                                NameObject('/Fit'))
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)

        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline

        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests

        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)

            id = self.id_count
            self.id_count += 1

            mp = _MergedPage(pg, pdfr, id)

            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)

        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages

        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))
示例#15
0
    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation,
                                          {}).get(indirectReference.idnum,
                                                  None)
        if retval is not None:
            return retval
        if indirectReference.generation == 0 \
                and indirectReference.idnum in self.xref_objStm:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First'] + offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        if indirectReference.idnum \
                not in self.xref[indirectReference.generation]:
            warnings.warn(
                "Object %d %d not defined." %
                (indirectReference.idnum, indirectReference.generation),
                utils.PdfReadWarning)
            return None
        start = self.xref[indirectReference.generation][
            indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        try:
            assert idnum == indirectReference.idnum
        except AssertionError:
            if self.xrefIndex:
                # Xref table probably had bad indexes due to not
                # being zero-indexed
                if self.strict:
                    raise utils.PdfReadError(
                        "Expected object ID (%d %d) does "
                        "not match actual (%d %d); xref "
                        "table not zero-indexed." %
                        (indirectReference.idnum, indirectReference.generation,
                         idnum, generation))
                else:
                    # should not happen since the xref table is corrected in
                    # non-strict mode
                    pass
            else:  # some other problem
                raise utils.PdfReadError(
                    "Expected object ID (%d %d) does not "
                    " match actual (%d %d)." %
                    (indirectReference.idnum, indirectReference.generation,
                     idnum, generation))
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)
        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval