Пример #1
0
def editPDF(filename):
    """ function to add metadata to pdf files"""
    INPUT = filename
    OUTPUT = filename[:-4] + '_updated.pdf'

    output = PdfFileWriter()
    fin = file(INPUT, 'rb')
    pdf_in = PdfFileReader(fin)
    infoDict = output._info.getObject()

    ###########################################################
    # I've added random tags here, use what needs to be added #
    #                                                         #
    ###########################################################
    infoDict.update({
        NameObject('/Tags'):
        createStringObject(tag_dict[filename]),
        NameObject('/Keywords'):
        createStringObject(tag_dict[filename])
    })
    for page in range(pdf_in.getNumPages()):
        output.addPage(pdf_in.getPage(page))

    outputStream = file(os.path.join(directory, OUTPUT), 'wb')
    output.write(outputStream)
    fin.close()
    outputStream.close()
Пример #2
0
    def __iter__(self):

        warnings.simplefilter('ignore', DeprecationWarning)
        # mostly based on PdfFileWriter.write

        # Begin writing, so that, even if _sweepIndirectReferences takes
        # a long time, the download begins
        object_positions = []
        length = 0
        s = self._header + "\n"
        yield s
        length += len(s)

        externalReferenceMap = {}
        self.set = set()
        self._sweepIndirectReferences(externalReferenceMap, self._root)
        del self.set

        stream = StringIO.StringIO()
        for i, obj in enumerate(self._objects):
            idnum = (i + 1)
            object_positions.append(length)
            s1 = str(idnum) + " 0 obj\n"
            obj.writeToStream(stream, None)
            s2 = stream.getvalue() + "\nendobj\n"
            yield s1 + s2
            length += len(s1) + len(s2)
            stream.reset()
            stream.truncate()

        # xref table
        xref_location = length
        yield("xref\n")
        yield("0 %s\n" % (len(self._objects) + 1))
        yield "0000000000 65535 f \n"

        for offset in object_positions:
            yield "%010d 00000 n \n" % offset

        # trailer
        yield("trailer\n")
        trailer = DictionaryObject()
        trailer.update({
                NameObject("/Size"): NumberObject(len(self._objects) + 1),
                NameObject("/Root"): self._root,
                NameObject("/Info"): self._info,
                })
        if hasattr(self, "_ID"):
            trailer[NameObject("/ID")] = self._ID
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)
        yield stream.getvalue()

        # eof
        yield("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
        warnings.simplefilter('default', DeprecationWarning)
def addCopyrightToPDF(pdf_file_location, pdf_file_destination,copyrightText, drawText=True, title="",authors=""):
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.setFont("Times-Roman",7)
    ctext = copyrightText.split("\n")
    if drawText:
        can.drawString(30, 40, ctext[0])
        can.drawString(30, 50, ctext[1])
    else:
        can.drawString(30, 50, "")
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(file(pdf_file_location, "rb"))
    output = PdfFileWriter()
    npagesorig = existing_pdf.getNumPages()
    #print npagesorig
    for i in range(npagesorig):
        page = existing_pdf.getPage(i)
        # if last page
        if i == (npagesorig-1):
            #print "HERE",copyrightText,drawText
            page.mergePage(new_pdf.getPage(0))
        output.addPage(page)

    infoDict = output._info.getObject()
    infoDict.update({
        NameObject('/Title'): createStringObject(title),
        NameObject('/Author'): createStringObject(authors)
    })

    # finally, write "output" to a real file
    outputStream = file(pdf_file_destination, "wb")
    output.write(outputStream)
    outputStream.close()
Пример #4
0
 def replace_text(cls, page, text, replace):
     # HACK
     from pyPdf.pdf import ContentStream, PageObject
     from pyPdf.generic import TextStringObject, NameObject
     content = ContentStream(page["/Contents"].getObject(), page.pdf)
     for idx in range(len(content.operations)):
         operands, operator = content.operations[idx]
         if operator == 'Tj':
             operands[0] = TextStringObject(operands[0].replace(
                 text, replace))
     new_page = PageObject.createBlankPage(page.pdf)
     new_page.mergePage(page)
     new_page[NameObject('/Contents')] = content
     return new_page
Пример #5
0
def pdf_add_content(content_string, page, scale=1, offsetx=0, offsety=0):
    """Add content to the end of the content stream of the PDF page.
    
    Inputs: content_string  The PDF drawing commands to add, as a single string.
            
            page            The pyPdf.pdf.PageObject to add the content to.
            
            scale           Before adding the content, adjust the the coordinate
            offsetx         system with a (uniform) scale factor and a
            offsety         translation of offsetx and offsety.
    
    """
    coord_trans = '%.2f 0 0 %.2f %.2f %.2f cm' % (scale, scale, offsetx,
                                                  offsety)
    commands = '\n'.join(('Q', 'q', coord_trans, content_string, 'Q'))

    try:
        orig_content = page['/Contents'].getObject()
    except KeyError:
        orig_content = ArrayObject([])
    stream = ContentStream(orig_content, page.pdf)
    stream.operations.insert(0, [[], 'q'])  # Existing content may not restore
    stream.operations.append([[], commands])  # graphics state at the end.
    page[NameObject('/Contents')] = stream
Пример #6
0
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import NameObject, createStringObject

OUTPUT = 'ml1.pdf'
INPUT = 'NOFO.pdf'

# There is no interface through pyPDF with which to set this other then getting
# your hands dirty like so:
output = PdfFileWriter()
fin = file(INPUT, 'rb')
pdf_in = PdfFileReader(fin)
infoDict = output._info.getObject()
print infoDict
infoDict.update({
    NameObject('/Title'): createStringObject(u'title'),
    NameObject('/Author'): createStringObject(u'author'),
    NameObject('/Subject'): createStringObject(u'subject'),
    NameObject('/Creator'): createStringObject(u'a script')
})
print infoDict
for page in range(pdf_in.getNumPages()):
    output.addPage(pdf_in.getPage(page))

outputStream = file(OUTPUT, 'wb')
output.write(outputStream)
outputStream.close()

from pyPdf import PdfFileReader, PdfFileWriter

pdf = PdfFileReader(open(OUTPUT, 'rb'))
Пример #7
0
import sys

parser = argparse.ArgumentParser(description=u'Limpia los metadatos de un PDF y opcionalmente añade título y autor')
parser.add_argument("input", help="fichero pdf origen")
parser.add_argument("output", help="fichero pdf destino")
args = parser.parse_args()

fin = file(args.input, 'rb')
pdfIn = PdfFileReader(fin)
pdfOut = PdfFileWriter()

for page in range(pdfIn.getNumPages()):
    pdfOut.addPage(pdfIn.getPage(page))

info = pdfOut._info.getObject()
del info[NameObject('/Producer')]


title = raw_input("Titulo:").decode(sys.stdin.encoding)
author = raw_input("Autor:").decode(sys.stdin.encoding)
info.update({
    NameObject('/Title'): createStringObject(title),
    NameObject('/Author'): createStringObject(author)
})



fout = open(args.output, 'wb')
pdfOut.write(fout)
fin.close()
fout.close()
Пример #8
0
inpfn = raw_input('Enter PDF path : ')

fin = file(inpfn, 'rb')
pdf_in = PdfFileReader(fin)

writer = PdfFileWriter()

for page in range(pdf_in.getNumPages()):
    writer.addPage(pdf_in.getPage(page))

infoDict = writer._info.getObject()

info = pdf_in.documentInfo
for key in info:
    infoDict.update({NameObject(key): createStringObject(info[key])})

# add the grade
list_of_data_to_delete = [
    '/CreationDate', '/Author', '/Creator', '/ModDate', '/Producer', '/Title'
]
for item in list_of_data_to_delete:
    try:
        infoDict.update({NameObject(item): createStringObject(u'')})
    except:
        print("can't delete : ", i)

fout = open('outputFile.pdf', 'wb')

writer.write(fout)
fin.close()