def __pdf_meta_writer(self): if os.path.isfile(self.source_file): with open(self.source_file, "rb") as input_file: fileinput = PdfFileReader(input_file) output = PdfFileWriter() meta_data = output._info.getObject() meta_data.update({ NameObject("/Title"): createStringObject(self.title), NameObject("/Author"): createStringObject(self.author), NameObject("/Subject"): createStringObject(self.subject), NameObject("/Keywords"): createStringObject(self.keywords) }) for page in range(fileinput.getNumPages()): output.addPage(fileinput.getPage(page)) with file(self.temp_file_path, "wb") as output_stream: output.write(output_stream) output_stream.close() input_file.close() self.__file_replacement() self.__fast_web_view() else: broken_file_path = str("File: %s not found!!!" % self.source_file) self.trigger.emit(broken_file_path)
def editPDF(filename): """ function to add metadata to pdf files""" INPUT = filename OUTPUT = filename[:-4] + '_updated.pdf' output = PdfFileWriter() fin = file(INPUT, 'rb') pdf_in = PdfFileReader(fin) infoDict = output._info.getObject() ########################################################### # I've added random tags here, use what needs to be added # # # ########################################################### infoDict.update({ NameObject('/Tags'): createStringObject(tag_dict[filename]), NameObject('/Keywords'): createStringObject(tag_dict[filename]) }) for page in range(pdf_in.getNumPages()): output.addPage(pdf_in.getPage(page)) outputStream = file(os.path.join(directory, OUTPUT), 'wb') output.write(outputStream) fin.close() outputStream.close()
def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate input_pdf = PdfFileReader(open(self.document.getUrl(), "rb")) output_pdf = PdfFileWriter() modification_date = metadata.pop("ModificationDate", None) if modification_date: metadata['ModDate'] = modification_date if type(metadata.get('Keywords', None)) is list: metadata['Keywords'] = metadata['Keywords'].join(' ') args = {} for key, value in list(metadata.items()): args[NameObject('/' + key.capitalize())] = createStringObject(value) output_pdf._info.getObject().update(args) for page_num in range(input_pdf.getNumPages()): output_pdf.addPage(input_pdf.getPage(page_num)) output_stream = io.BytesIO() output_pdf.write(output_stream) return output_stream.getvalue()
def save(self, filename): temps = [] for (i,page) in zip(self.mypages, self.pages): for j, section in enumerate(page.get_sections()): sys.stdout.write("\rWriting Image for page %d/%d, section %d/%d "%(i+1, len(self.pages), j+1, len(page.get_sections()))) sys.stdout.flush() sectionfile = tempfile.NamedTemporaryFile(suffix='.png') temps.append(sectionfile) (x,y,w,h) = section pagefn = "%s[%d]"%(self.original, i) density = 300 x = int(x*density / 72) y = int(y*density / 72) w = int(w*density / 72) h = int(h*density / 72) execute(["convert", "-density", str(density), pagefn, "-crop", '%dx%d%+d%+d'%(w,h,x,y), '-trim', '+repage', '-trim', sectionfile.name]) sys.stdout.write("\n") sys.stdout.flush() sections = temps midfile = 'x.pdf' execute(["convert"] + [f.name for f in sections] + [midfile]) for f in sections: f.close() output = PdfFileWriter() infoDict = output._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(self.info.get('title', "david")), NameObject('/Author'): createStringObject(self.info.get('author', "")) }) input1 = PdfFileReader(file(midfile, "rb")) for pn in range(input1.getNumPages()): output.addPage(input1.getPage(pn)) outputStream = file(outfile, "wb") output.write(outputStream) outputStream.close() execute(['rm', midfile])
def addCopyrightToPDF(pdf_file_location, pdf_file_destination,copyrightText, drawText=True, title="",authors=""): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont("Times-Roman",7) ctext = copyrightText.split("\n") if drawText: can.drawString(30, 40, ctext[0]) can.drawString(30, 50, ctext[1]) else: can.drawString(30, 50, "") can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(pdf_file_location, "rb")) output = PdfFileWriter() npagesorig = existing_pdf.getNumPages() #print npagesorig for i in range(npagesorig): page = existing_pdf.getPage(i) # if last page if i == (npagesorig-1): #print "HERE",copyrightText,drawText page.mergePage(new_pdf.getPage(0)) output.addPage(page) infoDict = output._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(title), NameObject('/Author'): createStringObject(authors) }) # finally, write "output" to a real file outputStream = file(pdf_file_destination, "wb") output.write(outputStream) outputStream.close()
def modifyMeta(pathFile, fileNameInput, fileNameOutput, metaInfo): # There is no interface through pyPDF with which to set this other then getting # your hands dirty like so: inputFile = os.path.join(pathFile,fileNameInput) outputFile = os.path.join(pathFile,fileNameOutput) print outputFile output = PdfFileWriter() infoDict = output._info.getObject() """ infoDict.update({ NameObject('/Title'): createStringObject(u'title'), NameObject('/Author'): createStringObject(u'author'), NameObject('/Subject'): createStringObject(u'subject'), NameObject('/Creator'): createStringObject(u'a script'), NameObject('/Producer'): createStringObject(u'producer') }) """ #Se borra Producer, que tiene valor por defecto infoDict.update({ NameObject('/Producer'): createStringObject(u'') }) #Asignamos valores pasados por parametro for metaName in metaInfo.keys(): infoDict.update({ NameObject(metaName): createStringObject(metaInfo.get(metaName)) }) pdfFile = PdfFileReader(file(inputFile, 'rb')) for page in range(pdfFile.getNumPages()): output.addPage(pdfFile.getPage(page)) outputStream = file(outputFile, 'wb') output.write(outputStream) outputStream.close()
def __modifyData(self): for data in self.dataToUpdate: self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
from pyPdf import PdfFileWriter, PdfFileReader from pyPdf.generic import NameObject, createStringObject OUTPUT = 'ml1.pdf' INPUT = 'NOFO.pdf' # There is no interface through pyPDF with which to set this other then getting # your hands dirty like so: output = PdfFileWriter() fin = file(INPUT, 'rb') pdf_in = PdfFileReader(fin) infoDict = output._info.getObject() print infoDict infoDict.update({ NameObject('/Title'): createStringObject(u'title'), NameObject('/Author'): createStringObject(u'author'), NameObject('/Subject'): createStringObject(u'subject'), NameObject('/Creator'): createStringObject(u'a script') }) print infoDict for page in range(pdf_in.getNumPages()): output.addPage(pdf_in.getPage(page)) outputStream = file(OUTPUT, 'wb') output.write(outputStream) outputStream.close() from pyPdf import PdfFileReader, PdfFileWriter pdf = PdfFileReader(open(OUTPUT, 'rb'))
import sys parser = argparse.ArgumentParser(description=u'Limpia los metadatos de un PDF y opcionalmente añade título y autor') parser.add_argument("input", help="fichero pdf origen") parser.add_argument("output", help="fichero pdf destino") args = parser.parse_args() fin = file(args.input, 'rb') pdfIn = PdfFileReader(fin) pdfOut = PdfFileWriter() for page in range(pdfIn.getNumPages()): pdfOut.addPage(pdfIn.getPage(page)) info = pdfOut._info.getObject() del info[NameObject('/Producer')] title = raw_input("Titulo:").decode(sys.stdin.encoding) author = raw_input("Autor:").decode(sys.stdin.encoding) info.update({ NameObject('/Title'): createStringObject(title), NameObject('/Author'): createStringObject(author) }) fout = open(args.output, 'wb') pdfOut.write(fout) fin.close() fout.close()
inpfn = raw_input('Enter PDF path : ') fin = file(inpfn, 'rb') pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() info = pdf_in.documentInfo for key in info: infoDict.update({NameObject(key): createStringObject(info[key])}) # add the grade list_of_data_to_delete = [ '/CreationDate', '/Author', '/Creator', '/ModDate', '/Producer', '/Title' ] for item in list_of_data_to_delete: try: infoDict.update({NameObject(item): createStringObject(u'')}) except: print("can't delete : ", i) fout = open('outputFile.pdf', 'wb') writer.write(fout) fin.close()