def merge_pdfs(pdf_path_list, to_file): pdf_out = pyPdf.PdfFileWriter() total_page_size = 0 f_in_handler_list = [] for path in pdf_path_list: # 这里不能用 with open(), 因为with语句会自动关闭文件, 导致pdf_out.write(f_out)失败 f = open(path, "rb") f_in_handler_list.append(f) pdf_in = pyPdf.PdfFileReader(f) page_size = pdf_in.getNumPages() total_page_size += page_size # 分别将page添加到输出pdf_out中 for i in range(0, page_size): pdf_out.addPage(pdf_in.getPage(i)) print "Processed file path: %s, page_size: %d" % (path, page_size) with open(to_file, "wb") as f_out: pdf_out.write(f_out) print "Merged file path: %s, page_size: %d" % (to_file, pdf_out.getNumPages()) # close pdf_in句柄 for f in f_in_handler_list: f.close()
def read_directory(dirpath, recurse): """ Retrieve the pdf files from the directory with path dirpath """ contents = os.listdir(dirpath) for file in contents: path = os.path.join(dirpath, file) if os.path.isdir(path) and recurse: read_directory(path, recurse) elif path.rsplit('.')[-1] == 'pdf': command = 'pdftotext %s' % path.replace(' ', '\ ') os.system(command) txtpath = path.rsplit('.', 1)[0] + '.txt' txtfile = open(txtpath, "rb") lines = txtfile.readlines() title = lines[1] description = lines[3] command = 'rm %s' % txtpath.replace(' ', '\ ') os.system(command) pdffile = open(path, "rb") writer = pyPdf.PdfFileWriter() else: logging.info('Ignore file: %s' % path)
def ExportSelected(event): if path: PagesToExport = SelectionPanel.GetSelections() if len(PagesToExport) < 1: dlg = wx.MessageDialog(top, "No pages selected!", "Error...", wx.OK | wx.ICON_QUESTION) dlg.ShowModal() dlg.Destroy() top.SetStatusText( "No pages selected. Please use the selection panel.") else: fh = file(path, 'rb') DestFileName = wx.GetTextFromUser( "Please provide output file name:", "Export File Name?", "Pages from " + os.path.basename(path).split('.')[0], top) input = pyPdf.PdfFileReader(fh) outputPage = pyPdf.PdfFileWriter() for PageToExport in PagesToExport: outputPage.addPage(input.getPage(PageToExport)) outputFileName = os.path.dirname( path) + os.sep + DestFileName + ".pdf" outputStream = file(outputFileName, "wb") outputPage.write(outputStream) outputStream.close() fh.close() else: dlg = wx.MessageDialog(top, "No file selected!", "Error...", wx.OK | wx.ICON_QUESTION) dlg.ShowModal() top.SetStatusText("No file selected. Please use \"Select File...\"") return
def merge_pdf_on_disk(self, docs): streams = [] writer = pyPdf.PdfFileWriter() for doc in docs: current_buff = tempfile.mkstemp(suffix='.pdf', prefix='credit_control_slip')[0] current_buff = os.fdopen(current_buff, 'w+b') current_buff.seek(0) streams.append(current_buff) current_buff.write(doc) current_buff.seek(0) reader = pyPdf.PdfFileReader(current_buff) for page in xrange(reader.getNumPages()): writer.addPage(reader.getPage(page)) buff = tempfile.mkstemp(suffix='.pdf', prefix='credit_control_slip_merged')[0] try: buff = os.fdopen(buff, 'w+b') # The writer close the reader file here buff.seek(0) writer.write(buff) buff.seek(0) return buff.read() except IOError: raise finally: buff.close() for stream in streams: stream.close()
def write_annotated_pdf(self, outfd, pdf=None, dice_map=None, **kw): """Write an annotated version of the PDF file. Inputs: outfd A file object, to which the PDF is output. pdf The original PDF file. If None, use self.pdf. dice_map The dice map describing how the PDF on the reader was made from the original PDF file. Other keywords are passed on to the annotations' write_to_pdf() methods. """ if pdf is None: pdf = self.pdf if dice_map is None: dice_map = OneToOneMap(len(self.pdf.pages)) outpdf = pyPdf.PdfFileWriter() j = k = 0 for i, page in enumerate(pdf.pages): while j < len(dice_map) and dice_map[j][0] == i: while k < len(self.annotations) and self.annotations[k].page == j: self.annotations[k].write_to_pdf(page, crop=dice_map[j][1], outpdf=outpdf, **kw) k += 1 j += 1 outpdf.addPage(page) outpdf.write(outfd)
def AutoSplit(event): global pageCount global path if path: dlg = wx.MessageDialog( top, "This will automatically export individual pages.", "Confirm AutoSplit", wx.OK | wx.CANCEL | wx.ICON_QUESTION) result = dlg.ShowModal() dlg.Destroy() if result == wx.ID_OK: fh = file(path, 'rb') input = pyPdf.PdfFileReader(fh) DestFileName = wx.GetTextFromUser( "Please provide output file name:", "Export File Name?", "Pages from " + os.path.basename(path).split('.')[0], top) for page in range(0, pageCount): outputPage = pyPdf.PdfFileWriter() outputPage.addPage(input.getPage(page)) outputFileName = os.path.dirname( path) + os.sep + DestFileName + " - " + str(page + 1) + ".pdf" outputStream = file(outputFileName, "wb") outputPage.write(outputStream) outputStream.close() fh.close() else: dlg = wx.MessageDialog(top, "No file selected!", "Error...", wx.OK | wx.ICON_QUESTION) dlg.ShowModal() top.SetStatusText("No file selected. Please use \"Select File...\"")
def combine_multiple_canvas(self): """Combine multiple PDF files at once when is working with multiple canvas""" if not self.multiple_canvas or not pyPdf or not self.temp_files: return readers = [] def append_pdf(input, output): for page_num in range(input.numPages): output.addPage(input.getPage(page_num)) output = pyPdf.PdfFileWriter() for f_name in self.temp_files: reader = pyPdf.PdfFileReader(file(f_name, 'rb')) readers.append(reader) append_pdf(reader, output) if isinstance(self.filename, basestring): fp = file(self.filename, 'wb') else: fp = self.filename output.write(fp) # Closes and clear objects fp.close() for r in readers: del r del output
def cutpdf(name_input, name_output): pdf_input = file(name_input, "rb") pdf_output = file(name_output, "wb") output = pyPdf.PdfFileWriter() input1 = pyPdf.PdfFileReader(pdf_input) pg = input1.getNumPages() for i in range(0, pg): page1 = input1.getPage(i) page2 = copy.copy(page1) cutline = (page1.mediaBox.getUpperRight_x() / 2, page1.mediaBox.getUpperRight_y()) page1.mediaBox.upperRight = cutline output.addPage(page1) page2.mediaBox.upperLeft = cutline output.addPage(page2) output.write(pdf_output) pdf_output.close return True
def _merge_PDFs(self, pdfslides): """Merge the given PDFs into one.""" output_filename = "%s.pdf" % self._get_filename().split(".svg")[0] output_filepath = abspath(join(os.curdir, output_filename)) has_pyPdf = False try: import pyPdf has_pyPdf = True except ImportError: pass if has_pyPdf: logging.info("Using 'pyPdf' to join PDFs to %s", output_filepath) output = pyPdf.PdfFileWriter() inputfiles = [] for slide in pdfslides: inputstream = file(slide, "rb") inputfiles.append(inputstream) reader = pyPdf.PdfFileReader(inputstream) output.addPage(reader.getPage(0)) outputStream = file(output_filepath, "wb") output.write(outputStream) outputStream.close() for f in inputfiles: f.close() else: logging.warning("PyPDF not installed, cannot merge PDF slides")
def pdf_merge(pdf1, pdf2): try: tmp1 = os.tempnam() tmp2 = os.tempnam() tmp3 = os.tempnam() output = pyPdf.PdfFileWriter() file(tmp1, "w").write(pdf1) file(tmp2, "w").write(pdf2) input1 = pyPdf.PdfFileReader(file(tmp1, "rb")) input2 = pyPdf.PdfFileReader(file(tmp2, "rb")) for page in range(input1.getNumPages()): output.addPage(input1.getPage(page)) for page in range(input2.getNumPages()): output.addPage(input2.getPage(page)) outputStream = file(tmp3, "wb") output.write(outputStream) outputStream.close() #cmd="/usr/bin/pdftk %s %s cat output %s"%(tmp1,tmp2,tmp3) #os.system(cmd) pdf3 = file(tmp3).read() os.unlink(tmp1) os.unlink(tmp2) os.unlink(tmp3) return pdf3 except: raise Exception("Failed to merge PDF files")
def POST(self): f = ip_form() selected = web.input(selected=[]).selected # ['a_01_1', ...] if not f.validates(): selected_list = [] #for i in selected: # id = i.replace('_', '.').lstrip('a') # selected_list.append((i, id, get_okved(id))) return render.form(f, get_okveds(), selected_list) else: # FIXME: addr = f.addr_zip.get_value() + ', г. Санкт-Петербург, ' if (f.addr_locality.get_value()): addr = addr + f.addr_locality.get_value() + ', ' addr = addr + f.addr_street_type.get_value() + '. ' + f.addr_street_name.get_value() + ', д. ' + f.addr_house_name.get_value() if (f.addr_building_type.get_value()): addr = addr + ', ' + f.addr_building_type.get_value() + '. ' + f.addr_building_name.get_value() if (f.addr_app_type.get_value()): addr = addr + ', ' + f.addr_app_type.get_value() + '. ' + f.addr_app_name.get_value() output = pyPdf.PdfFileWriter() error = False tmpfile = list() tocall = [('21001', prepare_21001), ('pd4', prepare_pd4)] if (int(f.tax.get_value()) > 2): tocall.append(('usn', prepare_usn,),) for key, func in tocall: url = forward_url + forms[key] + '/a/' r = requests.post(url, data=func(f, addr, selected), cookies=dict(csrftoken=token)) if (r.status_code == 200): # r - Responce object if (r.headers['content-type'] == 'application/pdf'): tmp = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) tmp.write(r.content) input = pyPdf.PdfFileReader(tmp) for page in input.pages: output.addPage(page) tmpfile.append(tmp) else: # e.g. 'text/html; charset=utf-8' error = True else: error = True deltmp(tmpfile) break if error: deltmp(tmpfile) #print r.status_code, r.raw.read() return r.text else: web.header('Content-Type', 'application/pdf') web.header('Content-Transfer-Encoding', 'binary') web.header('Content-Disposition', 'attachment; filename=\"print.pdf\";') outputStream = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) output.write(outputStream) outputStream.close() retvalue = file(outputStream.name, 'rb').read() os.remove(outputStream.name) deltmp(tmpfile) return retvalue
def write(self): """Assembles the final PDF and writes to disk.""" pdf_writer = pyPdf.PdfFileWriter() if self.front_matter is not None: front_matter = pyPdf.PdfFileReader(file(self.front_matter, "rb")) for page in range(front_matter.getNumPages()): pdf_writer.addPage(front_matter.getPage(page)) working_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) doc = SimpleDocTemplate(working_file) doc.pagesize = portrait(letter) story = [] styles = getSampleStyleSheet() for section in self.sections: heading_text = section.heading story.append(Paragraph(heading_text, styles['Heading1'])) for content in section.contents: if 'figure' in content: figure = content['figure'] if os.path.exists(figure): im = utils.ImageReader(figure) img_width, img_height = im.getSize() aspect = img_height / float(img_width) story.append( Image(figure, width=img_width, height=(img_width * aspect))) if content.get('caption', None) is not None: caption_text = '<font size=10>{0}</font>'.format( content['caption'].strip()) story.append(Paragraph(caption_text, styles['Italic'])) story.append(Spacer(1, 10)) if 'table' in content: _t = self.build_table(content['table']) story.append(_t) if content.get('caption', None) is not None: caption_text = '<font size=10>{0}</font>'.format( content['caption'].strip()) story.append(Paragraph(caption_text, styles['Italic'])) story.append(Spacer(1, 10)) if 'text' in content: for para in content['text']: story.append(Paragraph(para, styles['Normal'])) story.append(Spacer(1, 12)) doc.build(story) body_matter = pyPdf.PdfFileReader(working_file) for page in range(body_matter.getNumPages()): pdf_writer.addPage(body_matter.getPage(page)) try: os.remove(working_file.name) except OSError: # Windows reports file in use, other OS errors, etc. pass if self.end_matter is not None: end_matter = pyPdf.PdfFileReader(file(self.end_matter, "rb")) for page in range(end_matter.getNumPages()): pdf_writer.addPage(end_matter.getPage(page)) output_stream = file(self.output_filename, "wb") pdf_writer.write(output_stream)
def run(path): output = pyPdf.PdfFileWriter() fns = glob.glob(path + '/*/*.pdf') for fn in fns: tmp = pyPdf.PdfFileReader(file(fn, 'rb')) output.addPage(tmp.getPage(2)) fname = './summary.pdf' outf = file(fname, 'wb') output.write(outf) outf.close() return
def process_file(self, infile_name, outfile_name): """Process a PDF-file, writing only interesting pages to a new file.""" with open(infile_name, 'rb') as in_f: in_pdf = pyPdf.PdfFileReader(in_f) out_pdf = pyPdf.PdfFileWriter() for page_no in range(in_pdf.getNumPages()): page = in_pdf.getPage(page_no) if not self._scan_page(page): self._filter_images(page) out_pdf.addPage(page) with open(outfile_name, 'wb') as outf: out_pdf.write(outf)
def merge_pdfs(pdfs, output_name): output = pyPdf.PdfFileWriter() for input_name in pdfs: input = pyPdf.PdfFileReader(file(input_name, "rb")) #~ print "%s has %s pages." % (input_name, input.getNumPages()) for page in input.pages: output.addPage(page) outputStream = file(output_name, "wb") output.write(outputStream) outputStream.close()
def scrub(self): outputStream = StringIO.StringIO() output = pyPdf.PdfFileWriter() infoDict = output._info.getObject() infoDict.update({ pyPdf.generic.NameObject('/Producer'): pyPdf.generic.createStringObject(u''), }) for page in range(self.pdf.getNumPages()): output.addPage(self.pdf.getPage(page)) output.write(outputStream) string = outputStream.getvalue() outputStream.close() return string
def join(self, file=None): import pyPdf if pyPdf: output = pyPdf.PdfFileWriter() for pdffile in self.files: input = pyPdf.PdfFileReader(pdffile) for pageNumber in range(0, input.getNumPages()): output.addPage(input.getPage(pageNumber)) if file is not None: output.write(file) return file out = pisaTempFile(capacity=self.capacity) output.write(out) return out.getvalue()
def join(self, file=None): import pyPdf # TODO: Why is this in the middle of everything? if pyPdf: output = pyPdf.PdfFileWriter() for pdffile in self.files: input = pyPdf.PdfFileReader(pdffile) for pageNumber in xrange(input.getNumPages()): output.addPage(input.getPage(pageNumber)) if file is not None: output.write(file) return file out = pisaTempFile(capacity=self.capacity) output.write(out) return out.getvalue()
def split (input_file, output_format, start_number): """ splits a pdf file into single page pdfs """ input = pyPdf.PdfFileReader( open(input_file, "rb") ) for i in range(input.numPages): output = pyPdf.PdfFileWriter() output.addPage(input.getPage(i)) with open(output_format %("%03d" % (i + start_number)), "wb") as outputStream: output.write(outputStream) return True
def process(self, inputFile1): input1 = pyPdf.PdfFileReader(inputFile1) output = pyPdf.PdfFileWriter() for (num, page) in enumerate(input1.pages): if num in self.operations: for mergeFile, mergeNumber in self.operations[num]: merger = pyPdf.PdfFileReader(mergeFile) mergerPage = merger.getPage(mergeNumber) mergerPage.mergePage(page) page = mergerPage output.addPage(page) outputFile = cStringIO.StringIO() output.write(outputFile) return outputFile
def obtener_datos_pdf_con_error(nom_pdf): #input= os.path.join(path_croquis_listado,nom_pdf) input = nom_pdf out = os.path.join(path_urbano_listados, nom_pdf.split("\\")[-1]) pdf = pyPdf.PdfFileReader(open(input, "rb")) pdf_listado = pyPdf.PdfFileWriter() cant_pag = pdf.getNumPages() for i in range(1, cant_pag + 1): p = pdf.getPage(i) pdf_listado.addPage(p) with open(out, 'wb') as f: pdf_listado.write(f)
def split_pages(src, dst): src_f = file(src, 'r+b') dst_f = file(dst, 'w+b') input = pyPdf.PdfFileReader(src_f) output = pyPdf.PdfFileWriter() for i in range(input.getNumPages()): page1 = input.getPage(i) page2 = copy.copy(page1) page2.mediaBox = copy.copy(page1.mediaBox) #x1,y2--x2,y2 #| | #| | #x1,y3--x2,y3 #| | #| | #x1,y1--x2,y1 #x1,y2---x3,y2---x2,y2 #| | | #| | | #| | | #x1,y1---x3,y1---x2,y1 x1, y1 = page1.mediaBox.lowerLeft x2, y2 = page1.mediaBox.upperRight x1, y1 = math.floor(x1), math.floor(y1) x2, y2 = math.floor(x2), math.floor(y2) x3, y3 = math.floor(x2 / 2), math.floor(y2 / 2) if x2 < y2: # vertical page1.mediaBox.upperRight = (x2, y2) page1.mediaBox.lowerLeft = (x1, y3) page2.mediaBox.upperRight = (x2, y3) page2.mediaBox.lowerLeft = (x1, y1) else: # horizontal page1.mediaBox.upperRight = (x3, y2) page1.mediaBox.lowerLeft = (x1, y1) page2.mediaBox.upperRight = (x2, y2) page2.mediaBox.lowerLeft = (x3, y1) output.addPage(page1) output.addPage(page2) output.write(dst_f) src_f.close() dst_f.close()
def merge_pdf_in_memory(self, docs): writer = pyPdf.PdfFileWriter() for doc in docs: pdfreport = file(doc, 'rb') reader = pyPdf.PdfFileReader(pdfreport) for page in xrange(reader.getNumPages()): writer.addPage(reader.getPage(page)) buff = StringIO.StringIO() try: # The writer close the reader file here writer.write(buff) return buff.getvalue() except IOError: raise finally: buff.close()
def reduce_pdfs_to_pdf(job, data, outpath): """Bundle many PDFs into one without altering page dimensions. Resources in case this approach fails: - https://www.linux.com/news/software/applications/8229-putting-together-pdf-files - http://milan.kupcevic.net/ghostscript-ps-pdf/ - http://www.ghostscript.com/doc/current/Devices.htm """ dimensions, failed = None, [] pieces, pdf_out, handles = len(data), pyPdf.PdfFileWriter(), [] try: for idx, path in enumerate(data): #TODO: Probably a good idea to have another wrapper for status # outputs which self-overwrite using \r job.feed_status( "Queueing operations for final PDF: Piece %d of %d" % (idx, pieces)) file_in = file(path, 'rb') handles.append(file_in) pdf_in = pyPdf.PdfFileReader(file_in) for pidx, page in enumerate(pdf_in.pages): try: if dimensions: assert page.mediaBox == dimensions else: dimensions = page.mediaBox #TODO: Need to make the task progress bar definite. pdf_out.addPage(page) except AssertionError: failed.append((pidx, path)) if failed: raise AssertionError( "Page sizes don't match first page:\n\t%s" % '\n\t'.join(['page %d of file %s' % x for x in failed])) with file(outpath, 'wb') as file_out: job.terminal.feed( "\r\nBuilding %s (%d pages from %d source files)\r\n" % (outpath, pdf_out.getNumPages(), pieces)) pdf_out.write(file_out) finally: for fh in handles: fh.close()
def read(pdffile, pdffiletype, obj_id): nl = [] sqlstr = "" f = file(pdffile.name, "rb") pdf_in = pyPdf.PdfFileReader(f) pages = pdf_in.numPages cursor = connection.cursor() for p in range(0,pages-1): mem_page = pdf_in.getPage(p) pdf_out = pyPdf.PdfFileWriter() pdf_out.addPage(mem_page) out_stream = StringIO() pdf_out.write(out_stream) parser = PDFParser(out_stream) doc = PDFDocument(parser) rsrcmgr = PDFResourceManager() retstr = StringIO() device = TextConverter(rsrcmgr, retstr, codec='utf-8', laparams=LAParams()) interpreter = PDFPageInterpreter(rsrcmgr,device) lines = "" for page in PDFPage.create_pages(doc): interpreter.process_page(page) rstr = retstr.getvalue() if len(rstr.strip()) > 0: lines+="".join(rstr) lst = lines.split('\n') for li in lst: if li[:6] == 'ΑΦΜ': if li[8:] != SETTINGS['afm_dide']: new_file = '%s.%s.%s.pdf' % (pdffile.name.replace(os.path.join(settings.MEDIA_ROOT,'pdffiles'),'')[1:-4],li[7:].strip(),datetime.datetime.now().strftime('%H%M%S%f')) print li out_file = open(os.path.join(settings.MEDIA_ROOT,'pdffiles', 'extracted', new_file), 'wb') pdf_out.write(out_file) out_file.close() strsql = "insert into dide_paymentemployeepdf (id, employee_vat, paymentfilepdf_id, employeefile, pdf_file_type) values (NULL,'%s',%s,'%s', %s);" % (li[7:].strip(), obj_id, new_file, pdffiletype) #import pdb; pdb.set_trace() cursor.execute(strsql) nl.append(li[8:]) transaction.commit_unless_managed() cursor.close() f.close() return 1, len(nl)
def pdf_merge(layers): """ Overlay multiple single page PDF file descriptors. """ import pyPdf out = cStringIO.StringIO() pdf = pyPdf.PdfFileWriter() page = pyPdf.PdfFileReader(layers[0]) page = page.getPage(0) for i in layers[1:]: i = pyPdf.PdfFileReader(i) i = i.getPage(0) page.mergePage(i) pdf.addPage(page) pdf.write(out) out.reset() return(out)
def _get_file_data(self, cr, uid, context={}): invoice_ids = context.get('active_ids', []) if not invoice_ids: return False final_pdf = [] att_pool = self.pool['ir.attachment'] output = pyPdf.PdfFileWriter() for invoice in self.pool['account.invoice'].browse(cr, uid, invoice_ids, context=context): flg = False att_ids = att_pool.search(cr, uid, [('res_model', '=', 'account.invoice'), ('res_id', '=', invoice.id)]) for att_data in att_pool.browse(cr, uid, att_ids, context=context): if 'PDF' not in att_data.file_type.upper(): continue if att_data.datas_fname and att_data.datas and att_data.datas_fname.split( ".")[-1].upper() == "PDF": data = base64.decodestring(att_data.datas) buffer_file = StringIO.StringIO(data) input_attachment = pyPdf.PdfFileReader(buffer_file) flg = True for page in range(input_attachment.getNumPages()): output.addPage(input_attachment.getPage(page)) if not flg: ctx = context.copy() ctx['model'] = 'account.invoice' report_service = 'report.blank.invoice.report' service = netsvc.LocalService(report_service) (result, format) = service.create(cr, uid, [invoice.id], {'model': 'account.invoice'}, context=ctx) buffer_file = StringIO.StringIO(result) input_report = pyPdf.PdfFileReader(buffer_file) for page in range(input_report.getNumPages()): output.addPage(input_report.getPage(page)) outputStream = StringIO.StringIO() output.write(outputStream) res = outputStream.getvalue().encode('base64') outputStream.close() return res
def onJoinPdfs(self, event): """ Join the two PDFs together and save the result to the desktop """ pdfOne = self.pdfOne.GetValue() pdfTwo = self.pdfTwo.GetValue() if not os.path.exists(pdfOne): msg = "The PDF at %s does not exist!" % pdfOne dlg = wx.MessageDialog(None, msg, 'Error', wx.OK | wx.ICON_EXCLAMATION) dlg.ShowModal() dlg.Destroy() return if not os.path.exists(pdfTwo): msg = "The PDF at %s does not exist!" % pdfTwo dlg = wx.MessageDialog(None, msg, 'Error', wx.OK | wx.ICON_EXCLAMATION) dlg.ShowModal() dlg.Destroy() return outputPath = os.path.join(winshell.desktop(), self.outputPdf.GetValue()) + ".pdf" output = pyPdf.PdfFileWriter() pdfOne = pyPdf.PdfFileReader(file(pdfOne, "rb")) for page in range(pdfOne.getNumPages()): output.addPage(pdfOne.getPage(page)) pdfTwo = pyPdf.PdfFileReader(file(pdfTwo, "rb")) for page in range(pdfTwo.getNumPages()): output.addPage(pdfTwo.getPage(page)) outputStream = file(outputPath, "wb") output.write(outputStream) outputStream.close() msg = "PDF was save to " + outputPath dlg = wx.MessageDialog(None, msg, 'PDF Created', wx.OK | wx.ICON_INFORMATION) dlg.ShowModal() dlg.Destroy() self.pdfOne.SetValue("") self.pdfTwo.SetValue("") self.outputPdf.SetValue("")
def write(self, pdf_outfolder, file_order=None): """Given a folder to write to, and optionally the order of files""" outfile = os.path.join(pdf_outfolder, self.name + ".pdf") if os.path.exists(outfile): # already exists return # turn each file into a PDF PDFs = [self.converter.convert(f, self.name) for f in self.infiles] PDFs = filter(None, PDFs) # filter out those that couldn't convert if len(PDFs) == 1: # just copy the file over # for some reason, shutil.copy creates a corrupt PDF os.system("cp %s %s" % (pipes.quote(PDFs[0]), pipes.quote(outfile))) else: # concatenate output = pyPdf.PdfFileWriter() for p in PDFs: append_pdf(pyPdf.PdfFileReader(file(p, "rb")), output) output.write(file(outfile, "wb"))
def merge_pdf_on_disk(self, docs): writer = pyPdf.PdfFileWriter() for doc in docs: pdfreport = file(doc, 'rb') reader = pyPdf.PdfFileReader(pdfreport) for page in xrange(reader.getNumPages()): writer.addPage(reader.getPage(page)) buff = tempfile.mkstemp(suffix='.pdf', prefix='credit_control_slip_merged')[0] try: buff = os.fdopen(buff, 'w+b') # The writer close the reader file here buff.seek(0) writer.write(buff) buff.seek(0) return buff.read() except IOError: raise finally: buff.close()