def renderToPdf(envLL, filename, sizex, sizey): """Renders the specified Box2d and zoom level as a PDF""" basefilename = os.path.splitext(filename)[0] mergedpdf = None for mapname in MAPNIK_LAYERS: print 'Rendering', mapname # Render layer PDF. localfilename = basefilename + '_' + mapname + '.pdf'; file = open(localfilename, 'wb') surface = cairo.PDFSurface(file.name, sizex, sizey) envMerc = LLToMerc(envLL) map = mapnik.Map(sizex, sizey) mapnik.load_map(map, mapname + ".xml") map.zoom_to_box(envMerc) mapnik.render(map, surface) surface.finish() file.close() # Merge with master. if not mergedpdf: mergedpdf = PdfFileWriter() localpdf = PdfFileReader(open(localfilename, "rb")) page = localpdf.getPage(0) mergedpdf.addPage(page) else: localpdf = PdfFileReader(open(localfilename, "rb")) page.mergePage(localpdf.getPage(0)) output = open(filename, 'wb') mergedpdf.write(output) output.close()
def split_chapters(*t_args): """ Split a large pdf into chunks (i.e. chapters) """ if len(t_args)>0: args=t_args[0] if len(args)<1: print "usage: utils_pdf split_chapters configfile" return from pyPdf import PdfFileWriter, PdfFileReader f = open(args[0]) P = json.loads(f.read()) f.close() input = PdfFileReader(file(P["source"], "rb")) i0 = P["first_chapter_index"] ends = P["chapters_ends"] for i in xrange(0, len(ends)): ch_num = i0+i fmt = P["chapter_fmt"] % (ch_num, ) output = PdfFileWriter() if not os.path.exists(P["outputdir"]): os.mkdir( P["outputdir"]) fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt) j0 = P["firstpage"] if i==0 else ends[i-1] for j in xrange(j0, ends[i]): output.addPage(input.getPage(j)) outputStream = file(fn_out, "wb") output.write(outputStream) outputStream.close() print "wrote %s" % (fn_out,)
def add_omr_marks(self, pdf_data, is_latest_document): # Documentation # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf # https://pythonhosted.org/PyPDF2/PdfFileReader.html # https://stackoverflow.com/a/17538003 # https://gist.github.com/kzim44/5023021 # https://www.blog.pythonlibrary.org/2013/07/16/ # pypdf-how-to-write-a-pdf-to-memory/ self.ensure_one() pdf_buffer = StringIO.StringIO() pdf_buffer.write(pdf_data) existing_pdf = PdfFileReader(pdf_buffer) output = PdfFileWriter() total_pages = existing_pdf.getNumPages() # print latest omr mark on latest pair page (recto) latest_omr_page = total_pages // 2 for page_number in range(total_pages): page = existing_pdf.getPage(page_number) # only print omr marks on pair pages (recto) if page_number % 2 is 0: is_latest_page = is_latest_document and \ page_number == latest_omr_page marks = self._compute_marks(is_latest_page) omr_layer = self._build_omr_layer(marks) page.mergePage(omr_layer) output.addPage(page) out_buffer = StringIO.StringIO() output.write(out_buffer) return out_buffer.getvalue()
def save(self, to): origin = self.get_origin() if not origin: raise RuntimeError("Please implement get_origin method or origin attribute") try: existing_pdf = PdfFileReader(file(origin, "rb")) except IOError: raise RuntimeError(u"Failed to open origin file") output = PdfFileWriter() for page_id, page_class in enumerate(self.pages): new_page = page_class(self.instance).save() base_page = existing_pdf.getPage(0) base_page.mergePage(new_page) output.addPage(base_page) if isinstance(to, basestring): outputStream = file(to, "wb") else: outputStream = to output.write(outputStream) outputStream.close()
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'): "creates one pdf file from several others, following order from startpage" if VERBOSE: print ("Building table of contents...") f = open(folder+os.sep+startpage+'.html') html = '' for line in f: html += line f.close() html = html.replace("\n"," ") html = html.replace("> <","><") html = re.findall("<ul.*/ul>",html)[0] pages = re.findall('href="(.*?)"',html) pages.insert(1,startpage+".html") result = PdfFileWriter() for p in pages: if exists(p[:-5]): if VERBOSE: print ('Appending',p) try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb')) except: print ('Unable to append',p) else: for i in range(inputfile.getNumPages()): result.addPage(inputfile.getPage(i)) outputfile = open(OUTPUTPATH + os.sep + outputname,'wb') result.write(outputfile) outputfile.close() if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
def buildPdf(self): to_pdffile = self._takeoff() ap_pdffile = self._airplane() ck_pdffile = self._checklist() output = PdfFileWriter() self.addAllPages(output, PdfFileReader(file(ck_pdffile, "rb"))) self.addAllPages(output, PdfFileReader(file(to_pdffile, "rb"))) self.addAllPages(output, PdfFileReader(file(ap_pdffile, "rb"))) # Add AD Info Charts files = dict() for pdf in self.getExternalPdf(self.eVfrPath, self.__fligthplan.performance_takeoff.aerodrome.code): files[pdf] = file(pdf, "rb") files["%s_" % pdf] = PdfFileReader(files[pdf]) if files["%s_" % pdf].getIsEncrypted(): pdfCracked = PdfCracker().crack(pdf) files[pdf] = file(pdfCracked, "rb") files["%s_" % pdf] = PdfFileReader(files[pdf]) self.addAllPages(output=output, input=files["%s_" % pdf]) # write out the merged file outputPdf = os.path.join(self.outputdir, 'flightplan %s.pdf' % self.__fligthplan.title) outputStream = file(outputPdf, "wb") output.write(outputStream) outputStream.close() npyscreen.notify_confirm( message="Your pretty Fligthplan has been created at\n\n%s" % outputPdf )
def pdfMerge(): """ Merges generated PDFs into one called <filename>_summary Deletes the individual consituent PDFs """ def append_pdf(input, output): """ Combines PDF pages to be merged """ [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)] # Merge PDFs output = PdfFileWriter() print outDir append_pdf(PdfFileReader(file('%s%s_meta.pdf' %(outDir, filename), 'rb')), output) append_pdf(PdfFileReader(file('%s%s_plots.pdf' %(outDir, filename), 'rb')), output) outputFile = file('%s%s_summary.pdf' %(outDir, filename), 'wb') output.write(outputFile) outputFile.close() # Delete PDFs os.remove('%s%s_plots.pdf' %(outDir, filename)) os.remove('%s%s_meta.pdf' %(outDir, filename))
def into_half(src, dst): _src = file(src, 'rb') _dst = file(dst, 'wb') input = PdfFileReader(_src) output = PdfFileWriter() for i in range(input.getNumPages()): p = input.getPage(i) q = copy.copy(p) q.mediaBox = copy.copy(p.mediaBox) #x1, x2 = p.mediaBox.lowerLeft #x3, x4 = p.mediaBox.upperRight (w, h) = p.mediaBox.upperRight print w, h p.mediaBox.upperRight = (w/2, h) q.mediaBox.upperLeft = (w/2, h) output.addPage(p) output.addPage(q) output.write(_dst) _src.close() _dst.close()
def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate input_pdf = PdfFileReader(open(self.document.getUrl(), "rb")) output_pdf = PdfFileWriter() modification_date = metadata.pop("ModificationDate", None) if modification_date: metadata['ModDate'] = modification_date if type(metadata.get('Keywords', None)) is list: metadata['Keywords'] = metadata['Keywords'].join(' ') args = {} for key, value in list(metadata.items()): args[NameObject('/' + key.capitalize())] = createStringObject(value) output_pdf._info.getObject().update(args) for page_num in range(input_pdf.getNumPages()): output_pdf.addPage(input_pdf.getPage(page_num)) output_stream = io.BytesIO() output_pdf.write(output_stream) return output_stream.getvalue()
def cat(infilenames, outputfilename, verbose): inputs = [] for infilename in infilenames: print infilename if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+outputfilename+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... try: for i in infilenames: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() sys.exit(2) # pdf file is no pdf file... i = 0 output = PdfFileWriter() for pdf in inputs: for pagenr in range(pdf.getNumPages()): output.addPage(pdf.getPage(pagenr)) i=i+1 outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() if verbose: print (str(i)+" pages processed")
def createPDFHttpResponse(filepath, output_filename, user, access_time): """ Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document and the time of access. :param filepath: Path to the file :param output_filename: File name sent to the user :param user: :param access_time: :return: HttpResponse with the file content, or HttpResponseNotFound """ #Add access watermark buffer = StringIO() p = canvas.Canvas(buffer) p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' '))) p.showPage() p.save() buffer.seek(0) watermark = PdfFileReader(buffer) #Read the PDF to be accessed attachment = PdfFileReader(open(filepath, 'rb')) output = PdfFileWriter() #Attach watermark to each page for page in attachment.pages: page.mergePage(watermark.getPage(0)) output.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8') output.write(response) return response
def split_pset(): if (not options.pset or not options.probs): print_err_and_die("You must enter both arguements! run with -h for help") path = "pset%s/latex/"%options.pset try: filename = "%spset%s_answers.pdf"%(path, options.pset) inp = PdfFileReader(file(filename, "rb")) except IOError: print_err_and_die("Error! File, %s was not found." % filename) ##loop over user input and break up pdf questionNum = 1 probs = options.probs.split(",") for prob in probs: print "Processing question", questionNum prob = prob.strip() #kill whitespace out = PdfFileWriter() pages = get_pages(prob, inp.getNumPages()) for page in pages: print "page num", str(page) out.addPage(inp.getPage(int(page)-1)) outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb") out.write(outStream) outStream.close() questionNum +=1 print "Done!"
def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont(font['name'], font['size']) for i in context: can.drawString(i['x'], i['y'], i['value']) can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(self.path, "rb")) output = PdfFileWriter() # merge the new file with the existing page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(self.destination, "wb") output.write(outputStream) outputStream.close() return True
def __call__(self, data, attachments=[], pages=None): self.rendered = {} for field, ctx in self.fields.items(): if "template" not in ctx: continue self.context = ctx kwargs = self.template_args(data) template = self.context["template"] try: rendered_field = template.render(**kwargs) except Exception as err: logger.error("%s: %s %s", field, template, err) else: # Skip the field if it is already rendered by filter if field not in self.rendered: self.rendered[field] = rendered_field filled = PdfFileReader(self.exec_pdftk(self.rendered)) for pagenumber, watermark in self.watermarks: page = filled.getPage(pagenumber) page.mergePage(watermark) output = PdfFileWriter() pages = pages or xrange(filled.getNumPages()) for p in pages: output.addPage(filled.getPage(p)) for attachment in attachments: output.addBlankPage().mergePage(attachment.pdf()) return output
class cleanpdf: def __init__(self,pathFile): self.pathFile = pathFile self.inputFile = file(self.pathFile,"rb") self.pdfInput = PdfFileReader(self.inputFile) self.pyPdfOutput = PdfFileWriter() self.dataToUpdate = self.pyPdfOutput._info.getObject() self.__modifyData() self.__copyPDF() def __modifyData(self): for data in self.dataToUpdate: self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii')) def __copyPDF(self): for page in range(0,self.pdfInput.getNumPages()): self.pyPdfOutput.addPage(self.pdfInput.getPage(page)) outputFile = file(self.__changeName(),"wb") self.pyPdfOutput.write(outputFile) def __changeName(self): newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf" return newName
def delete(filesandranges, outputfilename, verbose): for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: print (pdf["name"]) fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in range(1,fiel.getNumPages()+1): if (pagenr not in pdf["pages"]): output.addPage(fiel.getPage(pagenr-1)) # else: # print ("skipping page nr: "+str(pagenr)) except: halp() sys.exit(2) # pdf file is no pdf file... if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def showpdf(request): sign = os.path.join(settings.MEDIA_ROOT, "signature.png") mimetypes.init() response = None if 'f' in request.GET: fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb") imgTemp = StringIO() imgDoc = canvas.Canvas(imgTemp) if request.GET['o'] == 'l': imgDoc.drawImage(sign, 529, 40, 290/2, 154/2) else: imgDoc.drawImage(sign, 70, 40, 290/2, 154/2) imgDoc.save() overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0) page = PdfFileReader(fr).getPage(0) page.mergePage(overlay) pdf_out = PdfFileWriter() pdf_out.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f'] pdf_out.write(response) return response
def select(filesandranges, outputfilename, verbose): if verbose: print (str(filesandranges)+"\noutput: "+str(outputfilename)) for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in pdf["pages"]: if (not (pagenr > fiel.getNumPages()) and not(pagenr < 1)): output.addPage(fiel.getPage(pagenr-1)) else: print("one or more pages are not in the chosen PDF") halp() sys.exit(3) #wrong pages or ranges except: halp() sys.exit(2) # pdf file is no pdf file...h if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def add_guides(self): pdf_in = PdfFileReader(open('sig.pdf', 'rb')) pdf_out = PdfFileWriter() for i in xrange(pdf_in.getNumPages()): page = pdf_in.getPage(i) if not i: guides = StringIO() if self.args.longarm: create_pdf( guides, a4lwidth_pt, a4lheight_pt, generate_longarm()) else: if self.args.a5: w, h = a5width_pt, a5height_pt else: w, h = a4lwidth_pt, a4lheight_pt create_pdf(guides, w, h, generate_shortarm( self.args.a5, bool(self.args.signature))) pdf_guides = PdfFileReader(guides) page.mergePage(pdf_guides.getPage(0)) pdf_out.addPage(page) pdf_out.write(open('sigs.pdf', 'wb'))
def split_file(f, filename): """Split our file into 10-page sub-files and add those to the queue in order. """ global file_queue curr_page = 0 pages_left = f.getNumPages() log('Splitting file ' + filename + " with " + str(pages_left) + " pages.") while pages_left > 0: # Create the new file pages_processed = 0 fname = filename[:-4] + '_' + str(curr_page) + '.pdf' output = PdfFileWriter() # Get 10 pages for it for i in range(curr_page, 10+curr_page): if pages_processed >= pages_left: break pages_processed += 1 output.addPage(f.getPage(i)) # Write and save file fout = file(fname, 'wb') output.write(fout) fout.flush() fout.close() file_queue.append(fname) curr_page += pages_processed pages_left -= pages_processed # Delete the file now that it's in pieces' os.remove(filename)
def get_chapters(): for url in download_list: output = PdfFileWriter() errored = False if url: filename = url[0].split('/')[-1] p = re.compile('(?<=kap)\d{1,3}') chap = p.search(filename).group() chap = chap[:-1] print "Doing chapter", chap for u in url: try: pdf = urllib2.urlopen(u).read() mem_file = StringIO(pdf) append_pdf(PdfFileReader(mem_file), output) except Exception, e: print "Error for chapter " + chap + ": " + str(e) errored = True pass try: if not errored: output.write(file("algs-kap" + chap + ".pdf", "wb")) print "Assembled pdf at algs-"+ chap + ".pdf" else: print "Couldn't get chapter, not assembled" except Exception, e: print "Error ocurred!" print e
def split(files, verbose): for infilename in files: if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... inputs = [] try: for i in files: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() print ("there has been an error of unfortunate proportions") sys.exit(2) # pdf file is no pdf file... i=0 j=0 for pdf in inputs: for pagenr in range(pdf.getNumPages()): output = PdfFileWriter() output.addPage(pdf.getPage(pagenr)) (name, ext) = splitext(files[i]) my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d" my_str = my_str % (pagenr+1) print (name+"p"+my_str+ext) outputStream = file(name+"p"+my_str+ext, "wb") output.write(outputStream) outputStream.close() j=j+1 i=i+1 if verbose: print (str(j)+" pages in "+str(i)+" files processed")
def join_pages(composites): # latex_buf = StringIO() page_fnames = [] for page_num, collection in enumerate(collect_pages(composites)): fnames, transcriptions, types = [], [], [] for r in collection: fnames.append(r['location']) transcriptions.append(r['transcription']) types.append(r['type']) page_fnames.append(paint_original_segments(fnames, transcriptions, page_num)) # latex_buf.write(assemble_latex(fnames, transcriptions, types)) # latex_buf.write(LATEX_NEWPAGE_SNIPPET) # raw_latex = LATEX_WRAP.format(raw_latex=latex_buf.getvalue(), font_size=LATEX_FONT_SIZE) # # transcribed pdf # latex_pdf_fname = latex_to_pdf(raw_latex) # --- # searchable pdf pdf_writer = PdfFileWriter() pdf_pages = [] for page_fname in page_fnames: pdf_pages.append(open(page_fname, 'rb')) pdf_reader = PdfFileReader(pdf_pages[-1]) pdf_writer.addPage(pdf_reader.getPage(0)) searchable_pdf = NamedTemporaryFile(prefix='searchable_', suffix='.pdf', dir=path.abspath('./static/images/'), delete=False) pdf_writer.write(searchable_pdf) searchable_pdf.close() map(lambda f: f.close(), pdf_pages) json.dump({ # 'transcribed': latex_pdf_fname, 'searchable': searchable_pdf.name }, sys.stdout)
def generate(donor): os.system('mkdir -p output') donor_url = donor.replace(' ','%20') page1 = 'output/%s1' % (donor.replace(' ','-').lower()) page2 = 'output/%s2' % (donor.replace(' ','-').lower()) combined = 'output/%s.pdf' % (donor.replace(' ','-').lower()) if os.path.exists(combined): return os.system('cp "%s" "%s.svg"' % (page1_svg, page1)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1)) os.system('cp "%s" "%s.svg"' % (page2_svg, page2)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2)) # Merge pages input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb')) input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb')) output = PdfFileWriter() output.addPage(input1.getPage(0)) output.addPage(input2.getPage(0)) outputStream = file(combined, 'wb') output.write(outputStream) outputStream.close() sleep(2)
def merge(fppath, bppath, outputpath, no_delete, fed_backwards): fpfile = PdfFileReader(open(fppath)) bpfile = PdfFileReader(open(bppath)) outputfile = PdfFileWriter() outputpages = [] for i in range(fpfile.getNumPages()): backpages = True try: outputpages.append(fpfile.getPage(i)) if backpages: if fed_backwards: outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1)) else: outputpages.append(bpfile.getPage(i)) except IndexError: backpages = False if not no_delete: outputpages = [page for page in outputpages if page.extractText() != ''] [outputfile.addPage(page) for page in outputpages] outputfile.write(open(os.path.expanduser(outputpath), 'w'))
def pdf(coursesid,examsid): ''' Creates a blank PDF of this exam ''' # TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from pyPdf import PdfFileWriter, PdfFileReader from io import BytesIO output = BytesIO() p = canvas.Canvas(output, pagesize=letter) p.drawString(100, 100, 'Hello') p.save() output.seek(0) new_pdf = PdfFileReader(output) existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb')) out = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) out.addPage(page) a = BytesIO() pdf_out = out.write(a) response = make_response(pdf_out) response.headers['Content-Disposition'] = "filename='sakulaci.pdf" response.mimetype = 'application/pdf' return response
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): flag=False if not context: context={} pool = pooler.get_pool(cr.dbname) attach = report_xml.attachment #~ #~ Check in the new model if this report allow to reprint, #~ Allowtoreprint should mandate over attach, if attach: objs = self.getObjects(cr, uid, ids, context) results = [] for obj in objs: aname = eval(attach, {'object':obj, 'time':time}) result = False if report_xml.attachment_use and aname and context.get('attachment_use', True): aids = pool.get('ir.attachment').search(cr, uid, [('datas_fname','=',aname+'.pdf'),('res_model','=',self.table),('res_id','=',obj.id)]) if aids: brow_rec = pool.get('ir.attachment').browse(cr, uid, aids[0]) if not brow_rec.datas: continue d = base64.decodestring(brow_rec.datas) results.append((d,'pdf')) continue result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context) if not result: return False try: if aname: flag=True #ya que entra solo la primera vez sin attachment name = aname+'.'+result[1] pool.get('ir.attachment').create(cr, uid, { 'name': aname, 'datas': base64.encodestring(result[0]), 'datas_fname': name, 'res_model': self.table, 'res_id': obj.id, }, context=context ) cr.commit() except Exception,e: import traceback, sys tb_s = reduce(lambda x, y: x+y, traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) netsvc.Logger().notifyChannel('report', netsvc.LOG_ERROR,str(e)) results.append(result) if results: if results[0][1]=='pdf': if not context.get('allow',False): return self.create_single_pdf(cr, uid, ids, data, report_xml, context) else: from pyPdf import PdfFileWriter, PdfFileReader output = PdfFileWriter() for r in results: reader = PdfFileReader(cStringIO.StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = cStringIO.StringIO() output.write(s) return s.getvalue(), results[0][1]
def main(): """ """ # Parse command line pdf_files = sys.argv[1:] if len(pdf_files) == 0: print __usage__ sys.exit() # Make sure there is more than one pdf file if len(pdf_files) == 1: print "In the spirit of gnu tar, this script cowardly refuses to" print "combine one pdf file!" sys.exit() # Create unique name for output file localtime = time.localtime() localtime = [str(x) for x in localtime] localtime = [x.zfill(2) for x in localtime] localtime[0] = localtime[0].zfill(4) output_file = "%s-%s-%s_%s-%s-%s.pdf" % tuple(localtime[:6]) # Combine pdf files in order output = PdfFileWriter() for pdf in pdf_files: input = PdfFileReader(file(pdf,"rb")) num_pages = input.getNumPages() for i in range(num_pages): output.addPage(input.getPage(i)) # Write final pdf stream = file(output_file,"wb") output.write(stream) stream.close()
def output(self): # get the output filename using the file dialog (out_filename, filter) = \ QFileDialog.getSaveFileName(parent = self, caption = self.tr(u'Export'), dir = '', filter = self.tr('pdf (*.pdf)')) # file IO out_file = open(out_filename, 'wb') in_file = open(self.in_filename, 'rb') in_reader = PdfFileReader(in_file) out_writer = PdfFileWriter() # extract input pages_string = self.pages_line_edit.text() # Get the indices of pages to extract pages = pages_parser(in_reader.getNumPages()).parse(pages_string) # append pages to output writer for page_index in pages: out_writer.addPage(in_reader.getPage(page_index)) # write to file out_writer.write(out_file) # close files in_file.close() out_file.close()
def parse_file(pdfFile,nameFile): pdfReader = PdfFileReader(file(pdfFile,"rb")) # read the names and emails from csv file names = get_names(nameFile) # create an instance in SMTP server smtp = smtplib.SMTP('localhost') # loop through the pages of the pdf # when a name is found, write pages to a new pdf until next name is found # then write the file and email as attachment i = 0 prevName = "" while i<pdfReader.getNumPages(): page = pdfReader.getPage(i) pageStr = page.extractText() # extract the pdf text for name in names.keys(): if pageStr.lower().find(name.lower())!=-1: if 'pdfWriter' in locals(): # send the current pdf send_email(smtp,pdfWriter,prevName,names) pdfWriter = PdfFileWriter() # create new pdfWriter file and add current page prevName = name # save off previous name break if 'pdfWriter' in locals(): pdfWriter.addPage(page) i+=1 # send the last file if 'pdfWriter' in locals(): send_email(smtp,pdfWriter,prevName,names) # quit the smtp server smtp.quit()
def kesit(dosya_yolu , sayfa1 , sayfa2=0): """pdf dosyasının sayfa1'den sayfa2'ye kadar olan kısmını alır sayfa2 verilmesse sayfa1'den sonuna kadar alır """ try: kaynak = PdfFileReader(open(dosya_yolu, "rb")) islem = PdfFileWriter() if sayfa1<0: (-1)*sayfa1 if sayfa2==0: sayfa2=kaynak.getNumPages() if sayfa2<=sayfa1: sayfa2=sayfa1+1 hedef = open("data.pdf", "wb") for i in range(int(sayfa1),int(sayfa2)): islem.addPage(kaynak.getPage(i)) islem.write(hedef) hedef.close() print "»» pdf oluşturuldu" except: print "»» pdf oluşturulamadı"
def download_pdf(url): writer = PdfFileWriter() code = requests.get(url, stream=True).status_code if code != 404: remoteFile = urlopen(Request(url)).read() memoryFile = StringIO(remoteFile) pdfFile = PdfFileReader(memoryFile) for pageNum in xrange(pdfFile.getNumPages()): currentPage = pdfFile.getPage(pageNum) #currentPage.mergePage(watermark.getPage(0)) writer.addPage(currentPage) outputStream = open( '/home/hjiang/superlist/pdf_folder/%s' % basename(url), "wb") writer.write(outputStream) outputStream.close() return (True) else: return (False)
def go(): doc = SimpleDocTemplate("new.pdf") Story = [Spacer(0, 2.7 * inch)] style = styles["Normal"] ptext = '<font name=Times-Roman size=10>%s</font>' % bogustext p = Paragraph(ptext, style) Story.append(p) Story.append(Spacer(1, 0.5 * inch)) doc.build(Story, onFirstPage=myFirstPage) new_pdf = PdfFileReader(file("new.pdf", "rb")) existing_pdf = PdfFileReader(file("masjid_template.pdf", "rb")) output = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) outputStream = file("ek_nayi_File.pdf", "wb") output.write(outputStream) outputStream.close()
def genfile(srcfile, desfile, startpage, endpage): """ 根据startpage跟endpage做pdf文件切分 :param srcfile: :param desfile: :param startpage: :param endpage: :return: """ output = PdfFileWriter() src = PdfFileReader(file(srcfile, "rb")) (filepath, filename) = os.path.split(desfile) if not os.path.exists(filepath): os.makedirs(filepath) des = file(desfile, "wb") for i in range(startpage - 1, endpage): output.addPage(src.getPage(i)) output.write(des) des.close() del src del des
def _create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): results = self._create_source(cr, uid, ids, data, report_xml, context) if results and len(results)==1: return results[0] if results: deferred = context.get('deferred_process') if deferred: deferred.set_status(_('Concatenating single documents')) not_pdf = filter(lambda r: r[1]!='pdf', results) if not_pdf: raise osv.except_osv(_('Error!'), _('Unsupported combination of formats!')) #if results[0][1]=='pdf': output = PdfFileWriter() for r in results: reader = PdfFileReader(StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = StringIO() output.write(s) return s.getvalue(), results[0][1] return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
def logotage(filename): infile = PdfFileReader(file(filename, 'rb')) outfilename = 'logo_' + filename # preparation du watermark : logo wmname = './logos/watermark_logo.pdf' wm = PdfFileReader(file(wmname, "rb")) mergepage = wm.getPage(0) nbp = infile.getNumPages() output = PdfFileWriter() for cpt in range(nbp): page = infile.getPage(cpt) if hasLogo( cpt): # on ajoute le watermark sur les seules pages concernees page.mergePage(mergepage) output.addPage(page) outfile = file(outfilename, u'wb') output.write(outfile) outfile.close() return
def joinPdf(self, pdf_list, output_filepath): """add??? :param pdf_list: add??? :param output_filepath: add??? """ output_pdf = PdfFileWriter() open_files = [] for input_path in pdf_list: input_file = open(input_path, 'rb') memory_file = StringIO(input_file.read()) open_files.append(memory_file) input_file.close() input_pdf = PdfFileReader(memory_file) for page in input_pdf.pages: output_pdf.addPage(page) output_file = open(output_filepath, 'wb') output_pdf.write(output_file) output_file.close() for input_file in open_files: input_file.close()
def remove_pages(pdf_file, max_pages=1): output = PdfFileWriter() with open(pdf_file, 'r') as pdf: input = PdfFileReader(pdf) total_pages = input.getNumPages() for i in xrange(max_pages): if i >= total_pages: break p = input.getPage(i) output.addPage(p) with open(pdf_file + '.tmp', 'w') as pdf: output.write(pdf) os.remove(pdf_file) os.rename(pdf_file + '.tmp', pdf_file) return pdf_file
def fisk_pdf(pdffile, directory): name = pdffile[:-4] g = open(os.path.join(directory, name, name + ".md"), "w") print("# Notes on: ", file = g) input = PdfFileReader(file(pdffile, "rb")) print("Number of pages %s" % input.getNumPages()) j = 0 for p in [input.getPage(i) for i in range(0,input.getNumPages())]: j = j + 1 output = PdfFileWriter() output.addPage(p) print("### Page " + str(j), file = g) imagefile = os.path.join(directory, name, "images", "file_" + str(j) + ".pdf") imagefilePNG = os.path.join(directory, name, "images", "file_" + str(j) + ".png") f = open(imagefile, "w") output.write(f) f.close() textfile = os.path.join(directory, name, "texts", "file_" + str(j) + ".txt") textfileASCII = os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt") cmd = "pdftotext " + imagefile + " " + textfile # extracts text from the pdffile os.system(cmd) cmd = "iconv -c -f utf8 -t ascii " + textfile + " > " + textfileASCII # ./texts/file_" + str(j) + ".txt" + " > ./texts/file_" + str(j) + "_ascii.txt" os.system(cmd) cmd = "convert -density 100 " + imagefile + " -quality 100 " + imagefilePNG os.system(cmd) print("![](./images/file_" + str(j) + ".png)", file = g) print("", file = g) print("### Text from page " + str(j), file = g) t = open(os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt"), "r") txt = t.read() txt = txt.replace('\n', ' ').replace('\r', '').replace('', '') n = 80 chunks = [txt[i:i+n] for i in range(0, len(txt), n)] for c in chunks: print(" " + c, file = g) print("", file = g) print("### Notes on page " + str(j), file = g) g.close()
def addCopyrightToPDF(pdf_file_location, pdf_file_destination,copyrightText, drawText=True, title="",authors=""): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont("Times-Roman",7) ctext = copyrightText.split("\n") if drawText: can.drawString(30, 40, ctext[0]) can.drawString(30, 50, ctext[1]) else: can.drawString(30, 50, "") can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(pdf_file_location, "rb")) output = PdfFileWriter() npagesorig = existing_pdf.getNumPages() #print npagesorig for i in range(npagesorig): page = existing_pdf.getPage(i) # if last page if i == (npagesorig-1): #print "HERE",copyrightText,drawText page.mergePage(new_pdf.getPage(0)) output.addPage(page) infoDict = output._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(title), NameObject('/Author'): createStringObject(authors) }) # finally, write "output" to a real file outputStream = file(pdf_file_destination, "wb") output.write(outputStream) outputStream.close()
def join_pdfs(list_pdf): n = len(list_pdf) if n == 0: raise ValueError, 'unexpected empty list' # Files == 1 if n == 1: return open(list_pdf[0]).read() # Files > 1 pdf_output = PdfFileWriter() for path in list_pdf: input = PdfFileReader(open(path, "rb")) for page in input.pages: pdf_output.addPage(page) output = StringIO() try: pdf_output.write(output) return output.getvalue() finally: output.close()
def _combine_pdf_files(self, tmp_folder_name, output_report): output_path = tmp_folder_name + output_report output_temp_path = tmp_folder_name + 'temp.pdf' cmd = """gs -q -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=%s \ -dBATCH %s*water*.pdf""" % (output_temp_path, tmp_folder_name) os.system(cmd) # remove the last empty page input_stream = PdfFileReader(file(output_temp_path, 'rb')) output_stream = PdfFileWriter() pagenum = input_stream.getNumPages() for i in range(pagenum - 1): page = input_stream.getPage(i) output_stream.addPage(page) out_stream = file(output_path, 'wb') try: output_stream.write(out_stream) finally: out_stream.close()
def print_danfe(inv): str_pdf = "" paths = [] if inv.nfe_version == '1.10': from pysped.nfe.leiaute import ProcNFe_110 procnfe = ProcNFe_110() elif inv.nfe_version == '2.00': from pysped.nfe.leiaute import ProcNFe_200 procnfe = ProcNFe_200() elif inv.nfe_version == '3.10': from pysped.nfe.leiaute import ProcNFe_310 procnfe = ProcNFe_310() file_xml = monta_caminho_nfe(inv.company_id, inv.nfe_access_key) if inv.state not in ('open', 'paid', 'sefaz_cancelled'): file_xml = os.path.join(file_xml, 'tmp/') procnfe.xml = os.path.join(file_xml, inv.nfe_access_key + '-nfe.xml') danfe = DANFE() danfe.logo = add_backgound_to_logo_image(inv.company_id) danfe.NFe = procnfe.NFe danfe.leiaute_logo_vertical = inv.company_id.nfe_logo_vertical danfe.protNFe = procnfe.protNFe danfe.caminho = "/tmp/" danfe.gerar_danfe() paths.append(danfe.caminho + danfe.NFe.chave + '.pdf') output = PdfFileWriter() s = StringIO() for path in paths: pdf = PdfFileReader(file(path, "rb")) for i in range(pdf.getNumPages()): output.addPage(pdf.getPage(i)) output.write(s) str_pdf = s.getvalue() s.close() return str_pdf
def batch_inspection_report(request): if 'po' in request.GET: po = int(request.GET.get('po')) lots = Report.objects.filter(origin_po=po).aggregate( high_lot=Max('lot_number'), low_lot=Min('lot_number')) start = int(lots['low_lot']) end = int(lots['high_lot']) else: start = int(request.GET.get('start')) end = int(request.GET.get('end')) response = HttpResponse(content_type='application/pdf') response['Content-Disposition'] = 'filename="inspection_report.pdf"' outputPDF = PdfFileWriter() for cert in range(start, end + 1): report = Report.objects.get(lot_number=cert) pdf = generate_inspection_report(request, report.lot_number) outputPDF.addPage(pdf.getPage(0)) outputPDF.write(response) return response
def __picture_pdf__(self, pageNum, coordinatesList): page = self.pdf.pages[pageNum - 1] res = [] for cord in coordinatesList: x = cord[0] + self.x_off y = cord[1] + self.y_off w = cord[2] + x h = cord[3] + y page.mediaBox.lowerLeft = (x, y) page.mediaBox.upperRight = (w, h) out = PdfFileWriter() out.addPage(page) outFile = file("tmp.pdf", "wb") out.write(outFile) outFile.close() im = Image(filename='tmp.pdf', resolution=300) im.save(filename=("tmp.jpg")) img = tim.open(("tmp.jpg")) res.append(image_to_string(img)) os.remove("tmpImage.jpg") os.remove("tmp.pdf") return res
def collate(self, remove_temp=True, remove_sources=False): from pyPdf import PdfFileWriter, PdfFileReader from svglib.svglib import svg2rlg from reportlab.graphics import renderPDF # Make temporary folder dest_dir, _ = os.path.split(self.dest) if not os.path.exists(dest_dir): raise RuntimeError, "output place %s d.n.e." % dest_dir temp_dir = dest_dir + '/tmp' if not os.path.exists(temp_dir): os.mkdir(temp_dir) # Fix SVG windows for PDFing temp_page = [ '%s/page%i.tmp' % (temp_dir, i) for i, _ in enumerate(self.sources) ] for s, d in zip(self.sources, temp_page): hack_svg_viewbox(s, d) # Generate single PDF pages for s in temp_page: drawing = svg2rlg(s) renderPDF.drawToFile(drawing, s, autoSize=1) # Concatenate the PDF pages into a single document output = PdfFileWriter() for s in temp_page: i = PdfFileReader(open(s, 'rb')) output.addPage(i.getPage(0)) del i fout = file(self.dest, 'wb') output.write(fout) fout.close() # Remove the temporary folder if remove_temp: shutil.rmtree(temp_dir) # Remove the source images if remove_sources: map(os.remove, self.sources) return True
def createForm(dogs, filename): packet = StringIO.StringIO() c = canvas.Canvas(packet, pagesize=A4) extraoffset = {'Luokka': 0} for i, info in enumerate(dogs): for k in info.keys(): if k in placements.keys(): if k in extraoffset: extra = extraoffset[k] else: extra = 0 x, y = placements[k] text = info[k] c.drawString(extra + x*cm, yoffset[i]*cm + y*cm, text) if i == 5: c.showPage() extraoffset['Luokka'] = 0.3*cm if len(dogs) < 7: c.showPage() c.save() packet.seek(0) new_pdf = PdfFileReader(packet) output = PdfFileWriter() output.addPage(new_pdf.getPage(0)) output.addPage(new_pdf.getPage(1)) outputStream = file("/tmp/stamp.pdf", "wb") output.write(outputStream) outputStream.close() try: os.mkdir("esitaytetyt") except: pass call(['pdftk', 'pohjat/koepoytakirja.pdf', 'multistamp', '/tmp/stamp.pdf', 'output', 'esitaytetyt/%s' % filename])
def writePDF(linkPaths): if "<type 'list'>" != str(type(linkPaths)): print "Invalid parameter passed.\n" return l = len(linkPaths) output = PdfFileWriter() for i in range(0,l): input1 = PdfFileReader(file("./Tmp/" + str(i+1) + ".pdf", "rb")) output.addPage(input1.getPage(0)) print("Generating newspaper...\n") dateObject = datetime.now() fileName = dateObject.strftime("%Y%m%d") fileName = "GDN " + fileName + ".pdf" outputStream = file(fileName, "wb") output.write(outputStream) outputStream.close() return
def perform_logo_embedding(pdf_fp, logo_template_fp): pdf_fp.seek(0) rg_input = PdfFileReader(pdf_fp) doc_info = rg_input.getDocumentInfo() creator_info = doc_info.creator or doc_info.producer final_rg = PdfFileWriter(author=doc_info.author, title=doc_info.title, subject=doc_info.subject, creator=creator_info) for page_number in range(rg_input.getNumPages()): page = rg_input.getPage(page_number) if page_number == 0: watermark = PdfFileReader(logo_template_fp) page.mergePage(watermark.getPage(0)) final_rg.addPage(page) pdf_logo_fp = StringIO() final_rg.write(pdf_logo_fp) pdf_logo_fp.seek(0) return pdf_logo_fp
def main(output_file, input_files): print "concat all files:" output = PdfFileWriter() total_pages = 0 for f in input_files: # expect filename as "*.pdf" if f[-4:] != ".pdf": print "skipped file: ", f continue else: input = PdfFileReader(file(f, 'rb')) num_pages = input.getNumPages() total_pages += num_pages print f, "->", str(num_pages) + "pages" for i in xrange(0, num_pages): output.addPage(input.getPage(i)) outputStream = file(output_file, 'wb') output.write(outputStream) print str(total_pages) + "pages written" outputStream.close()
def _merge_pdf(self, documents): """Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf """ writer = PdfFileWriter() streams = [] # We have to close the streams *after* PdfFilWriter's call to write() for document in documents: pdfreport = file(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp(suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) for stream in streams: stream.close() return merged_file_path
def merge_pdf(lpdf): """ Merge all PDF in the list and return the content as a File Object :param lpdf: List of PDF as File Object :type lpdf: list :return: return a file object :rtype: File Object """ fo_pdf = StringIO() ret = PdfFileWriter() for current_pdf in lpdf: if current_pdf is None: continue # We ensure we start at the begining of the file current_pdf.seek(0) tmp_pdf = PdfFileReader(current_pdf) for page in range(tmp_pdf.getNumPages()): ret.addPage(tmp_pdf.getPage(page)) # We store the content of the merge into a file object ret.write(fo_pdf) return fo_pdf
def cropNzoom(inputFile, pageNumber, zoomFactor): print "Cropping and scaling pdf" pageNumber = pageNumber - 1 outputFile = inputFile[:inputFile.rindex('.')] + '_' + str(pageNumber + 1) + 'test.pdf' output = PdfFileWriter() input1 = PdfFileReader(file(inputFile, "rb")) page = input1.getPage(pageNumber) page.scaleBy(zoomFactor) output.addPage(page) print "Saving cropped pdf as: " + outputFile[outputFile.rindex('\\') + 1:] outputStream = file(outputFile, "wb") output.write(outputStream) outputStream.close() return outputFile
def create(self, cr, uid, ids, datas, context=None): self.pool = pooler.get_pool(cr.dbname) checkoutType = self.pool.get('plm.checkout') output = PdfFileWriter() children = [] packed = [] checkouts = checkoutType.browse(cr, uid, ids) for checkout in checkouts: document = checkout.documentid if document.printout: if not document.id in packed: input1 = PdfFileReader( StringIO.StringIO( base64.decodestring(document.printout))) output.addPage(input1.getPage(0)) packed.append(document.id) pdf_string = StringIO.StringIO() output.write(pdf_string) self.obj = external_pdf(pdf_string.getvalue()) self.obj.render() pdf_string.close() return (self.obj.pdf, 'pdf')
def crop_image(box, pdf_page, filename, count): print "BOX" print box with open(filename + "_data/" + pdf_page, "rb") as in_f: input1 = PdfFileReader(in_f) output = PdfFileWriter() page = input1.getPage(0) x0 = float(box[0]) y0 = pdf_metadata.page_height - float(box[1]) x1 = float(box[2]) y1 = pdf_metadata.page_height - float(box[3]) page.trimBox.lowerLeft = (x0, y1) page.trimBox.upperRight = (x1, y0) page.cropBox.lowerLeft = (x0, y1) page.cropBox.upperRight = (x1, y0) output.addPage(page) with open("OCR_DATASET/" + filename + "_me_" + str(count), "wb") as out_f: output.write(out_f)
def make_pdf_1(name='', surname1='',dni=''): packet = StringIO.StringIO() can = canvas.Canvas(packet, pagesize=letter) can.drawString(290, 570, str(name)+' '+str(surname1)) can.drawString(210, 540, str(dni)) can.drawString(230, 370, str(name)+' '+str(surname1)) can.drawString(230, 137, str(name)+' '+str(surname1)) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file("contracte_voluntariat.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page #print 'hola '+str(existing_pdf.getNumPages()) page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) filename = str(dni)+'file'+str(0)+'.pdf' # finally, write "output" to a real file outputStream = file(filename, "wb") output.write(outputStream) outputStream.close() #make_pdf(name,surname1,surname2,dni,n_page + 1) return filename
def convert(args): dirname = getcwd() ifilename = path.join( dirname, args.ifile) if not path.isabs(args.ifile) else args.ifile ofilename = path.join( dirname, args.ofile) if not path.isabs(args.ofile) else args.ofile ofilename_n_ext = path.splitext(ofilename)[0] reader = PdfFileReader(open(ifilename, "rb")) for page_num in xrange(reader.getNumPages()): writer = PdfFileWriter() writer.addPage(reader.getPage(page_num)) with open(path.join(dirname, 'temp.pdf'), 'wb') as temp: writer.write(temp) im = Image() im.density("300") # DPI, for better quality im.backgroundColor('white') im.fillColor('white') im.read(path.join(dirname, 'temp.pdf')) im.write("%s_%d.jpg" % (ofilename_n_ext, page_num)) remove(path.join(dirname, 'temp.pdf'))
def main(): global LAYOUT_URL daysago = 0 if len(sys.argv) > 1: daysago = sys.argv[1] LAYOUT_URL += str(daysago) print "Getting issue data from", LAYOUT_URL sections = json.loads(getfromurl(LAYOUT_URL))['sections'] filenames = [] for section in sections: dosection(section, filenames) toclose = [] out = PdfFileWriter() print 'Retrieving pages' for file in filenames: f = open(file, 'rb') toclose.append(f) print '--> Adding page(s) from', file fpdf = PdfFileReader(f) for pdfpg in fpdf.pages: out.addPage(pdfpg) ofname = ''.join([ 'ST', (date.today() - timedelta(int(daysago))).strftime('%Y%m%d'), '.pdf' ]) of = open(ofname, 'wb') toclose.append(of) print 'Merging pages' out.write(of) for f in toclose: f.close() print 'Deleting temporary files' for file in filenames: print '--> Deleting', file os.unlink(file) print 'Done!', ofname
def split_pdf(path_pdf): inputpdf = PdfFileReader(file(path_pdf, "rb")) inputpdf.decrypt('') if not path.exists('./tmp'): makedirs('./tmp/') for i in range(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i * 1)) newname = path_pdf[:7] + "-" + str(i) + ".pdf" outputStream = file("./tmp/" + newname, "w+") output.write(outputStream) DateName = change_name(outputStream) outputStream.close() rename("./tmp/" + newname, "./tmp/" + DateName) AllPdf = listdir('./tmp/') now = datetime.now() now = now.strftime('%Y-%m-%d') DateNow = arrow.get(now) for pdf in AllPdf: PdfReturn = pdf.split('-')[::-1] PdfDate = '-'.join(PdfReturn) DateFile = arrow.get(PdfDate) delta = (DateFile - DateNow) if -5 <= delta.days <= 5: rename("./tmp/" + pdf, "./" + "planning.pdf") remove_all() return "planning.pdf" else: continue return False
def MergePDF(filepath,outfile): output=PdfFileWriter() outputPages=0 pdf_fileName=getFileName(filepath) print '总的',pdf_fileName for each in pdf_fileName: if '.DS_Store' in each: continue # print '看看',os.path.dirname(each),'+', os.path.splitext(each.replace(os.path.dirname(each),'')) # print '单的',each # 读取源pdf文件 input = PdfFileReader(file(each, "rb")) # print 'input:',input # 如果pdf文件已经加密,必须首先解密才能使用pyPdf if input.isEncrypted == True: print 'input.isEncrypted',input.isEncrypted input.decrypt("map") # 获得源pdf文件中页面总数 pageCount = input.getNumPages() outputPages += pageCount print pageCount # 分别将page添加到输出output中 for iPage in range(0, pageCount): output.addPage(input.getPage(iPage)) print "All Pages Number:"+str(outputPages) # 最后写pdf文件 outputStream=file(filepath+outfile,"wb") output.write(outputStream) outputStream.close() print "finished"