def merge_pdfs(origin, num_pages, aux, verso=None, above=False, allPage=False): """ this is a general purpose merging function, it helps in various plugins in order to not redo the wheel. It merges origin as the back, aux above. """ try: output = PdfFileWriter() input_result = PdfFileReader(io.BytesIO(origin)) pages = [] for i in range(0, num_pages): page_origin = input_result.getPage(i) if allPage or i % 2 == 0: page_aux = PdfFileReader(io.BytesIO(aux)).getPage(0) pages.append(tasks.merge.delay(page_origin, page_aux, above)) else: _merge_verso(verso, page_origin, above, pages) for page in pages : if type(page) == PyPDF2.pdf.PageObject: output.addPage(page) else : #request celery result data = page.get() output.addPage(data) out_io = io.BytesIO() output.write(out_io) out_io.seek(0, 0) return out_io.read() except Exception: labresult.app.logger.error(traceback.format_exc()) raise MergePDFException('Error while merging PDFs')
def generate_a_pdf(filename, num_pages, dir=None): """function to generate a random PDF file of N pages with single image per page taken from https://stackoverflow.com/questions/2925484/place-image-over-pdf Args: filename (str): path to save the pdf file num_pages (int): number of pages to make the pdf file KWArgs: dir (str): the path to the directory to save the pdf file Returns: str. path to the new pdf file """ pdf = PdfFileWriter() for num in range(1, num_pages+1): imgTemp = BytesIO() jpeg_path = make_a_jpeg('{}.jpeg'.format(str(num)), pick_a_color(num)) imgDoc = canvas.Canvas(imgTemp, pagesize=A4) imgDoc.drawImage(jpeg_path, 25, 45) imgDoc.save() pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0)) remove(jpeg_path) if dir: path = join(dir, filename) else: path = join(getcwd(), filename) pdf.write(open(path, 'wb')) return path
def do_highlight(filename,keywords,output_file): locations = calculate_locations(filename,keywords) reader = PdfFileReader(file(filename, 'rb')) lnk = DictionaryObject() writer = PdfFileWriter() num = 0 for page in reader.pages: for l in locations : if l.page_num == num : annot1 = highlight_annotation([l.bounds], 'Comments', 'Author', 'Comments.') popup_ref = writer._addObject(annot1) print l.page_num if "/Annots" in page: page['/Annots'].append(popup_ref) annots = page['/Annots'] else: page[NameObject('/Annots')] = ArrayObject([popup_ref]) annots = page['/Annots'] annots_ref = writer._addObject(annots) num+=1 writer.addPage(page) # finally, write "output" to document-output.pdf outputStream = file(output_file, "wb") writer.write(outputStream) outputStream.close()
def write_with_template(self,out_file="",template_filename=""): """ テンプレとマージしてそのまま出力しちゃう """ # リーダーでテンプレ読む template_filename = template_filename or self.template_filename reader = PdfFileReader(template_filename) # なんかページ処理 page = reader.getPage(0) page_width = page.mediaBox.getWidth() page_height = page.mediaBox.getHeight() # バッファからPDFデータ化 self.buffer.seek(0) # シークして読み込みできるように new_pdf = PdfFileReader(self.buffer) # 読み込み # テンプレート・ページと内容をマージ page.mergePage(new_pdf.getPage(0)) # writerに渡す writer = PdfFileWriter() writer.addPage(page) # 書き込む with open(out_file, 'wb') as f: writer.write(f)
def domerge(self): """ Main merge method. It will merge all the pdfs in the input directory. :return: """ print "Getting all pdf files in the folder" self.get_file_list() print "Going to start merge" pdfmerger = PdfFileMerger() writer = PdfFileWriter() outputStream = file(self.output, "wb") for filename in self.files: print "Going to merge %s"%filename reader = PdfFileReader(file(filename, 'rb')) page_num = 0 for page in reader.pages: # print page.artBox # print page.bleedBox # print page.cropBox # print page.trimBox self.add_footer(page,filename,page_num) page_num +=1 writer.addPage(page) writer.write(outputStream) outputStream.close() print "Done merging"
def merge(self, pdf_one, pdf_two, filename='my.pdf', output_dir='D:/pdf/'): ''' function:#pdfone为扫描的正面;#pdftwo为扫描的背面;#本函数实现将两个扫描文件按原有的顺序合并起来 :param pdf_one: :param pdf_two: :param filename: :param output_dir: :return: ''' input_one = open(pdf_one, 'rb') input_two = open(pdf_two, 'rb') pdf_input_one = PdfFileReader(input_one) pdf_input_two = PdfFileReader(input_two) numOne = pdf_input_one.getNumPages() numTwo = pdf_input_two.getNumPages() print(numOne, numTwo) pdf_output = PdfFileWriter() index_one = 0 index_two = numTwo - 1 while True: if index_one == numOne: break print(index_one, index_two) page1 = pdf_input_one.getPage(index_one) pdf_output.addPage(page1) page2 = pdf_input_two.getPage(index_two) pdf_output.addPage(page2) index_one += 1 index_two -= 1 pdf_name = output_dir + filename output_stream = open(pdf_name, 'wb') pdf_output.write(output_stream) output_stream.close() input_one.close() input_two.close() print('Done!')
def RemovePdfOwnerPassword(inputname, outputname): ''' ''' inputfile = open(inputname, 'rb') wrt = PdfFileWriter() ipt = PdfFileReader(inputfile) try: ipt.decrypt("") except KeyError as e: if e.message == '/Encrypt': print("%s is not an encrypted pdf" % inputname) return -1 else: raise e print(ipt.getDocumentInfo()) size = ipt.getNumPages() i = 0 while i < size: page = ipt.getPage(i) #print(page.extractText()) wrt.addPage(page) i = i + 1 fl = open(outputname, "wb") wrt.write(fl) inputfile.close() fl.close() return 0
def run(self): if self.beforeHandler(self._id, self.attachUrl): return filename = self.tempDir + str(random.random()) filename1 = self.tempDir + str(random.random()) + '.pdf' try: urllib.request.urlretrieve(self.attachUrl, filename) input_stream = open(filename, 'rb') pdf_input = PdfFileReader(input_stream) pdf_output = PdfFileWriter() page = 0 pages = pdf_input.getNumPages() - 1 # remove last page while page < pages: pdf_output.addPage(pdf_input.getPage(page)) page += 1 output_stream = open(filename1, 'wb') pdf_output.write(output_stream) output_stream.close() input_stream.close() if self.success is not None: self.success(self._id, filename1) except Exception as e: if self.error is not None: self.error(e, self.attachUrl) finally: if os.path.exists(filename): os.remove(filename) if os.path.exists(filename1): os.remove(filename1)
def __getPdfTxtAt(self,pageNum,bENHANCE): # print('---->>>>>'+str(pageNum)) try: RESOLUTION = 250 tempoutPdfName = 'temp.pdf' tempoutPdfNameWithAbsPath = os.path.join(self.__RootPath,tempoutPdfName) if os.path.exists(tempoutPdfNameWithAbsPath): os.remove(tempoutPdfNameWithAbsPath) pdfWriter = PdfFileWriter() #生成一个空白的pdf文件 pdfWriter.addPage(self.pdfReader.getPage(pageNum)) with open(tempoutPdfNameWithAbsPath,'wb') as pdfOutput: pdfWriter.write(pdfOutput) #将复制的内容全部写入合并的pdf try: with Image(filename=tempoutPdfNameWithAbsPath,resolution=RESOLUTION) as image_pdf: image_jpeg = image_pdf.convert('jpeg') except Exception as e: raise e raise(r'Image(filename=tempoutPdfNameWithAbsPath,resolution=RESOLUTION) occurs error!' ) quit = input("按任意键退出...") sys.exit(1) try: # img_page = Image(image=image_jpeg) req_image = image_jpeg.make_blob('jpeg') except Exception as e: raise e print('make_blob or ERROR! '+ str(pageNum)+' 页失败!') quit = input("按任意键退出...") sys.exit(1) try: image_filtered = PI.open(io.BytesIO(req_image)) # image_filtered= image_filtered.filter(ImageFilter.GaussianBlur(radius=1)) # if bENHANCE: # image_filtered= image_filtered.filter(ImageFilter.EDGE_ENHANCE) except Exception as e: raise e print('PI.open ERROR! '+ str(pageNum)+' 页失败!') quit = input("按任意键退出...") sys.exit(1) try: # print('>>> Debug:'+self.__lang) txt = self.__tool.image_to_string( image_filtered, lang=self.__lang, builder=pyocr.builders.TextBuilder() ) except Exception as e: raise e print('image_to_string '+ str(pageNum)+' 页失败!') quit = input("按任意键退出...") sys.exit(1) if os.path.exists(tempoutPdfNameWithAbsPath): os.remove(tempoutPdfNameWithAbsPath) return txt except Exception as e: raise e print('获取第 '+ str(pageNum)+' 页失败!') quit = input("按任意键退出...") sys.exit(1)
def merge_pdf(file_list, output_path): '''合并 PDF''' outpdf = PdfFileWriter() for f in file_list: f_pdf = PdfFileReader(open(f, 'rb')) for page in f_pdf.pages: outpdf.addPage(page) ous = open(output_path, 'wb') outpdf.write(ous) ous.close()
def pdfSplit(pdf_main, pdf_part): try: pdf_read_obj = PdfFileReader(pdf_main) pdf_write_obj = PdfFileWriter() page_num = pdf_read_obj.getNumPages() page_last_obj = pdf_read_obj.getPage(page_num - 1) page_last_obj.rotateClockwise(90) pdf_write_obj.addPage(page_last_obj) pdf_write_obj.write(open(pdf_part, 'wb')) return page_num - 1 except Exception as e: return False
def add_number(p, n, x, y, counter=1): base_pdf = copy.copy(p.getPage(0)) wm_pdf = PdfFileReader(io.BytesIO(n)).getPage(0) pdf_writer = PdfFileWriter() base_pdf.mergeTranslatedPage(wm_pdf, x, y) pdf_writer.addPage(base_pdf) saveloc = Path.cwd().joinpath("numbering", "assets", "numbered", f"{counter}.pdf") with open(saveloc, "wb") as outfile: pdf_writer.write(outfile) return
def PdfPassword(filepath, password): # Check if file exists checkFile = os.path.isfile(filepath) if checkFile: # Get the path of directory and filename path, filename = os.path.split(filepath) # Get the file extension to check for pdf files file_extension = os.path.splitext(filepath)[1] if file_extension == ".pdf": # The output filename output_file = os.path.join(path, f"temp_{ts}_{filename}") # Create a PdfFileWriter object pdf_writer = PdfFileWriter() # Open our PDF file with the PdfFileReader file = PdfFileReader(filepath) # Get number of pages in original file # Iterate through every page of the original file and add it to our new file for idx in range(file.numPages): # Get the page at index idx page = file.getPage(idx) # Add it to the output file pdf_writer.addPage(page) # Encrypt the new file with the entered password pdf_writer.encrypt(password, use_128bit=True) # Open a new file with open(output_file, "wb") as file: # Write our encrypted PDF to this file pdf_writer.write(file) print('File Written To Path:', output_file) else: # File extension is not PDF print( f"Not A PDF File Given, File Has Extension: {file_extension}") sys.exit() else: # No file exists on the current path print("Check The File Path") sys.exit()
def PdfMultiplePassword(filepaths, password): # Check if files exists check_path = [os.path.isfile(x) for x in filepaths] # Gets the files extension file_extensions = [os.path.splitext(x)[1] for x in filepaths] # Check if files extension are pdf file_extensions_check = [x for x in file_extensions if x != ".pdf"] if False in check_path: # Get the index of the file that doesn't exists index = check_path.index(False) print(f"File Doesn't Exists: {filepaths[index]}") sys.exit() else: # Not a PDF file is given if file_extensions_check: print("Submit Only PDF Files") sys.exit() else: count = 1 # Iterate through every pdf of the filepaths for path in filepaths: # Create a PdfFileWriter object pdf_writer = PdfFileWriter() # Open our PDF file with the PdfFileReader pdf_reader = PdfFileReader(path) # Get the page at index idx for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # The output filename output_file = f"merge_enc_{count}_{ts}.pdf" # Encrypt the new file with the entered password pdf_writer.encrypt(password, use_128bit=True) # Write out the merged PDF with open(output_file, 'wb') as file: pdf_writer.write(file) count += 1 print('File Written To Path:', output_file)
def getTitlePDFfromBookmarkfile(pdf_filepath, bookmark_filepath, pdf_filepath_output): bookmark_file = codecs.open(bookmark_filepath, 'r', encoding='utf-8') lines = bookmark_file.readlines() page_start = 0 for i, line in enumerate(lines): # print(line) if line.find(u'目录') >= 0: line = line.strip() print(line) print(line.split('\t')) page_start = int(line.split('\t')[1]) page_start -= 1 print(page_start) page_end = 0 page_list = [] for i, line in enumerate(lines): line = line.strip() # print(line) if line.find('\t') >= 0: # print(int(line.rsplit('\t',1)[1])) page_list.append(int(line.rsplit('\t', 1)[1])) # page_list=page_list.sort() # print(page_list) for i in range(0, len(page_list)): if page_list[i] > page_start: page_end = page_list[i] break page_end -= 1 print(page_end) if page_end <= page_start and page_start >= 0 and page_end > 0: print('not find title page') return pdf = PdfFileReader(open(pdf_filepath, "rb")) output = PdfFileWriter() for i in range(page_start, page_end + 1): output.addPage(pdf.getPage(i)) # dst_pdf.addPage(pdf.getPage(i)) # pdf_bytes = io.BytesIO() # output.write(pdf_bytes) # pdf_bytes.seek(0) # img = Image(file=pdf_bytes, resolution=300) # img.convert("png") # img.save(pdf_filepath_output+'_out.tif') stream = open(pdf_filepath_output, 'wb') output.write(stream)
def pdf2cut(pdf_in, pdf_out, axis_x, axis_y, width, height): pdf_read_obj = PdfFileReader(open(pdf_in, 'rb')) pdf_write_obj = PdfFileWriter() for page in pdf_read_obj.pages: page.mediaBox.setUpperLeft((axis_x, axis_y)) page.mediaBox.setUpperRight((width, height)) page.mediaBox.setLowerLeft((axis_x, axis_y - height)) page.mediaBox.setLowerRight((axis_x + width, axis_y - height)) pdf_write_obj.addPage(page) with open(pdf_out, 'wb') as f: pdf_write_obj.write(f) f.close() return pdf_out
def PdfPrettyPrint(inputname, outputname): inputfile = open(inputname, 'rb') wrt = PdfFileWriter() ipt = PdfFileReader(inputfile) #print ipt.getDocumentInfo() pdfnums = ipt.getNumPages() #print pdfnums i = 0 while i < pdfnums: page = ipt.getPage(i) wrt.addPage(page) if i + 2 < pdfnums: page = ipt.getPage(i + 2) wrt.addPage(page) else: wrt.addBlankPage() if i + 1 < pdfnums: page = ipt.getPage(i + 1) page.rotateClockwise(180) wrt.addPage(page) else: wrt.addBlankPage() if i + 3 < pdfnums: page = ipt.getPage(i + 3) page.rotateClockwise(180) wrt.addPage(page) else: wrt.addBlankPage() i = i + 4 fl = open(outputname, "wb") wrt.write(fl) inputfile.close() fl.close() return True
def process_pdf_automatically(self): self.statusBar().showMessage('Procesando...') # print("File Name:", self.name) if self.name != "": self.dir = QFileDialog.getExistingDirectory() ls = [] files = [x for x in os.listdir(self.dir + '/') if x.endswith('.pdf') and x != "join.pdf"] outfile = PdfFileWriter() bancos = ['bbva', 'santander'] for i in files: pdf = PdfFileReader(open(self.dir + '/' + str(i), 'rb')) page = pdf.getPage(0) pages = pdf.getNumPages() last = pdf.getPage(pages - 1) text = last.extractText() banco = re.findall("(bbva|santander)", text.lower()) text = page.extractText() fecha = \ re.findall("(corte.*[0-9]{1,2}[/][0-9]{1,2}[/][0-9]{2,4})", text.lower())[0] fecha = \ re.findall("([0-9]{1,2}[/][0-9]{1,2}[/][0-9]{2,4})", fecha)[0] ls.append({'page': page, 'bank': Counter(banco).most_common()[0][0].upper(), 'date': fecha}) fecha = [] for i in ls: fecha.append(i['date']) fecha.sort(key=lambda date: datetime.strptime(date, '%d/%m/%Y')) for i in fecha: for x in ls: if (x['date'] == i): outfile.addPage(x['page']) self.statusBar().showMessage('Creando PDF...') save_in = self.dir + '/' + self.name + '.pdf' with open(save_in, 'wb') as f: outfile.write(f) self.statusBar().showMessage('Creación del PDF Exitosa') self.show_dialog("Acción realizada con éxito") else: self.show_dialog("No fue posible crear el archivo PDF") self.statusBar().showMessage('')
def split_pdf(inFile, outFile): ''' 拆分文档 :param inFile: 输入文件 :param outFile: 输出文件 :return: ''' pdfFileWriter = PdfFileWriter() pdfFileReader = PdfFileReader(open(inFile, 'rb')) page_count = pdfFileReader.getNumPages() print(page_count) # 将 pdf 第2页之后的页面,输出到一个新的文件 for i in range(2, page_count): pdfFileWriter.addPage(pdfFileReader.getPage(i)) pdfFileWriter.write(open(outFile, 'wb'))
def createNewBooks(self, pdf_file, stPage, endPage, filename='my.pdf'): input = PdfFileReader(open(pdf_file, "rb")) if input.isEncrypted: #注意:所有的pdf,pypdf2默认都是加密形式,所以要先解密再读取 input = input.decrypt('') pdf_input = input pdf_output = PdfFileWriter() i = stPage while i < endPage: page = pdf_input.getPage(i) # 选取需要页面,需要注意的是第一页的编号是0 pdf_output.addPage(page) # 将选好的页面加入到新的pdf中 i += 1 output_stream = open(filename, 'wb') pdf_output.write(output_stream) output_stream.close() return 'Complete knifing'
class ReadPdf(object): def __init__(self,in_file): self.pdf_read = PdfFileReader(open(in_file, 'rb')) self.pdf_write = PdfFileWriter() def parse(self,out_file,type): for page in self.pdf_read.pages: if type=='01': # 电测听 剪切方案 pass # page.mediaBox.setUpperLeft((0,606)) # page.mediaBox.setUpperRight((595,606)) # page.mediaBox.setLowerLeft((0,0)) # page.mediaBox.setLowerRight((595,0)) elif type=='02': # 人体成分(投放) 剪切方案 page.mediaBox.setUpperLeft((0, 765)) page.mediaBox.setUpperRight((595, 765)) page.mediaBox.setLowerLeft((0, 22)) page.mediaBox.setLowerRight((595, 22)) elif type=='03': pass elif type=='04': # 骨密度 剪切方案 page.mediaBox.setUpperLeft((0, 860)) page.mediaBox.setUpperRight((595, 860)) page.mediaBox.setLowerLeft((0, 80)) page.mediaBox.setLowerRight((595, 80)) elif type=='05': # #超声骨密度 剪切方案 page.mediaBox.setUpperLeft((0, 842)) page.mediaBox.setUpperRight((595, 842)) page.mediaBox.setLowerLeft((0, 35)) page.mediaBox.setLowerRight((595, 35)) else: pass self.pdf_write.addPage(page) ous = open(out_file, 'wb') self.pdf_write.write(ous) ous.close()
def _removePropertyEndPage(self, file_pdf): '''移除资产明细表中的无用页''' fd_in = open(file_pdf, "rb") pdf_in = PdfFileReader(fd_in) page_num = pdf_in.getNumPages() pdf_out = PdfFileWriter() for num in range(page_num - 1): page = pdf_in.getPage(num) pdf_out.addPage(page) fd_out = open(file_pdf + 'tmp.pdf', "wb") pdf_out.write(fd_out) fd_in.close() fd_out.close() os.remove(file_pdf) os.rename(os.path.join('', file_pdf + 'tmp.pdf'), os.path.join('', file_pdf)) print(' > 已把最后一页删除')
def merge_page_nums(pages: List[PageObject], options, filename='page_nums.pdf'): output = [] path = os.path.join(options["folder-dir"], "tmp", filename) with open(path, 'rb') as f: page_num_pdf = PdfFileReader(f) for i, page in enumerate(pages): target: PageObject = page_num_pdf.getPage(i) target.mergePage(page) # For some reason the text doesn't appear properly if we don't write first thread_print("Writing extra output file because this is somehow necessary") tmp_out = PdfFileWriter() tmp_out.addPage(target) with open(os.path.join(options["folder-dir"], "tmp", "page_num_overlap.pdf"), 'wb') as f: pass#tmp_out.write(f) output.append(target) return output
def addBlankpage(inFile, outFile): ''' pdf读取写入操作 ''' pdfFileWriter = PdfFileWriter() # 获取 PdfFileReader 对象 pdfFileReader = PdfFileReader( inFile) # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb')) numPages = pdfFileReader.getNumPages() for index in range(0, numPages): pageObj = pdfFileReader.getPage(index) pdfFileWriter.addPage(pageObj) # 根据每页返回的 PageObject,写入到文件 pdfFileWriter.write(open(outFile, 'wb')) pdfFileWriter.addBlankPage() # 在文件的最后一页写入一个空白页,保存至文件中 pdfFileWriter.write(open(outFile, 'wb'))
def pdf_cat(input_files, output_stream): """https://stackoverflow.com/questions/3444645/merge-pdf-files""" input_streams = [] try: # First open all the files, then produce the output file, and # finally close the input files. This is necessary because # the data isn't read from the input files until the write # operation. Thanks to # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733 for input_file in input_files: input_streams.append(input_file) writer = PdfFileWriter() for reader in map(PdfFileReader, input_streams): for n in range(reader.getNumPages()): writer.addPage(reader.getPage(n)) writer.write(output_stream) finally: for f in input_streams: f.close()
def _create_hyperlinks(self, link_locations, page_locations): reader = PdfFileReader("tmp2.pdf") writer = PdfFileWriter() for i in range(reader.getNumPages()): page = reader.getPage(i) writer.addPage(page) for i in range(len(link_locations)): toc_page = 1 if self.toc_orientation == "P": toc_page = math.floor(i / settings["Items on vertical toc"]) if self.toc_orientation == "L": toc_page = math.floor(i / settings["Items on horizontal toc"]) writer.addLink(pagenum=toc_page, pagedest=page_locations[i] - 1, rect=link_locations[i], fit="/Fit", border=[0, 0, 0]) with open(self.filename, 'wb') as out: writer.write(out)
def get(self, request, *args, **kwargs): fontname_g = "HeiseiMin-W3" pdfmetrics.registerFont(UnicodeCIDFont(fontname_g)) reader = PdfFileReader('media/pdf/riyuu-format4.pdf') writer = PdfFileWriter() buffer = io.BytesIO() cc = canvas.Canvas(buffer) cc.setFont(fontname_g, 11) initial = 295 before_rect_x = 748 after_rect_x = 776.5 line_height = 11.9 input_list = [{ 'label': '便器からの立ち座り', 'before_flag': True, 'after_flag': False }, { 'label': 'トイレまでの移動', 'before_flag': False, 'after_flag': True }, { 'label': 'トイレ出入口の出入(扉の開閉含む)', 'before_flag': True, 'after_flag': False }] welfare_equipment_material = PdfMaterial.objects.get( key="welfare_equipment") cc = self.motion_purpose_draw(cc, before_rect_x, after_rect_x, welfare_equipment_material.materials, input_list, initial, line_height) cc.showPage() cc.save() buffer.seek(0) new_pdf = PdfFileReader(buffer) existing_page = reader.getPage(0) existing_page.mergePage(new_pdf.getPage(0)) writer.addPage(existing_page) new = io.BytesIO() writer.write(new) new.seek(0) print('finish') return FileResponse(new, as_attachment=True, filename='hello.pdf')
def fetchANs(bols, client, workOrderLocation): pdfs = [] doneBols = [] failedPDFs = "" for bol in bols: if bol != "" and not bol in doneBols: doneBols.append(bol) tickets = client.ticket_search(Title="Delivery Order for BL# " + bol, From="@msc.com") pdf = getPdf(tickets) if pdf: pdfs.append(pdf) with open(workOrderLocation + "\\" + "DOs.pdf", 'wb') as f: input_streams = [] try: # First open all the files, then produce the output file, and # finally close the input files. This is necessary because # the data isn't read from the input files until the write # operation. Thanks to # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733 i = 0 for input_file in pdfs: f1 = open(input_file, 'r+b') input_streams.append(f1) i += 1 writer = PdfFileWriter() for reader in map(PdfFileReader, input_streams): for n in range(reader.getNumPages()): writer.addPage(reader.getPage(n)) writer.write(f) finally: for f in input_streams: f.close() else: failedPDFs = "\n" + bol + failedPDFs for pdf in pdfs: os.remove(pdf) if failedPDFs != "": popUpOK("Could not find the following BOLs: " + failedPDFs)
def mergePdf(inFileList, outFile): ''' 合并文档 :param inFileList: 要合并的文档的 list :param outFile: 合并后的输出文件 :return: ''' pdfFileWriter = PdfFileWriter() #排序一下,不然合并序号不对 inFileList = sorted(inFileList) for inFile in inFileList: # 依次循环打开要合并文件 pdfReader = PdfFileReader(open(inFile, 'rb')) numPages = pdfReader.getNumPages() for index in range(0, numPages): pageObj = pdfReader.getPage(index) pdfFileWriter.addPage(pageObj) # 最后,统一写入到输出文件中 pdfFileWriter.write(open(outFile, 'wb'))
def get(self, request, *args, **kwargs): fontname_g = "HeiseiKakuGo-W5" pdfmetrics.registerFont(UnicodeCIDFont(fontname_g)) buffer = io.BytesIO() cc = canvas.Canvas(buffer) reader = PdfFileReader('media/pdf/sample.pdf') existing_page = reader.getPage(0) cc.setFont(fontname_g, 24) cc.drawString(0, 820, "テスト") cc.showPage() cc.save() buffer.seek(0) new_pdf = PdfFileReader(buffer) existing_page.mergePage(new_pdf.getPage(0)) writer = PdfFileWriter() writer.addPage(existing_page) new = io.BytesIO() writer.write(new) new.seek(0) return FileResponse(new, as_attachment=True, filename='hello.pdf')
def createNewBooks(self, pdf_file, output_file, output_dir): if not os.path.exists(output_dir): os.mkdir(output_dir) input_stream = open(pdf_file, 'rb') pdf_input = PdfFileReader(input_stream) pdf_output = PdfFileWriter() page = 0 pages = pdf_input.getNumPages() - 1 while page < pages: pdf_output.addPage(pdf_input.getPage(page)) page += 1 outputfilename = output_dir + '/' + output_file output_stream = open(outputfilename, 'wb') pdf_output.write(output_stream) output_stream.close() input_stream.close()
def _getLtePropertyPrintPage(self, wbs_id, file_pdf, file_end): '''生成要打印签字的资产明细页''' print('---> 开始生成%s要打印签字的资产明细页' % (wbs_id)) (page_num, page_end) = self._getPropertyKeyPage(file_pdf, self.key_yanshou) fp_in = open(file_pdf, "rb") pdf_in = PdfFileReader(fp_in) pageCount = pdf_in.getNumPages() if pageCount != page_num + 1: print(' > PyPDF2(%d)及pdfminer(%d)判定文件页数不同' % (pageCount, page_num)) self.dats_log.loc[self.log_index] = [ 'E', wbs_id, 'PDF文件', 'PyPDF2(%d)及pdfminer(%d)判定文件页数不同,未生成最后签字页' % (pageCount, page_num) ] self.log_index += 1 return if page_end == -1: print(' > 未找到关键字【%s】所在的页,也即未找到最后签字页' % (self.key_yanshou)) self.dats_log.loc[self.log_index] = [ 'E', wbs_id, 'PDF文件', '未找到关键字【%s】所在的页,也即未找到最后签字页' % (self.key_yanshou) ] self.log_index += 1 return page = pdf_in.getPage(page_end) pdf_out = PdfFileWriter() pdf_out.addPage(page) fp_out = open(file_end, 'wb') pdf_out.write(fp_out) fp_in.close() fp_out.close() print('---> 已获取资产明细最后一页并已保存至%s' % (file_end)) if page_num == page_end + 1: print(' > 需把%s文件删除最后1页' % (file_pdf)) self._removePropertyEndPage(file_pdf)
def crop(pdf_in, pdf_out): """ Параметры pdf_in - абсолютный путь к пдф pdf_out - абсолютный путь для исходящего пдф :return: status """ """ Временно к функции добавлен второй параметр - pdf_out. В продакшн она должна сохранять результат кропа в тот же файл """ status = True # Словарь с размерами бумаги для каждой страницы papers = analyze_papersize(pdf_in) # like {1: ('Speedmaster', 900, 640), 2: ('Dominant', 640, 450)} # TODO Доработать временное решение кропа в отсутствии инфы о размере бумаги. if papers == {}: perl_crop = "perl pdfcrop.pl {} {}".format(pdf_in, pdf_out) os.system(perl_crop) return status input = PdfFileReader(file(pdf_in, "rb")) output = PdfFileWriter() # Количество страниц pages_qty = input.getNumPages() for index in range(pages_qty): paper_machine = papers[index+1][0] paper_w = papers[index+1][1] paper_h = papers[index+1][2] for m in PrintingPress._registry: if paper_machine == m.name: machine = m plate_w = machine.plate_w plate_h = machine.plate_h page = input.getPage(index) """ EXAMLE # The resulting document has a trim box that is 200x200 points # and starts at 25,25 points inside the media box. # The crop box is 25 points inside the trim box. print mm(page.mediaBox.getUpperRight_x()), mm(page.mediaBox.getUpperRight_y()) page.trimBox.lowerLeft = (25, 25) page.trimBox.upperRight = (225, 225) page.cropBox.lowerLeft = (50, 50) page.cropBox.upperRight = (200, 200) """ print 'Crop page {} to paper {}x{}'.format(index+1, paper_w, paper_h) page.mediaBox.lowerLeft = ((pt(plate_w - paper_w)/2), pt(machine.klapan)) # отступ слева, отступ снизу page.mediaBox.upperRight = (pt(paper_w + (plate_w - paper_w)/2), pt(paper_h + machine.klapan)) # ширина+отступ, высота+отступ output.addPage(page) outputstream = file(pdf_out, "wb") output.write(outputstream) outputstream.close() return status
# operation. Thanks to # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733 i=0 if not os.path.exists("J:\PODs\\Temp Files\\"): os.mkdir("J:\PODs\\Temp Files\\") for input_file in attachmentsContent: # input_file=input_file.encode('utf8').decode('utf8') f1=open("J:\PODs\\Temp Files\\"+str(i), 'w+b') f1.write(input_file) # print(input_file) input_streams.append(f1) i+=1 writer = PdfFileWriter() for reader in map(PdfFileReader, input_streams): for n in range(reader.getNumPages()): writer.addPage(reader.getPage(n)) writer.write(f) finally: for f in input_streams: f.close() # else: # f.write(img2pdf.convert(attachmentsContent)) print('Saved attachment to', local_path) # elif isinstance(attachment, ItemAttachment): # if isinstance(attachment.item, Message): # print(attachment.item.subject, attachment.item.body) item.is_read=True item.save() # print(datetime.datetime.now()) sleep(30) account.inbox.refresh()
def getTitlePDFfromBookmarkfile(pdf_filepath, bookmark_filepath, pdf_filepath_output): print('getTitlePDFfromBookmarkfile') bookmark_file = codecs.open(bookmark_filepath, 'r', encoding='utf-16') lines = bookmark_file.readlines() page_start = 0 for i, line in enumerate(lines): print(line) if line.find(u'目录') >= 0: line = line.strip() print(line) # print(line.split('\t')) page_start = int(line.split('\t')[1]) print(page_start) break print(page_start) page_end = 0 page_list = [] for i, line in enumerate(lines): line = line.strip() # print(line) if line.find('\t') >= 0: # print(int(line.rsplit('\t',1)[1])) if line.rsplit('\t', 1)[1].isdigit(): page_list.append(int(line.rsplit('\t', 1)[1])) # page_list=page_list.sort() # print(page_list) for i in range(0, len(page_list)): if page_list[i] > page_start: page_end = page_list[i] break print(page_end) page_start -= 1 page_end -= 1 print('toc from', page_start, page_end) if page_end <= page_start and page_start >= 0 and page_end > 0: print('not find title page') return # fp = open(pdf_filepath, 'rb') # parser = PDFParser(fp) # document = PDFDocument(parser) # pages = dict( (page.pageid, pageno) for (pageno,page) # in enumerate(PDFPage.create_pages(document)) ) pdf = PdfFileReader(open(pdf_filepath, "rb")) print(pdf_filepath) output = PdfFileWriter() for i in range(page_start, page_end): # print(i) # pdf.getPage(i) # print('getPage') output.addPage(pdf.getPage(i)) # print('addPage') # dst_pdf.addPage(pdf.getPage(i)) # pdf_bytes = io.BytesIO() # output.write(pdf_bytes) # pdf_bytes.seek(0) # img = Image(file=pdf_bytes, resolution=300) # img.convert("png") # img.save(pdf_filepath_output+'_out.tif') stream = open(pdf_filepath_output, 'wb') output.write(stream)