def import_doc(self, file_uri, config, docsearch, current_doc=None): parent = gio.File(file_uri) doc = None idx = 0 for child in self.__get_all_children(parent): if not child.get_basename().lower().endswith(".pdf"): continue try: # make sure we can import it poppler.document_new_from_file(child.get_uri(), password=None) except Exception: continue doc = PdfDoc(config.workdir) doc.path += ("_%02d" % idx) doc.docid += ("_%02d" % idx) doc.import_pdf(config, child.get_uri()) for page in doc.pages: docsearch.index_page(page) idx += 1 assert(doc != None) return (doc, doc.pages[0])
def which_colours(input_path): document = poppler.document_new_from_file('file://%s' % \ urllib.pathname2url(os.path.abspath(input_path)), None) n_pages = document.get_n_pages() all_annots = 0 unique_colours = [] for i in range(n_pages): page = document.get_page(i) annot_mappings = page.get_annot_mapping() #print("## annot mappings ##") #print(annot_mappings) num_annots = len(annot_mappings) if num_annots > 0: for annot_mapping in annot_mappings: if annot_mapping.annot.get_annot_type( ).value_name != 'POPPLER_ANNOT_LINK': #print annot_mapping.annot.get_color() all_annots += 1 rgb = [] try: rgb.append(annot_mapping.annot.get_color().red) rgb.append(annot_mapping.annot.get_color().green) rgb.append(annot_mapping.annot.get_color().blue) except AttributeError: pass continue if rgb not in unique_colours: unique_colours.append(rgb) print("Unique colours: ", unique_colours)
def LoadDocument(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() print "wt:",self.width, self.height self._UpdateSize()
def _load(self, fobj): self._loaded = False if not fobj.readline().startswith(SEP): raise IOError, "Not a SlideDeX file" str = fobj.read() segments = str.split(SEP) if len(segments) < 3: raise IOError, "Could not load from file" self.settings = DocumentSettings(self, segments[0]) self.pages.clear() self.header.set_content(segments[1][1:]) # Ignore empty line for filename self.footer.set_content(segments[-1][1:]) for s in segments[2:-1]: filename, content = s.split('\n', 1) self.add_page(content, filename) pdffn = base_filename(self.fullfilename) + '.pdf' select_first_page = lambda status: self.slidelist_view.select_path((0,)) if os.path.exists(pdffn) and os.stat(pdffn).st_mtime >= os.stat(self.fullfilename).st_mtime: self.compile_pages() self.doc = poppler.document_new_from_file('file://' + os.path.abspath(pdffn), None) self.executor.add_callback(select_first_page) else: self.compile(select_first_page) self.modified = False # Set modified_since_save, and update the window title self._loaded = True
def __load_pdf_file(self, filename): self.__pdf_filename = filename LAST_OPEN_FOLDER = os.path.dirname(filename) filename = os.path.abspath(filename) self.__pdf_document = poppler.document_new_from_file( 'file://%s' % filename, None) self.__n_pages = self.__pdf_document.get_n_pages() self.__pages_model.clear() for i in range(self.__n_pages): if i % 2 == 0: size = self.__pdf_document.get_page(i).get_size() self.__pages_model.append([ str(i + 1), PageInfo(i, CropSetting(self.__odd_crop), size) ]) else: self.__pages_model.append([ str(i + 1), PageInfo(i, CropSetting(self.__even_crop), size) ]) if not self.__pdf_view: self.__pdf_view = PdfView() self.__canvas.get_root_item().add_child(self.__pdf_view, next_index())
def makeExp(self,e): # An experimental function f=open('final.tex', 'r') tex = f.readlines() # Find the block containing the new block entry k=-1 for i in range(len(tex)): if '%NEW_DOC_ENTRY' in tex[i]: k=i # insert all the line at that point with open("coord.txt", "a") as cfile: cfile.write('[%s , %s ]\n'%(e.GetX(),e.GetY())) win = PopUp(None,title="BORDER") out='\\begin{textblock*}{10cm}(%scm,%scm) This is the comment I am putting in pdf \end{textblock*}\n'%((e.GetX()*12/self.width),(e.GetY()*20/self.height)) tex.insert(k,out) f=open('final.tex','w') f.writelines(tex) f.close() cmd = "pdflatex final.tex" os.system(cmd) self.document = poppler.document_new_from_file("file://" + os.path.abspath('final.pdf'), None) print 'The tex file is : ' + ("file://"+os.path.abspath('final.pdf')) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() self._UpdateSize() self.Refresh()
def get_annotations(annotated_pdf, synctex_pdf): document = poppler.document_new_from_file('file://%s' % \ urllib.pathname2url(os.path.abspath(annotated_pdf)), None) n_pages = document.get_n_pages() all_annots = 0 annotation_data = [] input_filenames = set() for page_no in range(n_pages): page = document.get_page(page_no) annot_mappings = page.get_annot_mapping() num_annots = len(annot_mappings) if num_annots > 0: for annot_mapping in annot_mappings: if annot_mapping.annot.get_annot_type().value_name != 'POPPLER_ANNOT_LINK': all_annots += 1 rect = annot_mapping.area data = get_latex_position(page_no + 1, page.get_size(), rect, synctex_pdf) data["AnnotType"] = annot_mapping.annot.get_annot_type().value_nick data["Page"] = str(page_no + 1) data["Modified"] = annot_mapping.annot.get_modified() data["Contents"] = annot_mapping.annot.get_contents() annotation_data.append(data) input_filenames.add(data['Input']) return annotation_data, input_filenames
def getAnnotations(filepath): # absolute path if not filepath.startswith('file://'): # if filepath.startswith('/'): # filepath = "file:/%s" % filepath # else: filepath = "file://%s" % filepath # print filepath doc = poppler.document_new_from_file(filepath, None) pages = [doc.get_page(i) for i in range(doc.get_n_pages())] annotations = [] # process annotations for page_no, page in enumerate(pages): # get the annotations items = [i.annot.get_contents() for i in page.get_annot_mapping()] # filter out empty annotations items = [i for i in items if i] # print "page: %s comments: %s " % (page_no + 1, items) for it in items: it = AnnotationExtractor.cleanAnnotation(it) # write to file annotations.append({'page': page_no + 1, 'annotation': it}) # return json.dumps(annotations, indent=4, encoding="utf-8") return annotations
def refresh_pdf_preview_pane(self): pdf_preview = self.ui.get_widget('pdf_preview') rebuild = False if os.path.isfile( self.pdf_file ): try: self.pdf_preview['document'] = poppler.document_new_from_file ('file://%s' % (self.pdf_file), None) self.pdf_preview['n_pages'] = self.pdf_preview['document'].get_n_pages() self.pdf_preview['scale'] = None self.goto_pdf_page( self.pdf_preview['current_page_number'], new_doc=True ) except glib.GError: rebuild = True else: rebuild = True if rebuild: pdf_preview.set_size_request(0,0) self.pdf_preview['current_page'] = None self.ui.get_widget('button_move_previous_page').set_sensitive( False ) self.ui.get_widget('button_move_next_page').set_sensitive( False ) self.ui.get_widget('button_zoom_out').set_sensitive( False ) self.ui.get_widget('button_zoom_in').set_sensitive( False ) self.ui.get_widget('button_zoom_normal').set_sensitive( False ) self.ui.get_widget('button_zoom_best_fit').set_sensitive( False ) pdf_preview.queue_draw()
def LoadDocument(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() print "wt:", self.width, self.height self._UpdateSize()
def setup_template(self, source_path, output=None): if self.cr and self.page and self.pdf: return # Get source document self.document = poppler.document_new_from_file('file://{}'.format(source_path), None) self.page = self.document.get_page(0) # Create destination document # TODO: There seems to be an issue with quality, possibly due to issues # with size calculations here. self.width, self.height = self.page.get_size() self.pdf = cairo.PDFSurface(output, self.width, self.height) self.cr = cairo.Context(self.pdf) # Set a white background self.cr.save() self.cr.set_source_rgb(1,1,1) # set white bg self.cr.paint() self.cr.restore() # Render source pdf to destination self.cr.save() # NOTE: This is a costly function, especially with large PDFs. Consider # using task queuing (eg, celery) self.page.render_for_printing(self.cr) self.cr.restore()
def extract(): input_filename = sys.argv[1] # http://blog.hartwork.org/?p=612 document = poppler.document_new_from_file('file://%s' % \ urllib.parse.urlparse(os.path.abspath(input_filename)), None) n_pages = document.get_n_pages() all_annots = 0 for i in range(n_pages): page = document.get_page(i) annot_mappings = page.get_annot_mapping() num_annots = len(annot_mappings) if num_annots > 0: for annot_mapping in annot_mappings: if annot_mapping.annot.get_annot_type( ).value_name != 'POPPLER_ANNOT_LINK': all_annots += 1 print('page: {0:3}, {1:10}, type: {2:10}, content: {3}'. format( i + 1, annot_mapping.annot.get_modified(), annot_mapping.annot.get_annot_type().value_nick, annot_mapping.annot.get_contents())) if all_annots > 0: print(str(all_annots) + " annotation(s) found") else: print("no annotations found")
def __init__(self, uri, filename, timestamp): self.doc = poppler.document_new_from_file(uri, None) self.timestamp = timestamp self.n_pgs = self.doc.get_n_pages() self.filename = filename self.curr_pg = 0 self.curr_pg_disp = self.doc.get_page(self.curr_pg) self.doc_width, self.doc_height = self.curr_pg_disp.get_size()
def __init__(self, filename, parser, mime, backup, add2archive): super(PdfStripper, self).__init__(filename, parser, mime, backup, add2archive) uri = 'file://' + os.path.abspath(self.filename) self.password = None self.document = poppler.document_new_from_file(uri, self.password) self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator', 'producer', 'creation-date', 'mod-date', 'metadata')
def LoadDocument(self, file): cmd = "pdflatex final.tex" os.system(cmd) self.document = poppler.document_new_from_file("file://" + os.path.abspath('final.pdf'), None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() self._UpdateSize()
def LoadDocument(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = (325, 160) # self.current_page.get_size() self.scale = min(325.0 / self.current_page.get_size()[0], 160.0 / self.current_page.get_size()[1]) self.initscale = self.scale self.panel.SetSize((self.width, self.height))
def LoadDocument(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() self._UpdateSize() self._UpdateScale(self.scale) self.myparent.SetTitle(file)
def __init__(self, ifname, ofname, width = 1024, height = 1322): self.width = width self.height = height self.ifname = ifname self.ofname = ofname self.doc = poppler.document_new_from_file('file://%s' % \ urllib.pathname2url(self.ifname), password = None)
def __init__(self, uri, statbuf): sha = hashlib.sha1() sha.update('%u \0' % (statbuf.st_size,)) with closing(urllib.urlopen(uri)) as f: for data in generate_file_data(f): sha.update(data) self.id = sha.hexdigest() self.popplerdoc = P.document_new_from_file(uri, None) self.clear_selection()
def LoadDocument(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = (325, 160) #self.current_page.get_size() self.scale = min(325.0 / self.current_page.get_size()[0], 160.0 / self.current_page.get_size()[1]) self.initscale = self.scale self.panel.SetSize((self.width, self.height))
def __init__(self): uri = "file://" + sys.argv[1] self.document = poppler.document_new_from_file(uri, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(0) self.scale = 1 self.width, self.height = self.current_page.get_size() win = gtk.Window(gtk.WINDOW_TOPLEVEL) win.set_default_size(600, 600) win.set_title("Poppler GLib Demo") win.connect("delete-event", gtk.main_quit) adjust = gtk.Adjustment(0, 0, self.n_pages - 1, 1) page_selector = gtk.SpinButton(adjust, 0, 0) page_selector.connect("value-changed", self.on_changed) lab = gtk.Label('Page Number:') hbox = gtk.HBox(False, 0) vbox = gtk.VBox(False, 0) vbox.pack_start(hbox, False, False, 0) hbox.pack_start(lab, False, False, 4) hbox.pack_start(page_selector, False, False, 0) adjust = gtk.Adjustment(1, 1, 5, 1) scale_selector = gtk.SpinButton(adjust, 0, 0) scale_selector.connect("value-changed", self.on_scale_changed) lab = gtk.Label('Scale:') hbox.pack_start(lab, False, False, 4) hbox.pack_start(scale_selector, False, False, 0) b_scan_fonts = gtk.Button('Scan Fonts') b_scan_fonts.connect("clicked", self.on_scan_fonts) hbox.pack_start(b_scan_fonts, False, False, 4) sw = gtk.ScrolledWindow() sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) self.dwg = gtk.DrawingArea() self.dwg.set_size_request(int(self.width), int(self.height)) self.dwg.connect("expose-event", self.on_expose) sw.add_with_viewport(self.dwg) vbox.pack_start(sw, True, True, 0) win.add(vbox) win.show_all()
def on_changed(self, uri): """Lorsque l'on change de document actualise la vue du nouveau pdf """ self.document = poppler.document_new_from_file (uri, None) self.current_page = self.document.get_page(0) self.dwg.set_size_request(int(self.width*self.scale), int(self.height*self.scale)) self.dwg.queue_draw()
def main(): uri = ("file:///"+sys.argv[1]) doc = poppler.document_new_from_file(uri, None) iterp = poppler.IndexIter(doc) link = iterp.get_action() s = doc.find_dest(link.dest.named_dest) print link.title,' ', doc.get_page(s.page_num-1).get_label() walk_index(iterp, doc) return 0
def get_pdf(self, uri): uri = self.tabFileDictionary[str(self.fileDisplayArea.get_current_page())] uri = "file://" + os.path.realpath(uri) self.document = poppler.document_new_from_file(uri, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(int(self.page)) self.width, self.height = self.current_page.get_size() self.total_pages = self.document.get_n_pages() virtualThread(self)
def main(): uri = ("file:///" + sys.argv[1]) doc = poppler.document_new_from_file(uri, None) iterp = poppler.IndexIter(doc) link = iterp.get_action() s = doc.find_dest(link.dest.named_dest) print link.title, ' ', doc.get_page(s.page_num - 1).get_label() walk_index(iterp, doc) return 0
def get_pdf(self, uri): uri = self.tabFileDictionary[str(self.fileDisplayArea.get_current_page())] uri = "file://" + os.path.realpath(uri) self.document = poppler.document_new_from_file (uri, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(int(self.page)) self.width, self.height = self.current_page.get_size() self.total_pages = self.document.get_n_pages() virtualThread(self)
def __init__(self, pdf_url): document = poppler.document_new_from_file("file://" + os.path.abspath(pdf_url), None) self.main_window = SlideWindow(DocumentManager(document)) self.note_window = NoteWindow(DocumentManager(document, 1)) self.is_fullscreen = False for w in (self.main_window, self.note_window): w.connect("key-press-event", self.on_key_press) w.connect("button-press-event", self.on_button_press) w.connect("delete-event", gtk.main_quit)
def render(self): # FIXME: This is an specific fix for boleto printing in landscape # orientation. We should find a better fix for it or simply remove # PrintOperationPoppler when migrating the last reports using # reportlab to weasyprint if getattr(self._report, 'print_as_landscape', False): default_page_setup = gtk.PageSetup() default_page_setup.set_orientation(gtk.PAGE_ORIENTATION_LANDSCAPE) self.set_default_page_setup(default_page_setup) self._report.save() uri = gio.File(path=self._report.filename).get_uri() self._document = poppler.document_new_from_file(uri, password="")
def preview_expose_event_cb(self, widget, event): """Expose event callback: update the preview area.""" index = self.__getitem__("goComboBox").get_active() if index > -1 and os.path.exists(self.project_path+"/card_"+str(index)+".pdf"): pdf = poppler.document_new_from_file ("file://"+self.project_path+"/card_"+str(index)+".pdf", None) width, height = pdf.get_page(0).get_size() widget.set_size_request(int(width), int(height)) cairo_renderer = widget.window.cairo_create() cairo_renderer.set_source_rgb(1, 1, 1) cairo_renderer.scale(1, 1) cairo_renderer.rectangle(0, 0, width, height) cairo_renderer.fill() pdf.get_page(0).render(cairo_renderer)
def render_cairo(self, ctx): if self.background_type == 'pdf' and self.background_filename != '': pdf_filename = os.path.abspath(self.background_filename) pdf_uri = 'file://%s' % urllib.pathname2url(pdf_filename) pdfdoc = poppler.document_new_from_file(pdf_uri, password=None) pdf_page = pdfdoc.get_page(self.background_pageno - 1) pdf_page.render_for_printing(ctx) for layer in self.layers: layer.render_cairo(ctx) ctx.show_page()
def parse(self): doc = poppler.document_new_from_file('file://' + os.path.abspath(self.input_fn), None); title = doc.get_property('title') author = doc.get_property('author') if title is None: title = '' if author is None: author = '' meta = PDFMeta(title, author, doc.get_n_pages()) bookmarks = self.get_bookmarks(doc) return (meta, bookmarks)
def open(self, pdf, num=0): self.pageNum = num self.path = pdf self.parseConfig() # self.createBottomBox() self.createRightPanel() self.createViewer() self.document = poppler.document_new_from_file(self.path, None) self.numPages = self.document.get_n_pages() self.page = self.document.get_page(self.pageNum) self.surface = None self.createSurface() self.ctx = cairo.Context(self.surface) self.page.render(self.ctx)
def import_doc(self, file_uri, config, docsearch, current_doc=None): parent = gio.File(file_uri) doc = None idx = 0 for child in self.__get_all_children(parent): if not child.get_basename().lower().endswith(".pdf"): continue try: # make sure we can import it poppler.document_new_from_file(child.get_uri(), password=None) except Exception: continue doc = PdfDoc(config.workdir) doc.path += ("_%02d" % idx) doc.docid += ("_%02d" % idx) doc.import_pdf(config, child.get_uri()) for page in doc.pages: docsearch.index_page(page) idx += 1 assert (doc != None) return (doc, doc.pages[0])
def __init__(self, fn, parent): fn = os.path.splitext(fn)[0] self.notes = parse_notes(fn + '.notes.xml') self.pdf = fn + '.pdf' self.slide = 0 self.document = poppler.document_new_from_file('file://' + os.path.abspath(self.pdf), None) self.nslides = self.document.get_n_pages() self.index_to_note_num = parse_indices_labels([ self.document.get_page(n).get_label() for n in range(self.nslides)]) self.next_page = self.document.get_page(self.slide + 1) self.slide_size = tuple(int(_) for _ in self.next_page.get_size()) self.mupdf_pid = subprocess.Popen(["/usr/bin/mupdf", self.pdf]).pid self.nextkeys = ['j', 'J', 'Right', 'Down', 'Next', 'space'] self.prevkeys = ['k', 'K', 'Left', 'Up', 'Prior', 'BackSpace'] # store digits so you can re-sync slides & pdf self.digits = '' Tkinter.Frame.__init__(self, parent, background=BG) self.textsize = 20 parent.title('Presenting {}'.format(self.pdf)) self.notes[0] = "\nDo `f', then `W' to fullscreen mupdf.\n" + self.notes[0] self.note = Tkinter.StringVar() self.note.set('1/{}: '.format(self.nslides) + self.notes[0]) self.do_msg() # dummy just to get initial UI arrangement self.label = Tkinter.Label(self) self.label.pack(anchor='ne') #side=Tkinter.RIGHT, anchor='n') self.timer = Tkinter.Label(self, text="hit `t' to start timer", font=('Helvetica', 32, 'bold'), background=BG, fg=FG) self.timer.pack(anchor='center') self.start_time = 0 self.surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, self.slide_size[0], self.slide_size[1]) self.context = cairo.Context(self.surface) self.shownote() self.focus_get() self.bind_all("<Key>", self.onKeyPressed)
def extract_highlights(filepath: str) -> List[Tuple[int, int, int, int, str]]: """ This is based on code from Marwan Alsabbagh, https://stackoverflow.com/questions/13748242/extracting-pdf-annotations-comments see http://socialdatablog.com/extract-pdf-annotations.html """ doc = poppler.document_new_from_file(path, None) pages = [doc.get_page(i) for i in range(doc.get_n_pages())] for page_no, page in enumerate(pages): items = [i.annot.get_contents() for i in page.get_annot_mapping()] items = [i for i in items if i] for j in items: j = j.replace("\r\n", " ") j = j.replace("\r\n", " ") x = x + "\n\n" + "'{}' (page {})".format(j, page_no + 1) if "xk" in j: print(j)
def _get_text (self, file): uri = "file://" + file document = poppler.document_new_from_file (uri, None) npages = document.get_n_pages() text = "" for p in range(0,npages): page = document.get_page(p) w,h = page.get_size() r = poppler.Rectangle () r.x1 = 0 r.x2 = w r.y1 = 0 r.y2 = h # Currently we are getting the layout from the pdf here # we should collapse it text += page.get_text(poppler.SELECTION_GLYPH,r) return text
def LoadDocument(self, file1): self.document = poppler.document_new_from_file("file://"+file1, None) #Copy Slides to local self.total_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() ScrRes = wx.DisplaySize() S1 = ( ScrRes[0] / float (self.width ) ) * 0.9 S2 = ( ScrRes[1] / float (self.height ) ) * 0.9 print "S1 S2 ", S1 , S2 if (S1 < S2 ): self.scale = S1 else: self.scale = S2 print "Scale is --> ", self.scale self._UpdateSize() self.OnPaint(self) self.parent.parent.ShowFullScreen(True, style=wx.FULLSCREEN_NOBORDER^wx.FULLSCREEN_NOTOOLBAR^wx.FULLSCREEN_NOMENUBAR^wx.FULLSCREEN_NOSTATUSBAR)
def __init__(self, filename, nfile, tmp_dir): self.filename = os.path.abspath(filename) (self.path, self.shortname) = os.path.split(self.filename) (self.shortname, self.ext) = os.path.splitext(self.shortname) f = gio.File(filename) mime_type = f.query_info('standard::content-type').get_content_type() expected_mime_type = pdf_mime_type if mime_type == expected_mime_type: self.nfile = nfile + 1 self.mtime = os.path.getmtime(filename) self.copyname = os.path.join(tmp_dir, '%02d_' % self.nfile + self.shortname + '.pdf') shutil.copy(self.filename, self.copyname) self.document = poppler.document_new_from_file (file_prefix + self.copyname, None) self.npage = self.document.get_n_pages() else: self.nfile = 0 self.npage = 0
def setPDFBox(self, uri = ""): if uri == "": uri = "file://" + self._conf["PAGE_ACCEUIL"] self.document = poppler.document_new_from_file (uri, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(0) self.scale = self._conf["ZOOM"] self.width, self.height = self.current_page.get_size() self.surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(self.width*self.scale), int(self.height*self.scale)) sw = self.wTree.get_widget("scrolledwindow2") #sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) self.dwg = gtk.DrawingArea() self.dwg.set_size_request(int(self.width*self.scale), int(self.height*self.scale)) self.dwg.connect("expose-event", self.on_expose) sw.add_with_viewport(self.dwg) self.dwg.show()
def __init__(self, parent, content="", filename="", render=False): self.parent = parent self.buffer = sourceview.Buffer(language=LATEXLANG) self.buffer.connect("modified-changed", self.on_buffer_modified_changed) self.doc = None self.pb = self.parent.window.render_icon(gtk.STOCK_MISSING_IMAGE, gtk.ICON_SIZE_DIALOG) self._filename = filename self._modified_since_save = True # Set to False at end of document load. self._modified_since_compile = True self.set_content(content) cached = False if self._filename: pdffn = base_filename(self.fullfilename) + '.pdf' if os.path.exists(self.fullfilename + '.tex') and os.path.exists(pdffn): self.doc = poppler.document_new_from_file('file://' + os.path.abspath(pdffn), None) self.render_thumb() self._modified_since_compile = False cached = True if render and not cached: self.compile(lambda status: not status and self.render_thumb(), False)
def main(): input_filename = sys.argv[1] # http://blog.hartwork.org/?p=612 document = poppler.document_new_from_file('file://%s' % \ urllib.pathname2url(os.path.abspath(input_filename)), None) n_pages = document.get_n_pages() all_annots = 0 for i in range(n_pages): page = document.get_page(i) annot_mappings = page.get_annot_mapping () num_annots = len(annot_mappings) if num_annots > 0: for annot_mapping in annot_mappings: if annot_mapping.annot.get_annot_type().value_name != 'POPPLER_ANNOT_LINK': all_annots += 1 print 'page: {0:3}, {1:10}, type: {2:10}, content: {3}'.format(i+1, annot_mapping.annot.get_modified(), annot_mapping.annot.get_annot_type().value_nick, annot_mapping.annot.get_contents()) if all_annots > 0: print str(all_annots) + " annotation(s) found" else: print "no annotations found"
def show_pdf(self): """ Get PDF, render in pyplot, hook input events """ pyplot.clf() self.pdf_name = self.pdf_names[self.pdf_index] path = absolute_file_scheme_path(self.pdf_name) doc = poppler.document_new_from_file(path, None) self.num_pages = doc.get_n_pages() # Handle edge condition of moving to new pdf with fewer pages if self.page_num > self.num_pages: self.page_num = 0 page = doc.get_page(self.page_num) try: self.page_width, self.page_height = page.get_size() except AttributeError: self.pdf_names.remove(self.pdf_name) self.add_unprocessed() return self.page_width = int(self.page_width) self.page_height = int(self.page_height) image_surface = cairo.ImageSurface( cairo.FORMAT_ARGB32, self.page_width, self.page_height ) ctxt = cairo.Context(image_surface) page.render(ctxt) image_matrix = np.asarray(image_surface.get_data()) image_matrix = image_matrix.astype("|u1") image_matrix = image_matrix.reshape((self.page_height, self.page_width, 4)) pyplot.imshow(image_matrix) self.render_rects() pyplot.draw()
def __init__(self, uri, page=0): """ :param uri: URI to the PDF file to open (local only, starting with :file:`file://`) :type uri: string :param page: page number to which the file should be opened :type page: integer """ # Check poppler-python version -- we need Bazaar rev. 62 if not pympress.util.poppler_links_available(): print >>sys.stderr, "Hyperlink support not found in poppler-python -- be sure to use at least bazaar rev. 62 to have them working" # Open PDF file self.doc = poppler.document_new_from_file(uri, None) # Pages number self.nb_pages = self.doc.get_n_pages() # Number of the current page self.cur_page = page # Pages cache self.pages_cache = {} # Guess if the document has notes page0 = self.page(page) if page0 is not None: # "Regular" pages will have an apsect ratio of 4/3, 16/9, 16/10... # Full A4 pages will have an aspect ratio < 1. # So if the aspect ratio is >= 2, we can assume it is a document with notes. ar = page0.get_aspect_ratio() self.notes = (ar >= 2) # Create windows self.ui = pympress.ui.UI(self) self.ui.on_page_change(False) self.ui.run()
def __load_pdf_file(self, filename): self.__pdf_filename = filename LAST_OPEN_FOLDER = os.path.dirname(filename) filename = os.path.abspath(filename) self.__pdf_document = poppler.document_new_from_file( 'file://%s' % filename, None) self.__n_pages = self.__pdf_document.get_n_pages() self.__pages_model.clear() for i in range(self.__n_pages): if i % 2 == 0: size = self.__pdf_document.get_page(i).get_size() self.__pages_model.append( [str(i+1), PageInfo(i, CropSetting(self.__odd_crop), size)]) else: self.__pages_model.append( [str(i+1), PageInfo(i, CropSetting(self.__even_crop), size)]) if not self.__pdf_view: self.__pdf_view = PdfView() self.__canvas.get_root_item().add_child(self.__pdf_view, next_index())
def set_document(self, filename, operation, context): if not filename.startswith('file'): filename = 'file://' + os.path.realpath(filename) self.d = poppler.document_new_from_file(filename, None) operation.set_n_pages(self.d.get_n_pages()) # Assume all pages are same page = self.d.get_page(0) w, h = page.get_size() if w > h: w, h = h, w ori = gtk.PAGE_ORIENTATION_LANDSCAPE else: ori = gtk.PAGE_ORIENTATION_PORTRAIT page_setup = gtk.PageSetup() page_setup.set_orientation(ori) size = int(round(w)), int(round(h)) gtk_size = rl2gtk_papersizes.get(size, None) if gtk_size: ps = gtk.PaperSize(gtk_size) else: ps = gtk.paper_size_new_custom('', '', w, h, gtk.UNIT_POINTS) page_setup.set_paper_size(ps) operation.set_default_page_setup(page_setup)
def set_document (self, filename, operation,context): if not filename.startswith('file'): filename = 'file://' + os.path.realpath(filename) self.d = poppler.document_new_from_file(filename,None) operation.set_n_pages(self.d.get_n_pages()) # Assume all pages are same page = self.d.get_page(0) w,h = page.get_size() if w > h: w,h = h,w ori = gtk.PAGE_ORIENTATION_LANDSCAPE else: ori = gtk.PAGE_ORIENTATION_PORTRAIT page_setup = gtk.PageSetup() page_setup.set_orientation(ori) size = int(round(w)),int(round(h)) gtk_size = rl2gtk_papersizes.get(size,None) if gtk_size: ps = gtk.PaperSize(gtk_size) else: ps = gtk.paper_size_new_custom('','',w,h,gtk.UNIT_POINTS) page_setup.set_paper_size(ps) operation.set_default_page_setup(page_setup)
def __init__(self): self.presentation_config = config.presentation file_url = "file://%s" % self.presentation_config["slides"] self.presentation = \ poppler.document_new_from_file(file_url, None) self.n_pages = self.presentation.get_n_pages() self.current_page = self.presentation.get_page(0) self.setup_window() self.post_slide_deferred = None self.posted_slides = [] self.slides_to_post = self.presentation_config["to_post"] self.slide_titles = self.presentation_config["titles"] self.image_pattern = self.presentation_config["slide_image_pattern"] self.commands = {"next": self.display_next, "previous": self.display_previous, "quit": self.quit} # Hack for now to display first slide self.display_relative_slide(0, lambda x: True)
def refresh(self): # make the preview tmp = "%s.%s" % (os.tempnam(None, "paperwork_export_"), self.valid_exts[0]) path = self.__save(tmp, pages=(0, 1)) # reload the preview pdfdoc = poppler.document_new_from_file( ("file://%s" % path), password=None) assert(pdfdoc.get_n_pages() > 0) pdfpage = pdfdoc.get_page(0) pdfpage_size = pdfpage.get_size() surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(pdfpage_size[0]), int(pdfpage_size[1])) ctx = cairo.Context(surface) pdfpage.render(ctx) img = surface2image(surface) self.__preview = (path, img)
def render(self): self._report.save() uri = gio.File(path=self._report.filename).get_uri() self._document = poppler.document_new_from_file(uri, password="")
def main(): parser = argparse.ArgumentParser( description='Add ToUnicode tables to PDF files.') parser.add_argument('--outdir', default='tmp/sfd', type=str, help='Output .sfd files to this directory') parser.add_argument('pdfs', type=str, nargs='+', help='PDF files to process') args = parser.parse_args() fontnum = 0 for pdf in args.pdfs: print("Adding ToUnicode tables to PDF file {}".format(pdf)) with open(pdf, 'rb') as fobj: pdfdata = fobj.read() doc = PdfReader(fdata=pdfdata) doc.read_all() fonts = [ o for o in doc.indirect_objects.values() if hasattr(o, 'Type') and o.Type == '/Font' ] fonts = { font.FontDescriptor.FontName[1:]: font for font in fonts if font.FontDescriptor is not None } embedded_fonts = fontforge.fontsInFile(pdf) for fontname in embedded_fonts: if fontname not in fonts: print( "WARNING: font {} not found in pdf file".format(fontname)) continue print("Adding ToUnicode table to font {}".format(fontname)) font = fontforge.open('{}({})'.format(pdf, fontname)) fonts[fontname].ToUnicode = PdfDict() fonts[fontname].ToUnicode.stream = generate_tounicode( font, fonts[fontname]) # Need to save the modified font because fontforge won't read # ToUnicode when it converts to woff later. font.fontname = 'pretex{:06d}'.format(fontnum) font.save( os.path.join( args.outdir, '[{}]{}.sfd'.format(os.path.basename(pdf)[:-4], fontname))) fontnum += 1 PdfWriter(pdf, trailer=doc).write() # Measure extents for displayed equations pdfpath = os.path.realpath(os.path.dirname(pdf)) doc = poppler.document_new_from_file( 'file://{}'.format(os.path.realpath(pdf)), None) boxsize = os.path.join(pdfpath, 'boxsize.txt') with open(boxsize) as fobj: lines = fobj.readlines() with open(boxsize, 'w') as fobj: pageno = 0 for line in lines: if not (line.startswith('inline:') or line.startswith('display:')): fobj.write(line) continue pageno += 1 if not line.startswith('display:'): fobj.write(line) continue page = doc.get_page(pageno - 1) width, height = page.get_size() surf = cairo.RecordingSurface( cairo.Content.COLOR_ALPHA, cairo.Rectangle(0, 0, width, height)) ctx = cairo.Context(surf) page.render_for_printing(ctx) x, y, w, h = surf.ink_extents() fobj.write(line.strip() + '{},{},{},{}\n'.format(x, y, w, h))
command). """ import os import itertools import gtk import goocanvas import gobject import poppler import read_gov_law_proposal as gov import pdftotext_ext as ext pdf = poppler.document_new_from_file('file://%s/538.pdf' % os.getcwd(), password=None) def squares(width, height, n_wide, n_high): dx = float(width) / n_wide dy = float(height) / n_high for j in xrange(n_high): for i in xrange(n_wide): yield (dx * i, dy * j, dx, dy) def enlarging_square_range(start, height, end_width, n): for i in xrange(n + 1): yield (start[0], start[1], end_width * i / n, height)
import sys import os.path import poppler import cairo def convert_to_svg(page, filename): w,h = page.get_size() surface = cairo.SVGSurface(filename, w, h) ctx = cairo.Context(surface) page.render(ctx) if __name__ == '__main__': filename = sys.argv[1] doc = poppler.document_new_from_file('file://'+os.path.abspath(filename), None) page_nums = map(int, sys.argv[2:]) or range(doc.get_n_pages()) dirpath = os.path.dirname(filename) for pn in page_nums: page = doc.get_page(pn) convert_to_svg(page, os.path.join(dirpath, '%d.svg'%(pn+1,))) html = ''.join('<a href="{0}.svg" target="page">Page {0}</a><br />\n'.format(pn+1) for pn in page_nums) with open(os.path.join(dirpath, "pages.html"), "w") as f: f.write(html)
def load_file(self, file): self.document = poppler.document_new_from_file("file://" + file, None) self.n_pages = self.document.get_n_pages() self.current_page = self.document.get_page(self.n_page) self.width, self.height = self.current_page.get_size() self._update_size()
def get_pdf(filename): if filename not in pdf_cache: pdf_cache[filename] = poppler.document_new_from_file( 'file://%s' % os.path.realpath(filename), password=None) return pdf_cache[filename]