class PdfDocument(Document): def _get_text (self, file): uri = "file://" + file document = poppler.document_new_from_file (uri, None) npages = document.get_n_pages() text = "" for p in range(0,npages): page = document.get_page(p) w,h = page.get_size() r = poppler.Rectangle () r.x1 = 0 r.x2 = w r.y1 = 0 r.y2 = h # Currently we are getting the layout from the pdf here # we should collapse it text += page.get_text(poppler.SELECTION_GLYPH,r) return text def translate(self, config): # FIXME: Check if poppler gives us always UTF-8 strings config['outputFormat']['inputTextEncoding'] = "UTF8" self.translator = Translator(config) text = self._get_text(self.input_file) self.braille_text = self.translator.translate_string (text) return
class DocDocument(Document): def _get_text(seff, file): text = subprocess.check_output([antiword, "-x", "db", file]) return text def translate(self, config): config['outputFormat']['inputTextEncoding'] = "UTF8" self.translator = Translator(config) result = self._get_text (self.input_file) self.braille_text = self.translator.translate_string (result)
class TextDocument(Document): def set_text(self, text): self.text = text; def translate(self, config): self.translator = Translator(config) if self.input_file is not None: self.braille_text = self.translator.translate_file (self.input_file) else: self.braille_text = self.translator.translate_string (self.text)
class OdtDocument(Document): def _get_text(sefl, file): odhandler = ODF2XHTML (False, False) odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None) try: result = odhandler.odf2xhtml(file).encode('UTF-8','xmlcharrefreplace') except: result = "" pass return result def translate(self, config): config['outputFormat']['inputTextEncoding'] = "UTF8" self.translator = Translator(config) result = self._get_text (self.input_file) self.braille_text = self.translator.translate_string (result)
class PdfDocument(Document): def _get_text (self, file): uri = "file://" + file document = Poppler.Document.new_from_file (uri, "") npages = document.get_n_pages() text = "" for p in range(0,npages): page = document.get_page(p) text += page.get_text() return text def translate(self, config): # FIXME: Check if poppler gives us always UTF-8 strings config['outputFormat']['inputTextEncoding'] = "UTF8" self.translator = Translator(config) text = self._get_text(self.input_file) self.braille_text = self.translator.translate_string (text) return