Пример #1
0
class PdfDocument(Document):
    def _get_text (self, file):
	uri = "file://" + file
	document = poppler.document_new_from_file (uri, None)
	npages = document.get_n_pages()
	text = ""
	for p in range(0,npages):
		page = document.get_page(p)
		w,h = page.get_size()
		r = poppler.Rectangle ()
		r.x1 = 0
		r.x2 = w
		r.y1 = 0
		r.y2 = h
		# Currently we are getting the layout from the pdf here
		# we should collapse it
		text += page.get_text(poppler.SELECTION_GLYPH,r)

	return text


    def translate(self, config):
	# FIXME: Check if poppler gives us always UTF-8 strings
	config['outputFormat']['inputTextEncoding'] = "UTF8"
	self.translator = Translator(config)
	text = self._get_text(self.input_file)
	self.braille_text = self.translator.translate_string (text)
	return
Пример #2
0
class DocDocument(Document):

    def _get_text(seff, file):
	text = subprocess.check_output([antiword, "-x", "db", file])
	return text

    def translate(self, config):
	config['outputFormat']['inputTextEncoding'] = "UTF8"
	self.translator = Translator(config)
	result = self._get_text (self.input_file)
	self.braille_text = self.translator.translate_string (result)
Пример #3
0
class TextDocument(Document):

    def set_text(self, text):
	self.text = text;

    def translate(self, config):
	self.translator = Translator(config)
	if self.input_file is not None:
		self.braille_text = self.translator.translate_file (self.input_file)
	else:
		self.braille_text = self.translator.translate_string (self.text)
Пример #4
0
class OdtDocument(Document):

    def _get_text(sefl, file):
	odhandler = ODF2XHTML (False, False)
	odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None)
	try:
		result = odhandler.odf2xhtml(file).encode('UTF-8','xmlcharrefreplace')
	except:
		result = ""
		pass
	return result

    def translate(self, config):
	config['outputFormat']['inputTextEncoding'] = "UTF8"
	self.translator = Translator(config)
	result = self._get_text (self.input_file)
	self.braille_text = self.translator.translate_string (result)
Пример #5
0
class PdfDocument(Document):
    def _get_text (self, file):
	uri = "file://" + file
	document = Poppler.Document.new_from_file (uri, "")
	npages = document.get_n_pages()
	text = ""
	for p in range(0,npages):
		page = document.get_page(p)
		text += page.get_text()

	return text


    def translate(self, config):
	# FIXME: Check if poppler gives us always UTF-8 strings
	config['outputFormat']['inputTextEncoding'] = "UTF8"
	self.translator = Translator(config)
	text = self._get_text(self.input_file)
	self.braille_text = self.translator.translate_string (text)
	return