def convert_docfile(self, input, filename, output, outputname): result = True try: self.converter.convert(os.path.join(input, filename), os.path.join(output, outputname)) except Exception as e: logger.info(e) result = False return result
def wait_until_port_open(timeout=10): count = 0 while (not utils.builtin.is_port_open(self.host, self.port)): time.sleep(0.01) count += 0.01 if count > timeout: print "Can not connect unoconverter server." logger.info("Can not connect unoconverter server.") break
def __init__(self, fileobj, name, output_base): super(LibreOfficeProcessor, self).__init__(fileobj, name, output_base) if self.converter is None: self.__class__.converter = utils.docprocessor.unoconverter.DocumentConverter( ) self.html_path = self.output_path.html self.docbook_path = self.output_path.docbook self.mimetype = self.mimetype() logger.info('Mimetype: %s' % self.mimetype) self.result = self.convert()
def makedesktop(self): while (True): try: context = self.resolver.resolve( "uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % self.port) self.desktop = context.ServiceManager.createInstanceWithContext( "com.sun.star.frame.Desktop", context) break except Exception as e: self.restartservice() logger.info("%s. Restart service and rebuild desktop." % e) if not utils.builtin.is_port_open(self.host, self.port): raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % self.port
def convert(self): """ >>> import os >>> import shutil >>> import utils.docprocessor.libreoffice >>> import xml.etree.ElementTree >>> basepath = 'core/test_output' >>> f = open('core/test/cv_1.doc', 'r') >>> cv1 = utils.docprocessor.libreoffice.LibreOfficeProcessor(f, 'cv_1.doc', basepath) >>> cv1.result True >>> e = xml.etree.ElementTree.parse(os.path.join( ... cv1.docbook_path, cv1.name.xml)).getroot() >>> e.findall('para')[0].text 'http://jianli.yjbys.com/' >>> with open(os.path.join(cv1.markdown_path, ... cv1.name.md))as file: ... data = file.read() >>> 'http://jianli.yjbys.com/' in data True >>> f.close() >>> shutil.rmtree(basepath) """ logger.info('Convert: %s' % self.base) if self.mimetype in [ "application/msword", "application/vnd.openxmlformats-officedocument" ".wordprocessingml.document" ]: if self.mimetype == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": shutil.copyfile(os.path.join(self.source_path, self.name), os.path.join(self.docx_path, self.name.docx)) elif self.mimetype == "application/msword": returncode = self.convert_docfile(self.source_path, self.name, self.docx_path, self.name.docx) returncode = self.convert_docfile(self.docx_path, self.name.docx, self.docbook_path, self.name.xml) if not os.path.exists( os.path.join(self.docbook_path, self.name.xml)): logger.info('Not exists') self.resultcode = 2 return False if returncode is False: self.resultcode = 3 return False self.remove_note() self.file_docbook_to_markdown() logger.info(' '.join([self.base.base, self.name.base, 'Success'])) self.resultcode = 0 return True else: logger.info('Skip') self.resultcode = 1 return False
def convert(self): """ >>> import os >>> import shutil >>> import utils.docprocessor.pandoc >>> basepath = 'core/test_output' >>> f = open('core/test/cv_1.docx', 'r') >>> cv1 = utils.docprocessor.pandoc.PandocProcessor(f, 'cv_1.docx', basepath) >>> cv1.result True >>> '13888888888' in cv1.markdown_stream True >>> f.close() >>> shutil.rmtree(basepath) """ logger.info('Convert: %s' % self.base) self.file_docx_to_markdown() if not os.path.exists(os.path.join(self.markdown_path, self.name.md)): self.resultcode = 2 return False logger.info(' '.join([self.base.base, self.name.base, 'Success'])) self.resultcode = 0 return True
def convert(self, inputFile, outputFile): retry = 0 inputUrl = self._toFileUrl(inputFile) outputUrl = self._toFileUrl(outputFile) loadProperties = {"Hidden": True} self.makedesktop() while (True and retry < 3): try: document = utils.timeout.thread.timeout_call( self.desktop.loadComponentFromURL, 30, kill_wait=1, args=(inputUrl, "_blank", 0, self._toProperties(loadProperties))) document.refresh() family = self._detectFamily(document) outputExt = self._getFileExt(outputFile) storeProperties = self._getStoreProperties(document, outputExt) try: document.storeToURL(outputUrl, self._toProperties(storeProperties)) finally: document.close(True) break except utils.timeout.exception.ExecTimeout as e: logger.info("DocumentConverter timeout.") logger.info("Restart service and rebuild desktop.") self.restartservice() self.makedesktop() except Exception as e: # com.sun.star.uno.RuntimeException: # Binary URP bridge already disposed logger.info(e) logger.info("Restart service and rebuild desktop.") self.restartservice() self.makedesktop() retry += 1