def __init__(self, cr, cmd=False, dir_tmp=False): if not cmd or not dir_tmp: cr.execute("SELECT soffice, dir_tmp FROM oo_config") cmd, dir_tmp = cr.fetchone() # cmd = '/Applications/LibreOffice.app/Contents/MacOS/soffice' DocumentConverter.__init__(self, cmd, prefix='aeroo-', dir_tmp=dir_tmp, suffix='.odt') netsvc.Service.__init__(self, 'openoffice')
def _init_conn(self): logger = logging.getLogger('main') try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")
def convert_to_pdf(user, presentationObject): inputFile = "%s/%s" % (settings.CURRENT_DIR, presentationObject.presentation_slides) profile = user.profile # Location to store user's files # based on the hash of their username levelOne = profile.user_hash[0:4] levelTwo = profile.user_hash[4:8] path = "%s/%s/%s" % (settings.STORAGE_ROOT, levelOne, levelTwo) url_path = "%s/%s/%s" % (settings.STORAGE_URL, levelOne, levelTwo) # Check if directory exists, if not...create it if not os.path.exists(path): os.makedirs(path) # Check that the uploaded file exists if os.path.exists("%s" % (inputFile)): print "Uploaded file exists" # BaseName of the uploaded file baseName = os.path.basename(inputFile) # Name of the PDF File pdfFile = "%s/%s.pdf" % (path, baseName) pdfFile_url = "%s/%s.pdf" % (url_path, baseName) # New file location newFile = "%s/%s" % (path, baseName) newFile_url = "%s/%s" % (url_path, baseName) # Check to see if the file exists in the user storage if os.path.exists("%s" % (newFile)): if filecmp.cmp(inputFile, newFile): print "upload file and newFile the same, moving on" # Files are the same, no need to move pass else: # Move uploaded file into user's file location print "Moving uploaded file %s into location %s" % (inputFile, path) shutil.move(inputFile, path) else: # Move uploaded file into user's file location print "File does not exist in storage location" print "Moving uploaded file %s into location %s" % (inputFile, path) shutil.move(inputFile, path) # Convert presentation to PDF try: converter = DocumentConverter() converter.convert(newFile, pdfFile) except DocumentConversionException, exception: print "ERROR! " + str(exception) exit(1) except ErrorCodeIOException, exception: print e, exception print "ERROR! ErrorCodeIOException %d" % exception.ErrCode exit(1)
def main(fn_template, fn_data): fp_data = open(fn_data) reader = csv.DictReader(open(fn_data)) users = list(reader) converter = DocumentConverter() for user in users: user["filename"] = "%s_%s.odt" % (user["first_name"], user["last_name"]) user["filename_pdf"] = "%s_%s.pdf" % (user["first_name"], user["last_name"]) mail_merge(fn_template, user) converter.convert(user["filename"], user["filename_pdf"])
def init_conn(self): logger = logging.getLogger('main') try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")
def run(self): while True: doc = model.get_task_to_process() if doc is None: time.sleep(1) else: try: #log selected document web.debug(doc) #ensure that tmp direcory exist if not os.path.exists('/tmp/converterdir'): os.makedirs('/tmp/converterdir') #check if document need to been converted interim_path = '/tmp/converterdir/test2.pdf' if re.search(r"\.pdf$", doc.srcname, re.IGNORECASE | re.MULTILINE) is None: converter = DocumentConverter() converter.convert(doc.srcname, interim_path) #wait for converter closes time.sleep(1) else: #call pdftk to atach original pdf to new pdf without signature #subprocess.call("/opt/zimbra/bin/pdftk "+ doc.srcname +" cat '"+doc.srcname +"' output "+interim_path, shell=True, stdout=self.flog) interim_path = doc.srcname web.debug(interim_path) #call pdftk to atach original subprocess.call("/opt/zimbra/bin/pdftk " + interim_path + " attach_files '" + doc.srcname + "' output /tmp/converterdir/test3.pdf", shell=True, stdout=self.flog) #sign document with Private Key from key.properties d = datetime.today() unic_file_name = d.strftime('%m_%d_%H_%M_%S') subprocess.call( "java -jar /opt/zimbra/bin/signapp/signApp.jar /tmp/converterdir/test3.pdf /tmp/converterdir/" + unic_file_name + ".pdf /opt/zimbra/bin/signapp/key.properties", shell=True, stdout=self.flog) #mark document as ready model.update_task( doc.idtask, 10, '/tmp/converterdir/' + unic_file_name + '.pdf') except DocumentConversionException, exception: model.update_task(doc.idtask, -1) web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') + "ERROR! " + str(exception)) except ErrorCodeIOException, exception: model.update_task(doc.idtask, -1) web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') + "ERROR! ErrorCodeIOException %d" % exception.ErrCode) except Exception, exception: model.update_task(doc.idtask, -1) web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') + " unexpected error! - " + str(exception))
if not isdir(input_folder): print 'no such input folder: %s' % input_folder exit(1) if not isdir(output_folder): print 'no such output folder: %s' % output_folder exit(1) for entry in listdir(input_folder): fpath = input_folder + '/' + entry if isfile(fpath): fname = splitext(entry)[0] #print 'found file %s with name %s' % (entry, fname) new_fpath = output_folder + '/' + fname if not isdir(new_fpath): mkdir(new_fpath) try: converter = DocumentConverter() converter.convert(fpath, new_fpath + ('/%s.html' % fname)) converter.convert(fpath, new_fpath + ('/%s.pdf' % fname)) convert_file(new_fpath, fname + '.html') except DocumentConversionException, exception: print "ERROR!" + str(exception) exit(1) except Exception, exception: print "ERROR! ErrorCodeIOException %s" % exception exit(1) #else: # print 'found non-file %s' % entry
def __init__(self, cr, host, port): cr.execute("SELECT host, port, ooo_restart_cmd FROM oo_config") host, port, ooo_restart_cmd = cr.fetchone() DocumentConverter.__init__(self, host, port, ooo_restart_cmd) netsvc.Service.__init__(self, 'openoffice')
class OfficeService(): def __init__(self, oo_host, oo_port, spool_dir, auth_type): self.oo_host = oo_host self.oo_port = oo_port self.spool_path = spool_dir + '/%s' self.auth = auth_type self.init_conn() def init_conn(self): logger = logging.getLogger('main') try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning("Failed to initiate OpenOffice/LibreOffice connection.") def conn_healthy(self): if self.oservice is not None: return True logger = logging.getLogger('main') attempt = 0 while self.oservice is None and attempt < 3: attempt += 1 self.init_conn() if self.oservice is not None: return True sleep(3) message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.' logger.warning(message) raise NoOfficeConnection(message) def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None): logger = logging.getLogger('main') if not self.auth(username, password): raise AccessException('Access denied.') start_time = time() logger.debug('Openning identifier: %s' % identifier) if data is not False: data = base64.b64decode(data) elif identifier is not False: with open(self.spool_path % self._md5(str(identifier)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) else: raise NoidentException('Wrong or no identifier.') logger.debug(" read file %s" % str(time() - start_time)) self.conn_healthy() self.oservice.putDocument(data) logger.debug(" upload document to office %s" % str(time() - start_time)) conv_data = self.oservice.saveByStream(filters[out_mime or 'odt']) logger.debug(" download converted document %s" % str(time() - start_time)) self.oservice.closeDocument() logger.debug(" close document %s" % str(time() - start_time)) return base64.b64encode(conv_data).decode('utf8') def _md5(self, data): return md5(data.encode()).hexdigest() def upload(self, data=False, is_last=False, identifier=False, username=None, password=None): logger = logging.getLogger('main') logger.debug('Upload identifier: %s' % identifier) try: start_time = time() if not self.auth(username, password): raise AccessException('Access denied.') # NOTE:md5 conversion on file operations to prevent path injection attack if identifier and not path.isfile(self.spool_path % '_'+self._md5(str(identifier))): raise NoidentException('Wrong or no identifier.') elif data is False: raise NodataException('No data to be converted.') fname = '' # generate random identifier while not identifier: new_ident = randint(1, MAXINT) fname = self._md5(str(new_ident)) logger.debug(' assigning new identifier %s' % new_ident) # check if there is any other such files identifier = not path.isfile(self.spool_path % '_'+fname) \ and not path.isfile(self.spool_path % fname) \ and new_ident or False fname = fname or self._md5(str(identifier)) with open(self.spool_path % '_'+fname, "a") as tmpfile: tmpfile.write(data) logger.debug(" chunk finished %s" % str(time() - start_time)) if is_last: rename(self.spool_path % '_'+fname, self.spool_path % fname) logger.debug(" file finished") return {'identifier': identifier} except AccessException as e: raise e except NoidentException as e: raise e except NodataException as e: raise e except: import sys, traceback exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback, limit=2, file=sys.stdout) def join(self, idents, out_mime=False, username=None, password=None): logger = logging.getLogger('main') logger.debug('Join %s identifiers: %s' % (str(len(idents)),str(idents))) if not self.auth(username, password): raise AccessException('Access denied.') start_time = time() ident = idents.pop(0) with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) try: self.conn_healthy() self.oservice.putDocument(data) data_list = [] for ident in idents: with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) data_list.append(data) self.oservice.joinDocuments(data_list) result_data = self.oservice.saveByStream(filters[out_mime or 'odt']) self.oservice.closeDocument() except: self.oservice.closeDocument() logger.debug(" join finished %s" % str(time() - start_time)) return base64.b64encode(result_data).decode('utf8')
class OfficeService(): def __init__(self, oo_host, oo_port, spool_dir, auth_type): self.oo_host = oo_host self.oo_port = oo_port self.spool_path = spool_dir + '/%s' self.auth = auth_type self.init_conn() def init_conn(self): logger = logging.getLogger('main') try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning( "Failed to initiate OpenOffice/LibreOffice connection.") def conn_healthy(self): if self.oservice is not None: return True logger = logging.getLogger('main') attempt = 0 while self.oservice is None and attempt < 3: attempt += 1 self.init_conn() if self.oservice is not None: return True sleep(3) message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.' logger.warning(message) raise NoOfficeConnection(message) def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None): logger = logging.getLogger('main') if not self.auth(username, password): raise AccessException('Access denied.') start_time = time() logger.debug('Openning identifier: %s' % identifier) if data is not False: data = base64.b64decode(data) elif identifier is not False: with open(self.spool_path % self._md5(str(identifier)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) else: raise NoidentException('Wrong or no identifier.') logger.debug(" read file %s" % str(time() - start_time)) self.conn_healthy() self.oservice.putDocument(data) logger.debug(" upload document to office %s" % str(time() - start_time)) conv_data = self.oservice.saveByStream(filters[out_mime or 'odt']) logger.debug(" download converted document %s" % str(time() - start_time)) self.oservice.closeDocument() logger.debug(" close document %s" % str(time() - start_time)) return base64.b64encode(conv_data).decode('utf8') def _md5(self, data): return md5(data.encode()).hexdigest() def upload(self, data=False, is_last=False, identifier=False, username=None, password=None): logger = logging.getLogger('main') logger.debug('Upload identifier: %s' % identifier) try: start_time = time() if not self.auth(username, password): raise AccessException('Access denied.') # NOTE:md5 conversion on file operations to prevent path injection attack if identifier and not path.isfile(self.spool_path % '_' + self._md5(str(identifier))): raise NoidentException('Wrong or no identifier.') elif data is False: raise NodataException('No data to be converted.') fname = '' # generate random identifier while not identifier: new_ident = randint(1, MAXINT) fname = self._md5(str(new_ident)) logger.debug(' assigning new identifier %s' % new_ident) # check if there is any other such files identifier = not path.isfile(self.spool_path % '_'+fname) \ and not path.isfile(self.spool_path % fname) \ and new_ident or False fname = fname or self._md5(str(identifier)) with open(self.spool_path % '_' + fname, "a") as tmpfile: tmpfile.write(data) logger.debug(" chunk finished %s" % str(time() - start_time)) if is_last: rename(self.spool_path % '_' + fname, self.spool_path % fname) logger.debug(" file finished") return {'identifier': identifier} except AccessException as e: raise e except NoidentException as e: raise e except NodataException as e: raise e except: import sys, traceback exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback, limit=2, file=sys.stdout) def join(self, idents, out_mime=False, username=None, password=None): logger = logging.getLogger('main') logger.debug('Join %s identifiers: %s' % (str(len(idents)), str(idents))) if not self.auth(username, password): raise AccessException('Access denied.') start_time = time() ident = idents.pop(0) with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) try: self.conn_healthy() self.oservice.putDocument(data) data_list = [] for ident in idents: with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile: data = tmpfile.read() data = base64.b64decode(data) data_list.append(data) self.oservice.joinDocuments(data_list) result_data = self.oservice.saveByStream(filters[out_mime or 'odt']) self.oservice.closeDocument() except: self.oservice.closeDocument() logger.debug(" join finished %s" % str(time() - start_time)) return base64.b64encode(result_data).decode('utf8')
baseoutname = os.path.basename(baseoutname) docextension=docextension[1:] # Remove leading period if not docextension.lower() in allowedextensions: print "Unknown extension '%s'" % docextension.lower() print "Only %s are allowed" % ", ".join(allowedextensions) sys.exit(1) # Clean up any old files for f in ('%s/converted.wiki' % tempdir, '%s/converted.xml' % tempdir): if os.path.exists(f): os.unlink(f) # Convert the actual document. This will always create temporary files, # there is no way to get the data in a buffer. with IOWrapper("Converting document format..."): converter = DocumentConverter() converter.convert(inputfile, '%s/converted.wiki' % tempdir) converter.convert(inputfile, '%s/converted.xml' % tempdir) # Read the converted wiki format, and append our hardcoded additions wf = codecs.open('%s/converted.wiki' % tempdir, "r", "utf-8") wikilines = wf.readlines() wf.close() wikilines.append("\n[[Category:word2mediawiki]]\n") # Grab images out of the XML data parser = XMLParser(target=ImageGrabber()) # XXX: rewrite the namespace so as not to confuse certain XML parsers # (changes either in newer open/libreoffice or newer XML parsers require this) parser.feed(open('%s/converted.xml' % tempdir).read().replace("\"http://www.w3.org/XML/1998/namespace\"","\"http://www.w3.org/XML/1998/namespace_bugfix\""))
def __init__(self, cr, host, port, allow_raise_errors=True): cr.execute("SELECT host, port, ooo_restart_cmd FROM oo_config") host, port, ooo_restart_cmd = cr.fetchone() DocumentConverter.__init__(self, host, port, ooo_restart_cmd, allow_raise_errors)
class OfficeService(object): def __init__(self, oo_host, oo_port, spool_dir, auth_type): self.oo_host = oo_host self.oo_port = oo_port self.spool_path = spool_dir + '/%s' self.auth = auth_type self._init_conn() def _init_conn(self): try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning( "Failed to initiate OpenOffice/LibreOffice connection.") def _conn_healthy(self): if hasattr(self, 'oservice'): if self.oservice is not None: return True else: self.oservice = None attempt = 0 while self.oservice is None and attempt < 3: attempt += 1 self._init_conn() if self.oservice is not None: return True sleep(3) message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.' logger.warning(message) raise NoOfficeConnection(message) def _chktime(self, start_time): return '%s s' % str(round(time() - start_time, 6)) def get_file(self, ident, username=None, password=None): if not self.auth(username, password): raise AccessException('Access denied.') file_data = self._readFile(ident) return base64.b64encode(file_data).decode('utf8') def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None): if not self.auth(username, password): raise AccessException('Access denied.') start_time = time() logger.debug('Openning identifier: %s' % identifier) if data is not False: data = base64.b64decode(data) elif identifier is not False: data = self._readFile(identifier) else: raise NoidentException('Wrong or no identifier.') logger.debug(" read file %s" % self._chktime(start_time)) self._conn_healthy() logger.debug(" connection test ok %s" % self._chktime(start_time)) infilter = filters.get(in_mime, False) outfilter = filters.get(out_mime, False) self.oservice.putDocument(data, filter_name=infilter, read_only=False) logger.debug(" upload document to office %s" % self._chktime(start_time)) try: conv_data = self.oservice.saveByStream(filter_name=outfilter) logger.debug(" download converted document %s" % self._chktime(start_time)) except Exception as e: logger.debug(" conversion failed %s Exception: %s" % (self._chktime(start_time), str(e))) self.oservice.closeDocument() logger.debug(" emergency close document %s" % self._chktime(start_time)) raise e else: self.oservice.closeDocument() logger.debug(" close document %s" % self._chktime(start_time)) return base64.b64encode(conv_data).decode('utf8') def _md5(self, data): return md5(data.encode()).hexdigest() def upload(self, data=False, is_last=False, identifier=False, username=None, password=None): logger = logging.getLogger('main') logger.debug('Upload identifier: %s' % identifier) try: start_time = time() if not self.auth(username, password): raise AccessException('Access denied.') # NOTE:md5 conversion on file operations to prevent path injection attack if identifier and not path.isfile(self.spool_path % '_' + self._md5(str(identifier))): raise NoidentException('Wrong or no identifier.') elif data is False: raise NodataException('No data to be converted.') fname, identifier = self._get_filename_and_identifier(identifier) with open(self.spool_path % '_' + fname, "a") as tmpfile: tmpfile.write(data) logger.debug(" chunk finished %s" % self._chktime(start_time)) if is_last: rename(self.spool_path % '_' + fname, self.spool_path % fname) logger.debug(" file finished") return {'identifier': identifier} except AccessException as e: raise e except NoidentException as e: raise e except NodataException as e: raise e except: import sys import traceback exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback, limit=2, file=sys.stdout) def _get_filename_and_identifier(self, force_identifier=None): fname = '' # generate random identifier identifier = force_identifier while not identifier: new_ident = randint(1, MAXINT) fname = self._md5(str(new_ident)) logger.debug(' assigning new identifier %s' % new_ident) # check if there is any other such files identifier = not path.isfile(self.spool_path % '_' + fname) \ and not path.isfile(self.spool_path % fname) \ and new_ident or False fname = fname or self._md5(str(identifier)) return fname, identifier def _readFile(self, ident): spool_file_name = self._md5(str(ident)) logger.debug("> read id %s for spool name %s", ident, spool_file_name) with open(self.spool_path % spool_file_name, "r") as tmpfile: data = tmpfile.read() return base64.b64decode(data) def _readFileUTF8(self, ident): spool_file_name = self._md5(str(ident)) logger.debug("> read in utf-8 id %s for spool name %s", ident, spool_file_name) return open(self.spool_path % spool_file_name, 'rb').read().decode('utf8') def _readFiles(self, idents): logger = logging.getLogger('main') for ident in idents: start_time = time() data = self._readFile(ident) logger.debug(" read next file: %s +%s" % (ident, self._chktime(start_time))) yield data def join(self, idents, in_mime=False, out_mime=False, username=None, password=None): logger.debug('Join %s identifiers: %s' % (str(len(idents)), str(idents))) if not self.auth(username, password): raise AccessException('Access denied.') if in_mime == out_mime == 'pdf': return self._join_pdf_to_pdf(idents, in_mime, out_mime) return self._join_default(idents, in_mime, out_mime) def _join_default(self, idents, in_mime=False, out_mime=False): """ Join odt document to pdf or to another odt document :param idents: the aeroo_resport se4rvice file identifier return by upload function to join :param in_mime: accepted odt maybe other :param out_mime: accpeted odt, pdf :return: the join file result or raise if an error has occured """ start_time = time() ident = idents.pop(0) data = self._readFile(ident) logger.debug(" read first file %s" % self._chktime(start_time)) self._conn_healthy() logger.debug(" connection test ok %s" % self._chktime(start_time)) try: infilter = filters.get(in_mime, False) or 'writer8' outfilter = filters.get(out_mime, False) self.oservice.putDocument(data, filter_name=infilter, read_only=True) logger.debug(" upload first document to office %s" % self._chktime(start_time)) self.oservice.appendDocuments(self._readFiles(idents), filter_name=infilter) result_data = self.oservice.saveByStream(outfilter) except Exception as e: logger.debug(" conversion failed %s Exception: %s" % (self._chktime(start_time), str(e))) self.oservice.closeDocument() logger.debug(" emergency close document %s" % self._chktime(start_time)) raise e else: self.oservice.closeDocument() logger.debug(" close document %s" % self._chktime(start_time)) logger.debug(" join finished %s" % self._chktime(start_time)) return base64.b64encode(result_data).decode('utf8') def _join_pdf_to_pdf(self, idents, in_mime, out_mime): logger.debug('Merge %s pdf identifiers: %s' % (str(len(idents)), str(idents))) try: new_idents = [] while idents: to_process = idents[:100] idents = idents[100:] start_time = time() out_file_name, new_ident = self._merge_idents(to_process) new_idents.append(new_ident) logger.debug(">write merged file %s in %s", out_file_name, self._chktime(start_time)) if len(new_idents) > 1: return self._join_pdf_to_pdf(new_idents, in_mime=in_mime, out_mime=out_mime) return self._readFileUTF8(new_idents[0]) except Exception as e: logger.info(e) logger.exception(e) raise e def _merge_idents(self, idents): merger = PdfFileMerger() out_io = io.BytesIO() for ident in idents: file_data = self._readFile(ident) merger.append(io.BytesIO(file_data)) merger.write(out_io) merger.close() out_file_name, new_ident = self._get_filename_and_identifier() with open(self.spool_path % out_file_name, "wb") as outFile: outFile.write(base64.encodebytes(out_io.getvalue())) out_io.close() return out_file_name, new_ident
baseoutname = os.path.basename(baseoutname) docextension=docextension[1:] # Remove leading period if not docextension.lower() in allowedextensions: print "Unknown extension '%s'" % docextension.lower() print "Only %s are allowed" % ", ".join(allowedextensions) sys.exit(1) # Clean up any old files for f in ('%s/converted.wiki' % tempdir, '%s/converted.xml' % tempdir): if os.path.exists(f): os.unlink(f) # Convert the actual document. This will always create temporary files, # there is no way to get the data in a buffer. with IOWrapper("Converting document format..."): converter = DocumentConverter() converter.convert(inputfile, '%s/converted.wiki' % tempdir) converter.convert(inputfile, '%s/converted.xml' % tempdir) # Read the converted wiki format, and append our hardcoded additions wf = codecs.open('%s/converted.wiki' % tempdir, "r", "utf-8") wikilines = wf.readlines() wf.close() wikilines.append("\n[[Category:word2mediawiki]]\n") # Grab images out of the XML data parser = XMLParser(target=ImageGrabber()) parser.feed(open('%s/converted.xml' % tempdir).read()) parser.close() # Prepare for uploading to mediawiki
class OfficeService: def __init__(self, oo_host, oo_port, spool_dir, auth_type): self.oo_host = oo_host self.oo_port = oo_port self.spool_path = spool_dir + "/%s" self.auth = auth_type self._init_conn() def _init_conn(self): logger = logging.getLogger("main") try: self.oservice = DocumentConverter(self.oo_host, self.oo_port) except DocumentConversionException as e: self.oservice = None logger.warning("Failed to initiate OpenOffice/LibreOffice connection.") def _conn_healthy(self): if hasattr(self, "oservice"): if self.oservice is not None: return True else: self.oservice = None logger = logging.getLogger("main") attempt = 0 while self.oservice is None and attempt < 3: attempt += 1 self._init_conn() if self.oservice is not None: return True sleep(3) message = "Failed to initiate connection to OpenOffice/LibreOffice three times in a row." logger.warning(message) raise NoOfficeConnection(message) def _chktime(self, start_time): return "%s s" % str(round(time() - start_time, 6)) def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None): logger = logging.getLogger("main") if not self.auth(username, password): raise AccessException("Access denied.") start_time = time() logger.debug("Openning identifier: %s" % identifier) if data is not False: data = base64.b64decode(data) elif identifier is not False: data = self._readFile(identifier) else: raise NoidentException("Wrong or no identifier.") logger.debug(" read file %s" % self._chktime(start_time)) self._conn_healthy() logger.debug(" connection test ok %s" % self._chktime(start_time)) infilter = filters.get(in_mime, False) outfilter = filters.get(out_mime, False) self.oservice.putDocument(data, filter_name=infilter, read_only=True) logger.debug(" upload document to office %s" % self._chktime(start_time)) try: conv_data = self.oservice.saveByStream(filter_name=outfilter) logger.debug(" download converted document %s" % self._chktime(start_time)) except Exception as e: logger.debug(" conversion failed %s Exception: %s" % (self._chktime(start_time), str(e))) self.oservice.closeDocument() logger.debug(" emergency close document %s" % self._chktime(start_time)) raise e else: self.oservice.closeDocument() logger.debug(" close document %s" % self._chktime(start_time)) return base64.b64encode(conv_data).decode("utf8") def _md5(self, data): return md5(data.encode()).hexdigest() def upload(self, data=False, is_last=False, identifier=False, username=None, password=None): logger = logging.getLogger("main") logger.debug("Upload identifier: %s" % identifier) try: start_time = time() if not self.auth(username, password): raise AccessException("Access denied.") # NOTE:md5 conversion on file operations to prevent path injection attack if identifier and not path.isfile(self.spool_path % "_" + self._md5(str(identifier))): raise NoidentException("Wrong or no identifier.") elif data is False: raise NodataException("No data to be converted.") fname = "" # generate random identifier while not identifier: new_ident = randint(1, MAXINT) fname = self._md5(str(new_ident)) logger.debug(" assigning new identifier %s" % new_ident) # check if there is any other such files identifier = ( not path.isfile(self.spool_path % "_" + fname) and not path.isfile(self.spool_path % fname) and new_ident or False ) fname = fname or self._md5(str(identifier)) with open(self.spool_path % "_" + fname, "a") as tmpfile: tmpfile.write(data) logger.debug(" chunk finished %s" % self._chktime(start_time)) if is_last: rename(self.spool_path % "_" + fname, self.spool_path % fname) logger.debug(" file finished") return {"identifier": identifier} except AccessException as e: raise e except NoidentException as e: raise e except NodataException as e: raise e except: import sys, traceback exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback, limit=2, file=sys.stdout) def _readFile(self, ident): with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile: data = tmpfile.read() return base64.b64decode(data) def _readFiles(self, idents): logger = logging.getLogger("main") for ident in idents: start_time = time() data = self._readFile(ident) logger.debug(" read next file: %s +%s" % (ident, self._chktime(start_time))) yield data def join(self, idents, in_mime=False, out_mime=False, username=None, password=None): logger = logging.getLogger("main") logger.debug("Join %s identifiers: %s" % (str(len(idents)), str(idents))) if not self.auth(username, password): raise AccessException("Access denied.") start_time = time() ident = idents.pop(0) data = self._readFile(ident) logger.debug(" read first file %s" % self._chktime(start_time)) self._conn_healthy() logger.debug(" connection test ok %s" % self._chktime(start_time)) try: infilter = filters.get(in_mime, False) or "writer8" outfilter = filters.get(out_mime, False) self.oservice.putDocument(data, filter_name=infilter, read_only=True) logger.debug(" upload first document to office %s" % self._chktime(start_time)) self.oservice.appendDocuments(self._readFiles(idents), filter_name=infilter) result_data = self.oservice.saveByStream(outfilter) except Exception as e: logger.debug(" conversion failed %s Exception: %s" % (self._chktime(start_time), str(e))) self.oservice.closeDocument() logger.debug(" emergency close document %s" % self._chktime(start_time)) raise e else: self.oservice.closeDocument() logger.debug(" close document %s" % self._chktime(start_time)) logger.debug(" join finished %s" % self._chktime(start_time)) return base64.b64encode(result_data).decode("utf8")
if not isdir(input_folder): print "no such input folder: %s" % input_folder exit(1) if not isdir(output_folder): print "no such output folder: %s" % output_folder exit(1) for entry in listdir(input_folder): fpath = input_folder + "/" + entry if isfile(fpath): fname = splitext(entry)[0] # print 'found file %s with name %s' % (entry, fname) new_fpath = output_folder + "/" + fname if not isdir(new_fpath): mkdir(new_fpath) try: converter = DocumentConverter() converter.convert(fpath, new_fpath + ("/%s.html" % fname)) converter.convert(fpath, new_fpath + ("/%s.pdf" % fname)) convert_file(new_fpath, fname + ".html") except DocumentConversionException, exception: print "ERROR!" + str(exception) exit(1) except Exception, exception: print "ERROR! ErrorCodeIOException %s" % exception exit(1) # else: # print 'found non-file %s' % entry
def _new_ooproxy(self, cr, uid, host=None, port=None, context=None): if host is None: host = openerp.tools.config.get("ooproxy") if port is None: port = openerp.tools.config.get("ooport") return DocumentConverter(host, port)