class Handler(object): """OOO Handler is used to access the one Document and OpenOffice. For each Document inputed is created on instance of this class to manipulate the document. This Document must be able to create and remove a temporary document at FS, load and export. """ implements(IHandler) def __init__(self, base_folder_url, data, source_format, **kw): """Creates document in file system and loads it in OOo.""" self.document = FileSystemDocument(base_folder_url, data, source_format) self.zip = kw.get('zip', False) self.uno_path = kw.get("uno_path", None) self.office_binary_path = kw.get("office_binary_path", None) self.timeout = kw.get("timeout", 600) self.refresh = kw.get('refresh', False) self.source_format = source_format if not self.uno_path: self.uno_path = environ.get("uno_path") if not self.office_binary_path: self.office_binary_path = environ.get("office_binary_path") def _getCommand(self, *args, **kw): """Transforms all parameters passed in a command""" hostname, port = openoffice.getAddress() kw['hostname'] = hostname kw['port'] = port python = path.join(self.office_binary_path, "python") command_list = [path.exists(python) and python or "python", pkg_resources.resource_filename(__name__, path.join("helper", "unoconverter.py")), "--uno_path=%s" % self.uno_path, "--office_binary_path=%s" % self.office_binary_path, '--document_url=%s' % self.document.getUrl()] for arg in args: command_list.insert(3, "--%s" % arg) for k, v in kw.iteritems(): command_list.append("--%s=%s" % (k, v)) return command_list def _startTimeout(self): """start the Monitor""" self.monitor = MonitorTimeout(openoffice, self.timeout) self.monitor.start() return def _stopTimeout(self): """stop the Monitor""" self.monitor.terminate() return def _subprocess(self, command_list): """Run one procedure""" if monitor_sleeping_time is not None: monitor_sleeping_time.touch() try: self._startTimeout() process = Popen(command_list, stdout=PIPE, stderr=PIPE, close_fds=True, env=openoffice.environment_dict.copy()) stdout, stderr = process.communicate() finally: self._stopTimeout() if pid_exists(process.pid): process.terminate() return stdout, stderr def _callUnoConverter(self, *feature_list, **kw): """ """ if not openoffice.status(): openoffice.start() command_list = self._getCommand(*feature_list, **kw) stdout, stderr = self._subprocess(command_list) if not stdout and len(re.findall("\w*Exception|\w*Error", stderr)) >= 1: logger.debug(stderr) self.document.restoreOriginal() openoffice.restart() kw['document_url'] = self.document.getUrl() command = self._getCommand(*feature_list, **kw) stdout, stderr = self._subprocess(command) if stderr != "": raise Exception(stderr) return stdout, stderr def _serializeMimemapper(self, source_extension=None, destination_extension=None): """Serialize parts of mimemapper""" if destination_extension is None: return json.dumps(dict(mimetype_by_filter_type=mimemapper._mimetype_by_filter_type)) filter_list = [] service_type_list = mimemapper._doc_type_list_by_extension.get( source_extension, mimemapper.document_service_list) for service_type in service_type_list: filter_list.append((destination_extension, service_type, mimemapper.getFilterName(destination_extension, service_type))) logger.debug("Filter List: %r" % filter_list) return json.dumps(dict(doc_type_list_by_extension=mimemapper._doc_type_list_by_extension, filter_list=filter_list, mimetype_by_filter_type=mimemapper._mimetype_by_filter_type)) def convert(self, destination_format=None, **kw): """Convert a document to another format supported by the OpenOffice Keyword Arguments: destination_format -- extension of document as String """ logger.debug("OooConvert: %s > %s" % (self.source_format, destination_format)) kw['source_format'] = self.source_format if destination_format: kw['destination_format'] = destination_format kw['mimemapper'] = self._serializeMimemapper(self.source_format, destination_format) kw['refresh'] = json.dumps(self.refresh) openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*['convert'], **kw) finally: openoffice.release() url = stdout.replace('\n', '') self.document.reload(url) content = self.document.getContent(self.zip) self.document.trash() return content def getMetadata(self, base_document=False): """Returns a dictionary with all metadata of document. Keywords Arguments: base_document -- Boolean variable. if true, the document is also returned along with the metadata.""" logger.debug("getMetadata") kw = dict(mimemapper=self._serializeMimemapper()) if base_document: feature_list = ['getmetadata', 'convert'] else: feature_list = ['getmetadata'] openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*feature_list, **kw) finally: openoffice.release() metadata = json.loads(decodestring(stdout)) if 'document_url' in metadata: self.document.reload(metadata['document_url']) metadata['Data'] = self.document.getContent() del metadata['document_url'] self.document.trash() return metadata def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ metadata_pickled = json.dumps(metadata) logger.debug("setMetadata") kw = dict(metadata=encodestring(metadata_pickled)) openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*['setmetadata'], **kw) finally: openoffice.release() doc_loaded = self.document.getContent() self.document.trash() return doc_loaded
def _startTimeout(self): """start the Monitor""" self.monitor = MonitorTimeout(openoffice, self.timeout) self.monitor.start() return
class Handler(object): """OOO Handler is used to access the one Document and OpenOffice. For each Document inputed is created on instance of this class to manipulate the document. This Document must be able to create and remove a temporary document at FS, load and export. """ implements(IHandler) def __init__(self, base_folder_url, data, source_format, **kw): """Creates document in file system and loads it in OOo.""" self.zip = kw.get('zip', False) self.uno_path = kw.get("uno_path", None) self.office_binary_path = kw.get("office_binary_path", None) self.timeout = kw.get("timeout", 600) self.refresh = kw.get('refresh', False) self.source_format = source_format if not self.uno_path: self.uno_path = environ.get("uno_path") if not self.office_binary_path: self.office_binary_path = environ.get("office_binary_path") self._createDocument(base_folder_url, data, source_format) def _createDocument(self, base_folder_url, data, source_format): if source_format == 'csv': # Cloudooo expect utf-8 encoded csv, but also tolerate latin9 for # backward compatibility. # The heuristic is "if it's not utf-8", let's assume it's iso-8859-15. try: unicode(data, 'utf-8') except UnicodeDecodeError: data = unicode(data, 'iso-8859-15').encode('utf-8') logger.warn("csv data is not utf-8, assuming iso-8859-15") self.document = FileSystemDocument( base_folder_url, data, source_format) def _getCommand(self, *args, **kw): """Transforms all parameters passed in a command""" hostname, port = openoffice.getAddress() kw['hostname'] = hostname kw['port'] = port python = path.join(self.office_binary_path, "python") command_list = [path.exists(python) and python or "python", pkg_resources.resource_filename(__name__, path.join("helper", "unoconverter.py")), "--uno_path=%s" % self.uno_path, "--office_binary_path=%s" % self.office_binary_path, '--document_url=%s' % self.document.getUrl()] for arg in args: command_list.insert(3, "--%s" % arg) for k, v in kw.iteritems(): command_list.append("--%s=%s" % (k, v)) return command_list def _startTimeout(self): """start the Monitor""" self.monitor = MonitorTimeout(openoffice, self.timeout) self.monitor.start() return def _stopTimeout(self): """stop the Monitor""" self.monitor.terminate() return def _subprocess(self, command_list): """Run one procedure""" if monitor_sleeping_time is not None: monitor_sleeping_time.touch() try: self._startTimeout() process = Popen(command_list, stdout=PIPE, stderr=PIPE, close_fds=True, env=openoffice.environment_dict.copy()) stdout, stderr = process.communicate() finally: self._stopTimeout() if pid_exists(process.pid): process.terminate() return stdout, stderr def _callUnoConverter(self, *feature_list, **kw): """ """ if not openoffice.status(): openoffice.start() command_list = self._getCommand(*feature_list, **kw) stdout, stderr = self._subprocess(command_list) if not stdout and stderr: first_error = stderr logger.error(stderr) self.document.restoreOriginal() openoffice.restart() kw['document_url'] = self.document.getUrl() command = self._getCommand(*feature_list, **kw) stdout, stderr = self._subprocess(command) if not stdout and stderr: second_error = "\nerror of the second run: " + stderr logger.error(second_error) raise Exception(first_error + second_error) return stdout, stderr def _serializeMimemapper(self, source_extension=None, destination_extension=None): """Serialize parts of mimemapper""" if destination_extension is None: return json.dumps(dict(mimetype_by_filter_type=mimemapper._mimetype_by_filter_type)) filter_list = [] service_type_list = mimemapper._doc_type_list_by_extension.get( source_extension, mimemapper.document_service_list) for service_type in service_type_list: filter_list.append((destination_extension, service_type, mimemapper.getFilterName(destination_extension, service_type))) logger.debug("Filter List: %r" % filter_list) return json.dumps(dict(doc_type_list_by_extension=mimemapper._doc_type_list_by_extension, filter_list=filter_list, mimetype_by_filter_type=mimemapper._mimetype_by_filter_type)) def convert(self, destination_format=None, **kw): """Convert a document to another format supported by the OpenOffice Keyword Arguments: destination_format -- extension of document as String """ logger.debug("OooConvert: %s > %s" % (self.source_format, destination_format)) kw['source_format'] = self.source_format if destination_format: kw['destination_format'] = destination_format kw['mimemapper'] = self._serializeMimemapper(self.source_format, destination_format) kw['refresh'] = json.dumps(self.refresh) openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*['convert'], **kw) finally: openoffice.release() url = stdout.replace('\n', '') self.document.reload(url) content = self.document.getContent(self.zip) self.document.trash() return content def getMetadata(self, base_document=False): """Returns a dictionary with all metadata of document. Keywords Arguments: base_document -- Boolean variable. if true, the document is also returned along with the metadata.""" logger.debug("getMetadata") kw = dict(mimemapper=self._serializeMimemapper()) if base_document: feature_list = ['getmetadata', 'convert'] else: feature_list = ['getmetadata'] openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*feature_list, **kw) finally: openoffice.release() metadata = json.loads(decodestring(stdout)) if 'document_url' in metadata: self.document.reload(metadata['document_url']) metadata['Data'] = self.document.getContent() del metadata['document_url'] self.document.trash() return metadata def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ metadata_pickled = json.dumps(metadata) logger.debug("setMetadata") kw = dict(metadata=encodestring(metadata_pickled)) openoffice.acquire() try: stdout, stderr = self._callUnoConverter(*['setmetadata'], **kw) finally: openoffice.release() doc_loaded = self.document.getContent() self.document.trash() return doc_loaded @staticmethod def getAllowedConversionFormatList(source_mimetype): """Returns a list content_type and their titles which are supported by enabled handlers. [('application/vnd.oasis.opendocument.text', 'ODF Text Document'), ('application/pdf', 'PDF - Portable Document Format'), ... ] """ # XXX please never guess extension from mimetype output_set = set() if "/" in source_mimetype: parsed_mimetype_type = parseContentType(source_mimetype).gettype() # here `guess_all_extensions` never handles mimetype parameters # (even for `text/plain;charset=UTF-8` which is standard) extension_list = mimetypes.guess_all_extensions(parsed_mimetype_type) # XXX never guess else: extension_list = [source_mimetype] for ext in extension_list: for ext, title in mimemapper.getAllowedExtensionList(extension=ext.replace(".", "")): if ext in ("fodt", ".fodt"): # BBB output_set.add(("application/vnd.oasis.opendocument.text-flat-xml", title)) continue if ext: mimetype, _ = mimetypes.guess_type("a." + ext) # XXX never guess if mimetype: output_set.add((mimetype, title)) return list(output_set)