def parse_folder(self, family, directory, version_suffix=None): """ Parse a given folder for XML schema files (.xsd) or DTD files (.dtd). """ if directory.startswith('/'): directory = 'file://' + directory try: handle = fs.opener.fsopendir(directory) except Exception as e: raise IOError( u'Directory "{}" does not exist ({})'.format(directory, e)) for name in handle.listdir(): fullname = os.path.join(directory, name) base, ext = os.path.splitext(name) registered_name = name if version_suffix: basename, ext = os.path.splitext(name) registered_name = '{}-{}{}'.format(basename, version_suffix, ext) key = '{}::{}'.format(family, registered_name) ts = time.time() if ext == '.dtd': with handle.open(name, 'rb') as fp: validator = lxml.etree.DTD(fp) validator_type = 'DTD' elif ext == '.xsd': with handle.open(name, 'rb') as fp: schema_doc = lxml.etree.XML(fp.read()) validator = lxml.etree.XMLSchema(schema_doc) validator_type = 'XSD' elif ext == '.rng': with handle.open(name, 'rb') as fp: relaxng_doc = lxml.etree.XML(fp.read()) validator = lxml.etree.RelaxNG(relaxng_doc) validator_type = 'RELAXNG' elif ext == '.sch': with handle.open(name, 'rb') as fp: relaxng_doc = lxml.etree.XML(fp.read()) validator = lxml.isoschematron.Schematron(relaxng_doc) validator_type = 'SCHEMATRON' else: continue if key in self.registry: raise ValueError('{} already registered'.format(key)) self.registry[key] = dict( family=family, name=registered_name, validation=validator, path=fullname, info=handle.getinfo(name), type=validator_type, registered=datetime.datetime.utcnow()) LOG.info('Registered ({}, {}), duration: {:0.3f} seconds'.format(key, fullname, time.time() - ts))
def register_stylesheet(self, family, stylesheet_name, stylesheet_path): """ Register a Stylesheet as tuple (family, stylesheet_name) """ key = '{}::{}'.format(family, stylesheet_name) if key in self.xslt_registry: raise ValueError('Stylesheet {}/{} already registered'.format( family, stylesheet_name)) if not os.path.exists(stylesheet_path): raise ValueError('Stylesheet {}/{} not found ({})'.format( family, stylesheet_name, stylesheet_path)) with open(stylesheet_path, 'rb') as fp: try: xslt = lxml.etree.XML(fp.read()) except lxml.etree.XMLSyntaxError as e: raise ValueError( 'Stylesheet {}/{} could not be parsed ({}, {})'.format( family, stylesheet_name, e, stylesheet_path)) try: transform = lxml.etree.XSLT(xslt) except lxml.etree.XSLTParseError as e: raise ValueError( 'Stylesheet {}/{} could not be parsed ({}, {})'.format( family, stylesheet_name, e, stylesheet_path)) self.xslt_registry[key] = dict( transform=transform, path=stylesheet_path, registered=datetime.datetime.utcnow()) LOG.info('XSLT registered ({}, {})'.format(key, stylesheet_path))
def timed(self): path = self.context.absolute_url(1) ts = time.time() result = method(self) te = time.time() s = u'{:>25}(\'{}\')'.format(self.__class__.__name__, path) s = s + u': {:2.6f} seconds'.format(te - ts) LOG.info(s) return result
def runcmd(cmd): """ Execute a command using the subprocess module """ LOG.info(cmd) if win32: cmd = cmd.replace('\\', '/') s = Popen(cmd, shell=False) s.wait() return 0, '' else: stdin = open('/dev/null') stdout = stderr = PIPE p = Popen( cmd, shell=True, stdin=stdin, stdout=stdout, stderr=stderr, ) status = p.wait() stdout_ = p.stdout.read().strip() stderr_ = p.stderr.read().strip() if stdout_: LOG.info(stdout_) if stderr_: LOG.info(stderr_) return status, (stdout_ + stderr_).decode('utf-8')
def runcmd(cmd): """ Execute a command using the subprocess module """ LOG.info(cmd) if win32: cmd = cmd.replace('\\', '/') s = Popen(cmd, shell=False) s.wait() return 0, '' else: stdin = open('/dev/null') stdout = stderr = PIPE p = Popen(cmd, shell=True, stdin=stdin, stdout=stdout, stderr=stderr, ) status = p.wait() stdout_ = p.stdout.read().strip() stderr_ = p.stderr.read().strip() if stdout_: LOG.info(stdout_) if stderr_: LOG.info(stderr_) return status, (stdout_ + stderr_).decode('utf-8')
def __call__(self, xml_or_node, input_encoding=None, output_encoding=unicode, return_fragment=None, pretty_print=False, debug=False): """ Run the transformation chain either on an XML document passed as ``xml_or_node`` parameter or as pre-parsed XML node (lxml.etree.Element). XML documents passed as string must be either of type unicode or you must specify an explicit ``input_encoding``. The result XML document is returned as unicode string unless a different ``output_encoding`` is specified. In order to return a subelement from the result XML document you can specify a tag name using ``return_fragment`` in order the subdocument starting with the given tag name. """ # Check validness of the transformation chain first self.verify_steps() if debug: debug_dir = tempfile.mkdtemp(prefix='transformation_debug_') LOG.info('Transformation debug directory: {}'.format(debug_dir)) # Convert XML string into a root node if isinstance(xml_or_node, basestring): if not isinstance(xml_or_node, unicode): if not input_encoding: raise TypeError('Input data must be unicode') xml_or_node = unicode(xml_or_node, input_encoding) root = defusedxml.lxml.fromstring(xml_or_node.strip()) elif isinstance(xml_or_node, lxml.etree._Element): root = xml_or_node else: raise TypeError(u'Unsupported type {}'.format( xml_or_node.__class__)) # run the transformation chain for step_no, step in enumerate(self.steps): family, name = step ts = time.time() transformer = self.registry.get_transformation(family, name) conversion_context = dict( context=self.context, request=getattr(self.context, 'REQUEST', None), destdir=self.destdir, ) conversion_context.update(self.params) if debug: in_data = lxml.etree.tostring(root, encoding='utf8') in_data_fn = '{:02d}-{}-{}.in'.format(step_no, family, name) with open(os.path.join(debug_dir, in_data_fn), 'wb') as fp: fp.write(in_data) # A transformation is allowed to return a new root node (None # otherwise). The transformation chain will then continue in the # next transformation step with this new node. new_root = transformer(root, conversion_context=conversion_context) if new_root is not None: root = new_root if debug: out_data = lxml.etree.tostring(root, encoding='utf8') out_data_fn = '{:02d}-{}-{}.out'.format(step_no, family, name) with open(os.path.join(debug_dir, out_data_fn), 'wb') as fp: fp.write(out_data) LOG.info('Transformation %-30s: %3.6f seconds' % (name, time.time() - ts)) # optional: return a fragment given by the top-level tag name return_node = root if return_fragment: node = root.find(return_fragment) if node is None: raise ValueError( 'No tag <{}> found in transformed document'.format( return_fragment)) return_node = node if output_encoding == unicode: return lxml.etree.tostring(return_node, encoding=unicode, pretty_print=pretty_print) else: return lxml.etree.tostring(return_node.getroottree(), encoding=output_encoding, xml_declaration=True, pretty_print=pretty_print)
def get_fs_wrapper(url, credentials=None, context=None): if not url.endswith('/'): url += '/' f = furl(url) original_url = url if f.scheme == 'file': # hack for OSFP, fix this path = urllib.unquote(url[7:]) wrapper = OSFSWrapper(path, encoding='utf-8') elif f.scheme.startswith(('http', 'https')): try: wrapper = DAVFSWrapper(original_url, credentials) except fs.errors.ResourceNotFoundError: LOG.info('Failed to get DAVFSWrapper for {}'.format(original_url), exc_info=True) raise NotFound(original_url) except Exception as e: LOG.error('Failed to get DAVFSWrapper for {}'.format(original_url), exc_info=True) raise e elif f.scheme == 's3': if have_boto: wrapper = S3FSWrapper(bucket=f.host, prefix=str(f.path), aws_access_key=credentials['username'], aws_secret_key=credentials['password']) else: raise ImportError( 'boto module is not installed (required for S3 access)') elif f.scheme == 'sftp': f_path = urllib.unquote(str(f.path)) if have_paramiko: wrapper = SFTPFSWrapper(connection=(f.host, f.port or 22), root_path=f_path, username=(credentials['username'] or None), password=(credentials['password'] or None)) if wrapper.isfile('.') and wrapper.isdir('.'): parts = filter(None, f_path.split('/')) wrapper = SFTPFSWrapper(connection=(f.host, f.port or 22), root_path='/'.join(parts[:-1]), username=(credentials['username'] or None), password=(credentials['password'] or None)) wrapper.__leaf__ = True wrapper.__leaf_filename__ = parts[-1] else: raise ImportError( 'paramiko module is not installed (required for SFTP access)') elif f.scheme == 'ftp': wrapper = FTPFSWrapper(host=f.host, port=f.port, user=credentials['username'], passwd=credentials['password']) elif f.scheme == 'dropbox': registry = getUtility(IRegistry) settings = registry.forInterface(IDropboxSettings) annotation = IAnnotations(context) token_key = annotation.get(dropbox_authentication.DROPBOX_TOKEN_KEY) token_secret = annotation.get( dropbox_authentication.DROPBOX_TOKEN_SECRET) if not token_key or not token_secret: context = zope.globalrequest.getRequest().PUBLISHED.context authorization_url = '{}/authorize-dropbox'.format( context.absolute_url()) raise RuntimeError( 'Connector does not seem to be ' 'authorized with Dropbox (use {})'.format(authorization_url)) wrapper = DropboxFSWrapper( settings.dropbox_app_key, settings.dropbox_app_secret, 'dropbox', annotation[dropbox_authentication.DROPBOX_TOKEN_KEY], annotation[dropbox_authentication.DROPBOX_TOKEN_SECRET], root_path=urllib.unquote(str(f.path))) if wrapper.isfile('.'): wrapper.__leaf__ = True wrapper.__leaf_filename__ = '.' else: raise ValueError('Unsupported URL schema {}'.format(original_url)) wrapper.url = url return wrapper
def register_transformation(self, family, transformer_name, transformer_path, transformer_type='XSLT1'): """ Register a Transformation as tuple (``family``, ``transformer_name``). ``transformer_path`` is either an URI to the related transformation file on the filesystem (XSLT1) or a Python function implementing the IWrapper. Supported ``transformer_type``s so far: 'XSLT1', 'python' """ if transformer_type == 'python': # ``transformer_path`` is Python function here transform = transformer_path method_filename = transform.func_code.co_filename transformer_path = '{}(), {}'.format( transformer_path.func_name, transformer_path.func_code.co_filename) dir_handle = fs.opener.fsopendir('{}/..'.format(method_filename)) info = dir_handle.getinfo(os.path.basename(method_filename)) elif transformer_type in ('XSLT1', 'XSLT2', 'XSLT3'): try: handle = fs.opener.opener.open(transformer_path) except Exception as e: raise ValueError( 'Transformation {}/{} not found ({}, {})'.format(family, transformer_name, transformer_path, e)) with fs.opener.opener.open(transformer_path, 'rb') as fp: if transformer_path.startswith('/'): transformer_path = 'file://' + transformer_path dir_handle = fs.opener.fsopendir('{}/..'.format(handle.name)) info = dir_handle.getinfo(os.path.basename(handle.name)) try: xslt = lxml.etree.XML(fp.read()) except lxml.etree.XMLSyntaxError as e: raise ValueError( 'Transformation {}/{} could not be parsed ({}, {})'.format( family, transformer_name, e, transformer_path)) xslt_version = xslt.attrib.get('version', '1.0') if xslt_version[0] != transformer_type[-1]: raise ValueError('Stylesheet version "{}" does not match specified transformer_type "{}"'.format( xslt_version, transformer_type)) if transformer_type == 'XSLT1': try: transform = lxml.etree.XSLT(xslt) except lxml.etree.XSLTParseError as e: raise ValueError( 'Transformation {}/{} could not be parsed ({}, {})'.format( family, transformer_name, e, transformer_path)) else: # XSLT2+3 transform = transformer_path else: raise ValueError( u'Unsupported transformer type "{}"'.format(transformer_type)) key = '{}::{}'.format(family, transformer_name) if key in self.registry: raise ValueError( 'Transformation {}/{} already registered'.format(family, transformer_name)) self.registry[key] = dict( transform=transform, path=transformer_path, type=transformer_type, family=family, name=transformer_name, info=info, registered=datetime.datetime.utcnow()) LOG.info( 'Transformer registered ({}, {})'.format(key, transformer_path))
def __call__( self, xml_or_node, input_encoding=None, output_encoding=unicode, return_fragment=None, pretty_print=False, debug=False): """ Run the transformation chain either on an XML document passed as ``xml_or_node`` parameter or as pre-parsed XML node (lxml.etree.Element). XML documents passed as string must be either of type unicode or you must specify an explicit ``input_encoding``. The result XML document is returned as unicode string unless a different ``output_encoding`` is specified. In order to return a subelement from the result XML document you can specify a tag name using ``return_fragment`` in order the subdocument starting with the given tag name. """ # Check validness of the transformation chain first self.verify_steps() if debug: debug_dir = tempfile.mkdtemp(prefix='transformation_debug_') LOG.info('Transformation debug directory: {}'.format(debug_dir)) # Convert XML string into a root node if isinstance(xml_or_node, basestring): if not isinstance(xml_or_node, unicode): if not input_encoding: raise TypeError('Input data must be unicode') xml_or_node = unicode(xml_or_node, input_encoding) root = defusedxml.lxml.fromstring(xml_or_node.strip()) elif isinstance(xml_or_node, lxml.etree._Element): root = xml_or_node else: raise TypeError( u'Unsupported type {}'.format(xml_or_node.__class__)) # run the transformation chain for step_no, step in enumerate(self.steps): family, name = step ts = time.time() transformer = self.registry.get_transformation(family, name) conversion_context = dict(context=self.context, request=getattr( self.context, 'REQUEST', None), destdir=self.destdir, ) conversion_context.update(self.params) if debug: in_data = lxml.etree.tostring(root, encoding='utf8') in_data_fn = '{:02d}-{}-{}.in'.format(step_no, family, name) with open(os.path.join(debug_dir, in_data_fn), 'wb') as fp: fp.write(in_data) # A transformation is allowed to return a new root node (None # otherwise). The transformation chain will then continue in the # next transformation step with this new node. new_root = transformer(root, conversion_context=conversion_context) if new_root is not None: root = new_root if debug: out_data = lxml.etree.tostring(root, encoding='utf8') out_data_fn = '{:02d}-{}-{}.out'.format(step_no, family, name) with open(os.path.join(debug_dir, out_data_fn), 'wb') as fp: fp.write(out_data) LOG.info('Transformation %-30s: %3.6f seconds' % (name, time.time() - ts)) # optional: return a fragment given by the top-level tag name return_node = root if return_fragment: node = root.find(return_fragment) if node is None: raise ValueError( 'No tag <{}> found in transformed document'.format(return_fragment)) return_node = node if output_encoding == unicode: return lxml.etree.tostring( return_node, encoding=unicode, pretty_print=pretty_print) else: return lxml.etree.tostring( return_node.getroottree(), encoding=output_encoding, xml_declaration=True, pretty_print=pretty_print)
def parse_folder(self, family, directory, version_suffix=None): """ Parse a given folder for XML schema files (.xsd) or DTD files (.dtd). """ if directory.startswith('/'): directory = 'file://' + directory try: handle = fs.opener.fsopendir(directory) except Exception as e: raise IOError( u'Directory "{}" does not exist ({})'.format(directory, e)) for name in handle.listdir(): fullname = os.path.join(directory, name) LOG.debug(u'Parsing "{}"'.format(fullname)) base, ext = os.path.splitext(name) registered_name = name if version_suffix: basename, ext = os.path.splitext(name) registered_name = '{}-{}{}'.format(basename, version_suffix, ext) key = '{}::{}'.format(family, registered_name) ts = time.time() if ext == '.dtd': with handle.open(name, 'rb') as fp: validator = lxml.etree.DTD(fp) validator_type = 'DTD' elif ext == '.xsd': with handle.open(name, 'rb') as fp: try: schema_doc = lxml.etree.XML(fp.read()) validator = lxml.etree.XMLSchema(schema_doc) except Exception as e: LOG.error(u'Unable to parse XML Schema ({})'.format( e), exc_info=True) continue validator_type = 'XSD' elif ext == '.rng': with handle.open(name, 'rb') as fp: relaxng_doc = lxml.etree.XML(fp.read()) validator = lxml.etree.RelaxNG(relaxng_doc) validator_type = 'RELAXNG' elif ext == '.sch': with handle.open(name, 'rb') as fp: relaxng_doc = lxml.etree.XML(fp.read()) validator = lxml.isoschematron.Schematron(relaxng_doc) validator_type = 'SCHEMATRON' else: continue if key in self.registry: raise ValueError('{} already registered'.format(key)) duration = time.time() - ts self.registry[key] = dict( family=family, name=registered_name, validation=validator, path=fullname, info=handle.getinfo(name), duration=duration, type=validator_type, registered=datetime.datetime.utcnow()) if duration > 3: LOG.warn( 'Slow loading/parsing of ({}, {}), duration: {:0.3f} seconds'.format(key, fullname, duration)) LOG.info('Registered ({}, {}), duration: {:0.3f} seconds'.format( key, fullname, duration))
# -*- coding: utf-8 -*- ################################################################ # xmldirector.plonecore # (C) 2016, Andreas Jung, www.zopyx.com, Tuebingen, Germany ################################################################ import os import sys import pkg_resources from xmldirector.plonecore.logger import LOG __import__('pkg_resources').declare_namespace(__name__) # Check filesystem encoding fs_enc = sys.getfilesystemencoding() if fs_enc.lower() not in ('utf8', 'utf-8'): LOG.error('Filesystem encoding should be UTF-8, not {}'.format(fs_enc)) # import patches only for Plone 5 dist = pkg_resources.get_distribution('Products.CMFPlone') if dist.version.startswith('5'): import patches LOG.info('Applied patched for Plone 5')