def parse_folder(self, family, directory, version_suffix=None):
        """ Parse a given folder for XML schema files (.xsd) or
            DTD files (.dtd).
        """

        if directory.startswith('/'):
            directory = 'file://' + directory

        try:
            handle = fs.opener.fsopendir(directory)
        except Exception as e:
            raise IOError(
                u'Directory "{}" does not exist ({})'.format(directory, e))

        for name in handle.listdir():
            fullname = os.path.join(directory, name)
            base, ext = os.path.splitext(name)

            registered_name = name
            if version_suffix:
                basename, ext = os.path.splitext(name)
                registered_name = '{}-{}{}'.format(basename, version_suffix, ext)

            key = '{}::{}'.format(family, registered_name)
            ts = time.time()
            if ext == '.dtd':
                with handle.open(name, 'rb') as fp:
                    validator = lxml.etree.DTD(fp)
                    validator_type = 'DTD'
            elif ext == '.xsd':
                with handle.open(name, 'rb') as fp:
                    schema_doc = lxml.etree.XML(fp.read())
                    validator = lxml.etree.XMLSchema(schema_doc)
                    validator_type = 'XSD'
            elif ext == '.rng':
                with handle.open(name, 'rb') as fp:
                    relaxng_doc = lxml.etree.XML(fp.read())
                    validator = lxml.etree.RelaxNG(relaxng_doc)
                    validator_type = 'RELAXNG'
            elif ext == '.sch':
                with handle.open(name, 'rb') as fp:
                    relaxng_doc = lxml.etree.XML(fp.read())
                    validator = lxml.isoschematron.Schematron(relaxng_doc)
                    validator_type = 'SCHEMATRON'
            else:
                continue

            if key in self.registry:
                raise ValueError('{} already registered'.format(key))


            self.registry[key] = dict(
                family=family,
                name=registered_name,
                validation=validator,
                path=fullname,
                info=handle.getinfo(name),
                type=validator_type,
                registered=datetime.datetime.utcnow())
            LOG.info('Registered ({}, {}), duration: {:0.3f} seconds'.format(key, fullname, time.time() - ts))
예제 #2
0
    def register_stylesheet(self, family, stylesheet_name, stylesheet_path):
        """ Register a Stylesheet as tuple (family, stylesheet_name) """

        key = '{}::{}'.format(family, stylesheet_name)
        if key in self.xslt_registry:
            raise ValueError('Stylesheet {}/{} already registered'.format(
                family, stylesheet_name))

        if not os.path.exists(stylesheet_path):
            raise ValueError('Stylesheet {}/{} not found ({})'.format(
                family, stylesheet_name, stylesheet_path))

        with open(stylesheet_path, 'rb') as fp:
            try:
                xslt = lxml.etree.XML(fp.read())
            except lxml.etree.XMLSyntaxError as e:
                raise ValueError(
                    'Stylesheet {}/{} could not be parsed ({}, {})'.format(
                        family, stylesheet_name, e, stylesheet_path))

            try:
                transform = lxml.etree.XSLT(xslt)
            except lxml.etree.XSLTParseError as e:
                raise ValueError(
                    'Stylesheet {}/{} could not be parsed ({}, {})'.format(
                        family, stylesheet_name, e, stylesheet_path))

            self.xslt_registry[key] = dict(
                transform=transform,
                path=stylesheet_path,
                registered=datetime.datetime.utcnow())
            LOG.info('XSLT registered ({}, {})'.format(key, stylesheet_path))
 def timed(self):
     path = self.context.absolute_url(1)
     ts = time.time()
     result = method(self)
     te = time.time()
     s = u'{:>25}(\'{}\')'.format(self.__class__.__name__, path)
     s = s + u': {:2.6f} seconds'.format(te - ts)
     LOG.info(s)
     return result
예제 #4
0
def runcmd(cmd):
    """ Execute a command using the subprocess module """

    LOG.info(cmd)
    if win32:
        cmd = cmd.replace('\\', '/')
        s = Popen(cmd, shell=False)
        s.wait()
        return 0, ''
    else:
        stdin = open('/dev/null')
        stdout = stderr = PIPE
        p = Popen(
            cmd,
            shell=True,
            stdin=stdin,
            stdout=stdout,
            stderr=stderr,
        )

        status = p.wait()
        stdout_ = p.stdout.read().strip()
        stderr_ = p.stderr.read().strip()

        if stdout_:
            LOG.info(stdout_)
        if stderr_:
            LOG.info(stderr_)
        return status, (stdout_ + stderr_).decode('utf-8')
예제 #5
0
def runcmd(cmd):
    """ Execute a command using the subprocess module """

    LOG.info(cmd)
    if win32:
        cmd = cmd.replace('\\', '/')
        s = Popen(cmd, shell=False)
        s.wait()
        return 0, ''
    else:
        stdin = open('/dev/null')
        stdout = stderr = PIPE
        p = Popen(cmd,
                  shell=True,
                  stdin=stdin,
                  stdout=stdout,
                  stderr=stderr,
                  )

        status = p.wait()
        stdout_ = p.stdout.read().strip()
        stderr_ = p.stderr.read().strip()

        if stdout_:
            LOG.info(stdout_)
        if stderr_:
            LOG.info(stderr_)
        return status, (stdout_ + stderr_).decode('utf-8')
    def __call__(self,
                 xml_or_node,
                 input_encoding=None,
                 output_encoding=unicode,
                 return_fragment=None,
                 pretty_print=False,
                 debug=False):
        """ Run the transformation chain either on an XML document passed as ``xml_or_node`` parameter
            or as pre-parsed XML node (lxml.etree.Element). XML documents passed as string must be either of type
            unicode or you must specify an explicit ``input_encoding``. The result XML document is returned as
            unicode string unless a different ``output_encoding`` is specified. In order to return a subelement
            from the result XML document you can specify a tag name using ``return_fragment`` in order the subdocument
            starting with the given tag name.
        """

        # Check validness of the transformation chain first
        self.verify_steps()

        if debug:
            debug_dir = tempfile.mkdtemp(prefix='transformation_debug_')
            LOG.info('Transformation debug directory: {}'.format(debug_dir))

        # Convert XML string into a root node
        if isinstance(xml_or_node, basestring):
            if not isinstance(xml_or_node, unicode):
                if not input_encoding:
                    raise TypeError('Input data must be unicode')
                    xml_or_node = unicode(xml_or_node, input_encoding)
            root = defusedxml.lxml.fromstring(xml_or_node.strip())

        elif isinstance(xml_or_node, lxml.etree._Element):
            root = xml_or_node

        else:
            raise TypeError(u'Unsupported type {}'.format(
                xml_or_node.__class__))

        # run the transformation chain
        for step_no, step in enumerate(self.steps):
            family, name = step
            ts = time.time()
            transformer = self.registry.get_transformation(family, name)
            conversion_context = dict(
                context=self.context,
                request=getattr(self.context, 'REQUEST', None),
                destdir=self.destdir,
            )
            conversion_context.update(self.params)

            if debug:
                in_data = lxml.etree.tostring(root, encoding='utf8')
                in_data_fn = '{:02d}-{}-{}.in'.format(step_no, family, name)
                with open(os.path.join(debug_dir, in_data_fn), 'wb') as fp:
                    fp.write(in_data)

            # A transformation is allowed to return a new root node (None
            # otherwise).  The transformation chain will then continue in the
            # next transformation step with this new node.
            new_root = transformer(root, conversion_context=conversion_context)
            if new_root is not None:
                root = new_root

            if debug:
                out_data = lxml.etree.tostring(root, encoding='utf8')
                out_data_fn = '{:02d}-{}-{}.out'.format(step_no, family, name)
                with open(os.path.join(debug_dir, out_data_fn), 'wb') as fp:
                    fp.write(out_data)

            LOG.info('Transformation %-30s: %3.6f seconds' %
                     (name, time.time() - ts))

        # optional: return a fragment given by the top-level tag name
        return_node = root
        if return_fragment:
            node = root.find(return_fragment)
            if node is None:
                raise ValueError(
                    'No tag <{}> found in transformed document'.format(
                        return_fragment))
            return_node = node

        if output_encoding == unicode:
            return lxml.etree.tostring(return_node,
                                       encoding=unicode,
                                       pretty_print=pretty_print)
        else:
            return lxml.etree.tostring(return_node.getroottree(),
                                       encoding=output_encoding,
                                       xml_declaration=True,
                                       pretty_print=pretty_print)
예제 #7
0
def get_fs_wrapper(url, credentials=None, context=None):

    if not url.endswith('/'):
        url += '/'
    f = furl(url)
    original_url = url
    if f.scheme == 'file':
        # hack for OSFP, fix this
        path = urllib.unquote(url[7:])
        wrapper = OSFSWrapper(path, encoding='utf-8')
    elif f.scheme.startswith(('http', 'https')):
        try:
            wrapper = DAVFSWrapper(original_url, credentials)
        except fs.errors.ResourceNotFoundError:
            LOG.info('Failed to get DAVFSWrapper for {}'.format(original_url),
                     exc_info=True)
            raise NotFound(original_url)
        except Exception as e:
            LOG.error('Failed to get DAVFSWrapper for {}'.format(original_url),
                      exc_info=True)
            raise e
    elif f.scheme == 's3':
        if have_boto:
            wrapper = S3FSWrapper(bucket=f.host,
                                  prefix=str(f.path),
                                  aws_access_key=credentials['username'],
                                  aws_secret_key=credentials['password'])
        else:
            raise ImportError(
                'boto module is not installed (required for S3 access)')
    elif f.scheme == 'sftp':

        f_path = urllib.unquote(str(f.path))
        if have_paramiko:
            wrapper = SFTPFSWrapper(connection=(f.host, f.port or 22),
                                    root_path=f_path,
                                    username=(credentials['username'] or None),
                                    password=(credentials['password'] or None))

            if wrapper.isfile('.') and wrapper.isdir('.'):
                parts = filter(None, f_path.split('/'))
                wrapper = SFTPFSWrapper(connection=(f.host, f.port or 22),
                                        root_path='/'.join(parts[:-1]),
                                        username=(credentials['username']
                                                  or None),
                                        password=(credentials['password']
                                                  or None))
                wrapper.__leaf__ = True
                wrapper.__leaf_filename__ = parts[-1]
        else:
            raise ImportError(
                'paramiko module is not installed (required for SFTP access)')

    elif f.scheme == 'ftp':
        wrapper = FTPFSWrapper(host=f.host,
                               port=f.port,
                               user=credentials['username'],
                               passwd=credentials['password'])

    elif f.scheme == 'dropbox':

        registry = getUtility(IRegistry)
        settings = registry.forInterface(IDropboxSettings)
        annotation = IAnnotations(context)

        token_key = annotation.get(dropbox_authentication.DROPBOX_TOKEN_KEY)
        token_secret = annotation.get(
            dropbox_authentication.DROPBOX_TOKEN_SECRET)
        if not token_key or not token_secret:
            context = zope.globalrequest.getRequest().PUBLISHED.context
            authorization_url = '{}/authorize-dropbox'.format(
                context.absolute_url())
            raise RuntimeError(
                'Connector does not seem to be '
                'authorized with Dropbox (use {})'.format(authorization_url))

        wrapper = DropboxFSWrapper(
            settings.dropbox_app_key,
            settings.dropbox_app_secret,
            'dropbox',
            annotation[dropbox_authentication.DROPBOX_TOKEN_KEY],
            annotation[dropbox_authentication.DROPBOX_TOKEN_SECRET],
            root_path=urllib.unquote(str(f.path)))

        if wrapper.isfile('.'):
            wrapper.__leaf__ = True
            wrapper.__leaf_filename__ = '.'

    else:
        raise ValueError('Unsupported URL schema {}'.format(original_url))

    wrapper.url = url
    return wrapper
    def register_transformation(self, family, transformer_name, transformer_path, transformer_type='XSLT1'):
        """ Register a Transformation as tuple (``family``, ``transformer_name``).
            ``transformer_path`` is either an URI to the related transformation file on the filesystem (XSLT1)
            or a Python function implementing the IWrapper.

            Supported ``transformer_type``s so far: 'XSLT1', 'python'
        """

        if transformer_type == 'python':
            # ``transformer_path`` is Python function here
            transform = transformer_path
            method_filename = transform.func_code.co_filename
            transformer_path = '{}(), {}'.format(
                transformer_path.func_name, transformer_path.func_code.co_filename)
            dir_handle = fs.opener.fsopendir('{}/..'.format(method_filename))
            info = dir_handle.getinfo(os.path.basename(method_filename))

        elif transformer_type in ('XSLT1', 'XSLT2', 'XSLT3'):

            try:
                handle = fs.opener.opener.open(transformer_path)
            except Exception as e:
                raise ValueError(
                    'Transformation {}/{} not found ({}, {})'.format(family, transformer_name, transformer_path, e))

            with fs.opener.opener.open(transformer_path, 'rb') as fp:

                if transformer_path.startswith('/'):
                    transformer_path = 'file://' + transformer_path

                dir_handle = fs.opener.fsopendir('{}/..'.format(handle.name))
                info = dir_handle.getinfo(os.path.basename(handle.name))

                try:
                    xslt = lxml.etree.XML(fp.read())
                except lxml.etree.XMLSyntaxError as e:
                    raise ValueError(
                        'Transformation {}/{} could not be parsed ({}, {})'.format(
                            family,
                            transformer_name,
                            e,
                            transformer_path))

                xslt_version = xslt.attrib.get('version', '1.0')
                if xslt_version[0] != transformer_type[-1]:
                    raise ValueError('Stylesheet version "{}" does not match specified transformer_type "{}"'.format(
                        xslt_version, transformer_type))

                if transformer_type == 'XSLT1':
                    try:
                        transform = lxml.etree.XSLT(xslt)
                    except lxml.etree.XSLTParseError as e:
                        raise ValueError(
                            'Transformation {}/{} could not be parsed ({}, {})'.format(
                                family,
                                transformer_name,
                                e,
                                transformer_path))
                else:
                    # XSLT2+3
                    transform = transformer_path

        else:
            raise ValueError(
                u'Unsupported transformer type "{}"'.format(transformer_type))

        key = '{}::{}'.format(family, transformer_name)
        if key in self.registry:
            raise ValueError(
                'Transformation {}/{} already registered'.format(family, transformer_name))

        self.registry[key] = dict(
            transform=transform,
            path=transformer_path,
            type=transformer_type,
            family=family,
            name=transformer_name,
            info=info,
            registered=datetime.datetime.utcnow())

        LOG.info(
            'Transformer registered ({}, {})'.format(key, transformer_path))
    def __call__(
            self,
            xml_or_node,
            input_encoding=None,
            output_encoding=unicode,
            return_fragment=None,
            pretty_print=False,
            debug=False):
        """ Run the transformation chain either on an XML document passed as ``xml_or_node`` parameter
            or as pre-parsed XML node (lxml.etree.Element). XML documents passed as string must be either of type
            unicode or you must specify an explicit ``input_encoding``. The result XML document is returned as
            unicode string unless a different ``output_encoding`` is specified. In order to return a subelement
            from the result XML document you can specify a tag name using ``return_fragment`` in order the subdocument
            starting with the given tag name.
        """

        # Check validness of the transformation chain first
        self.verify_steps()

        if debug:
            debug_dir = tempfile.mkdtemp(prefix='transformation_debug_')
            LOG.info('Transformation debug directory: {}'.format(debug_dir))

        # Convert XML string into a root node
        if isinstance(xml_or_node, basestring):
            if not isinstance(xml_or_node, unicode):
                if not input_encoding:
                    raise TypeError('Input data must be unicode')
                    xml_or_node = unicode(xml_or_node, input_encoding)
            root = defusedxml.lxml.fromstring(xml_or_node.strip())

        elif isinstance(xml_or_node, lxml.etree._Element):
            root = xml_or_node

        else:
            raise TypeError(
                u'Unsupported type {}'.format(xml_or_node.__class__))

        # run the transformation chain
        for step_no, step in enumerate(self.steps):
            family, name = step
            ts = time.time()
            transformer = self.registry.get_transformation(family, name)
            conversion_context = dict(context=self.context,
                                      request=getattr(
                                          self.context, 'REQUEST', None),
                                      destdir=self.destdir,
                                      )
            conversion_context.update(self.params)

            if debug:
                in_data = lxml.etree.tostring(root, encoding='utf8')
                in_data_fn = '{:02d}-{}-{}.in'.format(step_no, family, name)
                with open(os.path.join(debug_dir, in_data_fn), 'wb') as fp:
                    fp.write(in_data)

            # A transformation is allowed to return a new root node (None
            # otherwise).  The transformation chain will then continue in the
            # next transformation step with this new node.
            new_root = transformer(root, conversion_context=conversion_context)
            if new_root is not None:
                root = new_root

            if debug:
                out_data = lxml.etree.tostring(root, encoding='utf8')
                out_data_fn = '{:02d}-{}-{}.out'.format(step_no, family, name)
                with open(os.path.join(debug_dir, out_data_fn), 'wb') as fp:
                    fp.write(out_data)

            LOG.info('Transformation %-30s: %3.6f seconds' %
                     (name, time.time() - ts))

        # optional: return a fragment given by the top-level tag name
        return_node = root
        if return_fragment:
            node = root.find(return_fragment)
            if node is None:
                raise ValueError(
                    'No tag <{}> found in transformed document'.format(return_fragment))
            return_node = node

        if output_encoding == unicode:
            return lxml.etree.tostring(
                return_node,
                encoding=unicode,
                pretty_print=pretty_print)
        else:
            return lxml.etree.tostring(
                return_node.getroottree(),
                encoding=output_encoding,
                xml_declaration=True,
                pretty_print=pretty_print)
예제 #10
0
    def parse_folder(self, family, directory, version_suffix=None):
        """ Parse a given folder for XML schema files (.xsd) or
            DTD files (.dtd).
        """

        if directory.startswith('/'):
            directory = 'file://' + directory

        try:
            handle = fs.opener.fsopendir(directory)
        except Exception as e:
            raise IOError(
                u'Directory "{}" does not exist ({})'.format(directory, e))

        for name in handle.listdir():
            fullname = os.path.join(directory, name)
            LOG.debug(u'Parsing "{}"'.format(fullname))
            base, ext = os.path.splitext(name)

            registered_name = name
            if version_suffix:
                basename, ext = os.path.splitext(name)
                registered_name = '{}-{}{}'.format(basename,
                                                   version_suffix, ext)

            key = '{}::{}'.format(family, registered_name)
            ts = time.time()
            if ext == '.dtd':
                with handle.open(name, 'rb') as fp:
                    validator = lxml.etree.DTD(fp)
                    validator_type = 'DTD'
            elif ext == '.xsd':
                with handle.open(name, 'rb') as fp:
                    try:
                        schema_doc = lxml.etree.XML(fp.read())
                        validator = lxml.etree.XMLSchema(schema_doc)
                    except Exception as e:
                        LOG.error(u'Unable to parse XML Schema ({})'.format(
                            e), exc_info=True)
                        continue
                    validator_type = 'XSD'
            elif ext == '.rng':
                with handle.open(name, 'rb') as fp:
                    relaxng_doc = lxml.etree.XML(fp.read())
                    validator = lxml.etree.RelaxNG(relaxng_doc)
                    validator_type = 'RELAXNG'
            elif ext == '.sch':
                with handle.open(name, 'rb') as fp:
                    relaxng_doc = lxml.etree.XML(fp.read())
                    validator = lxml.isoschematron.Schematron(relaxng_doc)
                    validator_type = 'SCHEMATRON'
            else:
                continue

            if key in self.registry:
                raise ValueError('{} already registered'.format(key))

            duration = time.time() - ts

            self.registry[key] = dict(
                family=family,
                name=registered_name,
                validation=validator,
                path=fullname,
                info=handle.getinfo(name),
                duration=duration,
                type=validator_type,
                registered=datetime.datetime.utcnow())
            if duration > 3:
                LOG.warn(
                    'Slow loading/parsing of ({}, {}), duration: {:0.3f} seconds'.format(key, fullname, duration))
            LOG.info('Registered ({}, {}), duration: {:0.3f} seconds'.format(
                key, fullname, duration))
예제 #11
0
# -*- coding: utf-8 -*-

################################################################
# xmldirector.plonecore
# (C) 2016,  Andreas Jung, www.zopyx.com, Tuebingen, Germany
################################################################


import os
import sys
import pkg_resources

from xmldirector.plonecore.logger import LOG


__import__('pkg_resources').declare_namespace(__name__)


# Check filesystem encoding
fs_enc = sys.getfilesystemencoding()
if fs_enc.lower() not in ('utf8', 'utf-8'):
    LOG.error('Filesystem encoding should be UTF-8, not {}'.format(fs_enc))


# import patches only for Plone 5
dist = pkg_resources.get_distribution('Products.CMFPlone')
if dist.version.startswith('5'):
    import patches
    LOG.info('Applied patched for Plone 5')