def transform(self, tree, template_path, **args): """ Transform an element tree with XSLT @param tree: the element tree @param template_path: pathname of the XSLT stylesheet @param args: dict of arguments to pass to the stylesheet """ self.error = None if args: _args = [(k, "'%s'" % args[k]) for k in args] _args = dict(_args) else: _args = None ac = etree.XSLTAccessControl(read_file=True, read_network=True) template = self.parse(template_path) if template: try: transformer = etree.XSLT(template, access_control=ac) if _args: result = transformer(tree, **_args) else: result = transformer(tree) return result except: e = sys.exc_info()[1] self.error = e return None else: # Error parsing the XSL template return None
def __build_lxml(target, source, env): """ General XSLT builder (HTML/FO), using the lxml module. """ from lxml import etree xslt_ac = etree.XSLTAccessControl(read_file=True, write_file=True, create_dir=True, read_network=False, write_network=False) xsl_style = env.subst('$DOCBOOK_XSL') xsl_tree = etree.parse(xsl_style) transform = etree.XSLT(xsl_tree, access_control=xslt_ac) doc = etree.parse(str(source[0])) # Support for additional parameters parampass = {} if parampass: result = transform(doc, **parampass) else: result = transform(doc) try: of = open(str(target[0]), "w") of.write(of.write(etree.tostring(result, pretty_print=True))) of.close() except: pass return None
def __build_lxml(target, source, env): """ General XSLT builder (HTML/FO), using the lxml module. """ from lxml import etree xslt_ac = etree.XSLTAccessControl(read_file=True, write_file=True, create_dir=True, read_network=False, write_network=False) xsl_style = env.subst('$DOCBOOK_XSL') xsl_tree = etree.parse(xsl_style) transform = etree.XSLT(xsl_tree, access_control=xslt_ac) doc = etree.parse(str(source[0])) # Support for additional parameters parampass = {} if parampass: result = transform(doc, **parampass) else: result = transform(doc) # we'd like the resulting output to be readably formatted, # so try pretty-print. Sometimes (esp. if the output is # not an xml file) we end up with a None type somewhere in # the transformed tree and tostring throws TypeError, # so provide a fallback. try: with open(str(target[0]), "wb") as of: of.write(etree.tostring(result, pretty_print=True)) except TypeError: result.write_output(str(target[0])) return None
def __build_lxml(target, source, env): """ General XSLT builder (HTML/FO), using the lxml module. """ from lxml import etree xslt_ac = etree.XSLTAccessControl(read_file=True, write_file=True, create_dir=True, read_network=False, write_network=False) xsl_style = env.subst('$DOCBOOK_XSL') xsl_tree = etree.parse(xsl_style) transform = etree.XSLT(xsl_tree, access_control=xslt_ac) doc = etree.parse(str(source[0])) # Support for additional parameters parampass = {} if parampass: result = transform(doc, **parampass) else: result = transform(doc) try: with open(str(target[0]), "wb") as of: of.write( etree.tostring(result, encoding="utf-8", pretty_print=True)) except Exception as e: print("ERROR: Failed to write {}".format(str(target[0]))) print(e) return None
def compileThemeTransform(rules, absolutePrefix=None, readNetwork=False, parameterExpressions=None, runtrace=False): """Prepare the theme transform by compiling the rules with the given options """ if parameterExpressions is None: parameterExpressions = {} accessControl = etree.XSLTAccessControl(read_file=True, write_file=False, create_dir=False, read_network=readNetwork, write_network=False) if absolutePrefix: absolutePrefix = expandAbsolutePrefix(absolutePrefix) params = set(parameterExpressions.keys() + ['url', 'base', 'path', 'scheme', 'host']) xslParams = dict((k, '') for k in params) compiledTheme = compile_theme(rules, absolute_prefix=absolutePrefix, parser=getParser('theme', readNetwork), rules_parser=getParser('rules', readNetwork), compiler_parser=getParser('compiler', readNetwork), read_network=readNetwork, access_control=accessControl, update=True, xsl_params=xslParams, runtrace=runtrace, ) if not compiledTheme: return None return etree.XSLT(compiledTheme, access_control=accessControl, )
def transform(tree, stylesheet_path, **args): if args: _args = [(k, "'%s'" % args[k]) for k in args] _args = dict(_args) else: _args = None stylesheet = etree.parse(stylesheet_path) ac = etree.XSLTAccessControl(read_file=True, read_network=True) transformer = etree.XSLT(stylesheet, access_control=ac) if _args: result = transformer(tree, **_args) else: result = transformer(tree) return result
def __new__(cls, cname, cbases, cvars): xslpaste = cvars.pop('cls__xslpaste', None) ret = type.__new__(cls, cname, cbases, cvars) if not hasattr(ret, 'cls__xslacl') or not hasattr( ret, 'cls__xslparser'): ret.cls__xslacl = et.XSLTAccessControl(read_network=False) ret.cls__xslparser = et.XMLParser(dtd_validation=False, resolve_entities=True, load_dtd=True, ns_clean=False) if xslpaste is not None: xml = et.XML(xslpaste, parser=ret.cls__xslparser) ret.cls__xslpaste = et.XSLT(xml, access_control=ret.cls__xslacl) if not hasattr(ret, 'cls__xslpaths'): ret.cls__xslpaths = {} ret.cls__xslnames = {} if ret.debug: if ret.cls__template_path is not None: ret.cls__xsllist.extend(ret.cls__xsllist_default) for name, filename in ret.cls__xsllist: ret.xslload_path(name, filename) return ret
def __init__(self, app, global_conf, ignore_paths=None, xslt_file=None, xslt_source="", xslt_tree=None, read_network=False): """Initialise, giving a filename or file pointer for an XSLT file. """ self.app = app self.global_conf = global_conf if xslt_file: xslt_file = open(xslt_file) xslt_source = xslt_file.read() xslt_file.close() if xslt_source: xslt_tree = etree.fromstring(xslt_source) self.read_network = read_network self.access_control = etree.XSLTAccessControl( read_file=True, write_file=False, create_dir=False, read_network=read_network, write_network=False) self.transform = etree.XSLT(xslt_tree, access_control=self.access_control) self.ignore_paths = [] if ignore_paths: ignore_paths = [s.strip() for s in ignore_paths if s.strip()] for p in ignore_paths: self.ignore_paths.append(re.compile(p))
def __build_lxml_noresult(target, source, env): """ Specialized XSLT builder for transformations without a direct result where the Docbook stylesheet itself creates the target file, using the lxml module. """ from lxml import etree xslt_ac = etree.XSLTAccessControl(read_file=True, write_file=True, create_dir=True, read_network=False, write_network=False) xsl_style = env.subst('$DOCBOOK_XSL') xsl_tree = etree.parse(xsl_style) transform = etree.XSLT(xsl_tree, access_control=xslt_ac) doc = etree.parse(str(source[0])) # Support for additional parameters parampass = {} if parampass: result = transform(doc, **parampass) else: result = transform(doc) return None
def xslt_ac_write_network_off(): ac = etree.XSLTAccessControl(read_network=True, write_network=False) # Noncompliant # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ transform = etree.XSLT(rootxsl, access_control=ac)
logging.basicConfig(level=logging.DEBUG) log = logging.getLogger('docbook2epub') # XXX REMARK -- Original: #DOCBOOK_XSL = os.path.abspath('../docbook-xsl-1.74.0/epub/docbook.xsl') # XXX REMARK -- 2017-07-22: Modified by Marcos Cruz (programandala.net): DOCBOOK_XSL = os.path.abspath( '/usr/share/xml/docbook/stylesheet/docbook-xsl/epub/docbook.xsl') MIMETYPE = 'mimetype' MIMETYPE_CONTENT = 'application/epub+zip' xslt_ac = etree.XSLTAccessControl(read_file=True, write_file=True, create_dir=True, read_network=True, write_network=False) transform = etree.XSLT(etree.parse(DOCBOOK_XSL), access_control=xslt_ac) def convert_docbook(docbook_file): '''Use DocBook XSL to transform our DocBook book into EPUB''' cwd = os.getcwd() # Create a temporary working directory for the output files output_path = os.path.basename(os.path.splitext(docbook_file)[0]) if not os.path.exists(output_path): os.mkdir(output_path) # DocBook needs the source file in the current working directory to output correctly shutil.copy(docbook_file, output_path)
def localname(name): return name.rsplit('}', 1)[1] def namespace(name): return name.rsplit('}', 1)[0][1:] def fullname(namespace, name): return '{%s}%s' % (namespace, name) # NOQA: S001 AC_READ_FILE = etree.XSLTAccessControl( read_file=True, write_file=False, create_dir=False, read_network=False, write_network=False, ) AC_READ_NET = etree.XSLTAccessControl( read_file=True, write_file=False, create_dir=False, read_network=True, write_network=False, ) class CustomResolver(etree.Resolver): def __init__(self, data): self.data = data
def xslt_inline_ac(): transform = etree.XSLT( rootxsl, access_control=etree.XSLTAccessControl()) # Noncompliant
def xslt_ac_default(): ac = etree.XSLTAccessControl() # Noncompliant # ^^^^^^^^^^^^^^^^^^^^^^^^^ transform = etree.XSLT(rootxsl, access_control=ac)
def __init__( self, app, global_conf, live=False, rules=None, theme=None, extra=None, css=True, xinclude=True, absolute_prefix=None, update=False, includemode='document', notheme=None, read_network=False, # BBB parameters theme_uri=None, extraurl=None): """Create the middleware. The parameters are: * ``rules``, the rules file * ``theme``, the theme file * ``extra``, an optional XSLT file with XDV extensions * ``css``, can be set to False to disable CSS syntax support (providing a moderate speed gain) * ``xinclude`` can be set to True to enable XInclude support (at a moderate speed cost) * ``absolute_prefix`` can be set to a string that will be prefixed to any *relative* URL referenced in an image, link or stylesheet in the theme HTML file before the theme is passed to the compiler. This allows a theme to be written so that it can be opened and views standalone on the filesystem, even if at runtime its static resources are going to be served from some other location. For example, an ``<img src="images/foo.jpg" />`` can be turned into ``<img src="/static/images/foo.jpg" />`` with an ``absolute_prefix`` of "/static". * ``update`` can be set to False to disable the automatic update support for the old Deliverance 0.2 namespace (for a moderate speed gain) * ``includemode`` can be set to 'document', 'esi' or 'ssi' to change the way in which includes are processed * ``live``, set to True to recompile the theme on each request * ``notheme``, a list of regular expressions for paths which should not be themed. """ if isinstance(notheme, basestring): notheme = [p for p in notheme.split('\n') if p.strip()] self.app = app self.global_conf = global_conf self.rules = rules self.theme = theme or theme_uri # theme_uri is for BBB self.extra = extra or extraurl # extraurl is for BBB self.css = asbool(css) self.xinclude = xinclude self.absolute_prefix = absolute_prefix self.update = update self.includemode = includemode self.live = asbool(live) self.notheme = notheme self.read_network = read_network self.access_control = etree.XSLTAccessControl( read_file=True, write_file=False, create_dir=False, read_network=read_network, write_network=False) self.transform = None
def __init__(self, app, global_conf, filename=None, tree=None, read_network=False, read_file=True, update_content_length=False, ignored_extensions=('js', 'css', 'gif', 'jpg', 'jpeg', 'pdf', 'ps', 'doc', 'png', 'ico', 'mov', 'mpg', 'mpeg', 'mp3', 'm4a', 'txt', 'rtf', 'swf', 'wav', 'zip', 'wmv', 'ppt', 'gz', 'tgz', 'jar', 'xls', 'bmp', 'tif', 'tga', 'hqx', 'avi'), environ_param_map=None, unquoted_params=None, doctype=None, content_type=None, charset=None, remove_conditional_headers=False, **params): """Initialise, giving a filename or parsed XSLT tree. The parameters are: * ``filename``, a filename from which to read the XSLT file * ``tree``, a pre-parsed lxml tree representing the XSLT file ``filename`` and ``tree`` are mutually exclusive. * ``read_network``, should be set to True to allow resolving resources from the network. * ``read_file``, should be set to False to disallow resolving resources from the filesystem. * ``update_content_length``, can be set to True to update the Content-Length header when applying the transformation. When set to False (the default), the header is removed and it is left to the WSGI server recalculate or send a chunked response. * ``ignored_extensions`` can be set to a list of filename extensions for which the transformation should never be applied * ``environ_param_map`` can be set to a dict of environ keys to parameter names. The corresponding values will then be sent to the transformation as parameters. * ``unquoted_params``, can be set to a list of parameter names which will not be quoted. * ``doctype``, can be set to a string which will replace that set in the XSLT, for example, "<!DOCTYPE html>". * ``content_type``, can be set to a string which will be set in the Content-Type header. By default it is inferred from the stylesheet. * ``charset``, can be set to a string which will be set in the Content-Type header. By default it is inferred from the stylesheet. * ``remove_conditional_headers``, should be set to True if the transformed output includes other files. Additional keyword arguments will be passed to the transformation as parameters. """ self.app = app self.global_conf = global_conf if filename is not None: xslt_file = open(filename) source = xslt_file.read() tree = etree.fromstring(source) xslt_file.close() if content_type is None: mediatype = tree.xpath( '/xsl:stylesheet/xsl:output/@media-type', namespaces=dict(xsl="http://www.w3.org/1999/XSL/Transform")) if mediatype: content_type = mediatype[-1] else: method = tree.xpath( '/xsl:stylesheet/xsl:output/@method', namespaces=dict( xsl="http://www.w3.org/1999/XSL/Transform")) if method: method = method[-1] if method.lower() == 'html': content_type = 'text/html' elif method.lower() == 'text': content_type = 'text/plain' elif method.lower() == 'xml': content_type = 'text/xml' self.content_type = content_type if charset is None: encoding = tree.xpath( '/xsl:stylesheet/xsl:output/@encoding', namespaces=dict(xsl="http://www.w3.org/1999/XSL/Transform")) if encoding: charset = encoding[-1] else: charset = "UTF-8" self.charset = charset self.read_network = asbool(read_network) self.read_file = asbool(read_file) self.access_control = etree.XSLTAccessControl( read_file=self.read_file, write_file=False, create_dir=False, read_network=self.read_network, write_network=False) self.transform = etree.XSLT(tree, access_control=self.access_control) self.update_content_length = asbool(update_content_length) self.ignored_extensions = frozenset(ignored_extensions) self.ignored_pattern = re.compile("^.*\.(%s)$" % '|'.join(ignored_extensions)) self.environ_param_map = environ_param_map or {} if isinstance(unquoted_params, basestring): unquoted_params = unquoted_params.split() self.unquoted_params = unquoted_params and \ frozenset(unquoted_params) or () self.params = params self.doctype = doctype self.remove_conditional_headers = asbool(remove_conditional_headers)
def __init__(self, app, global_conf, rules, theme=None, prefix=None, includemode='document', debug=False, read_network=False, read_file=True, update_content_length=False, ignored_extensions=('js', 'css', 'gif', 'jpg', 'jpeg', 'pdf', 'ps', 'doc', 'png', 'ico', 'mov', 'mpg', 'mpeg', 'mp3', 'm4a', 'txt', 'rtf', 'swf', 'wav', 'zip', 'wmv', 'ppt', 'gz', 'tgz', 'jar', 'xls', 'bmp', 'tif', 'tga', 'hqx', 'avi'), environ_param_map=None, unquoted_params=None, doctype=None, content_type=None, filter_xpath=False, **params): """Create the middleware. The parameters are: * ``rules``, the rules file * ``theme``, a URL to the theme file (may be a file:// URL) * ``debug``, set to True to recompile the theme on each request * ``prefix`` can be set to a string that will be prefixed to any *relative* URL referenced in an image, link or stylesheet in the theme HTML file before the theme is passed to the compiler. This allows a theme to be written so that it can be opened and views standalone on the filesystem, even if at runtime its static resources are going to be served from some other location. For example, an ``<img src="images/foo.jpg" />`` can be turned into ``<img src="/static/images/foo.jpg" />`` with a ``prefix`` of "/static". * ``includemode`` can be set to 'document', 'esi' or 'ssi' to change the way in which includes are processed * ``read_network``, should be set to True to allow resolving resources from the network. * ``read_file``, should be set to False to disallow resolving resources from the filesystem. * ``update_content_length``, can be set to True to update the Content-Length header when applying the transformation. When set to False (the default), the header is removed and it is left to the WSGI server recalculate or send a chunked response. * ``ignored_extensions`` can be set to a list of filename extensions for which the transformation should never be applied * ``environ_param_map`` can be set to a dict of environ keys to parameter names. The corresponding values will then be sent to the transformation as parameters. * ``unquoted_params``, can be set to a list of parameter names which will not be quoted. * ``doctype``, can be set to a string which will replace the default XHTML 1.0 transitional Doctype or that set in the Diazo theme. For example, "<!DOCTYPE html>". * ``content_type``, can be set to a string which will be set in the Content-Type header. By default it is inferred from the stylesheet. * ``charset``, can be set to a string which will be set in the Content-Type header. By default it is inferred from the stylesheet. * ``remove_conditional_headers``, should be set to True if the transformed output includes other files. * ``filter_xpath``, should be set to True to enable filter_xpath support for external includes. Additional keyword arguments will be passed to the theme transformation as parameters. """ self.app = app self.global_conf = global_conf self.rules = rules self.theme = theme self.absolute_prefix = prefix self.includemode = includemode self.debug = asbool(debug) self.read_network = asbool(read_network) self.read_file = asbool(read_file) self.update_content_length = asbool(update_content_length) self.ignored_extensions = ignored_extensions self.doctype = doctype self.content_type = content_type self.unquoted_params = unquoted_params self.filter_xpath = asbool(filter_xpath) self.access_control = etree.XSLTAccessControl( read_file=self.read_file, write_file=False, create_dir=False, read_network=self.read_network, write_network=False) self.transform_middleware = None self.filter_middleware = self.get_filter_middleware() self.environ_param_map = environ_param_map or {} self.environ_param_map.update({ 'diazo.path': 'path', 'diazo.host': 'host', 'diazo.scheme': 'scheme', }) self.params = params.copy()
def xslt_ac_read_write_network_off(): ac = etree.XSLTAccessControl(read_network=False, write_network=False) # Compliant transform = etree.XSLT(rootxsl, access_control=ac) # Compliant
def compileThemeTransform(rules, absolutePrefix=None, readNetwork=False, parameterExpressions=None): """Prepare the theme transform by compiling the rules with the given options """ if parameterExpressions is None: parameterExpressions = {} accessControl = etree.XSLTAccessControl(read_file=True, write_file=False, create_dir=False, read_network=readNetwork, write_network=False) if absolutePrefix: absolutePrefix = expandAbsolutePrefix(absolutePrefix) params = set(parameterExpressions.keys() + ['url', 'base', 'path', 'scheme', 'host']) xslParams = dict((k, '') for k in params) internalResolver = InternalResolver() pythonResolver = PythonResolver() if readNetwork: networkResolver = NetworkResolver() rulesParser = etree.XMLParser(recover=False) rulesParser.resolvers.add(internalResolver) rulesParser.resolvers.add(pythonResolver) if readNetwork: rulesParser.resolvers.add(networkResolver) themeParser = etree.HTMLParser() themeParser.resolvers.add(internalResolver) themeParser.resolvers.add(pythonResolver) if readNetwork: themeParser.resolvers.add(networkResolver) compilerParser = etree.XMLParser() compilerParser.resolvers.add(internalResolver) compilerParser.resolvers.add(pythonResolver) if readNetwork: compilerParser.resolvers.add(networkResolver) compiledTheme = compile_theme( rules, absolute_prefix=absolutePrefix, parser=themeParser, rules_parser=rulesParser, compiler_parser=compilerParser, read_network=readNetwork, access_control=accessControl, update=True, xsl_params=xslParams, ) if not compiledTheme: return None return etree.XSLT( compiledTheme, access_control=accessControl, )