Exemplo n.º 1
0
class choose_element(xslt_element):
    content_model = content_model.seq(
        content_model.rep1(content_model.qname(XSL_NAMESPACE, 'xsl:when')),
        content_model.opt(content_model.qname(XSL_NAMESPACE, 'xsl:otherwise')),
    )
    attribute_types = {}

    def setup(self):
        choices = self.children
        if isinstance(choices[-1], otherwise_element):
            self._otherwise = choices[-1]
            choices = choices[:-1]
        else:
            self._otherwise = None
        self._choices = [(child, child._test) for child in choices]
        return

    def instantiate(self, context):
        for child, test in self._choices:
            context.instruction, context.namespaces = child, child.namespaces
            if test.evaluate_as_boolean(context):
                chosen = child
                break
        else:
            # xsl:otherwise
            chosen = self._otherwise
            if not chosen:
                return
        return chosen.process_children(context)
Exemplo n.º 2
0
class apply_templates_element(xslt_element):

    content_model = content_model.rep(
        content_model.alt(content_model.qname(XSL_NAMESPACE, 'xsl:sort'),
                          content_model.qname(XSL_NAMESPACE,
                                              'xsl:with-param')))
    attribute_types = {
        'select': attribute_types.expression(),
        'mode': attribute_types.qname(),
    }

    def setup(self):
        sort_keys = []
        self._params = params = []
        for child in self.children:
            if isinstance(child, sort_element):
                sort_keys.append(child)
            elif isinstance(child, with_param_element):
                params.append((child, child._name, child._select))
        if sort_keys:
            self._select = sorted_expression(self._select, sort_keys)
        return

    def instantiate(self, context):
        params = {}
        for param, name, select in self._params:
            context.instruction, context.namespaces = param, param.namespaces
            params[name] = select.evaluate(context)

        if self._select:
            context.instruction, context.namespaces = self, self.namespaces
            try:
                nodes = self._select.evaluate_as_nodeset(context)
            except TypeError:
                raise
                raise XsltStaticError(XsltError.INVALID_APPLY_TEMPLATES_SELECT,
                                      self)
        else:
            nodes = context.node.xml_children

        # Process the selected nodes using `self._mode`
        context.transform.apply_templates(context, nodes, self._mode, params)
        return
Exemplo n.º 3
0
class function_element(xslt_element):
    content_model = content_model.seq(
        content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:param')),
        content_model.template,
    )
    attribute_types = {
        'name': attribute_types.qname_but_not_ncname(required=True),
    }

    def setup(self):
        params = self._params = []
        for child in self.children:
            if isinstance(child, param_element):
                params.append(child)
            elif isinstance(child, xslt_element):
                break
        if self._params:
            self._instructions = self.children[len(self._params) + 1:-1]
        else:
            self._instructions = self.children
        return

    def prime(self, context):
        context.add_function(self._name, self)
        return

    def __call__(self, context, *args):
        # Save context state as XPath is side-effect free
        focus = context.node, context.position, context.size
        state = context.instruction, context.namespaces, context.variables

        context.instruction, context.namespaces = self, self.namespaces
        # Set the return value
        self.result = datatypes.EMPTY_STRING
        # Set the parameter list
        if self._params:
            context.variables = context.variables.copy()
            params = iter(self._params)
            # Handle the passed in arguments
            for arg, param in itertools.izip(args, params):
                context.variables[param._name] = arg.evaluate(context)
            # Handle remaining parameter defaults
            for param in params:
                param.instantiate(context)
        # Process the instruction template
        for child in self._instructions:
            child.instantiate(context)
        # Restore context state
        context.instruction, context.namespaces, context.variables = state
        context.node, context.position, context.size = focus
        return self.result
Exemplo n.º 4
0
class for_each_element(xslt_element):

    content_model = content_model.seq(
        content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:sort')),
        content_model.template,
    )
    attribute_types = {
        'select': attribute_types.nodeset_expression(required=True),
    }

    def setup(self):
        children = self.children
        nkeys = 0
        for child in children:
            if isinstance(child, sort_element):
                nkeys += 1
            else:
                break
        if nkeys:
            self._select = sorted_expression(self._select, children[:nkeys])
        return

    def instantiate(self, context):
        if self._select:
            context.instruction = self
            context.namespaces = self.namespaces
            try:
                nodes = self._select.evaluate_as_nodeset(context)
            except TypeError:
                raise
                raise XsltError(XsltError.INVALID_FOREACH_NODESET)
        else:
            nodes = context.node.xml_children

        # Save the context focus (node/pos/size) and state (tpl/curr)
        saved = (context.node, context.position, context.size,
                 context.template, context.current_node)
        # Now process the selected nodes
        context.template = None
        size = context.size = len(nodes)
        position = 1
        for node in nodes:
            context.node = context.current_node = node
            context.position = position
            self.process_children(context)
            position += 1

        (context.node, context.position, context.size, context.template,
         context.current_node) = saved
        return
Exemplo n.º 5
0
class call_template_element(xslt_element):

    content_model = content_model.rep(
        content_model.qname(XSL_NAMESPACE, 'xsl:with-param')
        )
    attribute_types = {
        'name': attribute_types.qname(required=True),
        }

    _tail_recursive = False

    def setup(self):
        self._params = [ (child, child._name, child._select)
                         for child in self.children ]
        return

    def prime(self, context,
                 _test_elements=(if_element.if_element,),
                 _choose_elements=(choose_elements.when_element,
                                   choose_elements.otherwise_element,)):
        transform = self.root.stylesheet
        try:
            template = self._template = transform.named_templates[self._name]
        except KeyError:
            raise XsltError(XsltError.NAMED_TEMPLATE_NOT_FOUND,
                            self, self._name)
        # NOTE: Tail recursion is now checked for in the xsl:template setup().
        return

    def instantiate(self, context):
        # We need to calculate the parameters before the variable context
        # is changed back in the template element
        params = {}
        for param, name, select in self._params:
            context.instruction, context.namespaces = param, param.namespaces
            params[name] = select.evaluate(context)

        if self._tail_recursive:
            context.recursive_parameters = params
        else:
            #context.current_node = context.node
            self._template.instantiate(context, params)
        return
Exemplo n.º 6
0
class attribute_set_element(xslt_element):
    content_model = content_model.rep(
        content_model.qname(XSL_NAMESPACE, 'xsl:attribute'))
    attribute_types = {
        'name': attribute_types.qname(required=True),
        'use-attribute-sets': attribute_types.qnames(),
    }

    def instantiate(self, context, used=None):
        if used is None:
            used = []

        if self in used:
            raise XsltError(XsltError.CIRCULAR_ATTRIBUTE_SET, self, self._name)
        else:
            used.append(self)

        # XSLT 1.0, Section 7.1.4, Paragraph 4:
        # The available variable bindings are only the top-level ones.
        variables = context.variables
        context.variables = context.global_variables

        attribute_sets = context.transform.attribute_sets
        for name in self._use_attribute_sets:
            try:
                attribute_set = attribute_sets[name]
            except KeyError:
                raise XsltError(XsltError.UNDEFINED_ATTRIBUTE_SET, self,
                                attr_set_name)
            else:
                attribute_set.instantiate(context)

        self.process_children(context)

        context.variables = variables
        used.remove(self)
        return
Exemplo n.º 7
0
class template_element(xslt_element):

    content_model = content_model.seq(
        content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:param')),
        content_model.template,
    )
    attribute_types = {
        'match': attribute_types.pattern(),
        'name': attribute_types.qname(),
        'priority': attribute_types.number(),
        'mode': attribute_types.qname(),
    }

    _tail_recursive = False

    def __repr__(self):
        return "<template_element match='%s', name='%s', mode='%s', priority='%s'>" % (
            self._match, self._name, self._mode, self._priority)

    def setup(self):
        params = self._params = []
        for child in self.children:
            if isinstance(child, param_element):
                params.append((child, child._name))
            elif isinstance(child, xslt_element):
                break
        if self._params:
            self._instructions = self.children[len(self._params) + 1:-1]
        else:
            self._instructions = self.children
        # Check for tail-recursive invocation (i.e, call-tempates of self)
        if self._name and self._instructions:
            endpoints = [self._instructions[-1]]
            queue = endpoints.append
            for last in endpoints:
                if isinstance(last, call_template_element):
                    if last._name == self._name:
                        self._tail_recursive = True
                        last._tail_recursive = True
                        break
                elif isinstance(last, if_element):
                    last = last.last_instruction
                    if last: queue(last)
                elif isinstance(last, choose_element):
                    for choice in last.children:
                        last = choice.last_instruction
                        if last: queue(last)
        return

    def _printTemplateInfo(self):
        info, tname = self.getTemplateInfo()
        if tname:
            print "Template named %r:" % tname
        else:
            print "Template matching pattern %r :" % self._match
        print "  location: line %d, col %d of %s" % \
                (self.lineNumber, self.columnNumber, self.baseUri)
        for shortcut in info:
            print "  shortcut:"
            importidx, priority, tmode, patterninfo, quickkey = shortcut
            print "    ...import index:", importidx
            print "    .......priority:", priority
            print "    ...........mode:", tmode
            if not tname:
                print "    ......quick key: node type %s, expanded-name %r" % quickkey
                print "    ........pattern: %r  for axis type %s" % patterninfo[
                    0:2]
        return

    def instantiate(self, context, params=None):
        if params is None:
            params = {}

        if self._params:
            variables = context.variables
            context.variables = variables.copy()

        # The optimizer converts this to, roughly, a do/while loop
        while 1:
            context.recursive_parameters = None
            for child, param in self._params:
                if param in params:
                    context.variables[param] = params[param]
                else:
                    child.instantiate(context)

            for child in self._instructions:
                child.instantiate(context)

            # Update the params from the values given in
            # `recursive_parameters`.
            params = context.recursive_parameters
            if params is None:
                break

        if self._params:
            context.variables = variables
        return
Exemplo n.º 8
0
class transform_element(xslt_element):
    content_model = content_model.seq(
        content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:import')),
        content_model.top_level_elements,
    )
    attribute_types = {
        'id': attribute_types.id(),
        'extension-element-prefixes': attribute_types.prefixes(),
        'exclude-result-prefixes': attribute_types.prefixes(),
        'version': attribute_types.number(required=True),
    }

    space_rules = None
    decimal_formats = None
    namespace_aliases = None
    attribute_sets = None
    match_templates = None
    named_templates = None
    parameters = None
    variables = None
    global_variables = None
    initial_functions = None

    builtin_param_warning = True

    def setup(self, _param_element=variable_elements.param_element):
        """
        Called only once, at the first initialization
        """
        self.output_parameters = outputparameters.outputparameters()

        # Sort the top-level elements in decreasing import precedence to ease
        # processing later.
        precedence_key = operator.attrgetter('import_precedence')
        elements = sorted(self.children, key=precedence_key, reverse=True)

        # Merge the top-level stylesheet elements into their respective
        # lists.  Any element name not in the mapping is discarded.
        # Note, by sharing the same list no merging is required later.
        whitespace_elements, variable_elements = [], []
        top_level_elements = {
            'strip-space': whitespace_elements,
            'preserve-space': whitespace_elements,
            'output': [],
            'key': [],
            'decimal-format': [],
            'namespace-alias': [],
            'attribute-set': [],
            'variable': variable_elements,
            'param': variable_elements,
            'template': [],
        }
        # Using `groupby` takes advantage of series of same-named elements
        # appearing adjacent to each other.
        key = operator.attrgetter('expanded_name')
        for (namespace, name), nodes in itertools.groupby(self.children, key):
            if namespace == XSL_NAMESPACE and name in top_level_elements:
                top_level_elements[name].extend(nodes)

        # - process the `xsl:preserve-space` and `xsl:strip-space` elements
        # RECOVERY: Multiple matching patterns use the last occurance
        space_rules = {}
        for element in whitespace_elements:
            strip = element._strip_whitespace
            for token in element._elements:
                namespace, name = token
                space_rules[token] = (namespace, name, strip)
        self.space_rules = space_rules.values()
        # sort in decreasing priority, where `*` is lowest, followed by
        # `prefix:*`, then all others.
        self.space_rules.sort(reverse=True)

        # - process the `xsl:output` elements
        # Sort in increasing import precedence, so the last one added
        # will have the highest import precedence
        elements = top_level_elements['output']
        getter = operator.attrgetter('_method', '_version', '_encoding',
                                     '_omit_xml_declaration', '_standalone',
                                     '_doctype_system', '_doctype_public',
                                     '_cdata_section_elements', '_indent',
                                     '_media_type', '_byte_order_mark',
                                     '_canonical_form')
        for element in elements:
            (method, version, encoding, omit_xmldecl, standalone,
             doctype_system, doctype_public, cdata_elements, indent,
             media_type, byte_order_mark, canonical_form) = getter(element)
            if method is not None:
                self.output_parameters.method = method
            if version is not None:
                self.output_parameters.version = version
            if encoding is not None:
                self.output_parameters.encoding = encoding
            if omit_xmldecl is not None:
                self.output_parameters.omit_xml_declaration = omit_xmldecl
            if standalone is not None:
                self.output_parameters.standalone = standalone
            if doctype_system is not None:
                self.output_parameters.doctype_system = doctype_system
            if doctype_public is not None:
                self.output_parameters.doctype_public = doctype_public
            if cdata_elements:
                self.output_parameters.cdata_section_elements += cdata_elements
            if indent is not None:
                self.output_parameters.indent = indent
            if media_type is not None:
                self.output_parameters.media_type = media_type
            if byte_order_mark is not None:
                self.output_parameters.byte_order_mark = byte_order_mark
            if canonical_form is not None:
                self.output_parameters.canonical_form = canonical_form

        # - process the `xsl:key` elements
        # Group the keys by name
        elements = top_level_elements['key']
        name_key = operator.attrgetter('_name')
        elements.sort(key=name_key)
        keys = self._keys = {}
        for name, elements in itertools.groupby(elements, name_key):
            keys[name] = tuple(elements)

        # - process the `xsl:decimal-format` elements
        formats = self.decimal_formats = {}
        getter = operator.attrgetter('_decimal_separator',
                                     '_grouping_separator', '_infinity',
                                     '_minus_sign', '_NaN', '_percent',
                                     '_per_mille', '_zero_digit', '_digit',
                                     '_pattern_separator')
        for element in top_level_elements['decimal-format']:
            name = element._name
            format = getter(element)
            # It is an error to declare a decimal-format more than once
            # (even with different import precedence) with different values.
            if name in formats and formats[name] != format:
                # Construct a useful name for the error message.
                if name:
                    namespace, name = name
                    if namespace:
                        name = element.namespaces[namespace] + ':' + name
                else:
                    name = '#default'
                raise XsltError(XsltError.DUPLICATE_DECIMAL_FORMAT, name)
            else:
                formats[name] = format
        # Add the default decimal format, if not declared.
        if None not in formats:
            formats[None] = ('.', ',', 'Infinity', '-', 'NaN', '%',
                             unichr(0x2030), '0', '#', ';')

        # - process the `xsl:namespace-alias` elements
        elements = top_level_elements['namespace-alias']
        elements.reverse()
        aliases = self.namespace_aliases = {}
        for precedence, group in itertools.groupby(elements, precedence_key):
            mapped = {}
            for element in group:
                namespace = element.namespaces[element._stylesheet_prefix]
                if namespace not in aliases:
                    mapped[namespace] = True
                    result_prefix = element._result_prefix
                    result_namespace = element.namespaces[result_prefix]
                    aliases[namespace] = (result_namespace, result_prefix)
                # It is an error for a namespace URI to be mapped to multiple
                # different namespace URIs (with the same import precedence).
                elif namespace in mapped:
                    raise XsltError(XsltError.DUPLICATE_NAMESPACE_ALIAS,
                                    element._stylesheet_prefix)
        if aliases:
            # apply namespace fixup for the literal elements
            _fixup_aliases(self, aliases)

        # - process the `xsl:attribute-set` elements
        sets = self.attribute_sets = {}
        for element in top_level_elements['attribute-set']:
            sets[element._name] = element

        # - process the `xsl:param` and `xsl:variable` elements
        index, self._variables = {}, variable_elements[:]
        variable_elements.reverse()
        for element in variable_elements:
            name = element._name
            if name not in index:
                # unique (or first) variable binding
                index[name] = 1
            else:
                # shadowed variable binding, remove from processing list
                self._variables.remove(element)
        self.parameters = frozenset(element._name
                                    for element in self._variables
                                    if isinstance(element, _param_element))

        # - process the `xsl:template` elements
        match_templates = collections.defaultdict(_type_dispatch_table)
        named_templates = self.named_templates = {}
        elements = top_level_elements['template']
        elements.reverse()
        getter = operator.attrgetter('node_test', 'axis_type', 'node_type')
        for position, element in enumerate(elements):
            match, name = element._match, element._name
            precedence = element.import_precedence
            if match:
                namespaces = element.namespaces
                template_priority = element._priority
                mode_table = match_templates[element._mode]
                for pattern in match:
                    node_test, axis_type, node_type = getter(pattern)
                    if template_priority is None:
                        priority = node_test.priority
                    else:
                        priority = template_priority
                    sort_key = (precedence, priority, position)
                    info = (sort_key, node_test, axis_type, element)
                    # Add the template rule to the dispatch table
                    type_key = node_type.xml_typecode
                    if type_key == tree.element.xml_typecode:
                        # Element types are further keyed by the name test.
                        name_key = node_test.name_key
                        if name_key:
                            prefix, local = name_key
                            # Unprefixed names are in the null-namespace
                            try:
                                namespace = prefix and namespaces[prefix]
                            except KeyError:
                                raise XPathError(XPathError.UNDEFINED_PREFIX,
                                                 prefix=prefix)
                            else:
                                name_key = namespace, local
                        mode_table[type_key][name_key].append(info)
                    else:
                        # Every other node type gets lumped into a single list
                        # for that node type
                        mode_table[type_key].append(info)
            if name:
                # XSLT 1.0, Section 6, Paragraph 3:
                # It is an error if a stylesheet contains more than one
                # template with the same name and same import precedence.
                if name not in named_templates:
                    named_templates[name] = element
                elif named_templates[name].import_precedence == precedence:
                    # Construct a useful name for the error message.
                    namespace, name = name
                    if namespace:
                        name = element.namespaces[namespace] + ':' + name
                    raise XsltError(XsltError.DUPLICATE_NAMED_TEMPLATE, name)
        # Now expanded the tables and convert to regular dictionaries to
        # prevent inadvertant growth when non-existant keys are used.
        match_templates = self.match_templates = dict(match_templates)
        for mode, type_table in match_templates.iteritems():
            # Add those patterns that don't have a distinct type:
            #   node(), id() and key() patterns
            any_patterns = type_table[tree.node.xml_typecode]
            type_table = match_templates[mode] = dict(type_table)
            for type_key, patterns in type_table.iteritems():
                if type_key == tree.element.xml_typecode:
                    # Add those that are wildcard tests ('*' and 'prefix:*')
                    wildcard_names = patterns[None]
                    name_table = type_table[type_key] = dict(patterns)
                    for name_key, patterns in name_table.iteritems():
                        if name_key is not None:
                            patterns.extend(wildcard_names)
                        patterns.extend(any_patterns)
                        patterns.sort(reverse=True)
                        name_table[name_key] = tuple(patterns)
                else:
                    patterns.extend(any_patterns)
                    patterns.sort(reverse=True)
                    type_table[type_key] = tuple(patterns)
        #self._dump_match_templates(match_templates)
        return

    def _dump_match_templates(self, match_templates=None):
        from pprint import pprint
        if match_templates is None:
            match_templates = self.match_templates
        print "=" * 50
        for mode, type_table in match_templates.iteritems():
            print "mode:", mode
            for node_type, patterns in type_table.iteritems():
                print "  node type:", node_type
                print "  patterns: ",
                pprint(patterns)
                #for patterninfo in self.match_templates[mode][nodetype]:
                #    pat, axistype, template = patterninfo
                #    print "    template matching pattern  %r  for axis type %s" % (pat, axistype)
                #    templates[template] = 1
                print '-' * 30
        return

    ############################# Prime Routines #############################

    def prime(self, context):
        processed = context.variables
        elements, deferred = self._variables, []
        num_writers = len(context._writers)
        while 1:
            for element in elements:
                if element._name in processed:
                    continue
                try:
                    element.instantiate(context)
                except XPathError, error:
                    if error.code != XPathError.UNDEFINED_VARIABLE:
                        raise
                    # Remove any aborted and possibly unbalanced
                    # outut handlers on the stack.
                    del context._writers[num_writers:]
                    deferred.append(element)
            if not deferred:
                break
            elif deferred == elements:
                # Just pick the first one as being the "bad" variable.
                raise XsltError(XsltError.CIRCULAR_VARIABLE,
                                name=deferred[0]._name)
            # Re-order stored variable elements to simplify processing for
            # the next transformation.
            for element in deferred:
                self._variables.remove(element)
                self._variables.append(element)
            # Try again, but this time processing only the ones that
            # referenced, as of yet, undefined variables.
            elements, deferred = deferred, []

        for name, keys in self._keys.iteritems():
            context.keys[name] = _key_dispatch_table(keys)
        return