class choose_element(xslt_element): content_model = content_model.seq( content_model.rep1(content_model.qname(XSL_NAMESPACE, 'xsl:when')), content_model.opt(content_model.qname(XSL_NAMESPACE, 'xsl:otherwise')), ) attribute_types = {} def setup(self): choices = self.children if isinstance(choices[-1], otherwise_element): self._otherwise = choices[-1] choices = choices[:-1] else: self._otherwise = None self._choices = [(child, child._test) for child in choices] return def instantiate(self, context): for child, test in self._choices: context.instruction, context.namespaces = child, child.namespaces if test.evaluate_as_boolean(context): chosen = child break else: # xsl:otherwise chosen = self._otherwise if not chosen: return return chosen.process_children(context)
class apply_templates_element(xslt_element): content_model = content_model.rep( content_model.alt(content_model.qname(XSL_NAMESPACE, 'xsl:sort'), content_model.qname(XSL_NAMESPACE, 'xsl:with-param'))) attribute_types = { 'select': attribute_types.expression(), 'mode': attribute_types.qname(), } def setup(self): sort_keys = [] self._params = params = [] for child in self.children: if isinstance(child, sort_element): sort_keys.append(child) elif isinstance(child, with_param_element): params.append((child, child._name, child._select)) if sort_keys: self._select = sorted_expression(self._select, sort_keys) return def instantiate(self, context): params = {} for param, name, select in self._params: context.instruction, context.namespaces = param, param.namespaces params[name] = select.evaluate(context) if self._select: context.instruction, context.namespaces = self, self.namespaces try: nodes = self._select.evaluate_as_nodeset(context) except TypeError: raise raise XsltStaticError(XsltError.INVALID_APPLY_TEMPLATES_SELECT, self) else: nodes = context.node.xml_children # Process the selected nodes using `self._mode` context.transform.apply_templates(context, nodes, self._mode, params) return
class function_element(xslt_element): content_model = content_model.seq( content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:param')), content_model.template, ) attribute_types = { 'name': attribute_types.qname_but_not_ncname(required=True), } def setup(self): params = self._params = [] for child in self.children: if isinstance(child, param_element): params.append(child) elif isinstance(child, xslt_element): break if self._params: self._instructions = self.children[len(self._params) + 1:-1] else: self._instructions = self.children return def prime(self, context): context.add_function(self._name, self) return def __call__(self, context, *args): # Save context state as XPath is side-effect free focus = context.node, context.position, context.size state = context.instruction, context.namespaces, context.variables context.instruction, context.namespaces = self, self.namespaces # Set the return value self.result = datatypes.EMPTY_STRING # Set the parameter list if self._params: context.variables = context.variables.copy() params = iter(self._params) # Handle the passed in arguments for arg, param in itertools.izip(args, params): context.variables[param._name] = arg.evaluate(context) # Handle remaining parameter defaults for param in params: param.instantiate(context) # Process the instruction template for child in self._instructions: child.instantiate(context) # Restore context state context.instruction, context.namespaces, context.variables = state context.node, context.position, context.size = focus return self.result
class for_each_element(xslt_element): content_model = content_model.seq( content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:sort')), content_model.template, ) attribute_types = { 'select': attribute_types.nodeset_expression(required=True), } def setup(self): children = self.children nkeys = 0 for child in children: if isinstance(child, sort_element): nkeys += 1 else: break if nkeys: self._select = sorted_expression(self._select, children[:nkeys]) return def instantiate(self, context): if self._select: context.instruction = self context.namespaces = self.namespaces try: nodes = self._select.evaluate_as_nodeset(context) except TypeError: raise raise XsltError(XsltError.INVALID_FOREACH_NODESET) else: nodes = context.node.xml_children # Save the context focus (node/pos/size) and state (tpl/curr) saved = (context.node, context.position, context.size, context.template, context.current_node) # Now process the selected nodes context.template = None size = context.size = len(nodes) position = 1 for node in nodes: context.node = context.current_node = node context.position = position self.process_children(context) position += 1 (context.node, context.position, context.size, context.template, context.current_node) = saved return
class call_template_element(xslt_element): content_model = content_model.rep( content_model.qname(XSL_NAMESPACE, 'xsl:with-param') ) attribute_types = { 'name': attribute_types.qname(required=True), } _tail_recursive = False def setup(self): self._params = [ (child, child._name, child._select) for child in self.children ] return def prime(self, context, _test_elements=(if_element.if_element,), _choose_elements=(choose_elements.when_element, choose_elements.otherwise_element,)): transform = self.root.stylesheet try: template = self._template = transform.named_templates[self._name] except KeyError: raise XsltError(XsltError.NAMED_TEMPLATE_NOT_FOUND, self, self._name) # NOTE: Tail recursion is now checked for in the xsl:template setup(). return def instantiate(self, context): # We need to calculate the parameters before the variable context # is changed back in the template element params = {} for param, name, select in self._params: context.instruction, context.namespaces = param, param.namespaces params[name] = select.evaluate(context) if self._tail_recursive: context.recursive_parameters = params else: #context.current_node = context.node self._template.instantiate(context, params) return
class attribute_set_element(xslt_element): content_model = content_model.rep( content_model.qname(XSL_NAMESPACE, 'xsl:attribute')) attribute_types = { 'name': attribute_types.qname(required=True), 'use-attribute-sets': attribute_types.qnames(), } def instantiate(self, context, used=None): if used is None: used = [] if self in used: raise XsltError(XsltError.CIRCULAR_ATTRIBUTE_SET, self, self._name) else: used.append(self) # XSLT 1.0, Section 7.1.4, Paragraph 4: # The available variable bindings are only the top-level ones. variables = context.variables context.variables = context.global_variables attribute_sets = context.transform.attribute_sets for name in self._use_attribute_sets: try: attribute_set = attribute_sets[name] except KeyError: raise XsltError(XsltError.UNDEFINED_ATTRIBUTE_SET, self, attr_set_name) else: attribute_set.instantiate(context) self.process_children(context) context.variables = variables used.remove(self) return
class template_element(xslt_element): content_model = content_model.seq( content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:param')), content_model.template, ) attribute_types = { 'match': attribute_types.pattern(), 'name': attribute_types.qname(), 'priority': attribute_types.number(), 'mode': attribute_types.qname(), } _tail_recursive = False def __repr__(self): return "<template_element match='%s', name='%s', mode='%s', priority='%s'>" % ( self._match, self._name, self._mode, self._priority) def setup(self): params = self._params = [] for child in self.children: if isinstance(child, param_element): params.append((child, child._name)) elif isinstance(child, xslt_element): break if self._params: self._instructions = self.children[len(self._params) + 1:-1] else: self._instructions = self.children # Check for tail-recursive invocation (i.e, call-tempates of self) if self._name and self._instructions: endpoints = [self._instructions[-1]] queue = endpoints.append for last in endpoints: if isinstance(last, call_template_element): if last._name == self._name: self._tail_recursive = True last._tail_recursive = True break elif isinstance(last, if_element): last = last.last_instruction if last: queue(last) elif isinstance(last, choose_element): for choice in last.children: last = choice.last_instruction if last: queue(last) return def _printTemplateInfo(self): info, tname = self.getTemplateInfo() if tname: print "Template named %r:" % tname else: print "Template matching pattern %r :" % self._match print " location: line %d, col %d of %s" % \ (self.lineNumber, self.columnNumber, self.baseUri) for shortcut in info: print " shortcut:" importidx, priority, tmode, patterninfo, quickkey = shortcut print " ...import index:", importidx print " .......priority:", priority print " ...........mode:", tmode if not tname: print " ......quick key: node type %s, expanded-name %r" % quickkey print " ........pattern: %r for axis type %s" % patterninfo[ 0:2] return def instantiate(self, context, params=None): if params is None: params = {} if self._params: variables = context.variables context.variables = variables.copy() # The optimizer converts this to, roughly, a do/while loop while 1: context.recursive_parameters = None for child, param in self._params: if param in params: context.variables[param] = params[param] else: child.instantiate(context) for child in self._instructions: child.instantiate(context) # Update the params from the values given in # `recursive_parameters`. params = context.recursive_parameters if params is None: break if self._params: context.variables = variables return
class transform_element(xslt_element): content_model = content_model.seq( content_model.rep(content_model.qname(XSL_NAMESPACE, 'xsl:import')), content_model.top_level_elements, ) attribute_types = { 'id': attribute_types.id(), 'extension-element-prefixes': attribute_types.prefixes(), 'exclude-result-prefixes': attribute_types.prefixes(), 'version': attribute_types.number(required=True), } space_rules = None decimal_formats = None namespace_aliases = None attribute_sets = None match_templates = None named_templates = None parameters = None variables = None global_variables = None initial_functions = None builtin_param_warning = True def setup(self, _param_element=variable_elements.param_element): """ Called only once, at the first initialization """ self.output_parameters = outputparameters.outputparameters() # Sort the top-level elements in decreasing import precedence to ease # processing later. precedence_key = operator.attrgetter('import_precedence') elements = sorted(self.children, key=precedence_key, reverse=True) # Merge the top-level stylesheet elements into their respective # lists. Any element name not in the mapping is discarded. # Note, by sharing the same list no merging is required later. whitespace_elements, variable_elements = [], [] top_level_elements = { 'strip-space': whitespace_elements, 'preserve-space': whitespace_elements, 'output': [], 'key': [], 'decimal-format': [], 'namespace-alias': [], 'attribute-set': [], 'variable': variable_elements, 'param': variable_elements, 'template': [], } # Using `groupby` takes advantage of series of same-named elements # appearing adjacent to each other. key = operator.attrgetter('expanded_name') for (namespace, name), nodes in itertools.groupby(self.children, key): if namespace == XSL_NAMESPACE and name in top_level_elements: top_level_elements[name].extend(nodes) # - process the `xsl:preserve-space` and `xsl:strip-space` elements # RECOVERY: Multiple matching patterns use the last occurance space_rules = {} for element in whitespace_elements: strip = element._strip_whitespace for token in element._elements: namespace, name = token space_rules[token] = (namespace, name, strip) self.space_rules = space_rules.values() # sort in decreasing priority, where `*` is lowest, followed by # `prefix:*`, then all others. self.space_rules.sort(reverse=True) # - process the `xsl:output` elements # Sort in increasing import precedence, so the last one added # will have the highest import precedence elements = top_level_elements['output'] getter = operator.attrgetter('_method', '_version', '_encoding', '_omit_xml_declaration', '_standalone', '_doctype_system', '_doctype_public', '_cdata_section_elements', '_indent', '_media_type', '_byte_order_mark', '_canonical_form') for element in elements: (method, version, encoding, omit_xmldecl, standalone, doctype_system, doctype_public, cdata_elements, indent, media_type, byte_order_mark, canonical_form) = getter(element) if method is not None: self.output_parameters.method = method if version is not None: self.output_parameters.version = version if encoding is not None: self.output_parameters.encoding = encoding if omit_xmldecl is not None: self.output_parameters.omit_xml_declaration = omit_xmldecl if standalone is not None: self.output_parameters.standalone = standalone if doctype_system is not None: self.output_parameters.doctype_system = doctype_system if doctype_public is not None: self.output_parameters.doctype_public = doctype_public if cdata_elements: self.output_parameters.cdata_section_elements += cdata_elements if indent is not None: self.output_parameters.indent = indent if media_type is not None: self.output_parameters.media_type = media_type if byte_order_mark is not None: self.output_parameters.byte_order_mark = byte_order_mark if canonical_form is not None: self.output_parameters.canonical_form = canonical_form # - process the `xsl:key` elements # Group the keys by name elements = top_level_elements['key'] name_key = operator.attrgetter('_name') elements.sort(key=name_key) keys = self._keys = {} for name, elements in itertools.groupby(elements, name_key): keys[name] = tuple(elements) # - process the `xsl:decimal-format` elements formats = self.decimal_formats = {} getter = operator.attrgetter('_decimal_separator', '_grouping_separator', '_infinity', '_minus_sign', '_NaN', '_percent', '_per_mille', '_zero_digit', '_digit', '_pattern_separator') for element in top_level_elements['decimal-format']: name = element._name format = getter(element) # It is an error to declare a decimal-format more than once # (even with different import precedence) with different values. if name in formats and formats[name] != format: # Construct a useful name for the error message. if name: namespace, name = name if namespace: name = element.namespaces[namespace] + ':' + name else: name = '#default' raise XsltError(XsltError.DUPLICATE_DECIMAL_FORMAT, name) else: formats[name] = format # Add the default decimal format, if not declared. if None not in formats: formats[None] = ('.', ',', 'Infinity', '-', 'NaN', '%', unichr(0x2030), '0', '#', ';') # - process the `xsl:namespace-alias` elements elements = top_level_elements['namespace-alias'] elements.reverse() aliases = self.namespace_aliases = {} for precedence, group in itertools.groupby(elements, precedence_key): mapped = {} for element in group: namespace = element.namespaces[element._stylesheet_prefix] if namespace not in aliases: mapped[namespace] = True result_prefix = element._result_prefix result_namespace = element.namespaces[result_prefix] aliases[namespace] = (result_namespace, result_prefix) # It is an error for a namespace URI to be mapped to multiple # different namespace URIs (with the same import precedence). elif namespace in mapped: raise XsltError(XsltError.DUPLICATE_NAMESPACE_ALIAS, element._stylesheet_prefix) if aliases: # apply namespace fixup for the literal elements _fixup_aliases(self, aliases) # - process the `xsl:attribute-set` elements sets = self.attribute_sets = {} for element in top_level_elements['attribute-set']: sets[element._name] = element # - process the `xsl:param` and `xsl:variable` elements index, self._variables = {}, variable_elements[:] variable_elements.reverse() for element in variable_elements: name = element._name if name not in index: # unique (or first) variable binding index[name] = 1 else: # shadowed variable binding, remove from processing list self._variables.remove(element) self.parameters = frozenset(element._name for element in self._variables if isinstance(element, _param_element)) # - process the `xsl:template` elements match_templates = collections.defaultdict(_type_dispatch_table) named_templates = self.named_templates = {} elements = top_level_elements['template'] elements.reverse() getter = operator.attrgetter('node_test', 'axis_type', 'node_type') for position, element in enumerate(elements): match, name = element._match, element._name precedence = element.import_precedence if match: namespaces = element.namespaces template_priority = element._priority mode_table = match_templates[element._mode] for pattern in match: node_test, axis_type, node_type = getter(pattern) if template_priority is None: priority = node_test.priority else: priority = template_priority sort_key = (precedence, priority, position) info = (sort_key, node_test, axis_type, element) # Add the template rule to the dispatch table type_key = node_type.xml_typecode if type_key == tree.element.xml_typecode: # Element types are further keyed by the name test. name_key = node_test.name_key if name_key: prefix, local = name_key # Unprefixed names are in the null-namespace try: namespace = prefix and namespaces[prefix] except KeyError: raise XPathError(XPathError.UNDEFINED_PREFIX, prefix=prefix) else: name_key = namespace, local mode_table[type_key][name_key].append(info) else: # Every other node type gets lumped into a single list # for that node type mode_table[type_key].append(info) if name: # XSLT 1.0, Section 6, Paragraph 3: # It is an error if a stylesheet contains more than one # template with the same name and same import precedence. if name not in named_templates: named_templates[name] = element elif named_templates[name].import_precedence == precedence: # Construct a useful name for the error message. namespace, name = name if namespace: name = element.namespaces[namespace] + ':' + name raise XsltError(XsltError.DUPLICATE_NAMED_TEMPLATE, name) # Now expanded the tables and convert to regular dictionaries to # prevent inadvertant growth when non-existant keys are used. match_templates = self.match_templates = dict(match_templates) for mode, type_table in match_templates.iteritems(): # Add those patterns that don't have a distinct type: # node(), id() and key() patterns any_patterns = type_table[tree.node.xml_typecode] type_table = match_templates[mode] = dict(type_table) for type_key, patterns in type_table.iteritems(): if type_key == tree.element.xml_typecode: # Add those that are wildcard tests ('*' and 'prefix:*') wildcard_names = patterns[None] name_table = type_table[type_key] = dict(patterns) for name_key, patterns in name_table.iteritems(): if name_key is not None: patterns.extend(wildcard_names) patterns.extend(any_patterns) patterns.sort(reverse=True) name_table[name_key] = tuple(patterns) else: patterns.extend(any_patterns) patterns.sort(reverse=True) type_table[type_key] = tuple(patterns) #self._dump_match_templates(match_templates) return def _dump_match_templates(self, match_templates=None): from pprint import pprint if match_templates is None: match_templates = self.match_templates print "=" * 50 for mode, type_table in match_templates.iteritems(): print "mode:", mode for node_type, patterns in type_table.iteritems(): print " node type:", node_type print " patterns: ", pprint(patterns) #for patterninfo in self.match_templates[mode][nodetype]: # pat, axistype, template = patterninfo # print " template matching pattern %r for axis type %s" % (pat, axistype) # templates[template] = 1 print '-' * 30 return ############################# Prime Routines ############################# def prime(self, context): processed = context.variables elements, deferred = self._variables, [] num_writers = len(context._writers) while 1: for element in elements: if element._name in processed: continue try: element.instantiate(context) except XPathError, error: if error.code != XPathError.UNDEFINED_VARIABLE: raise # Remove any aborted and possibly unbalanced # outut handlers on the stack. del context._writers[num_writers:] deferred.append(element) if not deferred: break elif deferred == elements: # Just pick the first one as being the "bad" variable. raise XsltError(XsltError.CIRCULAR_VARIABLE, name=deferred[0]._name) # Re-order stored variable elements to simplify processing for # the next transformation. for element in deferred: self._variables.remove(element) self._variables.append(element) # Try again, but this time processing only the ones that # referenced, as of yet, undefined variables. elements, deferred = deferred, [] for name, keys in self._keys.iteritems(): context.keys[name] = _key_dispatch_table(keys) return