def evaluate_function(context, string): """ The dyn:evaluate function evaluates a string as an XPath expression and returns the resulting value, which might be a boolean, number, string, node set, result tree fragment or external object. The sole argument is the string to be evaluated. If the string is an invalid XPath expression, an empty node-set is returned. http://www.exslt.org/dyn/functions/evaluate/index.html """ string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) return datatypes.nodeset() try: result = expr.evaluate(context) except: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Runtime error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) return datatypes.nodeset() return result
def closure_function(context, nodeset, string): """ The dyn:closure function creates a node set resulting from transitive closure of evaluating the expression passed as the second argument on each of the nodes passed as the first argument, then on the node set resulting from that and so on until no more nodes are found. http://www.exslt.org/dyn/functions/closure/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) return datatypes.nodeset() result = datatypes.nodeset() while nodeset: nodeset = _map(context, nodeset, expr) result.extend(nodeset) return result
def evaluate_as_nodeset(self, context): arg0, arg1 = self._args # Get the key table key_name = arg0.evaluate_as_string(context) if not isqname(key_name): raise XsltRuntimeError(XsltError.INVALID_QNAME_ARGUMENT, context.instruction, value=key_name) key_name = context.expand_qname(key_name) try: key_documents = context.keys[key_name] except KeyError: # Unknown key name return datatypes.nodeset() else: key_values = key_documents[context.node.xml_root] # Get the lookup value value = arg1.evaluate(context) if isinstance(value, datatypes.nodeset): result = [] for value in value: value = datatypes.string(value) if value in key_values: result.extend(key_values[value]) else: value = datatypes.string(value) if value in key_values: result = key_values[value] else: result = () return datatypes.nodeset(result)
def evaluate_function(context, string): """ The dyn:evaluate function evaluates a string as an XPath expression and returns the resulting value, which might be a boolean, number, string, node set, result tree fragment or external object. The sole argument is the string to be evaluated. If the string is an invalid XPath expression, an empty node-set is returned. http://www.exslt.org/dyn/functions/evaluate/index.html """ string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) return datatypes.nodeset() try: result = expr.evaluate(context) except: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Runtime error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) return datatypes.nodeset() return result
def test_filter_exr(): result = filter_expr(nodeset_literal([ROOT, CHILD1]), predicates([predicate(TRUE)]) ).evaluate_as_nodeset(CONTEXT) _check_nodeset_result(result, datatypes.nodeset([ROOT, CHILD1])) result = filter_expr(nodeset_literal([ROOT, CHILD1]), predicates([predicate(FALSE)]) ).evaluate_as_nodeset(CONTEXT) _check_nodeset_result(result, datatypes.nodeset())
def match_function(context, source, pattern, flags=None): """ The regexp:match function lets you get hold of the substrings of the string passed as the first argument that match the captured parts of the regular expression passed as the second argument. The second argument is a regular expression that follows the Javascript regular expression syntax. The third argument is a string consisting of character flags to be used by the match. If a character is present then that flag is true. The flags are: g: global match - the submatches from all the matches in the string are returned. If this character is not present, then only the submatches from the first match in the string are returned. i: case insensitive - the regular expression is treated as case insensitive. If this character is not present, then the regular expression is case sensitive. The regexp:match function returns a node set of 'match' elements, each of whose string value is equal to a portion of the first argument string that was captured by the regular expression. If the match is not global, the first match element has a value equal to the portion of the string matched by the entire regular expression. """ source = source.evaluate_as_string(context) pattern = pattern.evaluate_as_string(context) flags = flags.evaluate_as_string(context) if flags else '' regexp = re.compile(pattern, re.IGNORECASE if 'i' in flags else 0) match = regexp.search(source) if match is None: return datatypes.nodeset() context.push_tree_writer(context.instruction.baseUri) if 'g' in flags: # find all matches in the source while match: context.start_element(u'match') # return everything that matched the pattern context.text(match.group()) context.end_element(u'match') match = regexp.search(source, match.end()) else: # the first 'match' element contains entire matched text all = [match.group()] groups = match.groups() groups and all.extend(list(groups)) for match in all: context.start_element(u'match') match and context.text(match) context.end_element(u'match') writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def test_filter_exr(): result = filter_expr(nodeset_literal([ROOT, CHILD1]), predicates([predicate(TRUE) ])).evaluate_as_nodeset(CONTEXT) _check_nodeset_result(result, datatypes.nodeset([ROOT, CHILD1])) result = filter_expr(nodeset_literal([ROOT, CHILD1]), predicates([predicate(FALSE) ])).evaluate_as_nodeset(CONTEXT) _check_nodeset_result(result, datatypes.nodeset())
def filter(self, nodes, context, reverse): if self: state = context.node, context.position, context.size for predicate in self: nodes = datatypes.nodeset(predicate.select(context, nodes)) context.node, context.position, context.size = state else: nodes = datatypes.nodeset(nodes) if reverse: nodes.reverse() return nodes
def test_absolute_location_path(): for args, expected in ( ([], datatypes.nodeset([DOC])), # /child::* ([relative_location_path(CHILD_STEP)], datatypes.nodeset([ROOT])), # /descendant::* ([relative_location_path(location_step(axis('descendant'), name_test('*')))], datatypes.nodeset([ROOT, CHILD1] + GCHILDREN1 + [CHILD2] + GCHILDREN2 + [CHILD3, LANG] + LCHILDREN)), ): result = absolute_location_path(*args).evaluate_as_nodeset(CONTEXT_CHILD1) assert isinstance(result, datatypes.nodeset) assert result == expected, (result, expected)
def paramvalue(obj): """ Try to convert a Python object into an XPath data model value returns the value if successful, else None """ if isinstance(obj, datatypes.xpathobject): return obj if isinstance(obj, unicode): return datatypes.string(obj) elif isinstance(obj, str): try: obj = obj.decode("utf-8") except UnicodeError: return None else: return datatypes.string(obj) elif isinstance(obj, bool): # <bool> is subclasses of <int>, test first return datatypes.TRUE if obj else datatypes.FALSE elif isinstance(obj, (int, long, float)): return datatypes.number(obj) elif isinstance(obj, tree.node): return obj # NOTE: At one time (WSGI.xml days) this attemped to be smart and handle # all iterables but this would mean blindly dealing with dangerous # creatures, such as sockets. So now it's more conservative and sticks to # just list & tuple. elif isinstance(obj, (list, tuple)): # We can only use the list if the items are all nodes or all strings. # Strings are converted to a nodeset of text nodes. for item in obj: if not isinstance(item, (str, unicode)): break else: # We need to use an entity to preserve ordering entity = tree.entity() for item in obj: if isinstance(item, str): try: item = unicode(item, "utf8") except UnicodeError: return None entity.xml_append(tree.text(item)) return datatypes.nodeset(entity.xml_children) # We can only use the list if all the items are nodes. for item in obj: if not isinstance(item, tree.node): return None return datatypes.nodeset(obj) else: return None
def paramvalue(obj): """ Try to convert a Python object into an XPath data model value returns the value if successful, else None """ if isinstance(obj, datatypes.xpathobject): return obj if isinstance(obj, unicode): return datatypes.string(obj) elif isinstance(obj, str): try: obj = obj.decode('utf-8') except UnicodeError: return None else: return datatypes.string(obj) elif isinstance(obj, bool): # <bool> is subclasses of <int>, test first return datatypes.TRUE if obj else datatypes.FALSE elif isinstance(obj, (int, long, float)): return datatypes.number(obj) elif isinstance(obj, tree.node): return obj # NOTE: At one time (WSGI.xml days) this attemped to be smart and handle # all iterables but this would mean blindly dealing with dangerous # creatures, such as sockets. So now it's more conservative and sticks to # just list & tuple. elif isinstance(obj, (list, tuple)): # We can only use the list if the items are all nodes or all strings. # Strings are converted to a nodeset of text nodes. for item in obj: if not isinstance(item, (str, unicode)): break else: # We need to use an entity to preserve ordering entity = tree.entity() for item in obj: if isinstance(item, str): try: item = unicode(item, 'utf8') except UnicodeError: return None entity.xml_append(tree.text(item)) return datatypes.nodeset(entity.xml_children) # We can only use the list if all the items are nodes. for item in obj: if not isinstance(item, tree.node): return None return datatypes.nodeset(obj) else: return None
def test_last_function_low_level(): ''' ''' doc = amara.parse(XML) last_url = doc.xml_select(u'/listitem/para/ulink[position() = last()]/@url') EXPECTED = nodeset([u'http://www.ncbi.nlm.nih.gov/pubmed/16785838']) assert last_url == EXPECTED, (EXPECTED, last_url)
def test_abbreviated_relative_location_path(): # child::*//child::* result = abbreviated_relative_location_path(relative_location_path(CHILD_STEP), CHILD_STEP ).evaluate_as_nodeset(CONTEXT_ROOT) assert isinstance(result, datatypes.nodeset) expected = datatypes.nodeset(GCHILDREN1 + GCHILDREN2 + LCHILDREN) assert result == expected, (result, expected)
def tokenize_function(context, string, delimiters=None): """ The str:tokenize function splits up a string and returns a node set of 'token' elements, each containing one token from the string. The first argument is the string to be tokenized. The second argument is a string consisting of a number of characters. Each character in this string is taken as a delimiting character. The string given by the first argument is split at any occurrence of any of these characters. """ string = string.evaluate_as_string(context) if delimiters: delimiters = delimiters.evaluate_as_string(context) else: delimiters = "\t\n\r " if delimiters: tokens = re.split("[%s]" % re.escape(delimiters), string) else: tokens = string context.push_tree_writer(context.instruction.baseUri) for token in tokens: context.start_element(u"token") context.text(token) context.end_element(u"token") writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def tokenize_function(context, string, delimiters=None): """ The str:tokenize function splits up a string and returns a node set of 'token' elements, each containing one token from the string. The first argument is the string to be tokenized. The second argument is a string consisting of a number of characters. Each character in this string is taken as a delimiting character. The string given by the first argument is split at any occurrence of any of these characters. """ string = string.evaluate_as_string(context) if delimiters: delimiters = delimiters.evaluate_as_string(context) else: delimiters = '\t\n\r ' if delimiters: tokens = re.split('[%s]' % re.escape(delimiters), string) else: tokens = string context.push_tree_writer(context.instruction.baseUri) for token in tokens: context.start_element(u'token') context.text(token) context.end_element(u'token') writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def test_last_function_low_level(): ''' ''' doc = amara.parse(XML) last_url = doc.xml_select( u'/listitem/para/ulink[position() = last()]/@url') EXPECTED = nodeset([u'http://www.ncbi.nlm.nih.gov/pubmed/16785838']) assert last_url == EXPECTED, (EXPECTED, last_url)
def intersection_function(context, nodeset1, nodeset2): """ The set:intersection function returns a node set comprising the nodes that are within both the node sets passed as arguments to it. """ nodeset1 = set(nodeset1.evaluate_as_nodeset(context)) nodeset2 = set(nodeset2.evaluate_as_nodeset(context)) return datatypes.nodeset(nodeset1 & nodeset2)
def test_abbreviated_absolute_location_path(): # //child::* result = abbreviated_absolute_location_path(relative_location_path(CHILD_STEP) ).evaluate_as_nodeset(CONTEXT_CHILD1) assert isinstance(result, datatypes.nodeset) expected = datatypes.nodeset([ROOT, CHILD1] + GCHILDREN1 + [CHILD2] + GCHILDREN2 + [CHILD3, LANG] + LCHILDREN) assert result == expected, (result, expected)
def test_absolute_location_path(): for args, expected in ( ([], datatypes.nodeset([DOC])), # /child::* ([relative_location_path(CHILD_STEP)], datatypes.nodeset([ROOT])), # /descendant::* ([ relative_location_path( location_step(axis('descendant'), name_test('*'))) ], datatypes.nodeset([ROOT, CHILD1] + GCHILDREN1 + [CHILD2] + GCHILDREN2 + [CHILD3, LANG] + LCHILDREN)), ): result = absolute_location_path( *args).evaluate_as_nodeset(CONTEXT_CHILD1) assert isinstance(result, datatypes.nodeset) assert result == expected, (result, expected)
def test_abbreviated_relative_location_path(): # child::*//child::* result = abbreviated_relative_location_path( relative_location_path(CHILD_STEP), CHILD_STEP).evaluate_as_nodeset(CONTEXT_ROOT) assert isinstance(result, datatypes.nodeset) expected = datatypes.nodeset(GCHILDREN1 + GCHILDREN2 + LCHILDREN) assert result == expected, (result, expected)
def test_abbreviated_absolute_location_path(): # //child::* result = abbreviated_absolute_location_path( relative_location_path(CHILD_STEP)).evaluate_as_nodeset(CONTEXT_CHILD1) assert isinstance(result, datatypes.nodeset) expected = datatypes.nodeset([ROOT, CHILD1] + GCHILDREN1 + [CHILD2] + GCHILDREN2 + [CHILD3, LANG] + LCHILDREN) assert result == expected, (result, expected)
def difference_function(context, nodeset1, nodeset2): """ The set:difference function returns the difference between two node sets - those nodes that are in the node set passed as the first argument that are not in the node set passed as the second argument. """ nodeset1 = set(nodeset1.evaluate_as_nodeset(context)) nodeset2 = set(nodeset2.evaluate_as_nodeset(context)) return datatypes.nodeset(nodeset1 - nodeset2)
def test_id_function(): for args, expected in ( ([number_literal('1')], datatypes.nodeset([CHILD2])), ([string_literal('"1 1"')], datatypes.nodeset([CHILD2])), ([string_literal('"0"')], datatypes.nodeset()), ([string_literal('"0 1"')], datatypes.nodeset([CHILD2])), ([string_literal('"0 1 1"')], datatypes.nodeset([CHILD2])), ([string_literal('"0 0 1 1"')], datatypes.nodeset([CHILD2])), ([nodeset_literal([EGG1])], datatypes.nodeset()), ([nodeset_literal([EGG1, EGG2])], datatypes.nodeset([CHILD2])), ([nodeset_literal([EGG1, EGG2, EGG3])], datatypes.nodeset([CHILD2])), ([nodeset_literal([EGG1, EGG2, EGG3, EGG4])], datatypes.nodeset([CHILD2])), ): result = function_call('id', args).evaluate_as_nodeset(CONTEXT1) assert isinstance(result, datatypes.nodeset) result = list(result) expected = list(expected) assert result == expected, (args, result, expected)
def test_relative_location_path(): for args, expected, ctx in ( # <CHILD1>/ancestor::* ([location_step(axis('ancestor'), name_test('*'))], datatypes.nodeset([ROOT]), CONTEXT_CHILD1), # <CHILD1>/ancestor-or-self::* ([location_step(axis('ancestor-or-self'), name_test('*'))], datatypes.nodeset([ROOT, CHILD1]), CONTEXT_CHILD1), # <ROOT>/descendant-or-self::GCHILD ([location_step(axis('descendant-or-self'), name_test('GCHILD'))], datatypes.nodeset(GCHILDREN1 + GCHILDREN2), CONTEXT_ROOT), # <CHILD1>/child::GCHILD ([location_step(axis('child'), name_test('GCHILD'))], datatypes.nodeset(GCHILDREN1), CONTEXT_CHILD1), ): result = relative_location_path(*args).evaluate_as_nodeset(ctx) assert isinstance(result, datatypes.nodeset) assert result == expected, (result, expected)
def replace_function(context, string, search, replace): """ The str:replace function converts a string to a node-set, with each instance of a substring from a given list (obtained from the string-values of nodes in the second argument) replaced by the node at the corresponding position of the node-set given as the third argument. Unreplaced substrings become text nodes. The second and third arguments can be any type of object; if either is not a node-set, it is treated as if it were a node-set of just one text node, formed from the object's string-value. Attribute and namespace nodes in the replacement set are erroneous but are treated as empty text nodes. All occurrences of the longest substrings are replaced first, and once a replacement is made, that span of the original string is no longer eligible for future replacements. An empty search string matches between every character of the original string. See http://exslt.org/str/functions/replace/str.replace.html for details. """ # FIXME: http://www.exslt.org/str/functions/replace/ doesn't say we have # to convert the first arg to a string, but should we, anyway? # If not, we should at least check and flag non-strings with a clear error? # prepare a list of strings to search for (based on searchNodeSet) string = string.evaluate_as_string(context) search = search.evaluate(context) replace = replace.evaluate(context) if isinstance(search, datatypes.nodeset): search = map(datatypes.string, search) else: search = [datatypes.string(search)] if isinstance(replace, datatypes.nodeset): # use `replace` but replace attr, ns nodes with empty text nodes for index, node in enumerate(replace): if isinstance(node, (tree.attribute, tree.namespace)): replace[index] = tree.text(u"") else: replace = [tree.text(datatypes.string(replace))] # Unpaired search patterns are to be deleted (replacement is None) replace = itertools.chain(replace, itertools.repeat(None)) # Sort the tuples in ascending order by length of string. # So that the longest search strings will be replaced first, replacements = zip(search, replace, itertools.imap(len, search)) replacements.sort(key=operator.itemgetter(2), reverse=True) # generate a result tree fragment context.push_tree_writer(context.instruction.baseUri) _replace(context, string, *replacements) writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def replace_function(context, string, search, replace): """ The str:replace function converts a string to a node-set, with each instance of a substring from a given list (obtained from the string-values of nodes in the second argument) replaced by the node at the corresponding position of the node-set given as the third argument. Unreplaced substrings become text nodes. The second and third arguments can be any type of object; if either is not a node-set, it is treated as if it were a node-set of just one text node, formed from the object's string-value. Attribute and namespace nodes in the replacement set are erroneous but are treated as empty text nodes. All occurrences of the longest substrings are replaced first, and once a replacement is made, that span of the original string is no longer eligible for future replacements. An empty search string matches between every character of the original string. See http://exslt.org/str/functions/replace/str.replace.html for details. """ #FIXME: http://www.exslt.org/str/functions/replace/ doesn't say we have #to convert the first arg to a string, but should we, anyway? #If not, we should at least check and flag non-strings with a clear error? # prepare a list of strings to search for (based on searchNodeSet) string = string.evaluate_as_string(context) search = search.evaluate(context) replace = replace.evaluate(context) if isinstance(search, datatypes.nodeset): search = map(datatypes.string, search) else: search = [datatypes.string(search)] if isinstance(replace, datatypes.nodeset): # use `replace` but replace attr, ns nodes with empty text nodes for index, node in enumerate(replace): if isinstance(node, (tree.attribute, tree.namespace)): replace[index] = tree.text(u'') else: replace = [tree.text(datatypes.string(replace))] # Unpaired search patterns are to be deleted (replacement is None) replace = itertools.chain(replace, itertools.repeat(None)) # Sort the tuples in ascending order by length of string. # So that the longest search strings will be replaced first, replacements = zip(search, replace, itertools.imap(len, search)) replacements.sort(key=operator.itemgetter(2), reverse=True) # generate a result tree fragment context.push_tree_writer(context.instruction.baseUri) _replace(context, string, *replacements) writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def __missing__(self, key): assert isinstance(key, tree.entity), key values = collections.defaultdict(set) context = xsltcontext.xsltcontext(key, 1, 1) for value, node in self._match_nodes(context, [key]): values[value].add(node) # Now store the unique nodes as an XPath nodeset values = self[key] = dict(values) for value, nodes in values.iteritems(): values[value] = datatypes.nodeset(nodes) return values
def handle_element(elem, resource): new_resource = None prefixes = elem.xml_root.xml_model.prefixes if elem.xml_model.metadata_context_expr: if not elem.xml_model.metadata_context_expr.evaluate( context(elem, namespaces=prefixes)): return #Is there a cue that designates this element as a resource envelope? if elem.xml_model.metadata_resource_expr: if elem.xml_model.metadata_resource_expr == NODE_ID_MARKER: #FIXME: Isn't going from unicode -> xpath str -> unicode wasteful? new_resource = unicode(datatypes.string(elem.xml_nodeid)) else: new_resource = unicode( datatypes.string( elem.xml_model.metadata_resource_expr.evaluate( context(elem, namespaces=prefixes)))) #Is there a cue that designates a relationship in this element? if elem.xml_model.metadata_rel_expr: #Execute the XPath to get the relationship name/title rel = datatypes.string( elem.xml_model.metadata_rel_expr.evaluate( context(elem, namespaces=prefixes))) if elem.xml_model.metadata_value_expr: #Execute the XPath to get the relationship value val = elem.xml_model.metadata_value_expr.evaluate( context(elem, namespaces=prefixes)) elif new_resource is not None: #If the element is also a resource envelope, the default value is the new resource ID val = new_resource else: #Handle the default ak:value of "." val = datatypes.nodeset([elem]) yield (unicode(resource), unicode(rel), val) #Basically expandqname first #prefix, local = splitqname(rattr) #try: # ns = elem.xml_namespaces[prefix] # resource = ns + local #except KeyError: # resource = rattr if new_resource is not None: resource = new_resource for rel_expr, val_expr in elem.xml_model.other_rel_exprs: rel = datatypes.string( elem.xml_select(rel_expr, prefixes=prefixes)) val = elem.xml_select(val_expr, prefixes=prefixes) yield (unicode(resource), unicode(rel), val) for child in elem.xml_elements: for item in handle_element(child, resource): yield item return
def evaluate_as_nodeset(self, context): arg0, = self._args arg0 = arg0.evaluate(context) if isinstance(arg0, datatypes.nodeset): ids = set(datatypes.string(x) for x in arg0) else: arg0 = datatypes.string(arg0) ids = set(arg0.split()) doc = context.node.xml_root nodeset = filter(None, (doc.xml_lookup(id) for id in ids)) nodeset.sort() return datatypes.nodeset(nodeset)
def distinct_function(context, nodeset): """ The set:distinct function returns a subset of the nodes contained in the node-set NS passed as the first argument. Specifically, it selects a node N if there is no node in NS that has the same string value as N, and that precedes N in document order. """ nodeset = nodeset.evaluate_as_nodeset(context) # Process the nodes in reverse document-order so that same-value keys # will be mapped to the first (in document order) node. nodeset.reverse() strings = itertools.imap(datatypes.string, nodeset) result = dict(itertools.izip(strings, nodeset)) return datatypes.nodeset(result.values())
def closure_function(context, nodeset, string): """ The dyn:closure function creates a node set resulting from transitive closure of evaluating the expression passed as the second argument on each of the nodes passed as the first argument, then on the node set resulting from that and so on until no more nodes are found. http://www.exslt.org/dyn/functions/closure/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) return datatypes.nodeset() result = datatypes.nodeset() while nodeset: nodeset = _map(context, nodeset, expr) result.extend(nodeset) return result
def lowest_function(context, nodeset): """ The math:lowest function returns the nodes in the node set whose value is the minimum value for the node set. The minimum value for the node set is the same as the value as calculated by math:min. A node has this minimum value if the result of converting its string value to a number as if by the number function is equal to the minimum value, where the equality comparison is defined as a numerical comparison using the = operator. """ nodeset = nodeset.evaluate_as_nodeset(context) lowest = min(nodeset, key=datatypes.number) numbers = itertools.imap(datatypes.number, nodeset) result = datatypes.nodeset() for number, node in itertools.izip(numbers, nodeset): if number == lowest: result.append(node) return result
def handle_element(elem, resource): new_resource = None prefixes = elem.xml_root.xml_model.prefixes if elem.xml_model.metadata_context_expr: if not elem.xml_model.metadata_context_expr.evaluate(context(elem, namespaces=prefixes)): return #Is there a cue that designates this element as a resource envelope? if elem.xml_model.metadata_resource_expr: if elem.xml_model.metadata_resource_expr == NODE_ID_MARKER: #FIXME: Isn't going from unicode -> xpath str -> unicode wasteful? new_resource = unicode(datatypes.string(elem.xml_nodeid)) else: new_resource = unicode(datatypes.string(elem.xml_model.metadata_resource_expr.evaluate(context(elem, namespaces=prefixes)))) #Is there a cue that designates a relationship in this element? if elem.xml_model.metadata_rel_expr: #Execute the XPath to get the relationship name/title rel = datatypes.string(elem.xml_model.metadata_rel_expr.evaluate(context(elem, namespaces=prefixes))) if elem.xml_model.metadata_value_expr: #Execute the XPath to get the relationship value val = elem.xml_model.metadata_value_expr.evaluate(context(elem, namespaces=prefixes)) elif new_resource is not None: #If the element is also a resource envelope, the default value is the new resource ID val = new_resource else: #Handle the default ak:value of "." val = datatypes.nodeset([elem]) yield (unicode(resource), unicode(rel), val) #Basically expandqname first #prefix, local = splitqname(rattr) #try: # ns = elem.xml_namespaces[prefix] # resource = ns + local #except KeyError: # resource = rattr if new_resource is not None: resource = new_resource for rel_expr, val_expr in elem.xml_model.other_rel_exprs: rel = datatypes.string(elem.xml_select(rel_expr, prefixes=prefixes)) val = elem.xml_select(val_expr, prefixes=prefixes) yield (unicode(resource), unicode(rel), val) for child in elem.xml_elements: for item in handle_element(child, resource): yield item return
def _map(context, nodeset, expr): focus = context.node, context.position, context.size context.size = len(nodeset) position = 1 inputs = iter(nodeset) return_type = None result = set() for node in inputs: context.node = node context.position = position position += 1 try: obj = expr.evaluate(context) except: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Runtime error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) else: if not return_type: if isinstance(obj, datatypes.nodeset): tag_name = None elif isinstance(obj, datatypes.number): tag_name = 'exsl:number' converter = datatypes.string elif isinstance(obj, datatypes.boolean): tag_name = 'exsl:boolean' converter = lambda obj: u'true' if obj else u'' else: tag_name = 'exsl:string' converter = datatypes.string return_type = True if tag_name: E = tree.element(EXSL_COMMON_NS, tag_name) E.xml_append(tree.text(converter(obj))) result.add(E) else: result.update(obj) context.node, context.position, context.size = focus return datatypes.nodeset(result)
def nodeset_function(context, arg0): """ The purpose of the exsl:node-set function is to return a node-set from a result tree fragment. If the argument is a node-set already, it is simply returned as is. If the argument to exsl:node-set is not a node-set or a result tree fragment, then it is converted to a string as by the string() function, and the function returns a node-set consisting of a single text node with that string value. The exsl:node-set function does not have side-effects: the result tree fragment used as an argument is still available as a result tree fragment after it is passed as an argument to exsl:node-set. """ obj = arg0.evaluate(context) if not isinstance(obj, datatypes.nodeset): if not isinstance(obj, tree.entity): obj = (tree.text(datatypes.string(obj)),) obj = datatypes.nodeset([obj]) return obj
def sum_function(context, nodeset, string): """ The dyn:sum function calculates the sum for the nodes passed as the first argument, where the value of each node is calculated dynamically using an XPath expression passed as a string as the second argument. http://www.exslt.org/dyn/functions/sum/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) return datatypes.nodeset() return sum(map(datatypes.number, _map(context, nodeset, expr)))
def nodeset_function(context, arg0): """ The purpose of the exsl:node-set function is to return a node-set from a result tree fragment. If the argument is a node-set already, it is simply returned as is. If the argument to exsl:node-set is not a node-set or a result tree fragment, then it is converted to a string as by the string() function, and the function returns a node-set consisting of a single text node with that string value. The exsl:node-set function does not have side-effects: the result tree fragment used as an argument is still available as a result tree fragment after it is passed as an argument to exsl:node-set. """ obj = arg0.evaluate(context) if not isinstance(obj, datatypes.nodeset): if not isinstance(obj, tree.entity): obj = (tree.text(datatypes.string(obj)), ) obj = datatypes.nodeset([obj]) return obj
def map_function(context, nodeset, string): """ The dyn:map function evaluates the expression passed as the second argument for each of the nodes passed as the first argument, and returns a node set of those values. http://www.exslt.org/dyn/functions/map/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) return datatypes.nodeset() return _map(context, nodeset, expr)
def _map(context, nodeset, expr): focus = context.node, context.position, context.size context.size = len(nodeset) position = 1 inputs = iter(nodeset) return_type = None result = set() for node in inputs: context.node = node context.position = position position += 1 try: obj = expr.evaluate(context) except: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Runtime error in XPath expression '%(expr)s', " "lower-level traceback:\n") % {'expr': string}] context.processor.warning(''.join(lines)) else: if not return_type: if isinstance(obj, datatypes.nodeset): tag_name = None elif isinstance(obj, datatypes.number): tag_name = 'exsl:number' converter = datatypes.string elif isinstance(obj, datatypes.boolean): tag_name = 'exsl:boolean' converter = lambda obj: u'true' if obj else u'' else: tag_name = 'exsl:string' converter = datatypes.string return_type = True if tag_name: E = tree.element(EXSL_COMMON_NS, tag_name) E.xml_append(tree.text(converter(obj))) result.add(E) else: result.update(obj) context.node, context.position, context.size = focus return datatypes.nodeset(result)
def map_function(context, nodeset, string): """ The dyn:map function evaluates the expression passed as the second argument for each of the nodes passed as the first argument, and returns a node set of those values. http://www.exslt.org/dyn/functions/map/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) return datatypes.nodeset() return _map(context, nodeset, expr)
def sum_function(context, nodeset, string): """ The dyn:sum function calculates the sum for the nodes passed as the first argument, where the value of each node is calculated dynamically using an XPath expression passed as a string as the second argument. http://www.exslt.org/dyn/functions/sum/index.html """ nodeset = nodeset.evaluate_as_nodeset(context) string = string.evaluate_as_string(context) try: expr = parse_xpath(string) except XPathError: lines = traceback.format_exception(*sys.exc_info()) lines[:1] = [("Syntax error in XPath expression '%(expr)s', " "lower-level traceback:\n") % { 'expr': string }] context.processor.warning(''.join(lines)) return datatypes.nodeset() return sum(map(datatypes.number, _map(context, nodeset, expr)))
def evaluate_as_nodeset(self, context): arg0, arg1 = self._args if arg1 is None: base_uri = context.instruction.baseUri else: for node in arg1.evaluate_as_nodeset(context): base_uri = node.xml_base break else: raise XsltRuntimeError(XsltError.DOC_FUNC_EMPTY_NODESET, context.instruction) arg0 = arg0.evaluate(context) if isinstance(arg0, datatypes.nodeset): uris = set() for node in arg0: uri = datatypes.string(node) if arg1 is None: base_uri = node.xml_base assert base_uri or iri.is_absolute(uri) uris.add(iri.absolutize(uri, base_uri)) else: uri = datatypes.string(arg0) assert base_uri or iri.is_absolute(uri) uris = [iri.absolutize(uri, base_uri)] documents = context.documents sources = context.transform.root.sources result = [] for uri in uris: if uri in documents: doc = documents[uri] else: if uri in sources: doc = amara.parse(StringIO(sources[uri]), uri) else: doc = amara.parse(uri) documents[uri] = doc result.append(doc) return datatypes.nodeset(result)
def split_function(context, string, pattern=None): """ The str:split function splits up a string and returns a node set of token elements, each containing one token from the string. The first argument is the string to be split. The second argument is a pattern string (default=' '). The string given by the first argument is split at any occurrence of this pattern. An empty string pattern will result in a split on every character in the string. """ string = string.evaluate_as_string(context) pattern = pattern.evaluate_as_string(context) if pattern else u" " context.push_tree_writer(context.instruction.baseUri) if pattern: tokens = string.split(pattern) else: tokens = string for token in tokens: context.start_element(u"token") context.text(token) context.end_element(u"token") writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def split_function(context, string, pattern=None): """ The str:split function splits up a string and returns a node set of token elements, each containing one token from the string. The first argument is the string to be split. The second argument is a pattern string (default=' '). The string given by the first argument is split at any occurrence of this pattern. An empty string pattern will result in a split on every character in the string. """ string = string.evaluate_as_string(context) pattern = pattern.evaluate_as_string(context) if pattern else u' ' context.push_tree_writer(context.instruction.baseUri) if pattern: tokens = string.split(pattern) else: tokens = string for token in tokens: context.start_element(u'token') context.text(token) context.end_element(u'token') writer = context.pop_writer() rtf = writer.get_result() return datatypes.nodeset(rtf.xml_children)
def test_parser_pass(): test_cases = [ (['child::*'], datatypes.nodeset(CHILDREN), CONTEXT_ROOT), (['/child::*'], datatypes.nodeset([ROOT]), CONTEXT_CHILD1), (['/*/*'], datatypes.nodeset(CHILDREN), CONTEXT_CHILD1), (['/child::*/*/child::GCHILD'], datatypes.nodeset(GCHILDREN1 + GCHILDREN2), CONTEXT_CHILD1), (['//*'], datatypes.nodeset([ROOT, CHILD1] + GCHILDREN1 + [CHILD2] + GCHILDREN2 + [CHILD3, LANG] + LCHILDREN), CONTEXT_CHILD1), (['//GCHILD'], datatypes.nodeset(GCHILDREN1 + GCHILDREN2), CONTEXT_CHILD1), (['//@attr1'], datatypes.nodeset([ATTR1, ATTR2]), CONTEXT_CHILD1), (['x:GCHILD'], datatypes.nodeset(), CONTEXT_CHILD1), (['.//GCHILD'], datatypes.nodeset(GCHILDREN2), CONTEXT_CHILD2), (['.//GCHILD'], datatypes.nodeset(GCHILDREN1 + GCHILDREN2), CONTEXT_ROOT), (['/'], datatypes.nodeset([DOC]), CONTEXT_TEXT), (['//CHILD1/..'], datatypes.nodeset([ROOT]), CONTEXT_CHILD1), (['CHILD1 | CHILD2'], datatypes.nodeset([CHILD1, CHILD2]), CONTEXT_ROOT), (['descendant::GCHILD[3]'], datatypes.nodeset([GCHILD21]), CONTEXT_ROOT), (['descendant::GCHILD[parent::CHILD1]'], datatypes.nodeset(GCHILDREN1), CONTEXT_ROOT), (['descendant::GCHILD[position() > 1]'], datatypes.nodeset([GCHILD12] + GCHILDREN2), CONTEXT_ROOT), (['CHILD2/@CODE'], datatypes.nodeset([IDATTR2]), CONTEXT_ROOT), (['CHILD2/@CODE * 0'], datatypes.number(0), CONTEXT_ROOT), ([u'f\xf6\xf8'], datatypes.nodeset([NONASCIIQNAME]), CONTEXT_LANG), (['@attr1[.="val1"]'], datatypes.nodeset([ATTR1]), CONTEXT_CHILD1), (["processing-instruction()"], datatypes.nodeset([PI, PI2]), CONTEXT_DOC), (["processing-instruction('no-data')"], datatypes.nodeset([PI2]), CONTEXT_DOC), (["processing-instruction('f')"], datatypes.nodeset(), CONTEXT_DOC), (['1'], datatypes.number(1), CONTEXT_ROOT), (['00200'], datatypes.number(200), CONTEXT_ROOT), (['3+4*7'], datatypes.number(31), CONTEXT_ROOT), (['3-4*1'], datatypes.number(-1), CONTEXT_ROOT), (['. * 0'], datatypes.NOT_A_NUMBER, CONTEXT_CHILD1), (['.. * 1'], datatypes.NOT_A_NUMBER, CONTEXT_CHILD1), #(['@attr31 * 1'], datatypes.NOT_A_NUMBER, CONTEXT_CHILD1), # TODO: Why is this commented out? (["string('1')"], datatypes.string("1"), CONTEXT_ROOT), (["concat('1', '2')"], datatypes.string("12"), CONTEXT_ROOT), (['true()'], datatypes.TRUE, CONTEXT_ROOT), (['false()'], datatypes.FALSE, CONTEXT_ROOT), (['1=3<4'], datatypes.TRUE, CONTEXT_ROOT), (['1 or 2 and 3'], datatypes.TRUE, CONTEXT_ROOT), (['1 and 2 = 3'], datatypes.FALSE, CONTEXT_ROOT), (['-1 or 2'], datatypes.TRUE, CONTEXT_ROOT), (['. or *'], datatypes.TRUE, CONTEXT_ROOT), (['$foo[1]'], datatypes.nodeset([ROOT]), CONTEXT_ROOT), (['$foo[1]/CHILD1'], datatypes.nodeset([CHILD1]), CONTEXT_ROOT), (['$foo[1]//GCHILD'], datatypes.nodeset(GCHILDREN1 + GCHILDREN2), CONTEXT_ROOT), (['$foo[1][3]'], datatypes.nodeset(), CONTEXT_ROOT), (['(child::*)'], datatypes.nodeset(CHILDREN), CONTEXT_ROOT), (['//element[descendant::y[.="z"]]'], datatypes.nodeset([ELEMENT2]), CONTEXT_ELEMENT), (['//element[descendant::y[.="z"]][1]'], datatypes.nodeset([ELEMENT2]), CONTEXT_ELEMENT), (['//element[descendant::y[.="z"]][2]'], datatypes.nodeset(), CONTEXT_ELEMENT), (["text()"], datatypes.nodeset(), CONTEXT_CHILD3), (["text()"], datatypes.nodeset([TEXT_WS1, TEXT_WS2, TEXT1]), CONTEXT_CHILD1), ] _run_parser_pass(test_cases)
#!/usr/bin/env python from amara import tree from amara.xpath import context, datatypes, XPathError from test_expressions import ( # expression TestCase base_expression, # nodeset literals DOC, PI, PI2, ROOT, CHILDREN, CHILD1, ATTR1, GCHILDREN1, GCHILD11, GCHILD12, TEXT1, CHILD2, ATTR2, IDATTR2, GCHILDREN2, GCHILD21, CHILD3, LANG, LCHILDREN, NONASCIIQNAME, TEXT_WS1, TEXT_WS2 ) CONTEXT_DOC = context(DOC, 1, 1) CONTEXT_ROOT = context(ROOT, 1, 1, variables={(None, 'foo'): datatypes.nodeset([ROOT])}) CONTEXT_CHILD1 = context(CHILD1, 1, 2, namespaces={'x': 'http://spam.com'}) CONTEXT_CHILD2 = context(CHILD2, 2, 2) CONTEXT_CHILD3 = context(CHILD3, 1, 1) CONTEXT_TEXT = context(TEXT1, 3, 3) CONTEXT_GCHILD11 = context(GCHILD11, 1, 2) CONTEXT_LANG = context(LANG, 1, 1) # <elements> # <element> # <x> # <y>a</y> # </x> # </element> # <element>