Exemplo n.º 1
0
 def tokens(cls, selector):
     tokens = List()
     prev = None
     for op, tag in ElementPath.xpath_tokenizer(selector):
         if op:
             token = Op(cls.unquote_attribute(op) if prev == "=" else op)
         else:
             token = Tag(cls.unquote_attribute(tag) if prev == "=" else tag)
         tokens.append(token)
         prev = token
     return tokens
Exemplo n.º 2
0
 def tokens(cls, selector):
     tokens = List()
     prev = None
     for op, tag in ElementPath.xpath_tokenizer(selector):
         if op:
             token = Op(cls.unquote_attribute(op) if prev == '=' else op)
         else:
             token = Tag(cls.unquote_attribute(tag) if prev == '=' else tag)
         tokens.append(token)
         prev = token
     return tokens
Exemplo n.º 3
0
def xpath_tokenizer(p):
    """
    Test the XPath tokenizer.

    >>> # tests from the xml specification
    >>> xpath_tokenizer("*")
    ['*']
    >>> xpath_tokenizer("text()")
    ['text', '()']
    >>> xpath_tokenizer("@name")
    ['@', 'name']
    >>> xpath_tokenizer("@*")
    ['@', '*']
    >>> xpath_tokenizer("para[1]")
    ['para', '[', '1', ']']
    >>> xpath_tokenizer("para[last()]")
    ['para', '[', 'last', '()', ']']
    >>> xpath_tokenizer("*/para")
    ['*', '/', 'para']
    >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
    ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
    >>> xpath_tokenizer("chapter//para")
    ['chapter', '//', 'para']
    >>> xpath_tokenizer("//para")
    ['//', 'para']
    >>> xpath_tokenizer("//olist/item")
    ['//', 'olist', '/', 'item']
    >>> xpath_tokenizer(".")
    ['.']
    >>> xpath_tokenizer(".//para")
    ['.', '//', 'para']
    >>> xpath_tokenizer("..")
    ['..']
    >>> xpath_tokenizer("../@lang")
    ['..', '/', '@', 'lang']
    >>> xpath_tokenizer("chapter[title]")
    ['chapter', '[', 'title', ']']
    >>> xpath_tokenizer("employee[@secretary and @assistant]")
    ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']

    >>> # additional tests
    >>> xpath_tokenizer("{http://spam}egg")
    ['{http://spam}egg']
    >>> xpath_tokenizer("./spam.egg")
    ['.', '/', 'spam.egg']
    >>> xpath_tokenizer(".//{http://spam}egg")
    ['.', '//', '{http://spam}egg']
    """
    out = []
    for op, tag in ElementPath.xpath_tokenizer(p):
        out.append(op or tag)
    return out
Exemplo n.º 4
0
    def _structure_parse_root_path(self, _root_path):
        """Extract information from the root path to get sufficient information 
        for specifying the destination and creating new nodes. \n 
        * If many XPaths in the string, it uses the first one 
        * Parses the root node name
        * Parses the row parent node path
        * Parses the row node name"""

        if _root_path == "":
            raise Exception("_parse_root_path: Root path cannot be empty")
        # Parse XPath tokens
        _root_xpath_tokens = list(_elementpath.xpath_tokenizer(self.rows_xpath))
        # Use only first path for destination
        # TODO: This part is never used, why?
        try:
            _first_splitter = _root_xpath_tokens.index(('', '|'))
            print("_parse_root_path: Multiple XPaths, using first.")
            _split_xpath = "".join((_a[0] + _a[1] for _a in _root_xpath_tokens[0:_first_splitter]))
        except ValueError:
            _split_xpath = self.rows_xpath

        # Get the root node name
        if _root_xpath_tokens[0][0] == "/":
            _root_node_name = _root_xpath_tokens[1][1]
        else:
            raise Exception(
                "_parse_root_path: It is necessary to have an absolute (\"/node\") top node name in the XPath")
        print("_root_node_name=" + str(_root_node_name))
        # Get the row node name

        _row_node_name_idx = find_previous_match(_root_xpath_tokens, len(_root_xpath_tokens) - 1, ("/", ""))
        if _row_node_name_idx == -1:
            raise Exception("_parse_root_path: Cannot find start of condition.\nXPath = " + _root_path)
        else:
            _row_node_name = _root_xpath_tokens[_row_node_name_idx + 1][1]

        if _row_node_name_idx == 0:
            raise Exception("_parse_root_path: The row node cannot be the root node.\nXPath = " + _root_path)

        print("_structure_row_node_name=" + str(_row_node_name))
        # Move on backward, what's left is the path to the parent of the row node, we need it to be able 
        # to create rows, as the path may not return a node to find a parent from
        _row_node_parent_xpath = "".join((_a[0] + _a[1] for _a in _root_xpath_tokens[0:_row_node_name_idx]))

        print("_row_node_parent_xpath=" + str(_row_node_parent_xpath))

        return _root_node_name, _row_node_name, _row_node_parent_xpath
Exemplo n.º 5
0
    def tokens(cls, selector):
        """
        >>> xpath_tokenizer('resource-lists')
        ['resource-lists']

        >>> xpath_tokenizer('list[@name="friends"]')
        ['list', '[', '@', 'name', '=', 'friends', ']']

        We cannot properly tokenize an URI like this :(
        >>> uri_ugly = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]'
        >>> len(xpath_tokenizer(uri_ugly)) # expected 7
        10

        To feed such URI to this function, replace quote \" with "
        >>> uri_nice = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]'
        >>> len(xpath_tokenizer(uri_nice)) # expected 7
        7
        """

        def unquote_attr_value(s):
            # XXX currently equivalent but differently encoded URIs won't be considered equal (&quot, etc.)
            if len(s) > 1 and s[0] == s[-1] and s[0] in '"\'':
                return s[1:-1]
            raise NodeParsingError

        tokens = List()
        prev = None
        for op, tag in ElementPath.xpath_tokenizer(selector):
            if prev == '=':
                unq = unquote_attr_value
            else:
                unq = lambda x:x
            if op:
                x = Op(unq(op))
            else:
                x = Tag(unq(tag))
            tokens.append(x)
            prev = x
        return tokens
Exemplo n.º 6
0
    def tokens(cls, selector):
        """
        >>> xpath_tokenizer('resource-lists')
        ['resource-lists']

        >>> xpath_tokenizer('list[@name="friends"]')
        ['list', '[', '@', 'name', '=', 'friends', ']']

        We cannot properly tokenize an URI like this :(
        >>> uri_ugly = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]'
        >>> len(xpath_tokenizer(uri_ugly)) # expected 7
        10

        To feed such URI to this function, replace quote \" with "
        >>> uri_nice = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]'
        >>> len(xpath_tokenizer(uri_nice)) # expected 7
        7
        """
        def unquote_attr_value(s):
            # XXX currently equivalent but differently encoded URIs won't be considered equal (&quot, etc.)
            if len(s) > 1 and s[0] == s[-1] and s[0] in '"\'':
                return s[1:-1]
            raise NodeParsingError

        tokens = List()
        prev = None
        for op, tag in ElementPath.xpath_tokenizer(selector):
            if prev == '=':
                unq = unquote_attr_value
            else:
                unq = lambda x: x
            if op:
                x = Op(unq(op))
            else:
                x = Tag(unq(tag))
            tokens.append(x)
            prev = x
        return tokens
Exemplo n.º 7
0
    def _structure_create_xpath_nodes(self, _node, _xpath):
        """Used an xpath to create nodes that match the path and its conditions(names, attributes and so forth)"""

        print("_create_xpath_nodes: " + str(_xpath))
        _curr_node = _node
        # Break up the string in its tokens
        _tokens = list(_elementpath.xpath_tokenizer(_xpath))
        print(str(_tokens))

        # Iterate through tokens, until we have gone through the entire XPath.
        _token_idx = 0

        # But first, move past any root reference and conditions. 
        # TODO: This is a bit ugly, perhaps.
        if len(_tokens) > 1 and (_tokens[0][0] == "/") and (_tokens[1][1] == _node.tag):
            print("_create_xpath_nodes: Ignoring root node path and condition.")
            _token_idx += 1
            while _token_idx < len(_tokens) and _tokens[_token_idx][0] != "/":
                _token_idx += 1

        while _token_idx < len(_tokens):
            print("_tokens[" + str(_token_idx) + "][0]:" + str(_tokens[_token_idx][0]))
            # Is this a new level?

            if _tokens[_token_idx][0] == "/":
                # Then the next is the name of the node
                _token_idx += 1

            if _tokens[_token_idx][0] == "":

                _next_name = _tokens[_token_idx][1]

                # Is the next token a condition?

                if _token_idx + 1 < len(_tokens) and _tokens[_token_idx + 1][0] == "[":
                    # It was, move on
                    _token_idx += 1

                    # Find ending of condition
                    _end_cond_idx = self.find_next_match(_tokens, _token_idx, ("]", ""))

                    if _end_cond_idx == -1:
                        raise Exception("_create_xpath_nodes: Cannot find end of condition.\nXPath = " + _xpath)
                        # Create relative path
                    _check_path = "".join((_a[0] + _a[1] for _a in _tokens[_token_idx - 1:_end_cond_idx + 1]))

                    # Then check if it exists
                    print("_check_path:" + str(_check_path))
                    _found_nodes = _curr_node.xpath(_check_path)

                    # Node found, move on
                    if _found_nodes and len(_found_nodes) == 1:
                        _token_idx += 1
                        _curr_node = _found_nodes[0]
                    else:
                        # If not found create node 
                        print("add node: " + str(_next_name))
                        _curr_node = SubElement(_curr_node, _next_name)

                        # If they can be discerned(@id=value), add attributes
                        if _tokens[_token_idx + 3][0] == "=" and _tokens[_token_idx + 1][0] == "@":
                            print("set attribute to satisfy this path: " + str(_check_path))
                            _curr_node.set(_tokens[_token_idx + 2][1], str(_tokens[_token_idx + 4][0]).strip("\"'"))

                    _token_idx += 5
                else:
                    if isinstance(_curr_node, str):
                        raise Exception(
                            "_structure_create_xpath_nodes - Internal error: Node is a string, missing node ref, "
                            "was _add_node_ref used? \nData :" + _curr_node + " | " + _next_name + " | " + _xpath +
                            " | " + str(_tokens))
                    _found_nodes = _curr_node.xpath(_next_name)
                    # Node found, move on
                    if len(_found_nodes) == 1:
                        _curr_node = _found_nodes[0]
                    else:
                        print("Create new node: " + str(_next_name))
                        _curr_node = SubElement(_curr_node, _next_name)

            _token_idx += 1
        return _curr_node