Пример #1
0
 def test_issue_001(self):
     selector = Selector("//FullPath[ends-with(., 'Temp')]")
     self.assertListEqual(selector.select(ElementTree.XML('<A/>')), [])
     self.assertListEqual(selector.select(ElementTree.XML('<FullPath/>')),
                          [])
     root = ElementTree.XML('<FullPath>High Temp</FullPath>')
     self.assertListEqual(selector.select(root), [root])
Пример #2
0
    def test_issue_042(self):
        selector1 = Selector('text()')
        selector2 = Selector('sup[last()]/preceding-sibling::text()')
        root = ElementTree.XML(
            '<root>a<sup>1</sup>b<sup>2</sup>c<sup>3</sup></root>')
        self.assertListEqual(selector1.select(root), selector2.select(root))

        selector2 = Selector('sup[1]/following-sibling::text()')
        root = ElementTree.XML(
            '<root><sup>1</sup>b<sup>2</sup>c<sup>3</sup>d</root>')
        self.assertListEqual(selector1.select(root), selector2.select(root))
Пример #3
0
    def test_selector_class(self):
        selector = Selector('/A')
        self.assertEqual(repr(selector),
                         "Selector(path='/A', parser=XPath2Parser)")
        self.assertEqual(selector.namespaces, XPath2Parser.DEFAULT_NAMESPACES)

        selector = Selector('text()')
        self.assertListEqual(selector.select(self.root), ['Dickens'])
        self.assertListEqual(list(selector.iter_select(self.root)),
                             ['Dickens'])

        selector = Selector('$a', variables={'a': 1})
        self.assertEqual(selector.select(self.root), 1)
        self.assertListEqual(list(selector.iter_select(self.root)), [1])
Пример #4
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self._fid is not None:
            self._fid.seek(0)
            resource = self._fid
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False)
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if elem in selector.select(self._root):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self._fid is None:
                resource.close()
Пример #5
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root, path, namespaces, strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == path_level and \
                                (select_all or elem in selector.select(self._root)):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self.source is not resource:
                resource.close()
Пример #6
0
 def get_context_elements(self, xml_document, rule_context, namespaces,
                          variables):
     result = select(xml_document,
                     rule_context,
                     namespaces=namespaces,
                     variables=variables)
     if rule_context.startswith('/'):
         return result
     else:
         selector = Selector(rule_context,
                             namespaces=namespaces,
                             variables=variables)
         for el in xml_document.iter():
             result.extend(selector.select(el))
         return result
Пример #7
0
    def iterfind(self, path=None, namespaces=None):
        """
        XML resource tree iterfind selector.

        :param path: an XPath expression to select nodes. If not provided the \
        iteration returns only the root node.
        :param namespaces: optional mapping from namespace prefixes to URIs. If the \
        resource is lazy and an empty dictionary is provided, the namespace map is \
        updated during the iteration.
        """
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        if namespaces or namespaces is None:
            events = ('start', 'end')
            nsmap = None
        else:
            # Track ad update namespaces
            events = ('start-ns', 'end-ns', 'start', 'end')
            nsmap = []

        try:
            if path is None:
                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == 0:
                            yield node
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

            else:
                selector = Selector(path,
                                    namespaces,
                                    strict=False,
                                    parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == path_level and \
                                (select_all or node in selector.select(self._root)):
                            yield node
                            node.clear()
                        elif level == 0:
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

        finally:
            if self.source is not resource:
                resource.close()
Пример #8
0
    def iter_subtrees(self,
                      path=None,
                      namespaces=None,
                      lazy_mode=1,
                      ancestors=None):
        """
        XML resource subtree iterator, that yields fully loaded elements. If a
        path is provided the elements selected by the XPath expression are yielded.
        If no path is provided only the root element is yielded. For lazy resources
        the argument *lazy_mode* can change the sequence of elements yielded. There
        are five possible modes, that generate different sequences of elements:\n
          1. Only a full root element (the default mode)\n
          2. Only a root element pruned at *depth_level*\n
          3. Only the elements at *depth_level* level of the tree\n
          4. The elements at *depth_level* and then a pruned root\n
          5. An incomplete root at start, the elements at *depth_level* and a pruned root

        :param path: an optional XPath expression to select element nodes.
        :param namespaces: an optional mapping from namespace prefixes to URIs. \
        Used to provide namespace mapping for the XPath expression. If the resource \
        is lazy the namespace map is updated during the iteration.
        :param lazy_mode: defines how a lazy resource is iterated when a path \
        is not provided.
        :param ancestors: if a list is provided the iterator tracks the list of \
        ancestors of yielded elements of lazy resources.
        """
        if not (1 <= lazy_mode <= 5):
            raise XMLSchemaValueError(
                "invalid argument lazy_mode={!r}".format(lazy_mode))

        if not self._lazy:
            if path is None:
                yield self._root
            else:
                yield from iter_select(self._root,
                                       path,
                                       namespaces,
                                       strict=False)
            return

        if self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        nsmap = []
        level = 0
        changed = False
        if namespaces is None:
            events = 'start', 'end'
        else:
            # Track ad update namespaces
            events = 'start-ns', 'end-ns', 'start', 'end'
        if ancestors is None:
            ancestors = []

        if path is None:
            subtree_level = int(self._lazy) if lazy_mode > 1 else 0
            select_all = True
            selector = None
            skip_depth_elements = lazy_mode < 3
        else:
            selector = Selector(path,
                                namespaces,
                                strict=False,
                                parser=XmlResourceXPathParser)
            path = path.replace(' ', '').replace('./', '')

            if path == '.':
                subtree_level = 0
            elif path.startswith('/'):
                subtree_level = path.count('/') - 1
            else:
                subtree_level = path.count('/') + 1

            select_all = '*' in path and set(path).issubset({'*', '/'})
            skip_depth_elements = False

        try:
            for event, node in self.iterparse(resource, events):
                if event == "start":
                    if not level:
                        self._root.clear()
                        self._root = node
                        if not path and lazy_mode == 5:
                            yield node
                        ancestors.append(node)
                    elif level < subtree_level:
                        ancestors.append(node)
                    level += 1
                elif event == 'end':
                    level -= 1
                    if not level:
                        if not path:
                            if lazy_mode != 3:
                                yield node
                        elif subtree_level:
                            pass
                        elif select_all or node in selector.select(self._root):
                            yield node
                    elif level != subtree_level:
                        if level < subtree_level:
                            ancestors.pop()
                        continue
                    elif skip_depth_elements:
                        pass
                    elif select_all or node in selector.select(self._root):
                        yield node

                    del node[:]  # delete children, keep attributes, text and tail.
                    if changed:
                        namespaces.clear()
                        namespaces.update(nsmap)
                        changed = False

                elif event == 'start-ns':
                    nsmap.append(node)
                    update_prefix(namespaces, *node)
                else:
                    nsmap.pop()
                    changed = True
        finally:
            if self.source is not resource:
                resource.close()