Exemplo n.º 1
0
    def __init__(self, features=()):
        self.paragraph_handler = BlockElementHandler('unstyled')
        self.element_handlers = HTMLRuleset({
            'p': self.paragraph_handler
        })
        for feature in features:
            rule = feature_registry.get_converter_rule('contentstate', feature)
            if rule is not None:
                self.element_handlers.add_rules(rule['from_database_format'])

        super().__init__()
Exemplo n.º 2
0
    def test_precedence(self):
        ruleset = HTMLRuleset()
        ruleset.add_rule('p', 'normal-paragraph')
        ruleset.add_rule('p[class="intro"]', 'intro-paragraph')
        ruleset.add_rule('p', 'normal-paragraph-again')

        self.assertEqual(ruleset.match('p', {'class': 'intro'}),
                         'intro-paragraph')
Exemplo n.º 3
0
    def test_precedence(self):
        ruleset = HTMLRuleset()
        ruleset.add_rule("p", "normal-paragraph")
        ruleset.add_rule('p[class="intro"]', "intro-paragraph")
        ruleset.add_rule("p", "normal-paragraph-again")

        self.assertEqual(ruleset.match("p", {"class": "intro"}),
                         "intro-paragraph")
Exemplo n.º 4
0
    def test_precedence(self):
        ruleset = HTMLRuleset()
        ruleset.add_rule('p', 'normal-paragraph')
        ruleset.add_rule('p[class="intro"]', 'intro-paragraph')
        ruleset.add_rule('p', 'normal-paragraph-again')

        self.assertEqual(ruleset.match('p', {'class': 'intro'}), 'intro-paragraph')
Exemplo n.º 5
0
    def __init__(self, features=()):
        self.paragraph_handler = BlockElementHandler("unstyled")
        self.element_handlers = HTMLRuleset(
            {
                "p": self.paragraph_handler,
                "br": LineBreakHandler(),
            }
        )
        for feature in features:
            rule = feature_registry.get_converter_rule("contentstate", feature)
            if rule is not None:
                self.element_handlers.add_rules(rule["from_database_format"])

        super().__init__(convert_charrefs=True)
Exemplo n.º 6
0
    def test_html_ruleset(self):
        ruleset = HTMLRuleset({
            'p': 'paragraph',
            'a[href]': 'link',
            'a[linktype=page]': 'page-link',
            'a[linktype="silly page"]': 'silly-page-link',
            "a[linktype='sensible page']": 'sensible-page-link',
        })

        self.assertEqual(ruleset.match('div', {}), None)
        self.assertEqual(ruleset.match('p', {}), 'paragraph')
        self.assertEqual(ruleset.match('p', {'class': 'intro'}), 'paragraph')
        self.assertEqual(ruleset.match('a', {'class': 'button'}), None)
        self.assertEqual(ruleset.match('a', {'class': 'button', 'href': 'http://wagtail.io'}), 'link')
        self.assertEqual(ruleset.match('a', {'class': 'button', 'linktype': 'document'}), None)
        self.assertEqual(ruleset.match('a', {'class': 'button', 'linktype': 'page'}), 'page-link')
        self.assertEqual(ruleset.match('a', {'class': 'button', 'linktype': 'silly page'}), 'silly-page-link')
        self.assertEqual(ruleset.match('a', {'class': 'button', 'linktype': 'sensible page'}), 'sensible-page-link')
    def __init__(self, features=()):
        self.paragraph_handler = BlockElementHandler('unstyled')
        self.element_handlers = HTMLRuleset({
            'p': self.paragraph_handler,
            'br': LineBreakHandler(),
        })
        for feature in features:
            rule = feature_registry.get_converter_rule('contentstate', feature)
            if rule is not None:
                self.element_handlers.add_rules(rule['from_database_format'])

        super().__init__(convert_charrefs=True)
Exemplo n.º 8
0
class HtmlToContentStateHandler(HTMLParser):
    def __init__(self, features=()):
        self.paragraph_handler = BlockElementHandler('unstyled')
        self.element_handlers = HTMLRuleset({
            'p': self.paragraph_handler,
            'br': LineBreakHandler(),
        })
        for feature in features:
            rule = feature_registry.get_converter_rule('contentstate', feature)
            if rule is not None:
                self.element_handlers.add_rules(rule['from_database_format'])

        super().__init__(convert_charrefs=True)

    def reset(self):
        self.state = HandlerState()
        self.contentstate = ContentState()

        # stack of (name, handler) tuples for the elements we're currently inside
        self.open_elements = []

        super().reset()

    def handle_starttag(self, name, attrs):
        attr_dict = dict(
            attrs)  # convert attrs from list of (name, value) tuples to a dict
        element_handler = self.element_handlers.match(name, attr_dict)

        if element_handler is None and not self.open_elements:
            # treat unrecognised top-level elements as paragraphs
            element_handler = self.paragraph_handler

        self.open_elements.append((name, element_handler))

        if element_handler:
            element_handler.handle_starttag(name, attrs, self.state,
                                            self.contentstate)

    def handle_endtag(self, name):
        if not self.open_elements:
            return  # avoid a pop from an empty list if we have an extra end tag
        expected_name, element_handler = self.open_elements.pop()
        assert name == expected_name, "Unmatched tags: expected %s, got %s" % (
            expected_name, name)
        if element_handler:
            element_handler.handle_endtag(name, self.state, self.contentstate)

    def handle_data(self, content):
        # normalise whitespace sequences to a single space
        content = re.sub(WHITESPACE_RE, ' ', content)

        if self.state.current_block is None:
            if content == ' ':
                # ignore top-level whitespace
                return
            else:
                # create a new paragraph block for this content
                add_paragraph_block(self.state, self.contentstate)

        if content == ' ':
            # if leading_whitespace = strip, this whitespace node is not significant
            #   and should be skipped.
            # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force
            # so that a space is forced before the next text node or inline element. If no such node
            # appears (= we reach the end of the block), the whitespace can rightfully be dropped.
            if self.state.leading_whitespace != STRIP_WHITESPACE:
                self.state.leading_whitespace = FORCE_WHITESPACE
        else:
            # strip or add leading whitespace according to the leading_whitespace flag
            if self.state.leading_whitespace == STRIP_WHITESPACE:
                content = content.lstrip()
            elif self.state.leading_whitespace == FORCE_WHITESPACE and not content.startswith(
                    ' '):
                content = ' ' + content

            if content.endswith(' '):
                # don't output trailing whitespace yet, because we want to discard it if the end
                # of the block follows. Instead, we'll set leading_whitespace = force so that
                # any following text or inline element will be prefixed by a space
                content = content.rstrip()
                self.state.leading_whitespace = FORCE_WHITESPACE
            else:
                # no trailing whitespace here - any leading whitespace at the start of the
                # next text node should be respected
                self.state.leading_whitespace = KEEP_WHITESPACE

            self.state.current_block.text += content

    def close(self):
        # if content ends in an atomic block (or is empty), need to append a spacer paragraph
        if not self.state.has_preceding_nonatomic_block:
            add_paragraph_block(self.state, self.contentstate)
        super().close()
class HtmlToContentStateHandler(HTMLParser):
    def __init__(self, features=()):
        self.paragraph_handler = BlockElementHandler('unstyled')
        self.element_handlers = HTMLRuleset({
            'p': self.paragraph_handler,
            'br': LineBreakHandler(),
        })
        for feature in features:
            rule = feature_registry.get_converter_rule('contentstate', feature)
            if rule is not None:
                self.element_handlers.add_rules(rule['from_database_format'])

        super().__init__(convert_charrefs=True)

    def reset(self):
        self.state = HandlerState()
        self.contentstate = ContentState()

        # stack of (name, handler) tuples for the elements we're currently inside
        self.open_elements = []

        super().reset()

    def handle_starttag(self, name, attrs):
        attr_dict = dict(attrs)  # convert attrs from list of (name, value) tuples to a dict
        element_handler = self.element_handlers.match(name, attr_dict)

        if element_handler is None and not self.open_elements:
            # treat unrecognised top-level elements as paragraphs
            element_handler = self.paragraph_handler

        self.open_elements.append((name, element_handler))

        if element_handler:
            element_handler.handle_starttag(name, attrs, self.state, self.contentstate)

    def handle_endtag(self, name):
        expected_name, element_handler = self.open_elements.pop()
        assert name == expected_name, "Unmatched tags: expected %s, got %s" % (expected_name, name)
        if element_handler:
            element_handler.handle_endtag(name, self.state, self.contentstate)

    def handle_data(self, content):
        # normalise whitespace sequences to a single space
        content = re.sub(WHITESPACE_RE, ' ', content)

        if self.state.current_block is None:
            if content == ' ':
                # ignore top-level whitespace
                return
            else:
                # create a new paragraph block for this content
                add_paragraph_block(self.state, self.contentstate)

        if content == ' ':
            # if leading_whitespace = strip, this whitespace node is not significant
            #   and should be skipped.
            # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force
            # so that a space is forced before the next text node or inline element. If no such node
            # appears (= we reach the end of the block), the whitespace can rightfully be dropped.
            if self.state.leading_whitespace != STRIP_WHITESPACE:
                self.state.leading_whitespace = FORCE_WHITESPACE
        else:
            # strip or add leading whitespace according to the leading_whitespace flag
            if self.state.leading_whitespace == STRIP_WHITESPACE:
                content = content.lstrip()
            elif self.state.leading_whitespace == FORCE_WHITESPACE and not content.startswith(' '):
                content = ' ' + content

            if content.endswith(' '):
                # don't output trailing whitespace yet, because we want to discard it if the end
                # of the block follows. Instead, we'll set leading_whitespace = force so that
                # any following text or inline element will be prefixed by a space
                content = content.rstrip()
                self.state.leading_whitespace = FORCE_WHITESPACE
            else:
                # no trailing whitespace here - any leading whitespace at the start of the
                # next text node should be respected
                self.state.leading_whitespace = KEEP_WHITESPACE

            self.state.current_block.text += content

    def close(self):
        # if content ends in an atomic block (or is empty), need to append a spacer paragraph
        if not self.state.has_preceding_nonatomic_block:
            add_paragraph_block(self.state, self.contentstate)
        super().close()
Exemplo n.º 10
0
    def test_html_ruleset(self):
        ruleset = HTMLRuleset({
            'p':
            'paragraph',
            'a[href]':
            'link',
            'a[linktype=page]':
            'page-link',
            'a[linktype="silly page"]':
            'silly-page-link',
            "a[linktype='sensible page']":
            'sensible-page-link',
        })

        self.assertEqual(ruleset.match('div', {}), None)
        self.assertEqual(ruleset.match('p', {}), 'paragraph')
        self.assertEqual(ruleset.match('p', {'class': 'intro'}), 'paragraph')
        self.assertEqual(ruleset.match('a', {'class': 'button'}), None)
        self.assertEqual(
            ruleset.match('a', {
                'class': 'button',
                'href': 'http://wagtail.io'
            }), 'link')
        self.assertEqual(
            ruleset.match('a', {
                'class': 'button',
                'linktype': 'document'
            }), None)
        self.assertEqual(
            ruleset.match('a', {
                'class': 'button',
                'linktype': 'page'
            }), 'page-link')
        self.assertEqual(
            ruleset.match('a', {
                'class': 'button',
                'linktype': 'silly page'
            }), 'silly-page-link')
        self.assertEqual(
            ruleset.match('a', {
                'class': 'button',
                'linktype': 'sensible page'
            }), 'sensible-page-link')
Exemplo n.º 11
0
    def test_html_ruleset(self):
        ruleset = HTMLRuleset({
            "p":
            "paragraph",
            "a[href]":
            "link",
            "a[linktype=page]":
            "page-link",
            'a[linktype="silly page"]':
            "silly-page-link",
            "a[linktype='sensible page']":
            "sensible-page-link",
        })

        self.assertIsNone(ruleset.match("div", {}))
        self.assertEqual(ruleset.match("p", {}), "paragraph")
        self.assertEqual(ruleset.match("p", {"class": "intro"}), "paragraph")
        self.assertIsNone(ruleset.match("a", {"class": "button"}))
        self.assertEqual(
            ruleset.match("a", {
                "class": "button",
                "href": "http://wagtail.org"
            }),
            "link",
        )
        self.assertIsNone(
            ruleset.match("a", {
                "class": "button",
                "linktype": "document"
            }))
        self.assertEqual(
            ruleset.match("a", {
                "class": "button",
                "linktype": "page"
            }), "page-link")
        self.assertEqual(
            ruleset.match("a", {
                "class": "button",
                "linktype": "silly page"
            }),
            "silly-page-link",
        )
        self.assertEqual(
            ruleset.match("a", {
                "class": "button",
                "linktype": "sensible page"
            }),
            "sensible-page-link",
        )