Exemplo n.º 1
0
def test_parse_dom():
    tag_list = [
        dhtmlparser.HTMLElement("<h1>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("</h1>"),
    ]

    dom = dhtmlparser._parseDOM(tag_list)

    assert len(dom) == 2
    assert len(first(dom).childs) == 2
    assert first(dom).childs[0].getTagName() == "xx"
    assert first(dom).childs[1].getTagName() == "xx"
    assert first(dom).childs[0].isNonPairTag()
    assert first(dom).childs[1].isNonPairTag()

    assert not dom[0].isNonPairTag()
    assert not dom[1].isNonPairTag()

    assert dom[0].isOpeningTag()
    assert dom[1].isEndTag()

    assert dom[0].endtag == dom[1]
    assert dom[1].openertag == dom[0]

    assert dom[1].isEndTagTo(dom[0])
Exemplo n.º 2
0
def test_parse_dom():
    tag_list = [
        dhtmlparser.HTMLElement("<h1>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("</h1>"),
    ]

    dom = dhtmlparser._parseDOM(tag_list)

    assert len(dom) == 2
    assert len(first(dom).childs) == 2
    assert first(dom).childs[0].getTagName() == "xx"
    assert first(dom).childs[1].getTagName() == "xx"
    assert first(dom).childs[0].isNonPairTag()
    assert first(dom).childs[1].isNonPairTag()

    assert not dom[0].isNonPairTag()
    assert not dom[1].isNonPairTag()

    assert dom[0].isOpeningTag()
    assert dom[1].isEndTag()

    assert dom[0].endtag == dom[1]
    assert dom[1].openertag == dom[0]

    assert dom[1].isEndTagTo(dom[0])
Exemplo n.º 3
0
def _find_link_element(item):
    link_el = item.find("link")

    if not link_el:
        return None

    return first(link_el)
Exemplo n.º 4
0
def test_wfind_complicated():
    dom = dhtmlparser.parseString(
        """
        <root>
            <some>
                <something>
                    <xe id="wanted xe" />
                </something>
                <something>
                    asd
                </something>
                <xe id="another xe" />
            </some>
            <some>
                else
                <xe id="yet another xe" />
            </some>
        </root>
        """
    )

    xe = dom.wfind("root").wfind("some").wfind("something").find("xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"

    unicorn = dom.wfind("root").wfind("pink").wfind("unicorn")

    assert not unicorn.childs
Exemplo n.º 5
0
def test_match_parameters():
    dom = dhtmlparser.parseString(
        """
        <root>
            <div id="1">
                <div id="5">
                    <xe id="wanted xe" />
                </div>
                <div id="10">
                    <xe id="another wanted xe" />
                </div>
                <xe id="another xe" />
            </div>
            <div id="2">
                <div id="20">
                    <xe id="last wanted xe" />
                </div>
            </div>
        </root>
        """
    )

    xe = dom.match(
        "root",
        {"tag_name": "div", "params": {"id": "1"}},
        ["div", {"id": "5"}],
        "xe"
    )

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"
Exemplo n.º 6
0
def _pick_item_property(item, item_property):
    prop = item.find(item_property)

    if not prop:
        return None

    return first(prop).getContent()
Exemplo n.º 7
0
def test_wfind_complicated():
    dom = dhtmlparser.parseString("""
        <root>
            <some>
                <something>
                    <xe id="wanted xe" />
                </something>
                <something>
                    asd
                </something>
                <xe id="another xe" />
            </some>
            <some>
                else
                <xe id="yet another xe" />
            </some>
        </root>
        """)

    xe = dom.wfind("root").wfind("some").wfind("something").find("xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"

    unicorn = dom.wfind("root").wfind("pink").wfind("unicorn")

    assert not unicorn.childs
Exemplo n.º 8
0
def test_match_parameters():
    dom = dhtmlparser.parseString("""
        <root>
            <div id="1">
                <div id="5">
                    <xe id="wanted xe" />
                </div>
                <div id="10">
                    <xe id="another wanted xe" />
                </div>
                <xe id="another xe" />
            </div>
            <div id="2">
                <div id="20">
                    <xe id="last wanted xe" />
                </div>
            </div>
        </root>
        """)

    xe = dom.match("root", {
        "tag_name": "div",
        "params": {
            "id": "1"
        }
    }, ["div", {
        "id": "5"
    }], "xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"
Exemplo n.º 9
0
def _find_comments_link(item):
    comments_link = item.find("comments")

    if not comments_link:
        return None

    return first(comments_link).getContent()
def test_params():
    dom = dhtmlparser.parseString("<xe id=1 />")
    xe = first(dom.find("xe"))

    assert xe.params["id"] == "1"

    xe.params = {}
    assert str(xe) == "<xe />"
Exemplo n.º 11
0
def test_params():
    dom = dhtmlparser.parseString("<xe id=1 />")
    xe = first(dom.find("xe"))

    assert xe.params["id"] == "1"

    xe.params = {}
    assert str(xe) == "<xe />"
Exemplo n.º 12
0
def test_containsParamSubset():
    dom = dhtmlparser.parseString("<div id=x class=xex></div>")
    div = first(dom.find("div"))

    assert div.containsParamSubset({"id": "x"})
    assert div.containsParamSubset({"class": "xex"})
    assert div.containsParamSubset({"id": "x", "class": "xex"})
    assert not div.containsParamSubset({"asd": "bsd", "id": "x", "class": "xex"})
Exemplo n.º 13
0
def test_isNonPairTag():
    assert not div.isNonPairTag()

    text = first(div.childs)
    assert text.getTagName().strip() == "Second."

    assert not text.isTag()
    assert not text.isNonPairTag()

    assert br.isNonPairTag()
Exemplo n.º 14
0
def test_replaceWith():
    nonpair = first(dom.find("nonpair"))

    assert nonpair

    nonpair.replaceWith(dhtmlparser.HTMLElement("<another />"))

    assert dom.find("another")

    assert dom.getContent() == "<div><another /></div>"
Exemplo n.º 15
0
def test_find_params():
    dom = dhtmlparser.parseString("""
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """)

    div_tags = dom.find("", {"id": "first"})

    assert div_tags
    assert len(div_tags) == 1

    assert first(div_tags).params.get("id") == "first"
    assert first(div_tags).getContent().strip().startswith("First div.")
def test_replaceWith():
    nonpair = first(dom.find("nonpair"))

    assert nonpair

    nonpair.replaceWith(
        dhtmlparser.HTMLElement("<another />")
    )

    assert dom.find("another")

    assert dom.getContent() == "<div><another /></div>"
Exemplo n.º 17
0
def test_recovery_after_is_smaller_than_sign():
    inp = """<code>5 < 10.</code>
    <div class="rating">here is the rating</div>
    """

    dom = dhtmlparser.parseString(inp)

    code = dom.find("code")

    assert code
    assert first(code).getContent() == "5 < 10."
    assert dom.find("div", {"class": "rating"})
Exemplo n.º 18
0
def test_recovery_after_is_smaller_than_sign():
    inp = """<code>5 < 10.</code>
    <div class="rating">here is the rating</div>
    """

    dom = dhtmlparser.parseString(inp)

    code = dom.find("code")

    assert code
    assert first(code).getContent() == "5 < 10."
    assert dom.find("div", {"class": "rating"})
Exemplo n.º 19
0
def test_find():
    dom = dhtmlparser.parseString("""
        "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) -->
         <div id='xex' a='b'>obsah xex divu</div>
        """)

    div_xe = dom.find("div", {"id": "xa"})  # notice the small `id`
    div_xex = dom.find("div", {"id": "xex"})
    div_xerexes = dom.find("div", {"id": "xerexex"})

    assert div_xe
    assert div_xex
    assert not div_xerexes

    div_xe = first(div_xe)
    div_xex = first(div_xex)

    assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>'
    assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>'

    assert div_xe.getTagName() == "div"
    assert div_xex.getTagName() == "div"
Exemplo n.º 20
0
def test_find_params():
    dom = dhtmlparser.parseString(
        """
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """
    )

    div_tags = dom.find("", {"id": "first"})

    assert div_tags
    assert len(div_tags) == 1

    assert first(div_tags).params.get("id") == "first"
    assert first(div_tags).getContent().strip().startswith("First div.")
Exemplo n.º 21
0
def test_find():
    dom = dhtmlparser.parseString(
        """
        "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) -->
         <div id='xex' a='b'>obsah xex divu</div>
        """
    )

    div_xe = dom.find("div", {"id": "xa"})  # notice the small `id`
    div_xex = dom.find("div", {"id": "xex"})
    div_xerexes = dom.find("div", {"id": "xerexex"})

    assert div_xe
    assert div_xex
    assert not div_xerexes

    div_xe = first(div_xe)
    div_xex = first(div_xex)

    assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>'
    assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>'

    assert div_xe.getTagName() == "div"
    assert div_xex.getTagName() == "div"
Exemplo n.º 22
0
def test_closeElements():
    tag = dhtmlparser.HTMLElement("<div>")
    tag.endtag = dhtmlparser.HTMLElement("</div>")

    tag.childs = [dhtmlparser.HTMLElement("<xe>")]

    xe = tag.find("xe")
    assert xe
    assert not xe[0].endtag

    tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement)

    xe = tag.find("xe")
    assert xe
    assert first(xe).endtag
def test_closeElements():
    tag = dhtmlparser.HTMLElement("<div>")
    tag.endtag = dhtmlparser.HTMLElement("</div>")

    tag.childs = [
        dhtmlparser.HTMLElement("<xe>")
    ]

    xe = tag.find("xe")
    assert xe
    assert not xe[0].endtag

    tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement)

    xe = tag.find("xe")
    assert xe
    assert first(xe).endtag
Exemplo n.º 24
0
def test_multiline_attribute():
    inp = """<sometag />
<ubertag attribute="long attribute
                    continues here">
    <valid>notice that quote is not properly started</valid>
</ubertag>
<something_parsable />
"""

    dom = dhtmlparser.parseString(inp)

    assert dom.find("sometag")
    assert dom.find("valid")
    assert dom.find("ubertag")
    assert first(dom.find("ubertag")).params["attribute"] == """long attribute
                    continues here"""
    assert dom.find("something_parsable")
Exemplo n.º 25
0
def test_wfind():
    dom = dhtmlparser.parseString("""
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """)

    div = dom.wfind("div").wfind("div")

    assert div.childs
    assert first(div.childs).params["id"] == "first.subdiv"
Exemplo n.º 26
0
def test_multiline_attribute():
    inp = """<sometag />
<ubertag attribute="long attribute
                    continues here">
    <valid>notice that quote is not properly started</valid>
</ubertag>
<something_parsable />
"""

    dom = dhtmlparser.parseString(inp)

    assert dom.find("sometag")
    assert dom.find("valid")
    assert dom.find("ubertag")
    assert first(dom.find("ubertag")).params["attribute"] == """long attribute
                    continues here"""
    assert dom.find("something_parsable")
Exemplo n.º 27
0
def test_wfind():
    dom = dhtmlparser.parseString(
        """
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """
    )

    div = dom.wfind("div").wfind("div")

    assert div.childs
    assert first(div.childs).params["id"] == "first.subdiv"
Exemplo n.º 28
0
def test_isOpeningTag():
    assert div.isOpeningTag()
    assert not first(div.childs).isOpeningTag()

    assert not br.isOpeningTag()
Exemplo n.º 29
0
def test_getTagName():
    assert div.getTagName() == 'div'
    assert first(div.childs).getTagName() == '\n        Second.\n        '

    assert br.getTagName() == "br"
Exemplo n.º 30
0
def test_isTag():
    assert div.isTag()
    assert not first(div.childs).isTag()
Exemplo n.º 31
0
def _construct_new_link_el(url):
    dom = dhtmlparser.parseString("<link>%s</link>" % url)

    return first(dom.find("link"))
Exemplo n.º 32
0
def test_isEndTag():
    assert not div.isEndTag()
    assert not first(div.childs).isEndTag()

    assert div.endtag.isEndTag()
Exemplo n.º 33
0
def test_isComment():
    assert not div.isComment()
    assert not first(div.childs).isComment()

    assert div.childs[-2].isComment()
Exemplo n.º 34
0
def test_getContent():
    match = '\n        Second.\n        <br />\n        <!-- comment -->\n    '
    assert div.getContent() == match
    assert first(div.childs).getContent() == '\n        Second.\n        '

    assert br.getContent() == ""
Exemplo n.º 35
0
def test_tagToString():
    assert div.tagToString() == '<div id="second">'
    assert first(div.childs).tagToString() == '\n        Second.\n        '

    assert br.tagToString() == "<br />"
Exemplo n.º 36
0
# Variables ===================================================================
DOM = dhtmlparser.parseString("""
    <div id=first>
        First div.
        <div id=first.subdiv>
            Subdiv in first div.
        </div>
    </div>
    <div id=second>
        Second.
        <br />
        <!-- comment -->
    </div>
""")
div = DOM.find("div")[-1]
br = first(div.find("br"))


# Functions & objects =========================================================
def test_isTag():
    assert div.isTag()
    assert not first(div.childs).isTag()


def test_isEndTag():
    assert not div.isEndTag()
    assert not first(div.childs).isEndTag()

    assert div.endtag.isEndTag()

Exemplo n.º 37
0
def test_toString():
    assert div.toString().startswith(div.tagToString())
    assert first(div.childs).toString() == '\n        Second.\n        '
    assert br.toString() == "<br />"