示例#1
0
def test_parse_dom():
    tag_list = [
        dhtmlparser.HTMLElement("<h1>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("</h1>"),
    ]

    dom = dhtmlparser._parseDOM(tag_list)

    assert len(dom) == 2
    assert len(first(dom).childs) == 2
    assert first(dom).childs[0].getTagName() == "xx"
    assert first(dom).childs[1].getTagName() == "xx"
    assert first(dom).childs[0].isNonPairTag()
    assert first(dom).childs[1].isNonPairTag()

    assert not dom[0].isNonPairTag()
    assert not dom[1].isNonPairTag()

    assert dom[0].isOpeningTag()
    assert dom[1].isEndTag()

    assert dom[0].endtag == dom[1]
    assert dom[1].openertag == dom[0]

    assert dom[1].isEndTagTo(dom[0])
示例#2
0
def test_parse_dom():
    tag_list = [
        dhtmlparser.HTMLElement("<h1>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("<xx>"),
        dhtmlparser.HTMLElement("</h1>"),
    ]

    dom = dhtmlparser._parseDOM(tag_list)

    assert len(dom) == 2
    assert len(first(dom).childs) == 2
    assert first(dom).childs[0].getTagName() == "xx"
    assert first(dom).childs[1].getTagName() == "xx"
    assert first(dom).childs[0].isNonPairTag()
    assert first(dom).childs[1].isNonPairTag()

    assert not dom[0].isNonPairTag()
    assert not dom[1].isNonPairTag()

    assert dom[0].isOpeningTag()
    assert dom[1].isEndTag()

    assert dom[0].endtag == dom[1]
    assert dom[1].openertag == dom[0]

    assert dom[1].isEndTagTo(dom[0])
示例#3
0
def _find_link_element(item):
    link_el = item.find("link")

    if not link_el:
        return None

    return first(link_el)
def test_wfind_complicated():
    dom = dhtmlparser.parseString(
        """
        <root>
            <some>
                <something>
                    <xe id="wanted xe" />
                </something>
                <something>
                    asd
                </something>
                <xe id="another xe" />
            </some>
            <some>
                else
                <xe id="yet another xe" />
            </some>
        </root>
        """
    )

    xe = dom.wfind("root").wfind("some").wfind("something").find("xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"

    unicorn = dom.wfind("root").wfind("pink").wfind("unicorn")

    assert not unicorn.childs
def test_match_parameters():
    dom = dhtmlparser.parseString(
        """
        <root>
            <div id="1">
                <div id="5">
                    <xe id="wanted xe" />
                </div>
                <div id="10">
                    <xe id="another wanted xe" />
                </div>
                <xe id="another xe" />
            </div>
            <div id="2">
                <div id="20">
                    <xe id="last wanted xe" />
                </div>
            </div>
        </root>
        """
    )

    xe = dom.match(
        "root",
        {"tag_name": "div", "params": {"id": "1"}},
        ["div", {"id": "5"}],
        "xe"
    )

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"
示例#6
0
def _pick_item_property(item, item_property):
    prop = item.find(item_property)

    if not prop:
        return None

    return first(prop).getContent()
def test_wfind_complicated():
    dom = dhtmlparser.parseString("""
        <root>
            <some>
                <something>
                    <xe id="wanted xe" />
                </something>
                <something>
                    asd
                </something>
                <xe id="another xe" />
            </some>
            <some>
                else
                <xe id="yet another xe" />
            </some>
        </root>
        """)

    xe = dom.wfind("root").wfind("some").wfind("something").find("xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"

    unicorn = dom.wfind("root").wfind("pink").wfind("unicorn")

    assert not unicorn.childs
def test_match_parameters():
    dom = dhtmlparser.parseString("""
        <root>
            <div id="1">
                <div id="5">
                    <xe id="wanted xe" />
                </div>
                <div id="10">
                    <xe id="another wanted xe" />
                </div>
                <xe id="another xe" />
            </div>
            <div id="2">
                <div id="20">
                    <xe id="last wanted xe" />
                </div>
            </div>
        </root>
        """)

    xe = dom.match("root", {
        "tag_name": "div",
        "params": {
            "id": "1"
        }
    }, ["div", {
        "id": "5"
    }], "xe")

    assert len(xe) == 1
    assert first(xe).params["id"] == "wanted xe"
示例#9
0
def _find_comments_link(item):
    comments_link = item.find("comments")

    if not comments_link:
        return None

    return first(comments_link).getContent()
def test_params():
    dom = dhtmlparser.parseString("<xe id=1 />")
    xe = first(dom.find("xe"))

    assert xe.params["id"] == "1"

    xe.params = {}
    assert str(xe) == "<xe />"
示例#11
0
def test_params():
    dom = dhtmlparser.parseString("<xe id=1 />")
    xe = first(dom.find("xe"))

    assert xe.params["id"] == "1"

    xe.params = {}
    assert str(xe) == "<xe />"
def test_containsParamSubset():
    dom = dhtmlparser.parseString("<div id=x class=xex></div>")
    div = first(dom.find("div"))

    assert div.containsParamSubset({"id": "x"})
    assert div.containsParamSubset({"class": "xex"})
    assert div.containsParamSubset({"id": "x", "class": "xex"})
    assert not div.containsParamSubset({"asd": "bsd", "id": "x", "class": "xex"})
def test_isNonPairTag():
    assert not div.isNonPairTag()

    text = first(div.childs)
    assert text.getTagName().strip() == "Second."

    assert not text.isTag()
    assert not text.isNonPairTag()

    assert br.isNonPairTag()
示例#14
0
def test_replaceWith():
    nonpair = first(dom.find("nonpair"))

    assert nonpair

    nonpair.replaceWith(dhtmlparser.HTMLElement("<another />"))

    assert dom.find("another")

    assert dom.getContent() == "<div><another /></div>"
def test_find_params():
    dom = dhtmlparser.parseString("""
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """)

    div_tags = dom.find("", {"id": "first"})

    assert div_tags
    assert len(div_tags) == 1

    assert first(div_tags).params.get("id") == "first"
    assert first(div_tags).getContent().strip().startswith("First div.")
def test_replaceWith():
    nonpair = first(dom.find("nonpair"))

    assert nonpair

    nonpair.replaceWith(
        dhtmlparser.HTMLElement("<another />")
    )

    assert dom.find("another")

    assert dom.getContent() == "<div><another /></div>"
示例#17
0
def test_recovery_after_is_smaller_than_sign():
    inp = """<code>5 < 10.</code>
    <div class="rating">here is the rating</div>
    """

    dom = dhtmlparser.parseString(inp)

    code = dom.find("code")

    assert code
    assert first(code).getContent() == "5 < 10."
    assert dom.find("div", {"class": "rating"})
示例#18
0
def test_recovery_after_is_smaller_than_sign():
    inp = """<code>5 < 10.</code>
    <div class="rating">here is the rating</div>
    """

    dom = dhtmlparser.parseString(inp)

    code = dom.find("code")

    assert code
    assert first(code).getContent() == "5 < 10."
    assert dom.find("div", {"class": "rating"})
def test_find():
    dom = dhtmlparser.parseString("""
        "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) -->
         <div id='xex' a='b'>obsah xex divu</div>
        """)

    div_xe = dom.find("div", {"id": "xa"})  # notice the small `id`
    div_xex = dom.find("div", {"id": "xex"})
    div_xerexes = dom.find("div", {"id": "xerexex"})

    assert div_xe
    assert div_xex
    assert not div_xerexes

    div_xe = first(div_xe)
    div_xex = first(div_xex)

    assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>'
    assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>'

    assert div_xe.getTagName() == "div"
    assert div_xex.getTagName() == "div"
def test_find_params():
    dom = dhtmlparser.parseString(
        """
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """
    )

    div_tags = dom.find("", {"id": "first"})

    assert div_tags
    assert len(div_tags) == 1

    assert first(div_tags).params.get("id") == "first"
    assert first(div_tags).getContent().strip().startswith("First div.")
def test_find():
    dom = dhtmlparser.parseString(
        """
        "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) -->
         <div id='xex' a='b'>obsah xex divu</div>
        """
    )

    div_xe = dom.find("div", {"id": "xa"})  # notice the small `id`
    div_xex = dom.find("div", {"id": "xex"})
    div_xerexes = dom.find("div", {"id": "xerexex"})

    assert div_xe
    assert div_xex
    assert not div_xerexes

    div_xe = first(div_xe)
    div_xex = first(div_xex)

    assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>'
    assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>'

    assert div_xe.getTagName() == "div"
    assert div_xex.getTagName() == "div"
示例#22
0
def test_closeElements():
    tag = dhtmlparser.HTMLElement("<div>")
    tag.endtag = dhtmlparser.HTMLElement("</div>")

    tag.childs = [dhtmlparser.HTMLElement("<xe>")]

    xe = tag.find("xe")
    assert xe
    assert not xe[0].endtag

    tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement)

    xe = tag.find("xe")
    assert xe
    assert first(xe).endtag
def test_closeElements():
    tag = dhtmlparser.HTMLElement("<div>")
    tag.endtag = dhtmlparser.HTMLElement("</div>")

    tag.childs = [
        dhtmlparser.HTMLElement("<xe>")
    ]

    xe = tag.find("xe")
    assert xe
    assert not xe[0].endtag

    tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement)

    xe = tag.find("xe")
    assert xe
    assert first(xe).endtag
示例#24
0
def test_multiline_attribute():
    inp = """<sometag />
<ubertag attribute="long attribute
                    continues here">
    <valid>notice that quote is not properly started</valid>
</ubertag>
<something_parsable />
"""

    dom = dhtmlparser.parseString(inp)

    assert dom.find("sometag")
    assert dom.find("valid")
    assert dom.find("ubertag")
    assert first(dom.find("ubertag")).params["attribute"] == """long attribute
                    continues here"""
    assert dom.find("something_parsable")
def test_wfind():
    dom = dhtmlparser.parseString("""
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """)

    div = dom.wfind("div").wfind("div")

    assert div.childs
    assert first(div.childs).params["id"] == "first.subdiv"
示例#26
0
def test_multiline_attribute():
    inp = """<sometag />
<ubertag attribute="long attribute
                    continues here">
    <valid>notice that quote is not properly started</valid>
</ubertag>
<something_parsable />
"""

    dom = dhtmlparser.parseString(inp)

    assert dom.find("sometag")
    assert dom.find("valid")
    assert dom.find("ubertag")
    assert first(dom.find("ubertag")).params["attribute"] == """long attribute
                    continues here"""
    assert dom.find("something_parsable")
def test_wfind():
    dom = dhtmlparser.parseString(
        """
        <div id=first>
            First div.
            <div id=first.subdiv>
                Subdiv in first div.
            </div>
        </div>
        <div id=second>
            Second.
        </div>
        """
    )

    div = dom.wfind("div").wfind("div")

    assert div.childs
    assert first(div.childs).params["id"] == "first.subdiv"
def test_isOpeningTag():
    assert div.isOpeningTag()
    assert not first(div.childs).isOpeningTag()

    assert not br.isOpeningTag()
def test_getTagName():
    assert div.getTagName() == 'div'
    assert first(div.childs).getTagName() == '\n        Second.\n        '

    assert br.getTagName() == "br"
def test_isTag():
    assert div.isTag()
    assert not first(div.childs).isTag()
示例#31
0
def _construct_new_link_el(url):
    dom = dhtmlparser.parseString("<link>%s</link>" % url)

    return first(dom.find("link"))
def test_isEndTag():
    assert not div.isEndTag()
    assert not first(div.childs).isEndTag()

    assert div.endtag.isEndTag()
def test_isComment():
    assert not div.isComment()
    assert not first(div.childs).isComment()

    assert div.childs[-2].isComment()
def test_getContent():
    match = '\n        Second.\n        <br />\n        <!-- comment -->\n    '
    assert div.getContent() == match
    assert first(div.childs).getContent() == '\n        Second.\n        '

    assert br.getContent() == ""
def test_tagToString():
    assert div.tagToString() == '<div id="second">'
    assert first(div.childs).tagToString() == '\n        Second.\n        '

    assert br.tagToString() == "<br />"
# Variables ===================================================================
DOM = dhtmlparser.parseString("""
    <div id=first>
        First div.
        <div id=first.subdiv>
            Subdiv in first div.
        </div>
    </div>
    <div id=second>
        Second.
        <br />
        <!-- comment -->
    </div>
""")
div = DOM.find("div")[-1]
br = first(div.find("br"))


# Functions & objects =========================================================
def test_isTag():
    assert div.isTag()
    assert not first(div.childs).isTag()


def test_isEndTag():
    assert not div.isEndTag()
    assert not first(div.childs).isEndTag()

    assert div.endtag.isEndTag()

def test_toString():
    assert div.toString().startswith(div.tagToString())
    assert first(div.childs).toString() == '\n        Second.\n        '
    assert br.toString() == "<br />"