Exemplo n.º 1
0
if __name__ == '__main__':
    import libxml2dom

    html = """
    <html>
    <body>
    <div>
    1
    <p>
    2
    </p>
    </div>
    <div>
    3
    <p>
    <div></div>
    </p>
    </div>
    </body>
    </html>
    """
    dom = libxml2dom.parseString(html)
    root = Node(dom)
    root = root.loadNodeTree(dom)

    a = root.childNodes[0].childNodes[0].childNodes[0]
    b = root.childNodes[0].childNodes[0].childNodes[1]

    #print a.tag, b.tag
    print simpleTreeMatching(a,b)
Exemplo n.º 2
0
    return maxmatch

if __name__ == "__main__":
    print "Test utils.match functions"

    import libxml2dom
    from eri.extractors.distancebypair2.node import Node
    import test_trees

    if True:
        #__debug = True

        html = test_trees.tree3
        dom = libxml2dom.parseString(html)
        root = Node(dom)
        root = root.loadNodeTree(dom, 0, True)

        div1 = root.childNodes[0].childNodes[0].childNodes[0]
        div2 = root.childNodes[0].childNodes[0].childNodes[1]

        body = root.childNodes[0].childNodes[0]
    #    print 'div1', div1.tags
    #    print 'div2', div2.tags
    #    print 'body', body.tags

        r = treematch(body)
        if not r == [1, 1, 1, 1, 1, False, False]:
            print 'False', r

    #tree2
    if True: