示例#1
0
def test_wronglyClosedTags():
	html = '''<html>
	<body>
		te</br><strong>x</em></strong>t
	</html>
	</body>'''
	et = parseHTML(html)
	assert et.getroot().tag == 'html'
	assert et.find('.//strong').text == 'x'
	assert len(et.findall('.//br')) == 0

	html = '</root-close><root></root>'
	et = parseHTML(html)
	assert et.getroot().tag == 'root'
示例#2
0
def test_basic():
	html = '''<html><body>text</body></html>'''
	et = parseHTML(html)
	assert et.getroot().tag == 'html'
	assert et.find('./body').text == 'text'

	html = '''<html>
	<body>
		te<b id="1">x</b>t
	</body>
	</html>'''
	et = parseHTML(html)
	assert et.getroot().tag == 'html'
	assert et.find('.//b').get('id') == '1'
	assert et.find('.//b').text == 'x'
示例#3
0
def test_unclosedTags():
	html = '''<html>
	<body>
		te<br><strong>xt'''
	et = parseHTML(html)
	assert et.getroot().tag == 'html'
	assert et.find('.//strong').text == 'xt'
	assert len(et.findall('.//br')) == 1
示例#4
0
def test_emptyTags():
	html = '''<html>
	<meta name="foo" value="bar">
	<body>
		te<br><strong>x</strong>t
	</body>
	</html>'''
	et = parseHTML(html)
	assert et.getroot().tag == 'html'
	assert et.find('./meta').get('name') == 'foo'
	assert et.find('.//strong').text == 'x'
	assert len(et.findall('.//br')) == 1
示例#5
0
def test_example():
	doc = parseHTML('<html><p>Cont<br>ent</p></html>')
	text = etree_text(doc.find('.//p'))
	assert text == 'Content'
示例#6
0
def test_brokenText():
	html = '<html><body>a<br/>b<br/>c</body></html>'
	et = parseHTML(html)
	assert etree_text(et.find('.//body')) == 'abc'