def get_html_data(self, html_name): try: with codecs.open(os.path.join(self.html_Path, html_name), "r", "utf-8") as f: content = f.read() # 获取html树,通过xpath获取指定内容 tree = etree.HTML(content) node = tree.xpath("//div[@class='content']")[0] node.text.encoding('gbk') # 通过正则表达式 获取<tr></tr>之间指定的内容 res_tr = r'<tr>(.*?)</tr>' m_tr = re.findall(res_tr, content, re.S | re.M) for line in m_tr: print(line) #获取表格第一列th 属性 res_th = r'<th>(.*?)</th>' m_th = re.findall(res_th, line, re.S | re.M) for mm in m_th: print(unicode(mm, 'utf-8')) #unicode防止乱 # #获取表格第二列td 属性值 res_td = r'<td>(.*?)</td>' m_td = re.findall(res_td, line, re.S | re.M) for nn in m_td: print(unicode(nn, 'utf-8')) except Exception as e: print(e)
def test_stylenames(): class my: class body(html.body): style = html.Style(font_size="12pt") u = unicode(my.body()) assert u == '<body style="font-size: 12pt"></body>'
def test_tag_with_subclassed_attr_simple(): class my(ns.hello): class Attr(ns.hello.Attr): hello="world" x = my() assert x.attr.hello == 'world' assert unicode(x) == '<my hello="world"/>'
def test_tag_with_subclassed_attr_simple(): class my(ns.hello): class Attr(ns.hello.Attr): hello = "world" x = my() assert x.attr.hello == 'world' assert unicode(x) == '<my hello="world"/>'
def test_alternating_style(): alternating = ( html.Style(background="white"), html.Style(background="grey"), ) class my(html): class li(html.li): def style(self): i = self.parent.index(self) return alternating[i % 2] style = property(style) x = my.ul(my.li("hello"), my.li("world"), my.li("42")) u = unicode(x) assert u == ('<ul><li style="background: white">hello</li>' '<li style="background: grey">world</li>' '<li style="background: white">42</li>' '</ul>')
def test_alternating_style(): alternating = ( html.Style(background="white"), html.Style(background="grey"), ) class my(html): class li(html.li): def style(self): i = self.parent.index(self) return alternating[i%2] style = property(style) x = my.ul( my.li("hello"), my.li("world"), my.li("42")) u = unicode(x) assert u == ('<ul><li style="background: white">hello</li>' '<li style="background: grey">world</li>' '<li style="background: white">42</li>' '</ul>')
def test_html_name_stickyness(): class my(html.p): pass x = my("hello") assert unicode(x) == '<p>hello</p>'
def test_raw(): x = ns.some(py.xml.raw("<p>literal</p>")) u = unicode(x) assert u == "<some><p>literal</p></some>"
def test_class_None(): t = html.body(class_=None) u = unicode(t) assert u == '<body></body>'
def test_tag_with_text_and_attributes(): x = ns.some(name="hello", value="world") assert x.attr.name == 'hello' assert x.attr.value == 'world' u = unicode(x) assert u == '<some name="hello" value="world"/>'
def test_singleton(): h = html.head(html.link(href="foo")) assert unicode(h) == '<head><link href="foo"/></head>' h = html.head(html.script(src="foo")) assert unicode(h) == '<head><script src="foo"></script></head>'
def test_tag_with_text_entity(): x = ns.hello('world & rest') u = unicode(x) assert u == "<hello>world & rest</hello>"
def test_tag_xmlname(): class my(ns.hello): xmlname = 'world' u = unicode(my()) assert u == '<world/>'
def test_list_nested(): x = ns.hello([ns.world()]) #pass in a list here u = unicode(x) assert u == '<hello><world/></hello>'
def test_tag_nested(): x = ns.hello(ns.world()) unicode(x) # triggers parentifying assert x[0].parent is x u = unicode(x) assert u == '<hello><world/></hello>'
def test_tag_with_raw_attr(): x = html.object(data=raw('&')) assert unicode(x) == '<object data="&"></object>'
def test_tag_with_text(): x = ns.hello("world") u = unicode(x) assert u == "<hello>world</hello>"
def test_tag_with_text_and_attributes_entity(): x = ns.some(name="hello & world") assert x.attr.name == "hello & world" u = unicode(x) assert u == '<some name="hello & world"/>'
def test_stylenames(): class my: class body(html.body): style = html.Style(font_size = "12pt") u = unicode(my.body()) assert u == '<body style="font-size: 12pt"></body>'