for line in soup.find_all(class_='MsoNormalTable'): #print(line.next_sibling.next_sibling) p = line.next_sibling.next_sibling new_tag_tr = soup.new_tag('tr') new_tag_tr['class'] = 'norTr' new_tag = soup.new_tag('table') new_tag['class'] = 'footTb' new_tag.append(new_tag_tr) for sub in p.stripped_strings: for s in sub.splitlines(): if len(s.strip()) > 1: #print(s+'#') new_tag_td = soup.new_tag('td', style="width:1cm;") if len(s.strip(':')) > 2: new_tag_td['style'] = "width:15mm;" new_tag_td_ip = soup.new_tag('td', style=new_tag_td['style']) pinyin = Pinyin() s = s.strip(':').strip(':').strip() pys = pinyin.get_init(s) new_tag_ip = soup.new_tag('input', id= 'ft_'+pys) new_tag_td_ip.append(new_tag_ip) new_tag_td.append(soup.new_string(s+':')) new_tag_tr.append(new_tag_td) new_tag_tr.append(new_tag_td_ip) p.replace_with(new_tag) for line in soup.find_all('p'): line.unwrap() print(soup.prettify()) f = open('prase2.htm','w',encoding='utf-8') f.write(soup.prettify())
def get_title_pinyin_td(td): py = Pinyin() result = get_title_td(td) result = py.get_init(result) return result
for line in soup.find_all(class_='MsoNormalTable'): #print(line.next_sibling.next_sibling) p = line.next_sibling.next_sibling new_tag_tr = soup.new_tag('tr') new_tag_tr['class'] = 'norTr' new_tag = soup.new_tag('table') new_tag['class'] = 'footTb' new_tag.append(new_tag_tr) for sub in p.stripped_strings: for s in sub.splitlines(): if len(s.strip()) > 1: #print(s+'#') new_tag_td = soup.new_tag('td', style="width:1cm;") if len(s.strip(':')) > 2: new_tag_td['style'] = "width:15mm;" new_tag_td_ip = soup.new_tag('td', style=new_tag_td['style']) pinyin = Pinyin() s = s.strip(':').strip(':').strip() pys = pinyin.get_init(s) new_tag_ip = soup.new_tag('input', id='ft_' + pys) new_tag_td_ip.append(new_tag_ip) new_tag_td.append(soup.new_string(s + ':')) new_tag_tr.append(new_tag_td) new_tag_tr.append(new_tag_td_ip) p.replace_with(new_tag) for line in soup.find_all('p'): line.unwrap() print(soup.prettify()) f = open('prase2.htm', 'w', encoding='utf-8') f.write(soup.prettify())