def get_main_text(html): main_text = Document(html).summary() main_text = BeautifulSoup(main_text).getText() # 处理空行 r = re.compile(r'\n+', re.M | re.S) main_text = r.sub('\n', main_text) # 去除首行回车 if main_text.find('\n') == 0: main_text = main_text.replace('\n', '', 1) return main_text