Exemplo n.º 1
0
def get_topn_words_from_urls(urls,topn,save_reports = False):
    htmls = [html_utils.get_html(url) for url in urls]
    # 汇总文本
    summary_atricle = '\n'.join([parse_report_article(html) for html in htmls])
    if save_reports:
        with open('reports.txt','w+') as fout:
            fout.write(summary_atricle)
    return cut_text_utils.get_topn_words(summary_atricle,topn)
Exemplo n.º 2
0
def get_topn_words(url,topn):
    html = html_utils.get_html(url)
    article = parse_report_article(html)
    return cut_text_utils.get_topn_words(article,topn)