Exemplo n.º 1
0
def hot_news(theday):
    today_result = []
    news_list = get_all_news(theday)
    print len(news_list), 'news in', theday
    for news in news_list:
        content = news['_source']['content']
        hot_spot = phgrocery(content)  #判断是否是热点新闻
        fintext = int(model_fintext.predict(content).predicted_y)  #判断是否是金融文本
        # print hot_spot
        if hot_spot and fintext:
            # print 'in!'
            iter_source = news['_source']
            keywords = jieba_keywords(content, 5)
            iter_source['key'] = ' '.join(keywords)
            iter_source.update({'text_id': news['_id']})
            today_result.append(iter_source)
        # if len(today_result) % 100 == 0:
        #     print len(today_result)
        # today_result.append(news['_source'].update({'text_id':news['_id'],'date':theday}))
    return today_result
Exemplo n.º 2
0
def hot_news(theday):
    today_result = []
    news_list = get_all_news(theday)
    print len(news_list),'news in',theday
    for news in news_list:
        content =  news['_source']['content']
        hot_spot = phgrocery(content)       #判断是否是热点新闻
        fintext = int(model_fintext.predict(content).predicted_y)     #判断是否是金融文本
        # print hot_spot
        if hot_spot and fintext:
            # print 'in!'
            iter_source = news['_source']
            keywords = jieba_keywords(content,5)
            iter_source['key'] = ' '.join(keywords)
            iter_source.update({'text_id':news['_id']})
            today_result.append(iter_source)
        # if len(today_result) % 100 == 0:
        #     print len(today_result)
            # today_result.append(news['_source'].update({'text_id':news['_id'],'date':theday}))
    return today_result
Exemplo n.º 3
0
def compute_by_mid(mid,content):
    # cur = defaultDatabase()
    # order = "select * from " + TABLE_HOTNEWS + " where binary text_id='%s'" % (text_id)
    # try:
    #     cur.execute(order)
    #     result = cur.fetchall()
    # except Exception,e:
    #     print e
    # if not result:
    #     return
    # news_id =  result[0]['id']
    content = result[0]['content']
    keywords = jieba_keywords(content,5)
    
    print 'news id:',news_id

   


    try:
        
        print 'load all source data start!'
        all_source_match(mid,key_word)        #读取并保存各个通道的相关文本
        print 'load data finished!'

        #将保存到topic_about表中的所有文本按new_id和渠道去重

        # print 'propagate compute start!'
        # propagateTask(news_id,theday,7)           #计算120天的多通道溯源记录     正式版应该倒查7天
        # print 'propagate compute end!'

        # print 'word cloud start!'
        # word_cloud_main(news_id)                    #计算词云并存储
        # print 'word cloud end!'

        # print 'clustering start!'
        # clustering_main(news_id)                    #观点聚类
        # print 'clustering end!'
        # # break
    except Exception,e:
        print e
Exemplo n.º 4
0
def compute_by_mid(mid, content):
    # cur = defaultDatabase()
    # order = "select * from " + TABLE_HOTNEWS + " where binary text_id='%s'" % (text_id)
    # try:
    #     cur.execute(order)
    #     result = cur.fetchall()
    # except Exception,e:
    #     print e
    # if not result:
    #     return
    # news_id =  result[0]['id']
    content = result[0]['content']
    keywords = jieba_keywords(content, 5)

    print 'news id:', news_id

    try:

        print 'load all source data start!'
        all_source_match(mid, key_word)  #读取并保存各个通道的相关文本
        print 'load data finished!'

        #将保存到topic_about表中的所有文本按new_id和渠道去重

        # print 'propagate compute start!'
        # propagateTask(news_id,theday,7)           #计算120天的多通道溯源记录     正式版应该倒查7天
        # print 'propagate compute end!'

        # print 'word cloud start!'
        # word_cloud_main(news_id)                    #计算词云并存储
        # print 'word cloud end!'

        # print 'clustering start!'
        # clustering_main(news_id)                    #观点聚类
        # print 'clustering end!'
        # # break
    except Exception, e:
        print e