def get_maker(topic, new_peaks, new_bottom, ts_list, collection): begin_ts = ts_list[new_bottom[0]] end_ts = ts_list[new_peaks[0]] print 'get_maker news_bottom:', new_bottom[0] print 'get_maker news_peak:', new_peaks[0] print 'get_maker ts_list:', ts2date(ts_list[0]) print 'get_maker start_ts:', ts2date(begin_ts) print 'get_maker end_ts:', ts2date(end_ts) if begin_ts > end_ts: begin_ts = ts_list[0] begin_ts = begin_ts - Hour filter_dict = get_filter_dict() query_dict = {'timestamp': {'$gte': begin_ts, '$lte': end_ts}} ''' maker_list = collection.find(query_dict, filter_dict).sort('weight').limit(maker_news_count) if not maker_list: return [] else: return maker_list ''' input_news_list = collection.find(query_dict, filter_dict) # 第一个波段内所有新闻进行分词 news_cut_list = cut_news(input_news_list) # 计算top50的关键词 keywords_list = get_news_keywords(news_cut_list) # 计算波段内新闻的关键词占比weight weight_list = get_news_weight(news_cut_list, keywords_list) # 排序获取weight前20的news maker_list = get_top_weight_news(weight_list) if not maker_list: return [] else: return maker_list
def get_maker(topic, new_peaks, new_bottom, ts_list, collection): begin_ts = ts_list[new_bottom[0]] end_ts = ts_list[new_peaks[0]] print 'get_maker news_bottom:', new_bottom[0] print 'get_maker news_peak:', new_peaks[0] print 'get_maker ts_list:', ts2date(ts_list[0]) print 'get_maker start_ts:', ts2date(begin_ts) print 'get_maker end_ts:', ts2date(end_ts) if begin_ts > end_ts: begin_ts = ts_list[0] begin_ts = begin_ts - Hour filter_dict = get_filter_dict() query_dict = {'timestamp':{'$gte':begin_ts, '$lte':end_ts}} ''' maker_list = collection.find(query_dict, filter_dict).sort('weight').limit(maker_news_count) if not maker_list: return [] else: return maker_list ''' input_news_list = collection.find(query_dict, filter_dict) # 第一个波段内所有新闻进行分词 news_cut_list = cut_news(input_news_list) # 计算top50的关键词 keywords_list = get_news_keywords(news_cut_list) # 计算波段内新闻的关键词占比weight weight_list = get_news_weight(news_cut_list, keywords_list) # 排序获取weight前20的news maker_list = get_top_weight_news(weight_list) if not maker_list: return [] else: return maker_list
def sort_news_by_comment(query_dict, news_collection, comment_collection): results = [] filter_dict = get_filter_dict() news_no_comment = news_collection.find(query_dict, filter_dict) for news in news_no_comment: #print 'news:', news news_id = news['id'] comment_query_dict = {'news_id': news_id} news_comment = comment_collection.find(comment_query_dict) # 一条新闻对应的所有评论 try: news['comments_count'] = len(news_comment) # news_id对应的评论数 except: news['comments_count'] = 0 results.append(news) sort_results = sorted(results, key=lambda x:x['comments_count'], reverse=True) return sort_results
def sort_news_by_comment(query_dict, news_collection, comment_collection): results = [] filter_dict = get_filter_dict() news_no_comment = news_collection.find(query_dict, filter_dict) for news in news_no_comment: #print 'news:', news news_id = news['id'] comment_query_dict = {'news_id': news_id} news_comment = comment_collection.find( comment_query_dict) # 一条新闻对应的所有评论 try: news['comments_count'] = len(news_comment) # news_id对应的评论数 except: news['comments_count'] = 0 results.append(news) sort_results = sorted(results, key=lambda x: x['comments_count'], reverse=True) return sort_results