def main(topic, start_ts, end_ts): #在topic_status中获取还未进行计算的话题 topics = _topic_not_calc(status='-1', module='i_news') topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='i_news' ,\ TopicStatus.status==-1).first() if topic_status_info: topic_id = topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end topicname = topic_status_info.topic db_date = topic_status_info.db_date _update_topic_status2Computing(topicname, start_ts, end_ts, db_date, 'i_news') print 'update_status' #mongodb中topic对应的collection print 'get_dynamic_mongo' news_collection , comment_collection = get_dynamic_mongo(topicname, start_ts, end_ts) #早期参与者 print 'start compute early_join' early_join(topicname, start_ts, end_ts, news_collection) #趋势发起人 print 'start compute trend_user' trend_user(topicname, start_ts, end_ts, news_collection, comment_collection) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date, 'i_news')
def main(): topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) topic_id = acquire_topic_id(topicname, start_ts, end_ts) windowsize = (end_ts - start_ts) / Day date = ts2datetime(end_ts) if windowsize > 7: degree_rank(TOPK, date, topic_id, windowsize) else: pagerank_rank(TOPK, date, topic_id, windowsize) topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: gexf = '' else: gexf = make_network_graph(date, topic_id, topicname, windowsize) save_gexf_results(topicname, date, windowsize, gexf) _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def main(): topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), 'start' _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) result = calculate(topicname,start_ts,end_ts) print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), result _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def _check_run_notcustomize_topic(during=Fifteenminutes): '''定时执行非定制话题表中status为0的话题数量、关键词、关键微博计算 ''' topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic # update status to 0 _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print topicname.encode('utf-8'), ' run realtime job from %s to %s ' % (start_ts, end_ts) sentimentRealTimeTopic(topicname, start_ts, end_ts+24*3600) # update status to 1 _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def topic_not_calc(): topics = _topic_not_calc() for topic in topics: query = topic.topic end_ts = topic.end during = topic.range start_ts = end_ts - during start_datestr = ts2datetime(start_ts) end_datestr = ts2datetime(end_ts) ts_1 = datetime2ts(start_datestr) ts_2 = datetime2ts(end_datestr) days = (ts_2 - ts_1) / Day date_list = [] for i in range(0, days): datestr = datetime.date.fromtimestamp(ts_1 + i * Day).isoformat() date_list.append(datestr) datestr = datestr.replace('-', '') xapian_search_weibo = getXapianWeiboByDate(datestr) print xapian_search_weibo if xapian_search_weibo: sentimentRealTimeTopic(xapian_search_weibo, query, ts_1 + i * Day, ts_1 + (i + 1) * Day)