Пример #1
0
def main(topic, start_ts, end_ts):
    #在topic_status中获取还未进行计算的话题
    topics = _topic_not_calc(status='-1', module='i_news')
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='i_news' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        topic_id = topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        topicname = topic_status_info.topic
        db_date = topic_status_info.db_date
            
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date, 'i_news')
        print 'update_status'
    
        #mongodb中topic对应的collection
        print 'get_dynamic_mongo'
        news_collection , comment_collection = get_dynamic_mongo(topicname, start_ts, end_ts)
        #早期参与者
        print 'start compute early_join'
        early_join(topicname, start_ts, end_ts, news_collection)
        #趋势发起人
        print 'start compute trend_user'
        trend_user(topicname, start_ts, end_ts, news_collection, comment_collection)

        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date, 'i_news') 
Пример #2
0
def main():
    topics = _topic_not_calc()

    if topics and len(topics):
    	topic = topics[0]
        
        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic

        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        topic_id = acquire_topic_id(topicname, start_ts, end_ts)
        windowsize = (end_ts - start_ts) / Day
        date = ts2datetime(end_ts)

        if windowsize > 7:
            degree_rank(TOPK, date, topic_id, windowsize)
        else:
            pagerank_rank(TOPK, date, topic_id, windowsize)

        topic_id = int(topic_id)
        windowsize = int(windowsize)

        if not topic_id:
            gexf = ''
        else:
            gexf = make_network_graph(date, topic_id, topicname, windowsize)

        save_gexf_results(topicname, date, windowsize, gexf)

        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
Пример #3
0
def main():
    topics = _topic_not_calc()
    if topics and len(topics):
    	topic = topics[0]

        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic
        print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), 'start'
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        result  = calculate(topicname,start_ts,end_ts)
        print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), result
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
Пример #4
0
def _check_run_notcustomize_topic(during=Fifteenminutes):
    '''定时执行非定制话题表中status为0的话题数量、关键词、关键微博计算
    '''

    topics = _topic_not_calc()
    if topics and len(topics):
        topic = topics[0]
        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic
        
        # update status to 0
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        
        print topicname.encode('utf-8'),  ' run realtime job from %s to %s ' % (start_ts, end_ts)
        sentimentRealTimeTopic(topicname, start_ts, end_ts+24*3600)

        # update status to 1
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def topic_not_calc():
    topics = _topic_not_calc()
    for topic in topics:
        query = topic.topic
    	end_ts = topic.end
    	during = topic.range
        
        start_ts = end_ts - during
        start_datestr = ts2datetime(start_ts)
        end_datestr = ts2datetime(end_ts)

        ts_1 = datetime2ts(start_datestr)
        ts_2 = datetime2ts(end_datestr)
        days = (ts_2 - ts_1) / Day

        date_list = []
        for i in range(0, days):
            datestr = datetime.date.fromtimestamp(ts_1 + i * Day).isoformat()
            date_list.append(datestr)
            datestr = datestr.replace('-', '')
            xapian_search_weibo = getXapianWeiboByDate(datestr)
            print xapian_search_weibo
            if xapian_search_weibo:
                sentimentRealTimeTopic(xapian_search_weibo, query, ts_1 + i * Day, ts_1 + (i + 1) * Day)