def cal_topic_quotasystem_count_by_date(topic, start, end): #确定要查询Weibo的时间段 start_date = ts2datetime(start) end_date = ts2datetime(end) # 若结束时间戳为2014:09:02 00:00:00,实际上还是算在9.1那一天中 print 'start, end:', start_date, end_date windowsize = (end - start) / Day print 'windowsize:', windowsize datestr_list = [] for i in range(windowsize): time = start + i * Day time_date = ts2datetime(time) datestr_list.append(time_date.replace('-', '')) print 'datestr_list:', datestr_list # topic_xapian_id = weibo_topic2xapian(topic, start, end) print 'topic_xapian_id:', topic_xapian_id xapian_search_weibo = getXapianWeiboByTopic(topic_xapian_id) ''' xapian_search_weibo = getXapianWeiboByDuration(datestr_list) # 这里是根据时间段进行查询的 xapian_search_topic = getXapianWeiboByTopic(topic) # 直接查topic建立的索引 ''' if xapian_search_weibo: print '******start_compute' quota_attention(topic, xapian_search_weibo, start_ts=start, end_ts=end) quota_duration(topic, start_ts=start, end_ts=end) print 'save duration success' quota_sensitivity(topic, start_ts=start, end_ts=end) print 'save sensitivity success' quota_importance(topic, start_ts=start, end_ts=end) print 'save importance success' quota_sentiment(topic, xapian_search_weibo, start_ts=start, end_ts=end) print 'save sentiment success' quota_coverage(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 覆盖度计算 print 'save coverage success' quota_person_sensitivity(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 敏感人物参与度 print 'save person_sensitivity success'
reverse=True) print 'top_source_user:'******''' count, top_weibo = xapian_search_weibo.search(query={'_id':top_source_mid}, fields=['timestamp']) print 'count:', count for i in top_weibo(): timestamp = i['timestamp'] print 'timestamp:', ts2date(int(timestamp)) ''' return sorted_result if __name__ == '__main__': ''' topic = u'高校思想宣传' date = '2015-02-01' windowsize = 9 topic_xapian_id = '54ccbfab5a220134d9fc1b37' ''' topic = TOPIC date = END start_ts = datetime2ts(START) end_ts = datetime2ts(END) #windowsize = (end_ts - start_ts) / Day windowsize = (end_ts - start_ts) / Day / 2 topic_xapian_id = weibo_topic2xapian(topic, start_ts, end_ts) get_interval_count(topic, date, windowsize, topic_xapian_id)
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
user_info['statuses_count'] = result['statuses_count'] else: user_info['name'] = u'未知' user_info['location'] = u'未知' user_info['friends_count'] = u'未知' user_info['followers_count'] = u'未知' user_info['profile_image_url'] = 'no' user_info['friends_count'] = u'未知' user_info['followers_count'] = u'未知' user_info['created_at'] = u'未知' user_info['statuses_count'] = u'未知' return user_info if __name__=='__main__': ''' topic = u'高校思想宣传' date = '2015-02-01' windowsize = 9 topic_xapian_id = '54ccbfab5a220134d9fc1b37' ''' topic = TOPIC date = END start_ts = datetime2ts(START) end_ts = datetime2ts(END) windowsize = (end_ts - start_ts) / Day topic_xapian_id = weibo_topic2xapian(topic, start_ts, end_ts) get_interval_count(topic, date, windowsize, topic_xapian_id)
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id( topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network( topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank( TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)