def save_csv(): topic_list = [u'东盟,博览会', u'全军政治工作会议', u'外滩踩踏', u'高校思想宣传', \ u'APEC', u'张灵甫遗骨疑似被埋羊圈', u'两会2015'] time_range_list = [('2013-09-02', '2013-09-07'), ('2014-10-31', '2014-11-15'), ('2014-12-31', '2015-01-09'),\ ('2015-01-23', '2015-02-02'), ('2014-11-01', '2014-11-10'), ('2015-01-23', '2015-02-02') ,\ ('2015-03-02', '2015-03-15')] for i in range(len(topic_list)): topic = topic_list[i] start_date = time_range_list[i][0] start_ts = datetime2ts(start_date) end_date = time_range_list[i][1] end_ts = datetime2ts(end_date) + 3600 * 24 write_topic_excel(topic, start_ts, end_ts) print 'success write topic:', topic return 'success save'
def get_timestamp_count(query_dict, topic, date, windowsize): during = 3600 day = 24 * 3600 end_ts = datetime2ts(date) start_ts = end_ts - windowsize * day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) interval = (end_ts - start_ts) / during # 以小时作为统计粒度 time_count = [] # query_dict['timestamp'] = {'$gt':start_ts, '$lt':end_ts} # print 'query_dict:', query_dict # count, results = xapian_search_weibo.search(query=query_dict, fields=['_id']) # print 'query_dict count:', count for i in range(interval, 0, -1): begin = end_ts - during * i end = begin + during query_dict["timestamp"] = {"$gt": begin, "$lt": end} # print 'query_dict:', query_dict # print 'begin, end:', begin, end count, result = xapian_search_weibo.search(query=query_dict, fields=["_id"]) # print 'end, count:', end, count # new_end = ts2date(end) time_count.append([end, count]) # print 'time_count:', time_count return time_count
def acquire_real_topic_id(topic, date, windowsize): end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day item = db.session.query(Topics).filter(Topics.topic==topic ,\ Topics.start_ts==start_ts ,\ Topics.end_ts==end_ts).first() if item: real_topic_id = item.id else: real_topic_id = None return real_topic_id
def get_all_data(): topic_list = [u'东盟,博览会', u'全军政治工作会议', u'外滩踩踏', u'高校思想宣传', u'APEC', u'张灵甫遗骨疑似被埋羊圈'] time_range_list = [('2013-09-08', 6), ('2014-11-16', 17), ('2015-01-10', 10), ('2015-02-01', 9), ('2014-11-20', 15), ('2015-02-02', 10)] result = {} result_list = [] for i in range(len(topic_list)): topic_name = topic_list[i] end_date = time_range_list[i][0] windowsize = time_range_list[i][1] end_ts = datetime2ts(end_date) start_ts = end_ts - Day * windowsize print 'start compute topic:', topic_name result = get_topic_data(topic_name, start_ts, end_ts) result_list.append(result) return json.dumps(result_list)
def read_uid_weibos(topic, date, windowsize, uid): # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {"user": uid} count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: weibo_list = [] else: weibo_list = [] for weibo in results(): wid = weibo["_id"] uid = weibo["user"] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result["name"] location = result["location"] friends_count = result["friends_count"] followers_count = result["followers_count"] created_at = result["created_at"] statuses_count = result["statuses_count"] profile_image_url = result["profile_image_url"] else: name = u"未知" location = u"未知" friends_count = u"未知" followers_count = u"未知" created_at = u"未知" statuses_count = u"未知" profile_image_url = u"no" text = weibo["text"] geo = weibo["geo"] source = weibo["source"] timestamp = weibo["timestamp"] date = ts2date(timestamp) reposts_count = weibo["reposts_count"] comments_count = weibo["comments_count"] weibo_link = weiboinfo2url(uid, wid) domain = uid2domain(uid) row = [ wid, uid, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, date, text, geo, source, reposts_count, comments_count, weibo_link, ] weibo_list.append(row) sort_weibo_list = sorted(weibo_list, key=lambda x: x[9]) return sort_weibo_list
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method): real_topic_id = acquire_real_topic_id(topic, date, windowsize) if not real_topic_id: return None, None, None # 该话题存在进行下面的计算 key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) # 选择有向图进行社区信息的计算 if network_type=='source_graph': key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf' elif network_type=='direct_superior_graph': key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf' g = nx.read_gexf(key) # 获取图结构中节点uid对应的社区包括的节点list community_user_list = get_community_user(g, uid, cid) # 考虑节点社区属性存放的位置 # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in community_user_list: query_dict['$or'].append({'user': int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp]) if rank_method == 'reposts_count': sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序 else: sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序 return sort_community_info
def get_info(neighbor_list, topic, date, windowsize): end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in neighbor_list: query_dict['$or'].append({'user': int(uid)}) neighbor_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid]) sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
lowbound = (start_ts / unit) * unit if customized == '0': count = db.session.query(func.sum(SentimentCountRatio.count)).filter(SentimentCountRatio.end>lowbound, \ SentimentCountRatio.end<=upbound, \ SentimentCountRatio.sentiment==sentiment, \ SentimentCountRatio.range==unit, \ SentimentCountRatio.query==query).all() else: count = db.session.query(func.sum(SentimentCount.count)).filter(SentimentCount.end>lowbound, \ SentimentCount.end<=upbound, \ SentimentCount.sentiment==sentiment, \ SentimentCount.range==unit, \ SentimentCount.query==query).all() if count and count[0] and count[0][0]: count = [end_ts * 1000, int(count[0][0])] else: count = [end_ts * 1000, 0] return count if __name__ == '__main__': emotions_kv = {'happy': 1, 'angry': 2, 'sad': 3} end_ts = datetime2ts('2013-09-18') during = 1 * Day for k, v in emotions_kv.iteritems(): count = search_topic_counts(end_ts, during, v, domain=0)
def community_result(community_user_list, topic, date, windowsize): # change end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {"$or": []} for uid in community_user_list: query_dict["$or"].append({"user": int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo["user"] _id = weibo["_id"] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result["name"] location = result["location"] friends_count = result["friends_count"] followers_count = result["followers_count"] created_at = result["created_at"] statuses_count = result["statuses_count"] profile_image_url = result["profile_image_url"] else: name = u"未知" location = u"未知" friends_count = u"未知" followers_count = u"未知" created_at = u"未知" statuses_count = u"未知" profile_image_url = u"no" text = weibo["text"] timestamp = weibo["timestamp"] date = ts2date(timestamp) reposts_count = weibo["reposts_count"] source = weibo["source"] geo = weibo["geo"] comments_count = weibo["comments_count"] sentiment = weibo["sentiment"] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) domain = uid2domain(uid) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 community_info.append( [ _id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name, weibo_link, domain, ] ) sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True) # 以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict