示例#1
0
def get_timestamp_count(query_dict, topic, date, windowsize):
    during = 3600
    day = 24 * 3600
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    interval = (end_ts - start_ts) / during  # 以小时作为统计粒度
    time_count = []
    #query_dict['timestamp'] = {'$gt':start_ts, '$lt':end_ts}
    #print 'query_dict:', query_dict
    #count, results = xapian_search_weibo.search(query=query_dict, fields=['_id'])
    #print 'query_dict  count:', count
    for i in range(interval, 0, -1):
        begin = end_ts - during * i
        end = begin + during

        query_dict['timestamp'] = {'$gt': begin, '$lt': end}
        #print 'query_dict:', query_dict
        #print 'begin, end:', begin, end
        count, result = xapian_search_weibo.search(query=query_dict,
                                                   fields=['_id'])
        #print 'end, count:', end, count
        #new_end = ts2date(end)
        time_count.append([end, count])
    #print 'time_count:', time_count
    return time_count
示例#2
0
def get_timestamp_count(query_dict, topic, date, windowsize):
    during = 3600
    day = 24 * 3600
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    interval = (end_ts - start_ts) / during  # 以小时作为统计粒度
    time_count = []
    # query_dict['timestamp'] = {'$gt':start_ts, '$lt':end_ts}
    # print 'query_dict:', query_dict
    # count, results = xapian_search_weibo.search(query=query_dict, fields=['_id'])
    # print 'query_dict  count:', count
    for i in range(interval, 0, -1):
        begin = end_ts - during * i
        end = begin + during

        query_dict["timestamp"] = {"$gt": begin, "$lt": end}
        # print 'query_dict:', query_dict
        # print 'begin, end:', begin, end
        count, result = xapian_search_weibo.search(query=query_dict, fields=["_id"])
        # print 'end, count:', end, count
        # new_end = ts2date(end)
        time_count.append([end, count])
    # print 'time_count:', time_count
    return time_count
示例#3
0
def community_result(community_user_list, topic, date, windowsize):
    #change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {'$or': []}
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict,
                                                      fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'

        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append([
            _id, name, location, friends_count, followers_count, created_at,
            statuses_count, profile_image_url, text, date, reposts_count,
            source, geo, comments_count, sentiment_name, weibo_link, domain
        ])

    sort_community_info = sorted(community_info,
                                 key=lambda x: x[10],
                                 reverse=True)  #以转发量排序

    mset = xapian_search_weibo.search(query=query_dict,
                                      max_offset=50,
                                      mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo,
                                             mset,
                                             fields=['terms']),
                               top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
示例#4
0
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method):
    real_topic_id = acquire_real_topic_id(topic, date, windowsize)
    if not real_topic_id:
        return None, None, None
        # 该话题存在进行下面的计算
    key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    # 选择有向图进行社区信息的计算
    if network_type=='source_graph':
        key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf'
    elif network_type=='direct_superior_graph':
        key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf'
    g = nx.read_gexf(key)
    # 获取图结构中节点uid对应的社区包括的节点list
    community_user_list = get_community_user(g, uid, cid)
    # 考虑节点社区属性存放的位置

    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None

    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
 
        community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp])
      
    if rank_method == 'reposts_count':
        sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序
    else:
        sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序
        
    return sort_community_info
示例#5
0
def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
示例#6
0
def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict