示例#1
0
文件: views.py 项目: huxiaoqian/case
def save_csv():
    topic_list = [u'东盟,博览会', u'全军政治工作会议', u'外滩踩踏', u'高校思想宣传', \
                       u'APEC', u'张灵甫遗骨疑似被埋羊圈', u'两会2015']
    time_range_list = [('2013-09-02', '2013-09-07'), ('2014-10-31', '2014-11-15'), ('2014-12-31', '2015-01-09'),\
                                 ('2015-01-23', '2015-02-02'), ('2014-11-01', '2014-11-10'), ('2015-01-23', '2015-02-02') ,\
                                 ('2015-03-02', '2015-03-15')]
    for i in range(len(topic_list)):
        topic = topic_list[i]
        start_date = time_range_list[i][0]
        start_ts = datetime2ts(start_date)
        end_date = time_range_list[i][1]
        end_ts = datetime2ts(end_date) + 3600 * 24
        write_topic_excel(topic, start_ts, end_ts)
        print 'success write topic:', topic
    return 'success save'
示例#2
0
def get_timestamp_count(query_dict, topic, date, windowsize):
    during = 3600
    day = 24 * 3600
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    interval = (end_ts - start_ts) / during  # 以小时作为统计粒度
    time_count = []
    # query_dict['timestamp'] = {'$gt':start_ts, '$lt':end_ts}
    # print 'query_dict:', query_dict
    # count, results = xapian_search_weibo.search(query=query_dict, fields=['_id'])
    # print 'query_dict  count:', count
    for i in range(interval, 0, -1):
        begin = end_ts - during * i
        end = begin + during

        query_dict["timestamp"] = {"$gt": begin, "$lt": end}
        # print 'query_dict:', query_dict
        # print 'begin, end:', begin, end
        count, result = xapian_search_weibo.search(query=query_dict, fields=["_id"])
        # print 'end, count:', end, count
        # new_end = ts2date(end)
        time_count.append([end, count])
    # print 'time_count:', time_count
    return time_count
示例#3
0
def acquire_real_topic_id(topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    item = db.session.query(Topics).filter(Topics.topic==topic ,\
                                                                Topics.start_ts==start_ts ,\
                                                                Topics.end_ts==end_ts).first()
    if item:
        real_topic_id = item.id
    else:
        real_topic_id = None

    return real_topic_id
示例#4
0
文件: views.py 项目: huxiaoqian/case
def get_all_data():
    topic_list = [u'东盟,博览会', u'全军政治工作会议', u'外滩踩踏', u'高校思想宣传', u'APEC', u'张灵甫遗骨疑似被埋羊圈']
    time_range_list = [('2013-09-08', 6), ('2014-11-16', 17), ('2015-01-10', 10), ('2015-02-01', 9), ('2014-11-20', 15), ('2015-02-02', 10)]
    result = {}
    result_list = []
    for i in range(len(topic_list)):
        topic_name = topic_list[i]
        end_date = time_range_list[i][0]
        windowsize = time_range_list[i][1]
        end_ts = datetime2ts(end_date)
        start_ts = end_ts - Day * windowsize
        print 'start compute topic:', topic_name
        result = get_topic_data(topic_name, start_ts, end_ts)
        result_list.append(result)
        
    return json.dumps(result_list)
示例#5
0
def read_uid_weibos(topic, date, windowsize, uid):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)

    query_dict = {"user": uid}
    count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        weibo_list = []
    else:
        weibo_list = []
        for weibo in results():
            wid = weibo["_id"]
            uid = weibo["user"]
            result = user_search.search_by_id(uid, fields=user_fields_list)
            if result:
                name = result["name"]
                location = result["location"]
                friends_count = result["friends_count"]
                followers_count = result["followers_count"]
                created_at = result["created_at"]
                statuses_count = result["statuses_count"]
                profile_image_url = result["profile_image_url"]
            else:
                name = u"未知"
                location = u"未知"
                friends_count = u"未知"
                followers_count = u"未知"
                created_at = u"未知"
                statuses_count = u"未知"
                profile_image_url = u"no"

            text = weibo["text"]
            geo = weibo["geo"]
            source = weibo["source"]
            timestamp = weibo["timestamp"]
            date = ts2date(timestamp)
            reposts_count = weibo["reposts_count"]
            comments_count = weibo["comments_count"]
            weibo_link = weiboinfo2url(uid, wid)
            domain = uid2domain(uid)

            row = [
                wid,
                uid,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                date,
                text,
                geo,
                source,
                reposts_count,
                comments_count,
                weibo_link,
            ]
            weibo_list.append(row)

    sort_weibo_list = sorted(weibo_list, key=lambda x: x[9])
    return sort_weibo_list
示例#6
0
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method):
    real_topic_id = acquire_real_topic_id(topic, date, windowsize)
    if not real_topic_id:
        return None, None, None
        # 该话题存在进行下面的计算
    key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    # 选择有向图进行社区信息的计算
    if network_type=='source_graph':
        key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf'
    elif network_type=='direct_superior_graph':
        key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf'
    g = nx.read_gexf(key)
    # 获取图结构中节点uid对应的社区包括的节点list
    community_user_list = get_community_user(g, uid, cid)
    # 考虑节点社区属性存放的位置

    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None

    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
 
        community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp])
      
    if rank_method == 'reposts_count':
        sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序
    else:
        sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序
        
    return sort_community_info
示例#7
0
def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
示例#8
0
文件: counts.py 项目: huxiaoqian/case
        lowbound = (start_ts / unit) * unit
        if customized == '0':
            count = db.session.query(func.sum(SentimentCountRatio.count)).filter(SentimentCountRatio.end>lowbound, \
                                            SentimentCountRatio.end<=upbound, \
                                            SentimentCountRatio.sentiment==sentiment, \
                                            SentimentCountRatio.range==unit, \
                                            SentimentCountRatio.query==query).all()

        else:
            count = db.session.query(func.sum(SentimentCount.count)).filter(SentimentCount.end>lowbound, \
                                                SentimentCount.end<=upbound, \
                                                SentimentCount.sentiment==sentiment, \
                                                SentimentCount.range==unit, \
                                                SentimentCount.query==query).all()

        if count and count[0] and count[0][0]:
            count = [end_ts * 1000, int(count[0][0])]
        else:
            count = [end_ts * 1000, 0]

    return count


if __name__ == '__main__':
    emotions_kv = {'happy': 1, 'angry': 2, 'sad': 3}
    end_ts = datetime2ts('2013-09-18')
    during = 1 * Day

    for k, v in emotions_kv.iteritems():
        count = search_topic_counts(end_ts, during, v, domain=0)
示例#9
0
def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict