Пример #1
0
def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            query_dict = {
                'timestamp': {
                    '$gt': begin_ts,
                    '$lt': end_ts
                },
                '$and': [
                    {
                        '$or': [{
                            'message_type': 1
                        }, {
                            'message_type': 3
                        }]
                    },
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit,
                            w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit,
                            w_limit)
Пример #2
0
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            mtype_count = {}
            mtype_kcount = {}  # mtype_kcount={mtype:[terms]}
            mtype_weibo = {}  # mtype_weibo={mtype:weibo}

            query_dict = {'timestamp': {'$gt': begin_ts, '$lt': end_ts}}

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v

                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)
Пример #3
0
def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$and': [
                    {'$or': [{'message_type': 1}, {'message_type': 3}]},
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict, fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)  
Пример #4
0
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
            
            mtype_count = {}
            mtype_kcount = {} # mtype_kcount={mtype:[terms]}
            mtype_weibo = {} # mtype_weibo={mtype:weibo}

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts}
            }

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v
                
                count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)
Пример #5
0
def community_result(community_user_list, topic, date, windowsize):
    #change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {'$or': []}
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict,
                                                      fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'

        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append([
            _id, name, location, friends_count, followers_count, created_at,
            statuses_count, profile_image_url, text, date, reposts_count,
            source, geo, comments_count, sentiment_name, weibo_link, domain
        ])

    sort_community_info = sorted(community_info,
                                 key=lambda x: x[10],
                                 reverse=True)  #以转发量排序

    mset = xapian_search_weibo.search(query=query_dict,
                                      max_offset=50,
                                      mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo,
                                             mset,
                                             fields=['terms']),
                               top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
Пример #6
0
def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
Пример #7
0
def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict