def get_topic_user():
    result = {}
    index_name = 'weibo_user'
    index_type = 'user'
    '''
    test_user_list = [['1499104401', '1265965213', '3270699555', '2073915493', '1686474312'],\
            ['2803301701', '2105426467', '1665372775', '3716504593', '2892376557'],\
            ['1457530250', '1698513182', '2793591492', '2218894100', '1737961042'],\
            ['1656818110', '1660127070', '1890124610', '1182391230', '1243861100'],\
            ['1680430844', '2998045524', '2202896360', '1639498782', '3494698730'],\
            ['2587093162', '1677675054', '1871767009', '1193111400', '1672418622'],\
            ['1730726640', '1752502540', '1868725480', '1262486750', '1235733080'],\
            ['1250041100', '2275231150', '1268642530', '1658606270', '1857599860'],\
            ['1929496477', '2167425990', '1164667670', '2417139911', '1708853044'],\
            ['1993292930', '1645823930', '1890926610', '1641561810', '2023833990'],\
            ['2005471590', '1233628160', '2074684140', '1396715380', '1236762250'],\
            ['1423592890', '2612799560', '1926127090', '2684951180', '1760607220']]
    '''
    for topic in search_dict:
        result[topic] = []
        user_list = search_dict[topic]
        profile_result = es_user_profile.mget(index=index_name, doc_type=index_type, body={'ids':user_list})['docs']
        for profile in profile_result:
            uid = profile['_id']
            try:
                uname = profile['_source']['nick_name']
                photo_url = profile['_source']['photo_url']
            except:
                uname = 'unknown'
                photo_url = 'unknown'
            result[topic].append([uid, uname, photo_url])
    return result
Exemplo n.º 2
0
def recommend_new_words(date_list):
    results = []
    for date in date_list:
        date = date.replace('-', '')
        words_dict = r.hgetall('recommend_sensitive_words_' + date)
        if words_dict:
            for key, value in words_dict.items():
                detail = []
                detail.append(key)
                value = json.loads(value)
                uid_list = value[0]
                uname = []
                try:
                    search_results = es_user_profile.mget(
                        index='weibo_user',
                        doc_type='user',
                        body={'ids': uid_list})['docs']
                    for item in search_results:
                        if item['found']:
                            uname.append(item['_source']['nick_name'])
                        else:
                            uname.append('unknown')
                except:
                    uname = uid_list
                detail.extend([uname, value[1]])
                results.append(detail)
    sorted_results = sorted(results, key=lambda x: x[2], reverse=True)
    return sorted_results
Exemplo n.º 3
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    results = []
    index_name = str(date).replace('-','') # index_name:20130901
    user_bci_results = es_cluster.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=True)['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    for i in range(0, len(uid_list)):
        personal_info = ['']*6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            personal_info[1] = profile_dict['nick_name']
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            personal_info[5] = user_bci_results[i]['_source'].get('user_index', 0)
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = r_cluster.hget('sensitive_' + index_name, str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        results.append(personal_info)
    return results
def recommend_new_words(date_list):
    results = []
    for date in date_list:
        date = date.replace('-', '')
        words_dict = r.hgetall('recommend_sensitive_words_'+date)
        if words_dict:
            for key, value in words_dict.items():
                detail = []
                detail.append(key)
                value = json.loads(value)
                uid_list = value[0]
                uname = []
                try:
                    search_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids': uid_list})['docs']
                    for item in search_results: 
                        if item['found']:
                            uname.append(item['_source']['nick_name'])
                        else:
                            uname.append('unknown')
                except:
                    uname = uid_list
                detail.extend([uname,value[1]])
                results.append(detail)
    sorted_results = sorted(results, key=lambda x:x[2], reverse=True)
    return sorted_results
Exemplo n.º 5
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace(
        '-', '')  # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name,
                                   doc_type='bci',
                                   body={'ids': uid_list},
                                   _source=False,
                                   fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user",
                                                doc_type="user",
                                                body={"ids": uid_list},
                                                _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = [''] * 6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(
                    tmp_bci / float(top_influnce_value) * 9 + 1, 10) * 100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts),
                                                 str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results
Exemplo n.º 6
0
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if sensitive:
            br_uid_results = r.hgetall('sensitive_be_retweet_' + str(uid))
        else:
            br_uid_results = r.hgetall('be_retweet_' + str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(),
                                   key=lambda x: x[1],
                                   reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user',
                                              doc_type='user',
                                              body={'ids': uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body={'ids': uid_list})['docs']

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item['_source']
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid, [uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':iter_user_list}, _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':iter_user_list}, _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname =  u'未知'
                fansnum =  u'未知'
                statusnum =  u'未知'
                friendsnum =  u'未知'
            out_user_result.append([uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT 
    
    sort_out_user_result = sorted(out_user_result, key=lambda x:x[2], reverse=True)

    return in_user_list, sort_out_user_result
Exemplo n.º 8
0
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if not sensitive:
            ruid_results = r.hgetall('retweet_' + str(uid))
        else:
            ruid_results = r.hgetall('sensitive_retweet_' +
                                     str(uid))  # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(),
                                   key=lambda x: x[1],
                                   reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user',
                                              doc_type='user',
                                              body={'ids': uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body={'ids': uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        if item['found']:
            uname = item['_source']['nick_name']
        else:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        if portrait_item['found']:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid, [uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
def get_top_user():
    results = dict()
    for domain in domain_dict:
        results[domain] = []
        user_list = domain_dict[domain]
        profile_result = es_user_profile.mget(index=index_name, doc_type=index_type, body={'ids':user_list})['docs']
        for profile in profile_result:
            uid = profile['_id']
            try:
                uname = profile['_source']['nick_name']
                photo_url = profile['_source']['photo_url']
            except:
                uname = 'unknown'
                photo_url = 'unknown'
            results[domain].append([uid, uname, photo_url])
    return results
Exemplo n.º 10
0
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if sensitive:
            br_uid_results = r.hgetall("sensitive_be_retweet_" + str(uid))
        else:
            br_uid_results = r.hgetall("be_retweet_" + str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"]
    es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"]

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item["_id"]
        try:
            source = item["_source"]
            uname = source["nick_name"]
        except:
            uname = u"unknown"

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item["_source"]
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid, [uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
def search_follower(uid, sensitive):
    results = dict()
    stat_results = dict()
    if 1:
        r = r_cluster
        if sensitive:
            br_uid_results = r.hgetall('sensitive_be_retweet_'+str(uid))
        else:
            br_uid_results = r.hgetall('be_retweet_'+str(uid))
        if br_uid_results:
            for br_uid in br_uid_results:
                if br_uid != uid:
                    try:
                        stat_results[br_uid] += br_uid_results[br_uid]
                    except:
                        stat_results[br_uid] = br_uid_results[br_uid]
    if not stat_results:
        return [None, 0]
    try:
        sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    except:
        return [None, 0]

    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs']

    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['nick_name']
        except:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        try:
            source = portrait_item['_source']
            in_status = 1
        except:
            in_status = 0
        result_list.append([uid,[uname, stat_results[uid], in_status]])
    return [result_list[:20], len(stat_results)]
def get_user_url(uid_list):
    results = []
    try:
        es_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list})['docs']
    except:
        es_results = {}
    for item in es_results:
        temp = []
        if item['found']:
            temp.append(item['_source']["photo_url"])
            temp.append(item['_source']['nick_name'])
            temp.append(item['_id'])
        else:
            temp.append("unknown")
            temp.append(item['_id'])
            temp.append(item['_id'])
        results.append(temp)
    return results
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    if 1:
        r = r_cluster
        if not sensitive:
            ruid_results = r.hgetall('retweet_'+str(uid))
        else:
            ruid_results = r.hgetall('sensitive_retweet_'+str(uid)) # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list})['docs']
    es_portrait_results = es.mget(index='sensitive_user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item['_id']
        if item['found']:
            uname = item['_source']['nick_name']
        else:
            uname = u'unknown'

        portrait_item = es_portrait_results[i]
        if portrait_item['found']:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid,[uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
Exemplo n.º 14
0
def search_retweet(uid, sensitive):
    stat_results = dict()
    results = dict()
    for db_num in R_DICT:
        r = R_DICT[db_num]
        if not sensitive:
            ruid_results = r.hgetall("retweet_" + str(uid))
        else:
            ruid_results = r.hgetall("sensitive_retweet_" + str(uid))  # because of sensitive weibo
        if ruid_results:
            for ruid in ruid_results:
                if ruid != uid:
                    if stat_results.has_key(ruid):
                        stat_results[ruid] += ruid_results[ruid]
                    else:
                        stat_results[ruid] = ruid_results[ruid]

    if stat_results:
        sort_stat_results = sorted(stat_results.items(), key=lambda x: x[1], reverse=True)[:20]
    else:
        return [None, 0]
    uid_list = [item[0] for item in sort_stat_results]
    es_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids": uid_list})["docs"]
    es_portrait_results = es.mget(index="sensitive_user_portrait", doc_type="user", body={"ids": uid_list})["docs"]
    result_list = []
    for i in range(len(es_profile_results)):
        item = es_profile_results[i]
        uid = item["_id"]
        if item["found"]:
            uname = item["_source"]["nick_name"]
        else:
            uname = u"unknown"

        portrait_item = es_portrait_results[i]
        if portrait_item["found"]:
            in_status = 1
        else:
            in_status = 0

        result_list.append([uid, [uname, stat_results[uid], in_status]])

    return [result_list[:20], len(stat_results)]
Exemplo n.º 15
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    es_cluster = es_user_profile
    ts = datetime2ts(date)
    results = []
    index_name = pre_influence_index + str(date).replace('-','') # index_name:20130901
    user_bci_results = es_bci.mget(index=index_name, doc_type='bci', body={'ids':uid_list}, _source=False, fields=['user_index'])['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
    top_influnce_value = get_top_value("user_index", es_bci, index_name, "bci")
    for i in range(0, len(uid_list)):
        personal_info = ['']*6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        personal_info[1] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            uname = profile_dict['nick_name']
            if uname:
                personal_info[1] = uname
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            try:
                tmp_bci = user_bci_results[i]['fields']['user_index'][0]
                influence = math.log(tmp_bci/float(top_influnce_value)*9+1, 10)*100
                personal_info[5] = influence
            except:
                personal_info[5] = 0
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = redis_cluster.hget('sensitive_' + str(ts), str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        else:
            personal_info.append([])
        results.append(personal_info)
    return results
Exemplo n.º 16
0
def get_sensitive_user_detail(uid_list, date, sensitive):
    results = []
    index_name = str(date).replace('-', '')  # index_name:20130901
    user_bci_results = es_cluster.mget(index=index_name,
                                       doc_type='bci',
                                       body={'ids': uid_list},
                                       _source=True)['docs']
    user_profile_results = es_user_profile.mget(index="weibo_user",
                                                doc_type="user",
                                                body={"ids": uid_list},
                                                _source=True)['docs']
    for i in range(0, len(uid_list)):
        personal_info = [''] * 6
        uid = uid_list[i]
        personal_info[0] = uid_list[i]
        if user_profile_results[i]['found']:
            profile_dict = user_profile_results[i]['_source']
            personal_info[1] = profile_dict['nick_name']
            personal_info[2] = profile_dict['user_location']
            personal_info[3] = profile_dict['fansnum']
            personal_info[4] = profile_dict['statusnum']
        if user_bci_results[i]['found']:
            personal_info[5] = user_bci_results[i]['_source'].get(
                'user_index', 0)
        else:
            personal_info[5] = 0
        if sensitive:
            sensitive_words = r_cluster.hget('sensitive_' + index_name,
                                             str(uid))
            if sensitive_words:
                sensitive_dict = json.loads(sensitive_words)
                personal_info.append(sensitive_dict.keys())
            else:
                personal_info.append([])
        results.append(personal_info)
    return results
Exemplo n.º 17
0
def get_task_detail_2(task_name, ts, user):
    results = dict()
    index_name = task_name
    _id = user + "-" + task_name
    task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"]
    task_name = task_detail['task_name']
    history_status = json.loads(task_detail['history_status'])
    start_time = task_detail['create_at']
    create_by = task_detail['create_by']
    stop_time = task_detail['stop_time']
    remark = task_detail.get('remark', '')
    portrait_detail = []
    count = 0 # 计数

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")

    time_series = [] # 时间
    #positive_sentiment_list = [] # 情绪列表
    #neutral_sentiment_list = []
    #negetive_sentiment_list = []
    all_weibo_list = []
    origin_weibo_list = [] # 微博列表
    retweeted_weibo_list = []
    #retweeted_weibo_count = [] # 别人转发他的数量
    #comment_weibo_count = []
    #total_number_count = []
    #burst_time_list = [] # 爆发时间列表
    important_user_set = set() # 重要人物列表
    out_portrait_users = set() # 未入库

    ts = int(ts)
    time_series = history_status
    #for item in history_status:
    #    if int(item[0]) <= ts:
    #        time_series.append(item[0]) # 到目前为止的所有的时间戳

    # get detail task information from es
    if time_series:
        flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs']
    else:
        flow_detail = {}
    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            #sentiment_distribution = json.loads(item["sentiment_distribution"])
            #positive_sentiment_list.append(int(sentiment_distribution['1']))
            #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \
            #        +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6']))
            #neutral_sentiment_list.append(int(sentiment_distribution['0']))
            origin_weibo_list.append(item["origin_weibo_number"]) # real
            retweeted_weibo_list.append(item['retweeted_weibo_number']) # real
            all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number'])
            #retweeted_weibo_count.append(item['retweeted_weibo_count'])
            #comment_weibo_count.append(item['comment_weibo_count'])
            #total_number_count.append(item['weibo_total_number'])
            temp_important_user_list = json.loads(item['important_users'])
            unfiltered_users = json.loads(item['unfilter_users'])
            temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库
            important_user_set = important_user_set | set(temp_important_user_list)
            out_portrait_users = out_portrait_users | set(temp_out_portrait_users)

            #burst_reason = item.get("burst_reason", "")
            #if burst_reason:
            #    burst_time_list.append([timestamp, count, burst_reason])
            count += 1

    ####################################################################################
    # 统计爆发原因,下相应的结论
    """
    weibo_variation_count = 0
    weibo_variation_time = []
    sentiment_variation_count = 0
    sentiment_variation_time = []
    sensitive_variation_count = 0 # sensitive
    sensitive_variation_time = [] # sensitive
    common_variation_count = 0
    common_variation_time = []
    if burst_time_list:
        for item in burst_time_list:
            tmp_common = 0
            x1 = 0
            x2 = 0
            x3 = 0
            if signal_count_varition in item[2]:
                weibo_variation_count += 1
                weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]])
                x1 = total_number_count[item[1]]
                tmp_common += 1
            if signal_sentiment_varition in item[2]:
                tmp_common += 1
                sentiment_variation_count += 1
                x2 = negetive_sentiment_list[item[1]]
                sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]])
            if signal_sensitive_variation in item[2]:
                tmp_common += 1
                sensitive_variation_count += 1
                x3 = sensitive_total_number_list[item[1]]
                sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]])
            if tmp_common >= 2:
                common_variation_count += 1
                common_variation_time.append([ts2date_min(item[0]), x1, x2, x3])

    warning_conclusion = remark
    variation_distribution = []
    if weibo_variation_count:
        variation_distribution.append(weibo_variation_time)
    else:
        variation_distribution.append([])

    if sentiment_variation_count:
        variation_distribution.append(sentiment_variation_time)
    else:
        variation_distribution.append([])

    if sensitive_variation_count:
        variation_distribution.append(sensitive_variation_time)
    else:
        variation_distribution.append([])

    if common_variation_count:
        variation_distribution.append(common_variation_time)
    else:
        variation_distribution.append([])

    results['warning_conclusion'] = warning_conclusion
    results['variation_distribution'] = variation_distribution

    # 每个用户的热度
    """

    # 获取重要用户的个人信息
    important_uid_list = list(important_user_set)
    out_portrait_users_list = list(out_portrait_users)
    user_detail_info = [] #
    out_user_detail_info = []
    if important_uid_list:
        user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs']
        for item in user_results:
            if item['found']:
                temp = []
                #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD:
                #    continue
                temp.append(item['fields']['uid'][0])
                uname = item['fields']['uname'][0]
                if not uname or uname == "未知":
                    uname = item['fields']['uid'][0]
                temp.append(uname)
                temp.append(item['fields']['photo_url'][0])
                temp.append(item['fields']['domain'][0])
                temp.append(item['fields']['topic_string'][0].split('&'))
                #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time)
                #temp.append(hot_count)
                temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100)
                temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100)
                temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100)
                user_detail_info.append(temp)
    # 排序
    if user_detail_info:
        user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True)
    else:
        user_detail_info = []

    if out_portrait_users_list:
        profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"]
        bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','')
        influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs']
        bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs']
        top_influence = get_top_all_influence("user_index", ts)
        count = 0
        if profile_results:
            for item in profile_results:
                temp = []
                if item['found']:
                    uid = item['_source']['uid']
                    temp = get_user_profile([uid], ['nick_name', 'user_location', 'statusnum', 'fansnum'])[0]
                else:
                    temp = [item['_id'], item['_id'], '', '', '']

                """
                if item['found']:
                    temp.append(item['_source']['uid'])
                    if item['_source']['nick_name']:
                        temp.append(item['_source']['nick_name'])
                    else:
                        temp.append(item['_source']['uid'])
                    temp.append(item['_source']['user_location'])
                    temp.append(item['_source']['statusnum'])
                    temp.append(item['_source']['friendsnum'])
                else:
                    temp.append(item['_id'])
                    temp.append(item['_id'])
                    temp.extend([''])
                    temp.append('--')
                    temp.append('--')
                try:
                    user_fansnum = bci_results[count]["fields"]["user_fansnum"][0]
                except:
                    user_fansnum = 0
                temp.append(user_fansnum)
                temp_influ = influence_results[count]
                if temp_influ.get('found', 0):
                    user_index = temp_influ['fields']['user_index'][0]
                    temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100)
                else:
                    temp.append(0)
                """
                count += 1
                out_user_detail_info.append(temp)

    revise_time_series = []
    for item in time_series:
        revise_time_series.append(ts2date_min(item))

    results['important_user_detail'] = user_detail_info
    results['out_portrait_user_detail'] = out_user_detail_info
    #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因
    results['time_series'] = revise_time_series
    #results['positive_sentiment_list'] = positive_sentiment_list
    #esults['negetive_sentiment_list'] = negetive_sentiment_list
    #results['neutral_sentiment_list'] = neutral_sentiment_list
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list
    #results['comment_weibo_count'] = comment_weibo_count
    #results['retweeted_weibo_count'] = retweeted_weibo_count
    #results['total_number_list'] = total_number_count

    return results
Exemplo n.º 18
0
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id,
                         id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "terms": {
                        "mid": mid_list
                    }
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts - DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list,
                                        doc_type=flow_text_index_type,
                                        body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict()  # 文本信息
    portrait_dict = dict()  # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source']  # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(
                index=profile_index_name,
                doc_type=profile_index_type,
                body={"ids": uid_list},
                fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {
                        "nick_name": item["fields"]["nick_name"][0],
                        "photo_url": item["fields"]["photo_url"][0]
                    }
                else:
                    portrait_dict[item['_id']] = {
                        "nick_name": item['_id'],
                        "photo_url": ""
                    }

        if order == "total":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[1],
                                 reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[2],
                                 reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list,
                                 key=lambda x: x[3],
                                 reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid, ''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x: x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x: x[-1], reverse=True)

    return results
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    origin_weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])

    mid_list = []
    mid_list.extend(origin_weibo_detail.keys())
    mid_list.extend(retweeted_weibo_detail.keys())

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                            {"terms": {"root_mid": mid_list}}
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }

    if text_type == "message_type":
        query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
    if text_type == "sentiment":
        #if isinstance(type_value, str):
        if len(type_value) == 1:
            query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}})
        else:
            query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}})

    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 查询微博
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []
    #print search_results
    # 2. 获取微博相关信息
    results = []
    uid_list = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append(item['uid'])
                temp.append("")
            temp.append(item["text"])
            #print item['text']
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            temp.append(item["message_type"])
            results.append(temp)

    return results
def get_positive_weibo_detail(ts, social_sensors, keywords_list, size, sentiment_type=1):
    former_mid_list = query_mid_list(ts-time_interval, keywords_list, time_segment, social_sensors) # 前一段时间内的微博mid list
    current_mid_list = query_mid_list(ts, keywords_list, time_interval,  social_sensors)
    mid_list = []
    mid_list.extend(former_mid_list)
    mid_list.extend(current_mid_list)

    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"range":{
                                "timestamp":{
                                    "gte": ts - time_interval,
                                    "lt": ts
                                }
                            }},
                        ],
                        "should":[
                            {"terms": {"root_mid": mid_list}},
                            {"terms": {"mid": mid_list}},
                            {"terms":{"keywords_string": keywords_list}}
                        ]
                    }
                }
            }
        },
        "sort": {"timestamp": {"order": "desc"}},
        "size": 100
    }



    #if social_sensors and int(sentiment_type) == 1:
    #    query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"terms":{"uid": social_sensors}})

    if int(sentiment_type) == 1 or int(sentiment_type) == 0:
        query_body["query"]["filtered"]["filter"]["bool"]["must"].append({"term":{"sentiment":sentiment_type}})
    else:
        query_body["query"]["filtered"]["filter"]["bool"]["must"] = [{"terms":{"sentiment": ["2", "3"]}}]

    # 判断当前ts和ts-time_interval是否属于同一天,确定查询哪个es
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-time_interval)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)

    # 1. 聚合原创微博mid list
    if datetime == datetime_1 and exist_es:
        search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    elif datetime != datetime_1 and exist_es_1:
        search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]

        for i in range(len(uid_list)):
            item = search_results[i]['_source']
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            temp.append(item['uid'])
            if portrait_result[i]['found']:
                temp.append(portrait_result[i]["fields"]["nick_name"][0])
                temp.append(portrait_result[i]["fields"]["photo_url"][0])
            else:
                temp.append("unknown")
                temp.append("")
            temp.append(item["text"])
            temp.append(item["sentiment"])
            temp.append(ts2date(item['timestamp']))
            temp.append(item['geo'])
            keywords_set = set(item['keywords_string'].split('&'))
            common_keywords = set(keywords_list) & keywords_set
            temp.append(list(common_keywords))
            temp.append(item['message_type'])
            results.append(temp)

    return results
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1):
    _id = user + '-' + task_name
    task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']

    mid_value = json.loads(task_detail['mid_topic_value'])
    duplicate_dict = json.loads(task_detail['duplicate_dict'])
    tmp_duplicate_dict = dict()
    for k,v in duplicate_dict.iteritems():
        try:
            tmp_duplicate_dict[v].append(k)
        except:
            tmp_duplicate_dict[v] = [k, v]

        

    if message_type == 1:
        weibo_detail = json.loads(task_detail['origin_weibo_detail'])
    elif message_type == 2:
        weibo_detail = json.loads(task_detail['retweeted_weibo_detail'])
    else:
        weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])
    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        },
        "size": 1000,
        "sort": {"timestamp": {"order": "desc"}}
    }


    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    sort_results = []
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        results_dict = dict()
        mid_index_dict = dict()
        for item in sorted_list: # size
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                if message_type == 1:
                    temp.append(1)
                elif message_type == 2:
                    temp.append(3)
                else:
                    temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                temp.append(iter_text['timestamp'])
                temp.append(mid_value[mid])
                temp.append(mid)
                results.append(temp)
            count_n += 1

        results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3
        sort_results = []
        count = 0
        for item in results:
            sort_results.append([item])
            mid_index_dict[item[-1]] = count
            count += 1

        
        if tmp_duplicate_dict:
            remove_list = []
            value_list = tmp_duplicate_dict.values() # [[mid, mid], ]
            for item in value_list:
                tmp = []
                for mid in item:
                    if mid_index_dict.get(mid, 0):
                        tmp.append(mid_index_dict[mid])
                if len(tmp) > 1:
                    tmp_min = min(tmp)
                else:
                    continue
                tmp.remove(tmp_min)
                for iter_count in tmp:
                    sort_results[tmp_min].extend(sort_results[iter_count])
                    remove_list.append(sort_results[iter_count])
            if remove_list:
                for item in remove_list:
                    sort_results.remove(item)
        

    return sort_results
def get_sensitive_text_detail(task_name, ts, user, order):
    _id = user + '-' + task_name
    task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source']
    weibo_detail = json.loads(task_detail['sensitive_weibo_detail'])

    weibo_detail_list = []
    if weibo_detail:
        for iter_mid, item in weibo_detail.iteritems():
            tmp = []
            tmp.append(iter_mid)
            tmp.append(item[iter_mid])
            tmp.append(item['retweeted'])
            tmp.append(item['comment'])
            weibo_detail_list.append(tmp)
    mid_list = weibo_detail.keys()

    results = []
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "terms":{"mid": mid_list}
                }
            }
        }
    }

    index_list = []
    datetime = ts2datetime(ts)
    datetime_1 = ts2datetime(ts-DAY)
    index_name = flow_text_index_name_pre + datetime
    exist_es = es_text.indices.exists(index_name)
    if exist_es:
        index_list.append(index_name)
    index_name_1 = flow_text_index_name_pre + datetime_1
    exist_es_1 = es_text.indices.exists(index_name_1)
    if exist_es_1:
        index_list.append(index_name_1)

    if index_list and mid_list:
        search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"]
    else:
        search_results = []

    uid_list = []
    text_dict = dict() # 文本信息
    portrait_dict = dict() # 背景信息
    if search_results:
        for item in search_results:
            uid_list.append(item["_source"]['uid'])
            text_dict[item['_id']] = item['_source'] # _id是mid
        if uid_list:
            portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"]
            for item in portrait_result:
                if item['found']:
                    portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]}
                else:
                    portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""}

        if order == "total":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True)
        elif order == "retweeted":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True)
        elif order == "comment":
            sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True)
        else:
            sorted_list = weibo_detail_list

        count_n = 0
        for item in sorted_list:
            mid = item[0]
            iter_text = text_dict.get(mid, {})
            temp = []
            # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type
            if iter_text:
                uid = iter_text['uid']
                temp.append(uid)
                iter_portrait = portrait_dict.get(uid, {})
                if iter_portrait:
                    temp.append(iter_portrait['nick_name'])
                    temp.append(iter_portrait['photo_url'])
                else:
                    temp.extend([uid,''])
                temp.append(iter_text["text"])
                temp.append(iter_text["sentiment"])
                temp.append(ts2date(iter_text['timestamp']))
                temp.append(iter_text['geo'])
                temp.append(iter_text['message_type'])
                temp.append(item[2])
                temp.append(item[3])
                temp.append(iter_text.get('sensitive', 0))
                count_n += 1
                results.append(temp)

        if results and order == "ts":
            results = sorted(results, key=lambda x:x[5], reverse=True)

        if results and order == "sensitive":
            results = sorted(results, key=lambda x:x[-1], reverse=True)

    return results
def get_temporal_rank(task_type, sort="retweeted", number=100):
    number = int(number) - 1
    if int(task_type) == 0: # 到目前位置
        sort_list = r.zrange("influence_%s" %sort, 0, number, withscores=True, desc=True)
    elif int(task_type) == 1:
        sort_list = r.zrange("influence_%s_1" %sort, 0, number, withscores=True, desc=True)
    elif int(task_type) == 2:
        sort_list = r.zrange("influence_%s_2" %sort, 0, number, withscores=True, desc=True)
    elif int(task_type) == 3:
        sort_list = r.zrange("influence_%s_3" %sort, 0, number, withscores=True, desc=True)
    else:
        sort_list = r.zrange("influence_%s_4" %sort, 0, number, withscores=True, desc=True)

    uid_list = []
    for item in sort_list:
        uid_list.append(item[0])

    if sort == "retweeted":
        other = "comment"
    else:
        other = "retweeted"

    results = []
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        bci_result = es_user_profile.mget(index="bci_history", doc_type="bci", body={"ids":uid_list}, fields=['user_fansnum',"weibo_month_sum" ])["docs"]
        count = 0
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            tmp.append(item['_id'])
            if item['found']:
                item = item['_source']
                tmp.append(item['nick_name'])
                tmp.append(item['statusnum'])
                tmp.append(item['user_location'])
                tmp.append(item['fansnum'])
            else:
                tmp.extend(['',0,'',0])
            try:
                user_fansnum = bci_result[count]['fileds']['user_fansnum']
                tmp[4] = user_fansnum
            except:
                pass
            try:
                weibo_number = bci_result[count]['fileds']["weibo_month_sum"]
                tmp[2] = weibo_number
            except:
                pass
            count_1 = int(sort_list[index][1])
            if int(task_type) == 0:
                tmp_count = r.zscore("influence_%s" %other, _id)
                if tmp_count:
                    count_2 =  int(tmp_count)
                else:
                    count_2 = 0
            else:
                tmp_count =  r.zscore("influence_%s_%s" %(other,task_type), _id)
                if tmp_count:
                    count_2 = int(tmp_count)
                else:
                    count_2 = 0
            if sort == "retweeted":
                tmp.append(count_1)
                tmp.append(count_2)
            else:
                tmp.append(count_2)
                tmp.append(count_1)
            results.append(tmp)
            count += 1

    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
Exemplo n.º 24
0
def identify_user_out(input_uid_list):
    out_user_list = []
    in_user_list = []
    input_len = len(input_uid_list)
    iter_count = 0
    print 'identify user out'
    #get user list who is out user_portrait
    while iter_count < input_len:
        iter_user_list = input_uid_list[iter_count:iter_count +
                                        DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(
                index=portrait_index_name,
                doc_type=portrait_index_type,
                body={'ids': iter_user_list},
                _source=False)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] != True:
                out_user_list.append(uid)
            else:
                in_user_list.append(uid)
        iter_count += DETECT_ITER_COUNT
    print 'get out user portrait information'
    #get user profile information for out user_portrait
    iter_count = 0
    out_user_count = len(out_user_list)
    out_user_result = []
    while iter_count < out_user_count:
        iter_user_list = out_user_list[iter_count:iter_count +
                                       DETECT_ITER_COUNT]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name,
                                                  doc_type=profile_index_type,
                                                  body={'ids': iter_user_list},
                                                  _source=True)['docs']
        except:
            profile_result = []
        for item in profile_result:
            uid = item['_id']
            if item['found'] == True:
                source = item['_source']
                uname = source['nick_name']
                fansnum = source['fansnum']
                statusnum = source['statusnum']
                friendsnum = source['friendsnum']
            else:
                uname = u'未知'
                fansnum = u'未知'
                statusnum = u'未知'
                friendsnum = u'未知'
            out_user_result.append(
                [uid, uname, fansnum, statusnum, friendsnum])
        iter_count += DETECT_ITER_COUNT

    sort_out_user_result = sorted(out_user_result,
                                  key=lambda x: x[2],
                                  reverse=True)

    return in_user_list, sort_out_user_result