예제 #1
0
def search_user_info(es,index_name,doc_type,uid,result_name):
    try:
        retweet_result = es.get(index=index_name, doc_type=doc_type, id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result[result_name])
        sorted_list = sorted(retweet_dict.iteritems(),key=lambda x:x[1],reverse=True)[:20]
        uid_list = [i[0] for i in sorted_list if i[0] != uid]
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs']
        except:
            user_result = []
        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs']    
        except:
            bci_history_result = []
        #print bci_history_result
        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                photo_url = source['photo_url']
                if uname == '':
                    uname = u'未知'
                #location = source['user_location']
                friendsnum = source['friendsnum']
            else:
                uname = u'未知'
                location = ''
                friendsnum = ''
                photo_url = 'unknown'
            #add index from bci_history
            try:
                bci_history_item = bci_history_result[iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found']==True:
                fansnum = bci_history_item['fields'][fields[0]][0]
                user_weibo_count = bci_history_item['fields'][fields[1]][0]
                user_friendsnum = bci_history_item['fields'][fields[2]][0]
                influence = bci_history_item['fields'][fields[3]][0]
            else:
                fansnum = ''
                user_weibo_count = ''
                user_friendsnum = ''
                influence = ''
            #retweet_count = int(retweet_dict[uid])
            count = retweet_dict[uid]
            out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
예제 #2
0
def show_keywords_rank(task_id, sort_type, count):
    try:
        task_found = es_network_task.get(index=network_keywords_index_name, \
                doc_type=network_keywords_index_type, id=task_id)['_source']
    except:
        task_found = {}
        return task_found
    
    search_results = json.loads(task_found['results'])
    sort_results = search_results[sort_type]
    results = []
    uid_list = []
    sort_list = []
    for source_uid, sort_value in sort_results:
        uid_list.append(source_uid)
        sort_list.append(sort_value)
    
    # 查看背景信息
    if uid_list:
        profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"]
        for item in profile_result:
            _id = item['_id']
            index = profile_result.index(item)
            tmp = []
            if item['found']:
                item = item['_source']
                tmp.append(item['uid'])
                tmp.append(item['nick_name'])
                tmp.append(item['user_location'])
            else:
                tmp.extend([_id,'',''])
            value = sort_list[index]
            tmp.append(value)
            results.append(tmp)
    
    if uid_list:
        count = 0
        history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"]
        for item in history_result:
            if item['found']:
                item = item['_source']
                results[count].extend([item['user_fansnum'], item['weibo_month_sum']])
            else:
                results[count].extend(['',''])
            count += 1
    
    if uid_list:
        count = 0
        portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
        for item in portrait_result:
            if item['found']:
                results[count].append("1")
            else:
                results[count].append("0")
            count += 1

    return results
예제 #3
0
def search_fans(uid,top_count):
    results = {}
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)

    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    result = {}
    be_retweet_inter_dict = {}
    be_comment_inter_dict = {}
    center_uid = uid
    try:
        be_retweet_result = es_retweet.get(index = be_retweet_index_name,doc_type=be_retweet_index_type,id=uid)['_source']
    except:
        be_retweet_result = {}

    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}
    # print "be_retweet_uid_dict", be_retweet_uid_dict
    try:
        be_comment_result = es_be_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}

    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    # print "be_comment_uid_dict", be_comment_uid_dict

    fans_result = union_dict(be_retweet_uid_dict,be_comment_uid_dict)
    fans_user_set = set(fans_result.keys())
    fans_list = list(fans_user_set)
    # print "fans_list", fans_list
    all_fans_dict = {}

    for fans_user in fans_list:
        if fans_user != center_uid:
            all_fans_dict[fans_user] = fans_result[fans_user]
    sort_all_fans_dict = sorted(all_fans_dict.items(), key=lambda x:x[1], reverse=True)
    all_fans_uid_list=[]
    all_fans_uid_list_all = [item[0] for item in sort_all_fans_dict]

    print all_fans_uid_list_all
    count = 0
    for i in all_fans_uid_list_all:
        count += 1
        all_fans_uid_list.append(i)
        if count == 1000:
            break
    print all_fans_uid_list

    out_portrait_list = all_fans_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        fans_count = int(all_fans_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':fans_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
예제 #4
0
def search_bidirect_interaction(uid, top_count):

    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)
    retweet_index_name = retweet_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    results = {}
    retweet_inter_dict = {}
    comment_inter_dict = {}
    center_uid = uid
    #bidirect interaction in retweet and be_retweet
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type, id=uid)['_source']
    except:
        retweet_result = {}
    if retweet_result:
        retweet_uid_dict = json.loads(retweet_result['uid_retweet'])
    else:
        retweet_uid_dict = {}
    retweet_uid_list = retweet_uid_dict.keys()
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type, id=uid)['_source']
    except:
        be_retweet_result = {}
    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}

    #bidirect interaction in comment and be_comment
    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type, id=uid)['_source']
    except:
        comment_result = {}
    if comment_result:
        comment_uid_dict = json.loads(comment_result['uid_comment'])
    else:
        comment_uid_dict = {}
    comment_uid_list = comment_uid_dict.keys()
    try:
        be_comment_result = es_comment.get(index=be_coment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}
    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    #get bidirect_interaction dict
    #all_interaction_dict = union_dict(retweet_inter_dict, comment_inter_dict)
    retweet_comment_result = union_dict(retweet_uid_dict, comment_uid_dict)
    be_retweet_comment_result = union_dict(be_retweet_uid_dict, be_comment_uid_dict)
    interaction_user_set = set(retweet_comment_result.keys()) & set(be_retweet_comment_result.keys())
    interaction_user_list = list(interaction_user_set)
    all_interaction_dict = {}
    for interaction_user in interaction_user_list:
        if interaction_user != center_uid:
            all_interaction_dict[interaction_user] = retweet_comment_result[interaction_user] + be_retweet_comment_result[interaction_user]

    sort_all_interaction_dict = sorted(all_interaction_dict.items(), key=lambda x:x[1], reverse=True)
    #get in_portrait_list, in_portrait_results and out_portrait_list
    all_interaction_uid_list = [item[0] for item in sort_all_interaction_dict]
    #print all_interaction_uid_list

    # if RUN_TYPE == 0:
        # all_interaction_dict = {'2029036025':3,'1282005885':2,'2549228714':2,'1809833450':1}
        # all_interaction_uid_list = ['2029036025', '1282005885', '2549228714', '1809833450']

    out_portrait_list = all_interaction_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        interaction_count = int(all_interaction_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':interaction_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
예제 #5
0
def search_mention(now_ts, uid, top_count):
    date = ts2datetime(now_ts)
    #evaluate_max_dict = get_evaluate_max()
    ts = datetime2ts(date)
    stat_results = dict()
    results = dict()
    uid_dict = {}
    for i in range(1,8):
        ts = ts - DAY
        try:
            result_string = r_cluster.hget('at_' + str(ts), str(uid))
        except:
            result_string = ''
        if not result_string:
            continue
        result_dict = json.loads(result_string)
        for at_uname in result_dict:
            try:
                stat_results[at_uname] += result_dict[at_uname]
            except:
                stat_results[at_uname] = result_dict[at_uname]
    sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True)
    # print sort_stat_results

    out_portrait_list = []
    out_list = stat_results.keys()

    #use to get user information from user profile
    out_query_list = [{'match':{'uname':item}} for item in out_list]
    if len(out_query_list) != 0:
        query = [{'bool':{'should': out_query_list}}]
        try:
            out_profile_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'bool':{'must':query}}, 'size':100})['hits']['hits']
        except:
            out_profile_result = []
    else:
        out_profile_result = []
    out_in_profile_list = []
    bci_search_id_list = []

    for out_item in out_profile_result:
        source = out_item['_source']
        uname = source['nick_name']
        uid = source['uid']
        location = source['location']
        friendsnum = source['friendsnum']
        out_portrait_list.append([uid, uname, stat_results[uname], '', location, friendsnum, ''])
        out_in_profile_list.append(uname)
        #use to search bci history
        bci_search_id_list.append(uid)
    out_out_profile_list = list(set(out_list) - set(out_in_profile_list))
    for out_out_item in out_out_profile_list:
        out_portrait_list.append(['', out_out_item, stat_results[out_out_item],'', '', '', ''])
    
    #add index from bci_history
    new_out_portrait_list = []
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_search_id_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs']
    except:
        bci_history_result = []
    iter_count = 0
    for out_portrait_item in out_portrait_list:
        append_dict = {}
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {}
        new_out_portrait_item = out_portrait_item
        append_dict['uid'] = out_portrait_item[0]
        append_dict['uname'] = out_portrait_item[1]
        append_dict['count'] = out_portrait_item[2]
        if bci_history_item:
            if bci_history_item['found'] == True:
                fansnum = bci_history_item['fields']['user_fansnum'][0]
                user_weibo_count = bci_history_item['fields']['weibo_month_sum'][0]
                user_friendsnum = bci_history_item['fields']['user_friendsnum'][0]
            else:
                fansnum = ''
                user_weibo_count = ''
                user_friendsnum = ''
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''
        append_dict['fansnum'] = fansnum
        append_dict['weibo_count'] = user_weibo_count
        append_dict['friendsnum'] = user_friendsnum
        # new_out_portrait_item[3] = fansnum
        # new_out_portrait_item[6] = user_weibo_count
        # new_out_portrait_item[-2] = user_friendsnum
        #new_out_portrait_list.append(new_out_portrait_item)
        new_out_portrait_list.append(append_dict)
        iter_count += 1
        #print append_dict
    return new_out_portrait_list  #  uid,名字,提及次数,粉丝数,注册地,关注数,微博数
예제 #6
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results =es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query':{
             'match_all':{}
        },
        'sort': [{bci_key:{'order': 'desc'}}],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append([uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
예제 #7
0
     "query":{
         "match_all":{}
     },
     "size":1,
     "sort":{sensitive_string:{"order":"desc"}}
 }
 try:
     top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
     top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
 except Exception, reason:
     print Exception, reason
     top_sensitive = 400
 index_type = 'bci'
 user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
 user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
 bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
 sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs']
 max_evaluate_influ = get_evaluate_max(index_name)
 for i in range(0, len(uid_list)):
     uid = uid_list[i]
     bci_dict = user_bci_result[i]
     profile_dict = user_profile_result[i]
     bci_history_dict = bci_history_result[i]
     sensitive_history_dict = sensitive_history_result[i]
     #print sensitive_history_dict
     try:
         bci_source = bci_dict['_source']
     except:
         bci_source = None
     if bci_source:
         influence = bci_source['user_index']
예제 #8
0
def identify_user_portrait(user_set, filter_type):
    in_portrait_result = []
    out_portrait_result = []
    user_list = list(user_set)
    #identify the user_portrait
    iter_count = 0
    all_user_count = len(user_list)
    all_in_portrait_user = dict()
    all_out_portrait_user_list = []
    max_result = get_evaluate_max()
    while iter_count <= all_user_count:
        iter_user_list = user_list[iter_count: iter_count + SENTIMENT_ITER_USER_COUNT]
        #search  user in user_portrait
        try:
            in_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'ids': iter_user_list}, _source=False, \
                    fields=['uname', 'influence', 'activeness', 'importance', 'sensitive'])['docs']
        except:
            in_portrait_result = []
        #add all hit user
        for in_portrait_item in in_portrait_result:
            if in_portrait_item['found'] == True:
                uname = in_portrait_item['fields']['uname'][0]
                if uname == '' or uname == 'unknown':
                    uname = in_portrait_item['_id']
                influence = in_portrait_item['fields']['influence'][0]
                normal_influence = math.log(influence / max_result['influence'] * 9 + 1 , 10) * 100
                activeness = in_portrait_item['fields']['activeness'][0]
                normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1 , 10) * 100
                importance = in_portrait_item['fields']['importance'][0]
                normal_importance = math.log(importance / max_result['importance'] * 9 + 1 , 10) * 100
                try:
                    sensitive = in_portrait_item['fields']['sensitive'][0]
                    normal_sensitive = math.log(sensitive / max_result['sensitive'] * 9 + 1 , 10) * 100
                except:
                    normal_sensitive = 0
                all_in_portrait_user[in_portrait_item['_id']] = [uname, normal_influence, normal_activeness, \
                    normal_importance, normal_sensitive]
            else:
                all_out_portrait_user_list.append(int(in_portrait_item['_id']))
        iter_count += SENTIMENT_ITER_USER_COUNT
    if filter_type == 'in':
        return all_in_portrait_user
    #get out portrait user info
    iter_count = 0
    all_out_portrait_user = dict()
    all_out_user_count = len(all_out_portrait_user_list)
    while iter_count <= all_out_user_count:
        iter_uid_list = all_out_portrait_user_list[iter_count: iter_count+SENTIMENT_ITER_USER_COUNT]
        bci_iter_uid_list = [str(item) for item in iter_uid_list]
        try:
            profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\
                    body={'ids':iter_uid_list}, _source=False, fields=['nick_name'])['docs']
        except:
            profile_result = []
        #bci_history
        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_iter_uid_list}, _source=False, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs']
        except:
            bci_history_result = []
        bci_iter_count = 0
        for uid in iter_uid_list:
            try:
                profile_item = profile_result[bci_iter_count]
            except:
                profile_item = {'found': False}
            if profile_item['found'] == True:
                uname = profile_item['fields']['nick_name'][0]
            else:
                uname= profile_item['_id']
            try:
                bci_history_item = bci_history_result[bci_iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found'] == True:
                statusnum = bci_history_item['fields']['weibo_month_sum'][0]
                fansnum = bci_history_item['fields']['user_fansnum'][0]
                friendsnum = bci_history_item['fields']['user_friendsnum'][0]
            else:
                statusnum = 0
                fansnum = 0
                friendsnum = 0
            all_out_portrait_user[str(uid)] = [uname, statusnum, friendsnum, fansnum]
            bci_iter_count += 1
        iter_count += SENTIMENT_ITER_USER_COUNT
    return all_in_portrait_user, all_out_portrait_user
예제 #9
0
def get_final_submit_user_info(uid_list):
    final_results = []
    try:
        profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs']
    except:
        profile_results = []
    try:
        bci_history_results =es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs']
    except:
        bci_history_results = []
    #get bci_history max value
    now_time_ts = time.time()
    search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY))
    bci_key = 'bci_' + str(search_date_ts)
    query_body = {
        'query':{
             'match_all':{}
        },
        'sort': [{bci_key:{'order': 'desc'}}],
        'size': 1
    }
    #try:
    bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits']
    #except:
    #    bci_max_result = {}
    if bci_max_result:
        bci_max_value = bci_max_result[0]['fields'][bci_key][0]
    else:
        bci_max_value = MAX_VALUE
    iter_count = 0
    for uid in uid_list:
        try:
            profile_item = profile_results[iter_count]
        except:
            profile_item = {}
        try:
            bci_history_item = bci_history_results[iter_count]
        except:
            bci_history_item = {}
        if profile_item and profile_item['found'] == True:
            uname = profile_item['_source']['nick_name']
            location = profile_item['_source']['user_location']
        else:
            uname = ''
            location = ''
        if bci_history_item and bci_history_item['found'] == True:
            fansnum = bci_history_item['_source']['user_fansnum']
            statusnum = bci_history_item['_source']['weibo_month_sum']
            try:
                bci = bci_history_item['_source'][bci_key]
                normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100
            except:
                normal_bci = ''
        else:
            fansnum = ''
            statusnum = ''
            normal_bci = ''
        final_results.append([uid, uname, location, fansnum, statusnum, normal_bci])
        iter_count += 1

    return final_results
예제 #10
0
     "query":{
         "match_all":{}
     },
     "size":1,
     "sort":{sensitive_string:{"order":"desc"}}
 }
 try:
     top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits']
     top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0]
 except Exception, reason:
     print Exception, reason
     top_sensitive = 400
 index_type = 'bci'
 user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
 user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
 bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
 sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs']
 max_evaluate_influ = get_evaluate_max(index_name)
 for i in range(0, len(uid_list)):
     uid = uid_list[i]
     bci_dict = user_bci_result[i]
     profile_dict = user_profile_result[i]
     bci_history_dict = bci_history_result[i]
     sensitive_history_dict = sensitive_history_result[i]
     #print sensitive_history_dict
     try:
         bci_source = bci_dict['_source']
     except:
         bci_source = None
     if bci_source:
         influence = bci_source['user_index']
예제 #11
0
def search_follower(uid, top_count):

    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    index_name = be_retweet_index_name_pre + str(db_number)
    center_uid = uid
    try:
        retweet_result = es_retweet.get(index=index_name, doc_type=be_retweet_index_type, id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result['uid_be_retweet'])
        uid_list = retweet_dict.keys()
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs']
        except:
            user_result = []

        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs']    
        except:
            bci_history_result = []
        print bci_history_result
        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                photo_url = source['photo_url']
                if uname == '':
                    uname = u'未知'
                #location = source['user_location']
                friendsnum = source['friendsnum']
                photo_url = 'unknown'
            else:
                uname = u'未知'
                location = ''
                friendsnum = ''

            #add index from bci_history
            try:
                bci_history_item = bci_history_result[iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found']==True:
                fansnum = bci_history_item['fields'][fields[0]][0]
                user_weibo_count = bci_history_item['fields'][fields[1]][0]
                user_friendsnum = bci_history_item['fields'][fields[2]][0]
                influence = bci_history_item['fields'][fields[3]][0]
            else:
                fansnum = ''
                user_weibo_count = ''
                user_friendsnum = ''
                influence = ''
            #retweet_count = int(retweet_dict[uid])
            count = retweet_dict[uid]
            out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
예제 #12
0
         index = profile_result.index(item)
         tmp = []
         if item['found']:
             item = item['_source']
             tmp.append(item['uid'])
             tmp.append(item['nick_name'])
             tmp.append(item['user_location'])
         else:
             tmp.extend([_id,'',''])
         value = sort_list[index]
         tmp.append(value)
         results.append(tmp)
 
 if uid_list:
     count = 0
     history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"]
     for item in history_result:
         if item['found']:
             item = item['_source']
             results[count].extend([item['user_fansnum'], item['weibo_month_sum']])
         else:
             results[count].extend(['',''])
         count += 1
 
 if uid_list:
     count = 0
     portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"]
     for item in portrait_result:
         if item['found']:
             results[count].append("1")
         else: