Exemplo n.º 1
0
def search_attention_id(uid, k=30):
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    index_name = retweet_index_name_pre + str(db_number)
    try:
        retweet_result = es_retweet.get(index=index_name,
                                        doc_type=retweet_index_type,
                                        id=uid)['_source']
    except:
        return set()
    if retweet_result:
        retweet_dict = json.loads(retweet_result['uid_retweet'])
        return set(retweet_dict.keys())
Exemplo n.º 2
0
def search_yangshi_attention(uid, top_count):

    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    index_name = retweet_index_name_pre + str(db_number)
    center_uid = uid
    # print es_retweet,index_name,retweet_index_type,uid
    try:
        retweet_result = es_retweet.get(index=index_name,
                                        doc_type=retweet_index_type,
                                        id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result['uid_retweet'])
        sorted_list = sorted(retweet_dict.iteritems(),
                             key=lambda x: x[1],
                             reverse=True)[:20]
        uid_list = [i[0] for i in sorted_list]
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name,
                                               doc_type=profile_index_type,
                                               body={'ids': uid_list})['docs']
        except:
            user_result = []

        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                if uname == '':
                    uname = u'未知'

            else:
                uname = u'未知'

            count = retweet_dict[uid]
            out_portrait_list.append({
                'uid': uid,
                'count': count,
                'uname': uname,
            })  #location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
def new_get_user_social(uid):
    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    #step1:retweet/comment
    retweet_index_name = retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type,\
                id=uid)['_source']['uid_retweet']
        retweet_result = json.loads(retweet_result)
    except:
        retweet_result = {}
    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type,\
                id=uid)['_source']['uid_comment']
        comment_result = json.loads(comment_result)
    except:
        comment_result = {}
    #union retweet and comment dict
    union_retweet_comment_result = union_dict(retweet_result, comment_result)
    try:
        union_retweet_comment_result.pop(uid)
    except:
        pass
    #filter who in in user_portrait by uid
    in_retweet_comment_result = filter_in_uid(union_retweet_comment_result) # [[id, uname, photo_url, count],...]
    top_user_retweet_comment = sorted(in_retweet_comment_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_retweet_comment'] = top_user_retweet_comment
    #step2:be_retweet/be_comment
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type,\
                id=uid)['_source']['uid_be_retweet']
        be_retweet_result = json.loads(be_retweet_result)
    except:
        be_retweet_result = {}
    try:
        be_comment_result = es_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type,\
                id=uid)['_source']['uid_be_comment']
        be_comment_result = json.loads(be_comment_result)
    except:
        be_comment_result = {}
    #union be_retweet and be_comment dict
    union_be_retweet_comment_result = union_dict(be_retweet_result, be_comment_result)
    try:
        union_be_retweet_comment_result.pop(uid)
    except:
        pass
    #filter who in user_portrait by uid
    in_be_retweet_comment_result = filter_in_uid(union_be_retweet_comment_result) # [[id, uname, photo_url, count],...]
    top_user_be_retweet_comment = sorted(in_be_retweet_comment_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_be_retweet_comment'] = top_user_be_retweet_comment
    #step3:interaction
    interaction_result = get_user_interaction(in_retweet_comment_result, in_be_retweet_comment_result)
    top_user_interaction = sorted(interaction_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_interaction'] = top_user_interaction
    #step4:at
    mention_result = search_mention(uid)
    #filter who in user_portrait
    in_mention_result = filter_in_uname(mention_result) # [[id, uname, photo_url, count],...]
    top_user_mention = sorted(in_mention_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_mention'] = top_user_mention
    #step5:user domain and topic who in user_portrait
    in_retweet_comment_uid_set = set([item[0] for item in in_retweet_comment_result])
    in_be_retweet_comment_uid_set = set([item[0] for item in in_be_retweet_comment_result])
    in_mention_result = set([item[0] for item in in_mention_result])
    all_in_uid_set = in_retweet_comment_uid_set | in_be_retweet_comment_uid_set | in_mention_result - set([uid])
    #compute domain
    domain_statis_dict = get_social_domain(all_in_uid_set)
    sort_domain_statis_dict = sorted(domain_statis_dict.items(), key=lambda x:x[1], reverse=True)[:20]
    results['in_domain'] = sort_domain_statis_dict
    #compute topic
    topic_statis_dict = get_social_topic(all_in_uid_set)
    sort_topic_statis_dict = sorted(topic_statis_dict.items(), key=lambda x:x[1], reverse=True)[:20]
    results['in_topic'] = sort_topic_statis_dict

    return results
Exemplo n.º 4
0
def new_get_user_social(uid):
    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    #step1:retweet/comment
    retweet_index_name = retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type,\
                id=uid)['_source']['uid_retweet']
        retweet_result = json.loads(retweet_result)
    except:
        retweet_result = {}
    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type,\
                id=uid)['_source']['uid_comment']
        comment_result = json.loads(comment_result)
    except:
        comment_result = {}
    #union retweet and comment dict
    union_retweet_comment_result = union_dict(retweet_result, comment_result)
    try:
        union_retweet_comment_result.pop(uid)
    except:
        pass
    #filter who in in user_portrait by uid
    in_retweet_comment_result = filter_in_uid(union_retweet_comment_result) # [[id, uname, photo_url, count],...]
    top_user_retweet_comment = sorted(in_retweet_comment_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_retweet_comment'] = top_user_retweet_comment
    #step2:be_retweet/be_comment
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type,\
                id=uid)['_source']['uid_be_retweet']
        be_retweet_result = json.loads(be_retweet_result)
    except:
        be_retweet_result = {}
    try:
        be_comment_result = es_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type,\
                id=uid)['_source']['uid_be_comment']
        be_comment_result = json.loads(be_comment_result)
    except:
        be_comment_result = {}
    #union be_retweet and be_comment dict
    union_be_retweet_comment_result = union_dict(be_retweet_result, be_comment_result)
    try:
        union_be_retweet_comment_result.pop(uid)
    except:
        pass
    #filter who in user_portrait by uid
    in_be_retweet_comment_result = filter_in_uid(union_be_retweet_comment_result) # [[id, uname, photo_url, count],...]
    top_user_be_retweet_comment = sorted(in_be_retweet_comment_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_be_retweet_comment'] = top_user_be_retweet_comment
    #step3:interaction
    interaction_result = get_user_interaction(in_retweet_comment_result, in_be_retweet_comment_result)
    top_user_interaction = sorted(interaction_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_interaction'] = top_user_interaction
    #step4:at
    mention_result = search_mention(uid)
    #filter who in user_portrait
    in_mention_result = filter_in_uname(mention_result) # [[id, uname, photo_url, count],...]
    top_user_mention = sorted(in_mention_result, key=lambda x:x[3], reverse=True)[:20]
    results['top_mention'] = top_user_mention
    #step5:user domain and topic who in user_portrait
    in_retweet_comment_uid_set = set([item[0] for item in in_retweet_comment_result])
    in_be_retweet_comment_uid_set = set([item[0] for item in in_be_retweet_comment_result])
    in_mention_result = set([item[0] for item in in_mention_result])
    all_in_uid_set = in_retweet_comment_uid_set | in_be_retweet_comment_uid_set | in_mention_result - set([uid])
    #compute domain
    domain_statis_dict = get_social_domain(all_in_uid_set)
    sort_domain_statis_dict = sorted(domain_statis_dict.items(), key=lambda x:x[1], reverse=True)[:20]
    results['in_domain'] = sort_domain_statis_dict
    #compute topic
    topic_statis_dict = get_social_topic(all_in_uid_set)
    sort_topic_statis_dict = sorted(topic_statis_dict.items(), key=lambda x:x[1], reverse=True)[:20]
    results['in_topic'] = sort_topic_statis_dict

    return results
Exemplo n.º 5
0
def search_fans(uid,top_count):
    results = {}
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)

    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    result = {}
    be_retweet_inter_dict = {}
    be_comment_inter_dict = {}
    center_uid = uid
    try:
        be_retweet_result = es_retweet.get(index = be_retweet_index_name,doc_type=be_retweet_index_type,id=uid)['_source']
    except:
        be_retweet_result = {}

    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}
    # print "be_retweet_uid_dict", be_retweet_uid_dict
    try:
        be_comment_result = es_be_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}

    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    # print "be_comment_uid_dict", be_comment_uid_dict

    fans_result = union_dict(be_retweet_uid_dict,be_comment_uid_dict)
    fans_user_set = set(fans_result.keys())
    fans_list = list(fans_user_set)
    # print "fans_list", fans_list
    all_fans_dict = {}

    for fans_user in fans_list:
        if fans_user != center_uid:
            all_fans_dict[fans_user] = fans_result[fans_user]
    sort_all_fans_dict = sorted(all_fans_dict.items(), key=lambda x:x[1], reverse=True)
    all_fans_uid_list=[]
    all_fans_uid_list_all = [item[0] for item in sort_all_fans_dict]

    print all_fans_uid_list_all
    count = 0
    for i in all_fans_uid_list_all:
        count += 1
        all_fans_uid_list.append(i)
        if count == 1000:
            break
    print all_fans_uid_list

    out_portrait_list = all_fans_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        fans_count = int(all_fans_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':fans_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
Exemplo n.º 6
0
def search_follower(uid, top_count):

    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    index_name = be_retweet_index_name_pre + str(db_number)
    # return search_user_info(es_retweet,index_name,retweet_index_type,uid,'uid_be_retweet')
    center_uid = uid
    try:
        retweet_result = es_retweet.get(index=index_name, doc_type=be_retweet_index_type, id=uid)['_source']
    except:
        return None
    if retweet_result:
        retweet_dict = json.loads(retweet_result['uid_be_retweet'])
        sorted_list = sorted(retweet_dict.iteritems(),key=lambda x:x[1],reverse=True)[:20]
        uid_list = [i[0] for i in sorted_list if i[0] != uid]
        portrait_result = []
        try:
            user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs']
        except:
            user_result = []

        try:
            bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs']    
        except:
            bci_history_result = []
        # print bci_history_result
        iter_count = 0
        out_portrait_list = []
        for out_user_item in user_result:
            uid = out_user_item['_id']
            if out_user_item['found'] == True:
                source = out_user_item['_source']
                uname = source['nick_name']
                photo_url = source['photo_url']
                if uname == '':
                    uname = u'未知'
                #location = source['user_location']
                friendsnum = source['friendsnum']

            else:
                uname = u'未知'
                location = ''
                friendsnum = ''
                photo_url = ''

            #add index from bci_history
            try:
                bci_history_item = bci_history_result[iter_count]
            except:
                bci_history_item = {'found': False}
            if bci_history_item['found']==True:
                fansnum = bci_history_item['fields'][fields[0]][0]
                user_weibo_count = bci_history_item['fields'][fields[1]][0]
                user_friendsnum = bci_history_item['fields'][fields[2]][0]
                influence = bci_history_item['fields'][fields[3]][0]
            else:
                fansnum = ''
                user_weibo_count = ''
                user_friendsnum = ''
                influence = ''
            #retweet_count = int(retweet_dict[uid])
            count = retweet_dict[uid]
            out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location,
            iter_count += 1
        return out_portrait_list
    else:
        return None
Exemplo n.º 7
0
def search_bidirect_interaction(uid, top_count):

    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    db_number = get_db_num(now_date_ts)
    retweet_index_name = retweet_index_name_pre + str(db_number)
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    results = {}
    retweet_inter_dict = {}
    comment_inter_dict = {}
    center_uid = uid
    #bidirect interaction in retweet and be_retweet
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type, id=uid)['_source']
    except:
        retweet_result = {}
    if retweet_result:
        retweet_uid_dict = json.loads(retweet_result['uid_retweet'])
    else:
        retweet_uid_dict = {}
    retweet_uid_list = retweet_uid_dict.keys()
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type, id=uid)['_source']
    except:
        be_retweet_result = {}
    if be_retweet_result:
        be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet'])
    else:
        be_retweet_uid_dict = {}

    #bidirect interaction in comment and be_comment
    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type, id=uid)['_source']
    except:
        comment_result = {}
    if comment_result:
        comment_uid_dict = json.loads(comment_result['uid_comment'])
    else:
        comment_uid_dict = {}
    comment_uid_list = comment_uid_dict.keys()
    try:
        be_comment_result = es_comment.get(index=be_coment_index_name, doc_type=be_comment_index_type, id=uid)['_source']
    except:
        be_comment_result = {}
    if be_comment_result:
        be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment'])
    else:
        be_comment_uid_dict = {}
    #get bidirect_interaction dict
    #all_interaction_dict = union_dict(retweet_inter_dict, comment_inter_dict)
    retweet_comment_result = union_dict(retweet_uid_dict, comment_uid_dict)
    be_retweet_comment_result = union_dict(be_retweet_uid_dict, be_comment_uid_dict)
    interaction_user_set = set(retweet_comment_result.keys()) & set(be_retweet_comment_result.keys())
    interaction_user_list = list(interaction_user_set)
    all_interaction_dict = {}
    for interaction_user in interaction_user_list:
        if interaction_user != center_uid:
            all_interaction_dict[interaction_user] = retweet_comment_result[interaction_user] + be_retweet_comment_result[interaction_user]

    sort_all_interaction_dict = sorted(all_interaction_dict.items(), key=lambda x:x[1], reverse=True)
    #get in_portrait_list, in_portrait_results and out_portrait_list
    all_interaction_uid_list = [item[0] for item in sort_all_interaction_dict]
    #print all_interaction_uid_list

    # if RUN_TYPE == 0:
        # all_interaction_dict = {'2029036025':3,'1282005885':2,'2549228714':2,'1809833450':1}
        # all_interaction_uid_list = ['2029036025', '1282005885', '2549228714', '1809833450']

    out_portrait_list = all_interaction_uid_list
    #use to get user information from user profile
    out_portrait_result = {}
    try:
        out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs']
    except:
        out_user_result = []
    #add index from bci_history
    try:
        bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs']
    except:
        bci_history_result = []
    iter_count = 0
    out_portrait_list = []
    for out_user_item in out_user_result:
        uid = out_user_item['_id']
        if out_user_item['found'] == True:
            source = out_user_item['_source']
            uname = source['nick_name']
            photo_url = source['photo_url']
            if uname == '':
                uname =  u'未知'
            location = source['user_location']
            friendsnum = source['friendsnum']
        else:
            uname = u'未知'
            location = ''
            friendsnum = ''
            photo_url = 'unknown'
        #add index from bci_history
        try:
            bci_history_item = bci_history_result[iter_count]
        except:
            bci_history_item = {'found': False}
        # print bci_history_item
        if bci_history_item['found'] == True:
            fansnum = bci_history_item['fields'][fields[0]][0]
            user_weibo_count = bci_history_item['fields'][fields[1]][0]
            user_friendsnum = bci_history_item['fields'][fields[2]][0]
            influence = bci_history_item['fields'][fields[3]][0]
        else:
            fansnum = ''
            user_weibo_count = ''
            user_friendsnum = ''

        interaction_count = int(all_interaction_dict[uid])
        out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':interaction_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count})
        iter_count += 1

    return out_portrait_list
Exemplo n.º 8
0
def info_new_get_user_social(uid):
    results = {}
    now_ts = time.time()
    db_number = get_db_num(now_ts)
    #step1:retweet/comment
    retweet_index_name = retweet_index_name_pre + str(db_number)
    comment_index_name = comment_index_name_pre + str(db_number)
    print es_retweet, retweet_index_name, uid
    try:
        retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type,\
                id=uid)['_source']['uid_retweet']
        retweet_result = json.loads(retweet_result)
    except:
        retweet_result = {}

    try:
        comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type,\
                id=uid)['_source']['uid_comment']
        comment_result = json.loads(comment_result)
    except:
        comment_result = {}
    #union retweet and comment dict

    union_retweet_comment_result = union_dict(retweet_result, comment_result)
    try:
        union_retweet_comment_result.pop(uid)
    except:
        pass
    #filter who in in user_portrait by uid
    in_retweet_comment_result = set([
        i for i in union_retweet_comment_result.keys()
    ])  # [[id, uname, photo_url, count],...]

    #step2:be_retweet/be_comment
    be_retweet_index_name = be_retweet_index_name_pre + str(db_number)
    be_comment_index_name = be_comment_index_name_pre + str(db_number)
    try:
        be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type,\
                id=uid)['_source']['uid_be_retweet']
        be_retweet_result = json.loads(be_retweet_result)
    except:
        be_retweet_result = {}
    try:
        be_comment_result = es_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type,\
                id=uid)['_source']['uid_be_comment']
        be_comment_result = json.loads(be_comment_result)
    except:
        be_comment_result = {}
    #union be_retweet and be_comment dict
    union_be_retweet_comment_result = union_dict(be_retweet_result,
                                                 be_comment_result)
    try:
        union_be_retweet_comment_result.pop(uid)
    except:
        pass

    #filter who in user_portrait by uid
    in_be_retweet_comment_result = set([
        i for i in union_be_retweet_comment_result.keys()
    ])  # [[id, uname, photo_url, count],...]

    mention_result = search_mention(uid)
    #filter who in user_portrait
    in_mention_result = set([i for i in mention_result.keys()
                             ])  # [[id, uname, photo_url, count],...]

    all_in_uid_set = in_retweet_comment_result | in_be_retweet_comment_result | in_mention_result - set(
        [uid])

    print '760'
    #compute topic
    topic_statis_dict = get_social_topic(all_in_uid_set)
    sort_topic_statis_dict = sorted(topic_statis_dict.items(),
                                    key=lambda x: x[1],
                                    reverse=True)[:20]
    results['in_topic'] = sort_topic_statis_dict
    return results