Пример #1
0
def compute(user_set, bulk_action):
    count_c = 0

    weibo_redis = R_CLUSTER_FLOW1
    for user in user_set:
        origin_weibo_set = weibo_redis.smembers(
            user + "_origin_weibo")  # origin weibo list
        retweeted_weibo_set = weibo_redis.smembers(
            user + "_retweeted_weibo")  # retweeted weibo list
        comment_weibo_set = weibo_redis.smembers(
            user + "comment_weibo")  # comment weibo list
        user_info = weibo_redis.hgetall(user)  #dict

        origin_weibo_retweeted_timestamp = []
        origin_weibo_retweeted_count = []
        origin_weibo_list = []
        origin_weibo_comment_timestamp = []
        origin_weibo_comment_count = []
        retweeted_weibo_retweeted_count = []
        retweeted_weibo_comment_count = []
        retweeted_weibo_retweeted_timestamp = []
        retweeted_weibo_comment_timestamp = []
        retweeted_weibo_list = []
        user_fansnum = 0
        comment_weibo_number = 0
        user_friendsnum = 0
        for key in user_info.keys():
            if 'origin_weibo_retweeted_timestamp_' in key:  # 不同时间段的原创微博被转发的爆发度
                origin_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'origin_weibo_comment_timestamp_' in key:  # 不同时间段的原创微博被评论的爆发度
                origin_weibo_comment_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_retweeted_timestamp_' in key:  # 不同时间段的转发微博被转发的爆发度
                retweeted_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_comment_timestamp_' in key:  # 不同时间段的转发微博被评论的爆发度
                retweeted_weibo_comment_timestamp.append(key.split('_')[-1])
            elif '_origin_weibo_retweeted' in key:  # which origin weibo is retweeted, and retwweted number
                origin_weibo_retweeted_count.append(
                    key.split('_')[0])  # origin weibo list
            elif '_origin_weibo_comment' in key:  # which origin weibo is commentted, and comment number
                origin_weibo_comment_count.append(key.split('_')[0])
            elif '_retweeted_weibo_retweeted' in key:  # which retweeted weibo is retweeted, and retweeted number
                retweeted_weibo_retweeted_count.append(
                    key.split('_')[0])  # which retweeted weibo
            elif '_retweeted_weibo_comment' in key:  # which retweeted weibo is commented, and comment number
                retweeted_weibo_comment_count.append(key.split('_')[0])
            elif 'fansnum' in key:
                user_fansnum = user_info[key]
            elif "user_friendsnum" in key:
                user_friendsnum = user_info[key]
            elif "comment_weibo" == key:
                pass
            else:
                #print user_info
                #print key
                #print user
                pass

        user_id = str(user)
        origin_weibo_retweeted_detail, origin_weibo_retweeted_total_number, origin_weibo_retweeted_top, origin_weibo_retweeted_average_number \
                = statistic_weibo(origin_weibo_retweeted_count, origin_weibo_set, user_info, "_origin_weibo_retweeted")

        origin_weibo_comment_detail, origin_weibo_comment_total_number, origin_weibo_comment_top, origin_weibo_comment_average_number\
                = statistic_weibo(origin_weibo_comment_count, origin_weibo_set, user_info, "_origin_weibo_comment")

        retweeted_weibo_retweeted_detail, retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_top, retweeted_weibo_retweeted_average_number \
                = statistic_weibo(retweeted_weibo_retweeted_count, retweeted_weibo_set, user_info,  '_retweeted_weibo_retweeted')

        retweeted_weibo_comment_detail, retweeted_weibo_comment_total_number, retweeted_weibo_comment_top, retweeted_weibo_comment_average_number\
                = statistic_weibo(retweeted_weibo_comment_count, retweeted_weibo_set, user_info, '_retweeted_weibo_comment')

        origin_weibo_retweeted_brust = activity_weibo(
            origin_weibo_retweeted_timestamp, user_info,
            "origin_weibo_retweeted_timestamp")
        origin_weibo_comment_brust = activity_weibo(
            origin_weibo_comment_timestamp, user_info,
            "origin_weibo_comment_timestamp")
        retweeted_weibo_retweeted_brust = activity_weibo(
            retweeted_weibo_retweeted_timestamp, user_info,
            "retweeted_weibo_retweeted_timestamp")
        retweeted_weibo_comment_brust = activity_weibo(
            retweeted_weibo_comment_timestamp, user_info,
            "retweeted_weibo_comment_timestamp")

        influence_origin_weibo_retweeted = influence_weibo_cal(
            origin_weibo_retweeted_total_number,
            origin_weibo_retweeted_average_number,
            origin_weibo_retweeted_top[0][1], origin_weibo_retweeted_brust)

        influence_origin_weibo_comment = influence_weibo_cal(
            origin_weibo_comment_total_number,
            origin_weibo_comment_average_number,
            origin_weibo_comment_top[0][1], origin_weibo_comment_brust)

        influence_retweeted_weibo_retweeted = influence_weibo_cal(
            retweeted_weibo_retweeted_total_number,
            retweeted_weibo_retweeted_average_number,
            retweeted_weibo_retweeted_top[0][1],
            retweeted_weibo_retweeted_brust)

        influence_retweeted_weibo_comment = influence_weibo_cal(
            retweeted_weibo_comment_total_number,
            retweeted_weibo_comment_average_number,
            retweeted_weibo_comment_top[0][1], retweeted_weibo_retweeted_brust)

        user_index = user_index_cal(origin_weibo_list, retweeted_weibo_list,
                                    user_fansnum,
                                    influence_origin_weibo_retweeted,
                                    influence_origin_weibo_comment,
                                    influence_retweeted_weibo_retweeted,
                                    influence_retweeted_weibo_comment)

        user_item = {}
        user_item['user_index'] = user_index
        user_item['user'] = user
        user_item['user_fansnum'] = user_fansnum
        user_item["user_friendsnum"] = user_friendsnum
        user_item['origin_weibo_number'] = len(origin_weibo_set)
        user_item['comment_weibo_number'] = len(comment_weibo_set)
        user_item['retweeted_weibo_number'] = len(retweeted_weibo_set)
        user_item['total_number'] = len(origin_weibo_set) + len(
            comment_weibo_set) + len(retweeted_weibo_set)

        user_item[
            'origin_weibo_retweeted_total_number'] = origin_weibo_retweeted_total_number
        user_item[
            'origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_average_number
        user_item[
            'origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top[
                0][1]
        user_item['origin_weibo_retweeted_top'] = json.dumps(
            origin_weibo_retweeted_top)
        user_item[
            'origin_weibo_retweeted_brust_average'] = origin_weibo_retweeted_brust[
                1]
        user_item[
            'origin_weibo_top_retweeted_id'] = origin_weibo_retweeted_top[0][0]
        user_item[
            'origin_weibo_retweeted_brust_n'] = origin_weibo_retweeted_brust[0]
        user_item['origin_weibo_retweeted_detail'] = json.dumps(
            origin_weibo_retweeted_detail)

        user_item[
            'origin_weibo_comment_total_number'] = origin_weibo_comment_total_number
        user_item[
            'origin_weibo_comment_average_number'] = origin_weibo_comment_average_number
        user_item[
            'origin_weibo_comment_top_number'] = origin_weibo_comment_top[0][1]
        user_item['origin_weibo_comment_top'] = json.dumps(
            origin_weibo_comment_top)
        user_item['origin_weibo_comment_brust_n'] = origin_weibo_comment_brust[
            0]
        user_item[
            'origin_weibo_comment_brust_average'] = origin_weibo_comment_brust[
                1]
        user_item['origin_weibo_top_comment_id'] = origin_weibo_comment_top[0][
            0]
        user_item['origin_weibo_comment_detail'] = json.dumps(
            origin_weibo_comment_detail)

        user_item[
            'retweeted_weibo_retweeted_total_number'] = retweeted_weibo_retweeted_total_number
        user_item[
            'retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_average_number
        user_item[
            'retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top[
                0][1]
        user_item['retweeted_weibo_retweeted_top'] = json.dumps(
            retweeted_weibo_retweeted_top)
        user_item[
            'retweeted_weibo_retweeted_brust_n'] = retweeted_weibo_retweeted_brust[
                0]
        user_item[
            'retweeted_weibo_retweeted_brust_average'] = retweeted_weibo_retweeted_brust[
                1]
        user_item[
            'retweeted_weibo_top_retweeted_id'] = retweeted_weibo_retweeted_top[
                0][0]
        user_item['retweeted_weibo_retweeted_detail'] = json.dumps(
            retweeted_weibo_retweeted_detail)

        user_item[
            'retweeted_weibo_comment_total_number'] = retweeted_weibo_comment_total_number
        user_item[
            'retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_average_number
        user_item[
            'retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top[
                0][1]
        user_item['retweeted_weibo_comment_top'] = json.dumps(
            retweeted_weibo_comment_top)
        user_item[
            'retweeted_weibo_comment_brust_n'] = retweeted_weibo_comment_brust[
                0]
        user_item[
            'retweeted_weibo_comment_brust_average'] = retweeted_weibo_comment_brust[
                1]
        user_item[
            'retweeted_weibo_top_comment_id'] = retweeted_weibo_comment_top[0][
                0]
        user_item['retweeted_weibo_comment_detail'] = json.dumps(
            retweeted_weibo_comment_detail)

        x = expand_index_action(user_item)
        bulk_action.extend([x[0], x[1]])
        count_c += 1
        if count_c % 1000 == 0:
            es.bulk(bulk_action, index=es_index, doc_type='bci', timeout=30)
            bulk_action = []
            ##print count_c
    return bulk_action
Пример #2
0
def compute(user_set, bulk_action):
    count_c = 0

    weibo_redis = R_CLUSTER_FLOW1
    for user in user_set:
        origin_weibo_set = weibo_redis.smembers(user + "_origin_weibo") # origin weibo list
        retweeted_weibo_set = weibo_redis.smembers(user + "_retweeted_weibo") # retweeted weibo list
        comment_weibo_set = weibo_redis.smembers(user + "comment_weibo") # comment weibo list
        user_info = weibo_redis.hgetall(user)#dict

        origin_weibo_retweeted_timestamp = []
        origin_weibo_retweeted_count = []
        origin_weibo_list = []
        origin_weibo_comment_timestamp = []
        origin_weibo_comment_count = []
        retweeted_weibo_retweeted_count = []
        retweeted_weibo_comment_count= []
        retweeted_weibo_retweeted_timestamp = []
        retweeted_weibo_comment_timestamp = []
        retweeted_weibo_list = []
        user_fansnum = 0
        comment_weibo_number = 0
        user_friendsnum = 0
        for key in user_info.keys():
            if 'origin_weibo_retweeted_timestamp_' in key: # 不同时间段的原创微博被转发的爆发度
                origin_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'origin_weibo_comment_timestamp_' in key: # 不同时间段的原创微博被评论的爆发度
                origin_weibo_comment_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_retweeted_timestamp_' in key:# 不同时间段的转发微博被转发的爆发度
                retweeted_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_comment_timestamp_' in key: # 不同时间段的转发微博被评论的爆发度
                retweeted_weibo_comment_timestamp.append(key.split('_')[-1])
            elif '_origin_weibo_retweeted' in key: # which origin weibo is retweeted, and retwweted number
                origin_weibo_retweeted_count.append(key.split('_')[0]) # origin weibo list
            elif '_origin_weibo_comment' in key: # which origin weibo is commentted, and comment number
                origin_weibo_comment_count.append(key.split('_')[0])
            elif '_retweeted_weibo_retweeted' in key: # which retweeted weibo is retweeted, and retweeted number
                retweeted_weibo_retweeted_count.append(key.split('_')[0]) # which retweeted weibo
            elif '_retweeted_weibo_comment' in key: # which retweeted weibo is commented, and comment number
                retweeted_weibo_comment_count.append(key.split('_')[0])
            elif 'fansnum' in key:
                user_fansnum = user_info[key]
            elif "user_friendsnum" in key:
                user_friendsnum = user_info[key]
            elif "comment_weibo" == key:
                pass
            else:
                print user_info
                print key
                print user

        user_id = str(user)
        origin_weibo_retweeted_detail, origin_weibo_retweeted_total_number, origin_weibo_retweeted_top, origin_weibo_retweeted_average_number \
                = statistic_weibo(origin_weibo_retweeted_count, origin_weibo_set, user_info, "_origin_weibo_retweeted")

        origin_weibo_comment_detail, origin_weibo_comment_total_number, origin_weibo_comment_top, origin_weibo_comment_average_number\
                = statistic_weibo(origin_weibo_comment_count, origin_weibo_set, user_info, "_origin_weibo_comment")

        retweeted_weibo_retweeted_detail, retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_top, retweeted_weibo_retweeted_average_number \
                = statistic_weibo(retweeted_weibo_retweeted_count, retweeted_weibo_set, user_info,  '_retweeted_weibo_retweeted')

        retweeted_weibo_comment_detail, retweeted_weibo_comment_total_number, retweeted_weibo_comment_top, retweeted_weibo_comment_average_number\
                = statistic_weibo(retweeted_weibo_comment_count, retweeted_weibo_set, user_info, '_retweeted_weibo_comment')


        origin_weibo_retweeted_brust= activity_weibo(origin_weibo_retweeted_timestamp, user_info, "origin_weibo_retweeted_timestamp")
        origin_weibo_comment_brust= activity_weibo(origin_weibo_comment_timestamp, user_info, "origin_weibo_comment_timestamp")
        retweeted_weibo_retweeted_brust= activity_weibo(retweeted_weibo_retweeted_timestamp, user_info, "retweeted_weibo_retweeted_timestamp")
        retweeted_weibo_comment_brust= activity_weibo(retweeted_weibo_comment_timestamp, user_info, "retweeted_weibo_comment_timestamp")


        influence_origin_weibo_retweeted = influence_weibo_cal(origin_weibo_retweeted_total_number, origin_weibo_retweeted_average_number, origin_weibo_retweeted_top[0][1],origin_weibo_retweeted_brust)

        influence_origin_weibo_comment = influence_weibo_cal(origin_weibo_comment_total_number, origin_weibo_comment_average_number, origin_weibo_comment_top[0][1], origin_weibo_comment_brust)

        influence_retweeted_weibo_retweeted = influence_weibo_cal(retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_average_number, retweeted_weibo_retweeted_top[0][1], retweeted_weibo_retweeted_brust)

        influence_retweeted_weibo_comment = influence_weibo_cal(retweeted_weibo_comment_total_number, retweeted_weibo_comment_average_number, retweeted_weibo_comment_top[0][1], retweeted_weibo_retweeted_brust)

        user_index = user_index_cal(origin_weibo_list, retweeted_weibo_list, user_fansnum, influence_origin_weibo_retweeted, influence_origin_weibo_comment, influence_retweeted_weibo_retweeted, influence_retweeted_weibo_comment)


        user_item = {}
        user_item['user_index'] = user_index
        user_item['user'] = user
        user_item['user_fansnum'] = user_fansnum
        user_item["user_friendsnum"] = user_friendsnum
        user_item['origin_weibo_number'] = len(origin_weibo_set)
        user_item['comment_weibo_number'] = len(comment_weibo_set)
        user_item['retweeted_weibo_number'] = len(retweeted_weibo_set)

        user_item['origin_weibo_retweeted_total_number'] = origin_weibo_retweeted_total_number
        user_item['origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_average_number
        user_item['origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top[0][1]
        user_item['origin_weibo_retweeted_top'] = json.dumps(origin_weibo_retweeted_top)
        user_item['origin_weibo_retweeted_brust_average'] = origin_weibo_retweeted_brust[1]
        user_item['origin_weibo_top_retweeted_id'] = origin_weibo_retweeted_top[0][0]
        user_item['origin_weibo_retweeted_brust_n'] = origin_weibo_retweeted_brust[0]
        user_item['origin_weibo_retweeted_detail'] = json.dumps(origin_weibo_retweeted_detail)

        user_item['origin_weibo_comment_total_number'] = origin_weibo_comment_total_number
        user_item['origin_weibo_comment_average_number'] = origin_weibo_comment_average_number
        user_item['origin_weibo_comment_top_number'] = origin_weibo_comment_top[0][1]
        user_item['origin_weibo_comment_top'] = json.dumps(origin_weibo_comment_top)
        user_item['origin_weibo_comment_brust_n'] = origin_weibo_comment_brust[0]
        user_item['origin_weibo_comment_brust_average'] = origin_weibo_comment_brust[1]
        user_item['origin_weibo_top_comment_id'] = origin_weibo_comment_top[0][0]
        user_item['origin_weibo_comment_detail'] = json.dumps(origin_weibo_comment_detail)

        user_item['retweeted_weibo_retweeted_total_number'] = retweeted_weibo_retweeted_total_number
        user_item['retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_average_number
        user_item['retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top[0][1]
        user_item['retweeted_weibo_retweeted_top'] = json.dumps(retweeted_weibo_retweeted_top)
        user_item['retweeted_weibo_retweeted_brust_n'] = retweeted_weibo_retweeted_brust[0]
        user_item['retweeted_weibo_retweeted_brust_average'] = retweeted_weibo_retweeted_brust[1]
        user_item['retweeted_weibo_top_retweeted_id'] = retweeted_weibo_retweeted_top[0][0]
        user_item['retweeted_weibo_retweeted_detail'] = json.dumps(retweeted_weibo_retweeted_detail)

        user_item['retweeted_weibo_comment_total_number'] = retweeted_weibo_comment_total_number
        user_item['retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_average_number
        user_item['retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top[0][1]
        user_item['retweeted_weibo_comment_top'] = json.dumps(retweeted_weibo_comment_top)
        user_item['retweeted_weibo_comment_brust_n'] = retweeted_weibo_comment_brust[0]
        user_item['retweeted_weibo_comment_brust_average'] = retweeted_weibo_comment_brust[1]
        user_item['retweeted_weibo_top_comment_id'] = retweeted_weibo_comment_top[0][0]
        user_item['retweeted_weibo_comment_detail'] = json.dumps(retweeted_weibo_comment_detail)

        x = expand_index_action(user_item)
        bulk_action.extend([x[0], x[1]])
        count_c += 1
        if count_c % 1000 == 0:
            es.bulk(bulk_action, index=es_index, doc_type='bci', timeout=30)
            bulk_action = []
            print count_c
    return bulk_action
def compute(user_set, es, sensitive_uid_set):
    ts = time.time()
    bulk_action = []
    count_c = 0
    sensitive_user = 0

    weibo_redis = R_CLUSTER_FLOW1
    for user in user_set:
        if user in sensitive_uid_set:
            sensitive_user = 1
            print user
        user_info = weibo_redis.hgetall(user)  # dict
        sensitive_user_info = weibo_redis.hgetall("s_" + user)
        origin_weibo_retweeted_timestamp = []
        s_origin_weibo_retweeted_timestamp = []
        origin_weibo_retweeted_count = []
        s_origin_weibo_retweeted_count = []
        origin_weibo_list = []
        s_origin_weibo_list = []
        origin_weibo_comment_timestamp = []
        s_origin_weibo_comment_timestamp = []
        origin_weibo_comment_count = []
        s_origin_weibo_comment_count = []
        retweeted_weibo_retweeted_count = []
        s_retweeted_weibo_retweeted_count = []
        retweeted_weibo_comment_count = []
        s_retweeted_weibo_comment_count = []
        retweeted_weibo_retweeted_timestamp = []
        s_retweeted_weibo_retweeted_timestamp = []
        retweeted_weibo_comment_timestamp = []
        s_retweeted_weibo_comment_timestamp = []
        retweeted_weibo_list = []
        s_retweeted_weibo_list = []
        user_fansnum = 0
        comment_weibo_number = 0
        s_comment_weibo_number = 0
        user_friendsnum = 0
        s_origin_weibo_retweeted_detail = {}
        s_origin_weibo_comment_detail = {}
        s_retweeted_weibo_retweeted_detail = {}
        s_retweeted_weibo_comment_detail = {}
        origin_weibo_retweeted_detail = {}
        origin_weibo_comment_detail = {}
        retweeted_weibo_retweeted_detail = {}
        retweeted_weibo_comment_detail = {}

        for key in user_info.iterkeys():
            if "origin_weibo_retweeted_timestamp_" in key:
                origin_weibo_retweeted_timestamp.append(key.split("_")[-1])
            elif "origin_weibo_comment_timestamp_" in key:
                origin_weibo_comment_timestamp.append(key.split("_")[-1])
            elif "retweeted_weibo_retweeted_timestamp_" in key:
                retweeted_weibo_retweeted_timestamp.append(key.split("_")[-1])
            elif "retweeted_weibo_comment_timestamp_" in key:
                retweeted_weibo_comment_timestamp.append(key.split("_")[-1])
            elif "_origin_weibo_timestamp" in key:
                origin_weibo_list.append(key.split("_")[0])
            elif "_retweeted_weibo_timestamp" in key:
                retweeted_weibo_list.append(key.split("_")[0])
            elif "_origin_weibo_retweeted" in key:
                origin_weibo_retweeted_count.append(key.split("_")[0])
            elif "_origin_weibo_comment" in key:
                origin_weibo_comment_count.append(key.split("_")[0])
            elif "_retweeted_weibo_retweeted" in key:
                retweeted_weibo_retweeted_count.append(key.split("_")[0])
            elif "_retweeted_weibo_comment" in key:
                retweeted_weibo_comment_count.append(key.split("_")[0])
            elif "fansnum" in key:
                user_fansnum = user_info[key]
            elif "user_friendsnum" in key:
                user_friendsnum = user_info[key]
            elif "comment_weibo" in key:
                comment_weibo_number = user_info[key]
            else:
                print user_info, key

        user_origin_weibo_timestamp = []
        if len(origin_weibo_list):
            for i in range(len(origin_weibo_list)):
                timestamp = user_info[str(origin_weibo_list[i]) + "_origin_weibo_timestamp"]
                user_origin_weibo_timestamp.append(timestamp)

        user_retweeted_weibo_timestamp = []
        if len(retweeted_weibo_list):
            for i in range(len(retweeted_weibo_list)):
                timestamp = user_info[str(retweeted_weibo_list[i]) + "_retweeted_weibo_timestamp"]
                user_retweeted_weibo_timestamp.append(timestamp)

        user_id = str(user)
        origin_weibo_retweeted_detail, origin_weibo_retweeted_total_number, origin_weibo_retweeted_top_number, origin_weibo_retweeted_average_number, origin_weibo_top_retweeted_id = statistic_weibo(
            origin_weibo_retweeted_count, user_info, origin_weibo_list, "_origin_weibo_retweeted"
        )

        origin_weibo_comment_detail, origin_weibo_comment_total_number, origin_weibo_comment_top_number, origin_weibo_comment_average_number, origin_weibo_top_comment_id = statistic_weibo(
            origin_weibo_comment_count, user_info, origin_weibo_list, "_origin_weibo_comment"
        )

        retweeted_weibo_retweeted_detail, retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_top_number, retweeted_weibo_retweeted_average_number, retweeted_weibo_top_retweeted_id = statistic_weibo(
            retweeted_weibo_retweeted_count, user_info, retweeted_weibo_list, "_retweeted_weibo_retweeted"
        )

        retweeted_weibo_comment_detail, retweeted_weibo_comment_total_number, retweeted_weibo_comment_top_number, retweeted_weibo_comment_average_number, retweeted_weibo_top_comment_id = statistic_weibo(
            retweeted_weibo_comment_count, user_info, retweeted_weibo_list, "_retweeted_weibo_comment"
        )

        origin_weibo_retweeted_brust = activity_weibo(
            origin_weibo_retweeted_timestamp, user_info, "origin_weibo_retweeted_timestamp"
        )
        origin_weibo_comment_brust = activity_weibo(
            origin_weibo_comment_timestamp, user_info, "origin_weibo_comment_timestamp"
        )
        retweeted_weibo_retweeted_brust = activity_weibo(
            retweeted_weibo_retweeted_timestamp, user_info, "retweeted_weibo_retweeted_timestamp"
        )
        retweeted_weibo_comment_brust = activity_weibo(
            retweeted_weibo_comment_timestamp, user_info, "retweeted_weibo_comment_timestamp"
        )

        influence_origin_weibo_retweeted = influence_weibo_cal(
            origin_weibo_retweeted_total_number,
            origin_weibo_retweeted_average_number,
            origin_weibo_retweeted_top_number,
            origin_weibo_retweeted_brust,
        )

        influence_origin_weibo_comment = influence_weibo_cal(
            origin_weibo_comment_total_number,
            origin_weibo_comment_average_number,
            origin_weibo_comment_top_number,
            origin_weibo_comment_brust,
        )

        influence_retweeted_weibo_retweeted = influence_weibo_cal(
            retweeted_weibo_retweeted_total_number,
            retweeted_weibo_retweeted_average_number,
            retweeted_weibo_retweeted_top_number,
            retweeted_weibo_retweeted_brust,
        )

        influence_retweeted_weibo_comment = influence_weibo_cal(
            retweeted_weibo_comment_total_number,
            retweeted_weibo_comment_average_number,
            retweeted_weibo_comment_top_number,
            retweeted_weibo_retweeted_brust,
        )

        user_index = user_index_cal(
            origin_weibo_list,
            retweeted_weibo_list,
            user_fansnum,
            influence_origin_weibo_retweeted,
            influence_origin_weibo_comment,
            influence_retweeted_weibo_retweeted,
            influence_retweeted_weibo_comment,
        )

        if not sensitive_user_info:
            s_user_index = 0
            s_origin_weibo_retweeted_total_number = 0
            s_origin_weibo_retweeted_average_number = 0
            s_origin_weibo_retweeted_top_number = 0
            s_origin_weibo_retweeted_brust = (0, 0)
            s_origin_weibo_top_retweeted_id = "0"

            s_origin_weibo_comment_total_number = 0
            s_origin_weibo_comment_average_number = 0
            s_origin_weibo_comment_top_number = 0
            s_origin_weibo_comment_brust = (0, 0)
            s_origin_weibo_top_comment_id = "0"

            s_retweeted_weibo_retweeted_total_number = 0
            s_retweeted_weibo_retweeted_average_number = 0
            s_retweeted_weibo_retweeted_top_number = 0
            s_retweeted_weibo_retweeted_brust = (0, 0)
            s_retweeted_weibo_top_retweeted_id = "0"

            s_retweeted_weibo_comment_total_number = 0
            s_retweeted_weibo_comment_average_number = 0
            s_retweeted_weibo_comment_top_number = 0
            s_retweeted_weibo_comment_brust = (0, 0)
            s_retweeted_weibo_top_comment_id = "0"
        else:
            for key in sensitive_user_info.iterkeys():
                if "origin_weibo_retweeted_timestamp_" in key:
                    s_origin_weibo_retweeted_timestamp.append(key.split("_")[-1])
                elif "origin_weibo_comment_timestamp_" in key:
                    s_origin_weibo_comment_timestamp.append(key.split("_")[-1])
                elif "retweeted_weibo_retweeted_timestamp_" in key:
                    s_retweeted_weibo_retweeted_timestamp.append(key.split("_")[-1])
                elif "retweeted_weibo_comment_timestamp_" in key:
                    s_retweeted_weibo_comment_timestamp.append(key.split("_")[-1])
                elif "_origin_weibo_timestamp" in key:
                    s_origin_weibo_list.append(key.split("_")[0])
                elif "_retweeted_weibo_timestamp" in key:
                    s_retweeted_weibo_list.append(key.split("_")[0])
                elif "_origin_weibo_retweeted" in key:
                    s_origin_weibo_retweeted_count.append(key.split("_")[0])
                elif "_origin_weibo_comment" in key:
                    s_origin_weibo_comment_count.append(key.split("_")[0])
                elif "_retweeted_weibo_retweeted" in key:
                    s_retweeted_weibo_retweeted_count.append(key.split("_")[0])
                elif "_retweeted_weibo_comment" in key:
                    s_retweeted_weibo_comment_count.append(key.split("_")[0])
                elif "comment_weibo" in key:
                    s_comment_weibo_number = sensitive_user_info[key]
                else:
                    print sensitive_user_info, key

            user_origin_weibo_timestamp = []
            if len(s_origin_weibo_list):
                for i in range(len(s_origin_weibo_list)):
                    timestamp = sensitive_user_info[str(s_origin_weibo_list[i]) + "_origin_weibo_timestamp"]
                    user_origin_weibo_timestamp.append(timestamp)

            user_retweeted_weibo_timestamp = []
            if len(s_retweeted_weibo_list):
                for i in range(len(s_retweeted_weibo_list)):
                    timestamp = sensitive_user_info[str(s_retweeted_weibo_list[i]) + "_retweeted_weibo_timestamp"]
                    user_retweeted_weibo_timestamp.append(timestamp)

            user_id = str(user)
            s_origin_weibo_retweeted_detail, s_origin_weibo_retweeted_total_number, s_origin_weibo_retweeted_top_number, s_origin_weibo_retweeted_average_number, s_origin_weibo_top_retweeted_id = statistic_weibo(
                s_origin_weibo_retweeted_count, sensitive_user_info, s_origin_weibo_list, "_origin_weibo_retweeted"
            )

            s_origin_weibo_comment_detail, s_origin_weibo_comment_total_number, s_origin_weibo_comment_top_number, s_origin_weibo_comment_average_number, s_origin_weibo_top_comment_id = statistic_weibo(
                s_origin_weibo_comment_count, sensitive_user_info, s_origin_weibo_list, "_origin_weibo_comment"
            )

            s_retweeted_weibo_retweeted_detail, s_retweeted_weibo_retweeted_total_number, s_retweeted_weibo_retweeted_top_number, s_retweeted_weibo_retweeted_average_number, s_retweeted_weibo_top_retweeted_id = statistic_weibo(
                s_retweeted_weibo_retweeted_count,
                sensitive_user_info,
                s_retweeted_weibo_list,
                "_retweeted_weibo_retweeted",
            )

            s_retweeted_weibo_comment_detail, s_retweeted_weibo_comment_total_number, s_retweeted_weibo_comment_top_number, s_retweeted_weibo_comment_average_number, s_retweeted_weibo_top_comment_id = statistic_weibo(
                s_retweeted_weibo_comment_count, sensitive_user_info, s_retweeted_weibo_list, "_retweeted_weibo_comment"
            )

            s_origin_weibo_retweeted_brust = activity_weibo(
                s_origin_weibo_retweeted_timestamp, sensitive_user_info, "origin_weibo_retweeted_timestamp"
            )
            s_origin_weibo_comment_brust = activity_weibo(
                s_origin_weibo_comment_timestamp, sensitive_user_info, "origin_weibo_comment_timestamp"
            )
            s_retweeted_weibo_retweeted_brust = activity_weibo(
                s_retweeted_weibo_retweeted_timestamp, sensitive_user_info, "retweeted_weibo_retweeted_timestamp"
            )
            s_retweeted_weibo_comment_brust = activity_weibo(
                s_retweeted_weibo_comment_timestamp, sensitive_user_info, "retweeted_weibo_comment_timestamp"
            )

            s_influence_origin_weibo_retweeted = influence_weibo_cal(
                s_origin_weibo_retweeted_total_number,
                s_origin_weibo_retweeted_average_number,
                s_origin_weibo_retweeted_top_number,
                s_origin_weibo_retweeted_brust,
            )

            s_influence_origin_weibo_comment = influence_weibo_cal(
                s_origin_weibo_comment_total_number,
                s_origin_weibo_comment_average_number,
                s_origin_weibo_comment_top_number,
                s_origin_weibo_comment_brust,
            )

            s_influence_retweeted_weibo_retweeted = influence_weibo_cal(
                s_retweeted_weibo_retweeted_total_number,
                s_retweeted_weibo_retweeted_average_number,
                s_retweeted_weibo_retweeted_top_number,
                s_retweeted_weibo_retweeted_brust,
            )

            s_influence_retweeted_weibo_comment = influence_weibo_cal(
                s_retweeted_weibo_comment_total_number,
                s_retweeted_weibo_comment_average_number,
                s_retweeted_weibo_comment_top_number,
                s_retweeted_weibo_retweeted_brust,
            )

            s_user_index = user_index_cal(
                s_origin_weibo_list,
                s_retweeted_weibo_list,
                user_fansnum,
                s_influence_origin_weibo_retweeted,
                s_influence_origin_weibo_comment,
                s_influence_retweeted_weibo_retweeted,
                s_influence_retweeted_weibo_comment,
            )

        user_item = {}
        user_item["ordinary_influence"] = user_index
        user_item["sensitive_influence"] = s_user_index
        user_item["user_index"] = 0.8 * user_index + 0.2 * s_user_index
        user_item["uid"] = str(user)
        user_item["user_fansnum"] = int(user_fansnum)
        user_item["user_friendsnum"] = int(user_friendsnum)
        user_item["origin_weibo_number"] = len(origin_weibo_list)
        user_item["comment_weibo_number"] = comment_weibo_number
        user_item["retweeted_weibo_number"] = len(retweeted_weibo_list)
        user_item["s_origin_weibo_number"] = len(s_origin_weibo_list)
        user_item["s_comment_weibo_number"] = s_comment_weibo_number
        user_item["s_retweeted_weibo_number"] = len(s_retweeted_weibo_list)
        if sensitive_user:
            user_item["s_origin_weibo_retweeted_detail"] = json.dumps(s_origin_weibo_retweeted_detail)
            user_item["s_origin_weibo_comment_detail"] = json.dumps(s_origin_weibo_comment_detail)
            user_item["s_retweeted_weibo_retweeted_detail"] = json.dumps(s_retweeted_weibo_retweeted_detail)
            user_item["s_retweeted_weibo_comment_detail"] = json.dumps(s_retweeted_weibo_comment_detail)

        user_item["origin_weibo_retweeted_total_number"] = origin_weibo_retweeted_total_number
        user_item["origin_weibo_retweeted_average_number"] = origin_weibo_retweeted_average_number
        user_item["origin_weibo_retweeted_top_number"] = origin_weibo_retweeted_top_number
        user_item["origin_weibo_retweeted_brust_average"] = origin_weibo_retweeted_brust[1]
        user_item["origin_weibo_top_retweeted_id"] = origin_weibo_top_retweeted_id
        user_item["origin_weibo_retweeted_brust_n"] = origin_weibo_retweeted_brust[0]
        if sensitive_user:
            user_item["origin_weibo_retweeted_detail"] = json.dumps(origin_weibo_retweeted_detail)

        user_item["s_origin_weibo_retweeted_total_number"] = s_origin_weibo_retweeted_total_number
        user_item["s_origin_weibo_retweeted_average_number"] = s_origin_weibo_retweeted_average_number
        user_item["s_origin_weibo_retweeted_top_number"] = s_origin_weibo_retweeted_top_number
        user_item["s_origin_weibo_retweeted_brust_average"] = s_origin_weibo_retweeted_brust[1]
        user_item["s_origin_weibo_top_retweeted_id"] = s_origin_weibo_top_retweeted_id
        user_item["s_origin_weibo_retweeted_brust_n"] = s_origin_weibo_retweeted_brust[0]

        user_item["origin_weibo_comment_total_number"] = origin_weibo_comment_total_number
        user_item["origin_weibo_comment_average_number"] = origin_weibo_comment_average_number
        user_item["origin_weibo_comment_top_number"] = origin_weibo_comment_top_number
        user_item["origin_weibo_comment_brust_n"] = origin_weibo_comment_brust[0]
        user_item["origin_weibo_comment_brust_average"] = origin_weibo_comment_brust[1]
        user_item["origin_weibo_top_comment_id"] = origin_weibo_top_comment_id
        if sensitive_user:
            user_item["origin_weibo_comment_detail"] = json.dumps(origin_weibo_comment_detail)

        user_item["s_origin_weibo_comment_total_number"] = s_origin_weibo_comment_total_number
        user_item["s_origin_weibo_comment_average_number"] = s_origin_weibo_comment_average_number
        user_item["s_origin_weibo_comment_top_number"] = s_origin_weibo_comment_top_number
        user_item["s_origin_weibo_comment_brust_n"] = s_origin_weibo_comment_brust[0]
        user_item["s_origin_weibo_comment_brust_average"] = s_origin_weibo_comment_brust[1]
        user_item["s_origin_weibo_top_comment_id"] = s_origin_weibo_top_comment_id

        user_item["retweeted_weibo_retweeted_total_number"] = retweeted_weibo_retweeted_total_number
        user_item["retweeted_weibo_retweeted_average_number"] = retweeted_weibo_retweeted_average_number
        user_item["retweeted_weibo_retweeted_top_number"] = retweeted_weibo_retweeted_top_number
        user_item["retweeted_weibo_retweeted_brust_n"] = retweeted_weibo_retweeted_brust[0]
        user_item["retweeted_weibo_retweeted_brust_average"] = retweeted_weibo_retweeted_brust[1]
        user_item["retweeted_weibo_top_retweeted_id"] = retweeted_weibo_top_retweeted_id
        if sensitive_user:
            user_item["retweeted_weibo_retweeted_detail"] = json.dumps(retweeted_weibo_retweeted_detail)

        user_item["s_retweeted_weibo_retweeted_total_number"] = s_retweeted_weibo_retweeted_total_number
        user_item["s_retweeted_weibo_retweeted_average_number"] = s_retweeted_weibo_retweeted_average_number
        user_item["s_retweeted_weibo_retweeted_top_number"] = s_retweeted_weibo_retweeted_top_number
        user_item["s_retweeted_weibo_retweeted_brust_n"] = s_retweeted_weibo_retweeted_brust[0]
        user_item["s_retweeted_weibo_retweeted_brust_average"] = s_retweeted_weibo_retweeted_brust[1]
        user_item["s_retweeted_weibo_top_retweeted_id"] = s_retweeted_weibo_top_retweeted_id

        user_item["retweeted_weibo_comment_total_number"] = retweeted_weibo_comment_total_number
        user_item["retweeted_weibo_comment_average_number"] = retweeted_weibo_comment_average_number
        user_item["retweeted_weibo_comment_top_number"] = retweeted_weibo_comment_top_number
        user_item["retweeted_weibo_comment_brust_n"] = retweeted_weibo_comment_brust[0]
        user_item["retweeted_weibo_comment_brust_average"] = retweeted_weibo_comment_brust[1]
        user_item["retweeted_weibo_top_comment_id"] = retweeted_weibo_top_comment_id
        if sensitive_user:
            user_item["retweeted_weibo_comment_detail"] = json.dumps(retweeted_weibo_comment_detail)

        user_item["s_retweeted_weibo_comment_total_number"] = s_retweeted_weibo_comment_total_number
        user_item["s_retweeted_weibo_comment_average_number"] = s_retweeted_weibo_comment_average_number
        user_item["s_retweeted_weibo_comment_top_number"] = s_retweeted_weibo_comment_top_number
        user_item["s_retweeted_weibo_comment_brust_n"] = s_retweeted_weibo_comment_brust[0]
        user_item["s_retweeted_weibo_comment_brust_average"] = s_retweeted_weibo_comment_brust[1]
        user_item["s_retweeted_weibo_top_comment_id"] = s_retweeted_weibo_top_comment_id

        x = expand_index_action(user_item)
        bulk_action.extend([x[0], x[1]])
        count_c += 1
        if count_c % 1000 == 0:
            try:
                es.bulk(bulk_action, index=es_index, doc_type="bci", timeout=30)
                bulk_action = []
            except:
                es.bulk(bulk_action, index=es_index, doc_type="bci", timeout=30)
                bulk_action = []
            print count_c
Пример #4
0
def compute(user_set, es):
    bulk_action = []
    count_c = 0
    
    weibo_redis = R_CLUSTER_FLOW1
    for user in user_set:
        user_info = weibo_redis.hgetall(user)#dict
        origin_weibo_retweeted_timestamp = []
        origin_weibo_retweeted_count = []
        origin_weibo_list = []
        origin_weibo_comment_timestamp = []
        origin_weibo_comment_count = []
        retweeted_weibo_retweeted_count = []
        retweeted_weibo_comment_count= []
        retweeted_weibo_retweeted_timestamp = []
        retweeted_weibo_comment_timestamp = []
        retweeted_weibo_list = []
        user_fansnum = 0
        comment_weibo_number = 0
        user_friendsnum = 0
        for key in user_info.iterkeys():
            if 'origin_weibo_retweeted_timestamp_' in key:
                origin_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'origin_weibo_comment_timestamp_' in key:
                origin_weibo_comment_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_retweeted_timestamp_' in key:
                retweeted_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_comment_timestamp_' in key:
                retweeted_weibo_comment_timestamp.append(key.split('_')[-1])
            elif '_origin_weibo_timestamp' in key:
                origin_weibo_list.append(key.split('_')[0])
            elif '_retweeted_weibo_timestamp' in key:
                retweeted_weibo_list.append(key.split('_')[0])
            elif '_origin_weibo_retweeted' in key:
                origin_weibo_retweeted_count.append(key.split('_')[0])
            elif '_origin_weibo_comment' in key:
                origin_weibo_comment_count.append(key.split('_')[0])
            elif '_retweeted_weibo_retweeted' in key:
                retweeted_weibo_retweeted_count.append(key.split('_')[0])
            elif '_retweeted_weibo_comment' in key:
                retweeted_weibo_comment_count.append(key.split('_')[0])
            elif 'fansnum' in key:
                user_fansnum = user_info[key]
            elif "user_friendsnum" in key:
                user_friendsnum = user_info[key]
            elif 'comment_weibo' in key:
                comment_weibo_number = user_info[key]
            else:
                print user_info, key

        user_origin_weibo_timestamp = [] 
        if len(origin_weibo_list):
            for i in range(len(origin_weibo_list)):
                timestamp = user_info[str(origin_weibo_list[i])+'_origin_weibo_timestamp']
                user_origin_weibo_timestamp.append(timestamp)


        user_retweeted_weibo_timestamp = [] 
        if len(retweeted_weibo_list):
            for i in range(len(retweeted_weibo_list)):
                timestamp = user_info[str(retweeted_weibo_list[i])+'_retweeted_weibo_timestamp']
                user_retweeted_weibo_timestamp.append(timestamp)

        user_id = str(user)
        origin_weibo_retweeted_detail, origin_weibo_retweeted_total_number, origin_weibo_retweeted_top_number, origin_weibo_retweeted_average_number, origin_weibo_top_retweeted_id \
                = statistic_weibo(origin_weibo_retweeted_count, user_info, origin_weibo_list, "_origin_weibo_retweeted")

        origin_weibo_comment_detail, origin_weibo_comment_total_number, origin_weibo_comment_top_number, origin_weibo_comment_average_number, origin_weibo_top_comment_id \
                = statistic_weibo(origin_weibo_comment_count, user_info, origin_weibo_list, "_origin_weibo_comment")

        retweeted_weibo_retweeted_detail, retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_top_number, retweeted_weibo_retweeted_average_number, retweeted_weibo_top_retweeted_id \
                = statistic_weibo(retweeted_weibo_retweeted_count, user_info, retweeted_weibo_list, '_retweeted_weibo_retweeted')

        retweeted_weibo_comment_detail, retweeted_weibo_comment_total_number, retweeted_weibo_comment_top_number, retweeted_weibo_comment_average_number, retweeted_weibo_top_comment_id \
                = statistic_weibo(retweeted_weibo_comment_count, user_info, retweeted_weibo_list, '_retweeted_weibo_comment')


        origin_weibo_retweeted_brust= activity_weibo(origin_weibo_retweeted_timestamp, user_info, "origin_weibo_retweeted_timestamp")
        origin_weibo_comment_brust= activity_weibo(origin_weibo_comment_timestamp, user_info, "origin_weibo_comment_timestamp")
        retweeted_weibo_retweeted_brust= activity_weibo(retweeted_weibo_retweeted_timestamp, user_info, "retweeted_weibo_retweeted_timestamp")
        retweeted_weibo_comment_brust= activity_weibo(retweeted_weibo_comment_timestamp, user_info, "retweeted_weibo_comment_timestamp")



        influence_origin_weibo_retweeted = influence_weibo_cal(origin_weibo_retweeted_total_number, origin_weibo_retweeted_average_number, origin_weibo_retweeted_top_number,origin_weibo_retweeted_brust)

        influence_origin_weibo_comment = influence_weibo_cal(origin_weibo_comment_total_number, origin_weibo_comment_average_number, origin_weibo_comment_top_number, origin_weibo_comment_brust)

        influence_retweeted_weibo_retweeted = influence_weibo_cal(retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_average_number, retweeted_weibo_retweeted_top_number, retweeted_weibo_retweeted_brust)

        influence_retweeted_weibo_comment = influence_weibo_cal(retweeted_weibo_comment_total_number, retweeted_weibo_comment_average_number, retweeted_weibo_comment_top_number, retweeted_weibo_retweeted_brust)

        user_index = user_index_cal(origin_weibo_list, retweeted_weibo_list, user_fansnum, influence_origin_weibo_retweeted, influence_origin_weibo_comment, influence_retweeted_weibo_retweeted, influence_retweeted_weibo_comment)


        user_item = {}
        user_item['user_index'] = user_index
        user_item['user'] = user
        user_item['user_fansnum'] = user_fansnum
        user_item["user_friendsnum"] = user_friendsnum
        user_item['origin_weibo_number'] = len(origin_weibo_list)
        user_item['comment_weibo_number'] = comment_weibo_number
        user_item['retweeted_weibo_number'] = len(retweeted_weibo_list)

        user_item['origin_weibo_retweeted_total_number'] = origin_weibo_retweeted_total_number
        user_item['origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_average_number
        user_item['origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top_number
        user_item['origin_weibo_retweeted_brust_average'] = origin_weibo_retweeted_brust[1]
        user_item['origin_weibo_top_retweeted_id'] = origin_weibo_top_retweeted_id
        user_item['origin_weibo_retweeted_brust_n'] = origin_weibo_retweeted_brust[0]
        #user_item['origin_weibo_retweeted_detail'] = origin_weibo_retweeted_detail

        user_item['origin_weibo_comment_total_number'] = origin_weibo_comment_total_number
        user_item['origin_weibo_comment_average_number'] = origin_weibo_comment_average_number
        user_item['origin_weibo_comment_top_number'] = origin_weibo_comment_top_number
        user_item['origin_weibo_comment_brust_n'] = origin_weibo_comment_brust[0]
        user_item['origin_weibo_comment_brust_average'] = origin_weibo_comment_brust[1]
        user_item['origin_weibo_top_comment_id'] = origin_weibo_top_comment_id
        #user_item['origin_weibo_comment_detail'] = origin_weibo_comment_detail

        user_item['retweeted_weibo_retweeted_total_number'] = retweeted_weibo_retweeted_total_number
        user_item['retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_average_number
        user_item['retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top_number
        user_item['retweeted_weibo_retweeted_brust_n'] = retweeted_weibo_retweeted_brust[0]
        user_item['retweeted_weibo_retweeted_brust_average'] = retweeted_weibo_retweeted_brust[1]
        user_item['retweeted_weibo_top_retweeted_id'] = retweeted_weibo_top_retweeted_id
        #user_item['retweeted_weibo_retweeted_detail'] = retweeted_weibo_retweeted_detail

        user_item['retweeted_weibo_comment_total_number'] = retweeted_weibo_comment_total_number
        user_item['retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_average_number
        user_item['retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top_number
        user_item['retweeted_weibo_comment_brust_n'] = retweeted_weibo_comment_brust[0]
        user_item['retweeted_weibo_comment_brust_average'] = retweeted_weibo_comment_brust[1]
        user_item['retweeted_weibo_top_comment_id'] = retweeted_weibo_top_comment_id
        #user_item['retweeted_weibo_comment_detail'] = retweeted_weibo_comment_detail

        x = expand_index_action(user_item)
        bulk_action.extend([x[0], x[1]])
        count_c += 1
        if count_c % 1000 == 0:
            while True:
                try:
                    es.bulk(bulk_action, index=es_index, doc_type='bci', timeout=30)
                    bulk_action = []
                    break
                except Exception,r:
                    es = ES_CLUSTER_FLOW1
                    print "bulk error"
            print count_c
def compute(user_set, es, sensitive_uid_set):
    ts = time.time()
    bulk_action = []
    count_c = 0
    sensitive_user = 0

    weibo_redis = R_CLUSTER_FLOW1
    for user in user_set:
        if user in sensitive_uid_set:
            sensitive_user = 1
            print user
        user_info = weibo_redis.hgetall(user)  #dict
        sensitive_user_info = weibo_redis.hgetall('s_' + user)
        origin_weibo_retweeted_timestamp = []
        s_origin_weibo_retweeted_timestamp = []
        origin_weibo_retweeted_count = []
        s_origin_weibo_retweeted_count = []
        origin_weibo_list = []
        s_origin_weibo_list = []
        origin_weibo_comment_timestamp = []
        s_origin_weibo_comment_timestamp = []
        origin_weibo_comment_count = []
        s_origin_weibo_comment_count = []
        retweeted_weibo_retweeted_count = []
        s_retweeted_weibo_retweeted_count = []
        retweeted_weibo_comment_count = []
        s_retweeted_weibo_comment_count = []
        retweeted_weibo_retweeted_timestamp = []
        s_retweeted_weibo_retweeted_timestamp = []
        retweeted_weibo_comment_timestamp = []
        s_retweeted_weibo_comment_timestamp = []
        retweeted_weibo_list = []
        s_retweeted_weibo_list = []
        user_fansnum = 0
        comment_weibo_number = 0
        s_comment_weibo_number = 0
        user_friendsnum = 0
        s_origin_weibo_retweeted_detail = {}
        s_origin_weibo_comment_detail = {}
        s_retweeted_weibo_retweeted_detail = {}
        s_retweeted_weibo_comment_detail = {}
        origin_weibo_retweeted_detail = {}
        origin_weibo_comment_detail = {}
        retweeted_weibo_retweeted_detail = {}
        retweeted_weibo_comment_detail = {}

        for key in user_info.iterkeys():
            if 'origin_weibo_retweeted_timestamp_' in key:
                origin_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'origin_weibo_comment_timestamp_' in key:
                origin_weibo_comment_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_retweeted_timestamp_' in key:
                retweeted_weibo_retweeted_timestamp.append(key.split('_')[-1])
            elif 'retweeted_weibo_comment_timestamp_' in key:
                retweeted_weibo_comment_timestamp.append(key.split('_')[-1])
            elif '_origin_weibo_timestamp' in key:
                origin_weibo_list.append(key.split('_')[0])
            elif '_retweeted_weibo_timestamp' in key:
                retweeted_weibo_list.append(key.split('_')[0])
            elif '_origin_weibo_retweeted' in key:
                origin_weibo_retweeted_count.append(key.split('_')[0])
            elif '_origin_weibo_comment' in key:
                origin_weibo_comment_count.append(key.split('_')[0])
            elif '_retweeted_weibo_retweeted' in key:
                retweeted_weibo_retweeted_count.append(key.split('_')[0])
            elif '_retweeted_weibo_comment' in key:
                retweeted_weibo_comment_count.append(key.split('_')[0])
            elif 'fansnum' in key:
                user_fansnum = user_info[key]
            elif "user_friendsnum" in key:
                user_friendsnum = user_info[key]
            elif 'comment_weibo' in key:
                comment_weibo_number = user_info[key]
            else:
                print user_info, key

        user_origin_weibo_timestamp = []
        if len(origin_weibo_list):
            for i in range(len(origin_weibo_list)):
                timestamp = user_info[str(origin_weibo_list[i]) +
                                      '_origin_weibo_timestamp']
                user_origin_weibo_timestamp.append(timestamp)

        user_retweeted_weibo_timestamp = []
        if len(retweeted_weibo_list):
            for i in range(len(retweeted_weibo_list)):
                timestamp = user_info[str(retweeted_weibo_list[i]) +
                                      '_retweeted_weibo_timestamp']
                user_retweeted_weibo_timestamp.append(timestamp)

        user_id = str(user)
        origin_weibo_retweeted_detail, origin_weibo_retweeted_total_number, origin_weibo_retweeted_top_number, origin_weibo_retweeted_average_number, origin_weibo_top_retweeted_id \
                = statistic_weibo(origin_weibo_retweeted_count, user_info, origin_weibo_list, "_origin_weibo_retweeted")

        origin_weibo_comment_detail, origin_weibo_comment_total_number, origin_weibo_comment_top_number, origin_weibo_comment_average_number, origin_weibo_top_comment_id \
                = statistic_weibo(origin_weibo_comment_count, user_info, origin_weibo_list, "_origin_weibo_comment")

        retweeted_weibo_retweeted_detail, retweeted_weibo_retweeted_total_number, retweeted_weibo_retweeted_top_number, retweeted_weibo_retweeted_average_number, retweeted_weibo_top_retweeted_id \
                = statistic_weibo(retweeted_weibo_retweeted_count, user_info, retweeted_weibo_list, '_retweeted_weibo_retweeted')

        retweeted_weibo_comment_detail, retweeted_weibo_comment_total_number, retweeted_weibo_comment_top_number, retweeted_weibo_comment_average_number, retweeted_weibo_top_comment_id \
                = statistic_weibo(retweeted_weibo_comment_count, user_info, retweeted_weibo_list, '_retweeted_weibo_comment')

        origin_weibo_retweeted_brust = activity_weibo(
            origin_weibo_retweeted_timestamp, user_info,
            "origin_weibo_retweeted_timestamp")
        origin_weibo_comment_brust = activity_weibo(
            origin_weibo_comment_timestamp, user_info,
            "origin_weibo_comment_timestamp")
        retweeted_weibo_retweeted_brust = activity_weibo(
            retweeted_weibo_retweeted_timestamp, user_info,
            "retweeted_weibo_retweeted_timestamp")
        retweeted_weibo_comment_brust = activity_weibo(
            retweeted_weibo_comment_timestamp, user_info,
            "retweeted_weibo_comment_timestamp")

        influence_origin_weibo_retweeted = influence_weibo_cal(
            origin_weibo_retweeted_total_number,
            origin_weibo_retweeted_average_number,
            origin_weibo_retweeted_top_number, origin_weibo_retweeted_brust)

        influence_origin_weibo_comment = influence_weibo_cal(
            origin_weibo_comment_total_number,
            origin_weibo_comment_average_number,
            origin_weibo_comment_top_number, origin_weibo_comment_brust)

        influence_retweeted_weibo_retweeted = influence_weibo_cal(
            retweeted_weibo_retweeted_total_number,
            retweeted_weibo_retweeted_average_number,
            retweeted_weibo_retweeted_top_number,
            retweeted_weibo_retweeted_brust)

        influence_retweeted_weibo_comment = influence_weibo_cal(
            retweeted_weibo_comment_total_number,
            retweeted_weibo_comment_average_number,
            retweeted_weibo_comment_top_number,
            retweeted_weibo_retweeted_brust)

        user_index = user_index_cal(origin_weibo_list, retweeted_weibo_list,
                                    user_fansnum,
                                    influence_origin_weibo_retweeted,
                                    influence_origin_weibo_comment,
                                    influence_retweeted_weibo_retweeted,
                                    influence_retweeted_weibo_comment)

        if not sensitive_user_info:
            s_user_index = 0
            s_origin_weibo_retweeted_total_number = 0
            s_origin_weibo_retweeted_average_number = 0
            s_origin_weibo_retweeted_top_number = 0
            s_origin_weibo_retweeted_brust = (0, 0)
            s_origin_weibo_top_retweeted_id = '0'

            s_origin_weibo_comment_total_number = 0
            s_origin_weibo_comment_average_number = 0
            s_origin_weibo_comment_top_number = 0
            s_origin_weibo_comment_brust = (0, 0)
            s_origin_weibo_top_comment_id = '0'

            s_retweeted_weibo_retweeted_total_number = 0
            s_retweeted_weibo_retweeted_average_number = 0
            s_retweeted_weibo_retweeted_top_number = 0
            s_retweeted_weibo_retweeted_brust = (0, 0)
            s_retweeted_weibo_top_retweeted_id = '0'

            s_retweeted_weibo_comment_total_number = 0
            s_retweeted_weibo_comment_average_number = 0
            s_retweeted_weibo_comment_top_number = 0
            s_retweeted_weibo_comment_brust = (0, 0)
            s_retweeted_weibo_top_comment_id = '0'
        else:
            for key in sensitive_user_info.iterkeys():
                if 'origin_weibo_retweeted_timestamp_' in key:
                    s_origin_weibo_retweeted_timestamp.append(
                        key.split('_')[-1])
                elif 'origin_weibo_comment_timestamp_' in key:
                    s_origin_weibo_comment_timestamp.append(key.split('_')[-1])
                elif 'retweeted_weibo_retweeted_timestamp_' in key:
                    s_retweeted_weibo_retweeted_timestamp.append(
                        key.split('_')[-1])
                elif 'retweeted_weibo_comment_timestamp_' in key:
                    s_retweeted_weibo_comment_timestamp.append(
                        key.split('_')[-1])
                elif '_origin_weibo_timestamp' in key:
                    s_origin_weibo_list.append(key.split('_')[0])
                elif '_retweeted_weibo_timestamp' in key:
                    s_retweeted_weibo_list.append(key.split('_')[0])
                elif '_origin_weibo_retweeted' in key:
                    s_origin_weibo_retweeted_count.append(key.split('_')[0])
                elif '_origin_weibo_comment' in key:
                    s_origin_weibo_comment_count.append(key.split('_')[0])
                elif '_retweeted_weibo_retweeted' in key:
                    s_retweeted_weibo_retweeted_count.append(key.split('_')[0])
                elif '_retweeted_weibo_comment' in key:
                    s_retweeted_weibo_comment_count.append(key.split('_')[0])
                elif 'comment_weibo' in key:
                    s_comment_weibo_number = sensitive_user_info[key]
                else:
                    print sensitive_user_info, key

            user_origin_weibo_timestamp = []
            if len(s_origin_weibo_list):
                for i in range(len(s_origin_weibo_list)):
                    timestamp = sensitive_user_info[str(s_origin_weibo_list[i])
                                                    +
                                                    '_origin_weibo_timestamp']
                    user_origin_weibo_timestamp.append(timestamp)

            user_retweeted_weibo_timestamp = []
            if len(s_retweeted_weibo_list):
                for i in range(len(s_retweeted_weibo_list)):
                    timestamp = sensitive_user_info[
                        str(s_retweeted_weibo_list[i]) +
                        '_retweeted_weibo_timestamp']
                    user_retweeted_weibo_timestamp.append(timestamp)

            user_id = str(user)
            s_origin_weibo_retweeted_detail, s_origin_weibo_retweeted_total_number, s_origin_weibo_retweeted_top_number, s_origin_weibo_retweeted_average_number, s_origin_weibo_top_retweeted_id \
                    = statistic_weibo(s_origin_weibo_retweeted_count, sensitive_user_info, s_origin_weibo_list, "_origin_weibo_retweeted")

            s_origin_weibo_comment_detail, s_origin_weibo_comment_total_number, s_origin_weibo_comment_top_number, s_origin_weibo_comment_average_number, s_origin_weibo_top_comment_id \
                    = statistic_weibo(s_origin_weibo_comment_count, sensitive_user_info, s_origin_weibo_list, "_origin_weibo_comment")

            s_retweeted_weibo_retweeted_detail, s_retweeted_weibo_retweeted_total_number, s_retweeted_weibo_retweeted_top_number, s_retweeted_weibo_retweeted_average_number, s_retweeted_weibo_top_retweeted_id \
                    = statistic_weibo(s_retweeted_weibo_retweeted_count, sensitive_user_info, s_retweeted_weibo_list, '_retweeted_weibo_retweeted')

            s_retweeted_weibo_comment_detail, s_retweeted_weibo_comment_total_number, s_retweeted_weibo_comment_top_number, s_retweeted_weibo_comment_average_number, s_retweeted_weibo_top_comment_id \
                    = statistic_weibo(s_retweeted_weibo_comment_count, sensitive_user_info, s_retweeted_weibo_list, '_retweeted_weibo_comment')

            s_origin_weibo_retweeted_brust = activity_weibo(
                s_origin_weibo_retweeted_timestamp, sensitive_user_info,
                "origin_weibo_retweeted_timestamp")
            s_origin_weibo_comment_brust = activity_weibo(
                s_origin_weibo_comment_timestamp, sensitive_user_info,
                "origin_weibo_comment_timestamp")
            s_retweeted_weibo_retweeted_brust = activity_weibo(
                s_retweeted_weibo_retweeted_timestamp, sensitive_user_info,
                "retweeted_weibo_retweeted_timestamp")
            s_retweeted_weibo_comment_brust = activity_weibo(
                s_retweeted_weibo_comment_timestamp, sensitive_user_info,
                "retweeted_weibo_comment_timestamp")

            s_influence_origin_weibo_retweeted = influence_weibo_cal(
                s_origin_weibo_retweeted_total_number,
                s_origin_weibo_retweeted_average_number,
                s_origin_weibo_retweeted_top_number,
                s_origin_weibo_retweeted_brust)

            s_influence_origin_weibo_comment = influence_weibo_cal(
                s_origin_weibo_comment_total_number,
                s_origin_weibo_comment_average_number,
                s_origin_weibo_comment_top_number,
                s_origin_weibo_comment_brust)

            s_influence_retweeted_weibo_retweeted = influence_weibo_cal(
                s_retweeted_weibo_retweeted_total_number,
                s_retweeted_weibo_retweeted_average_number,
                s_retweeted_weibo_retweeted_top_number,
                s_retweeted_weibo_retweeted_brust)

            s_influence_retweeted_weibo_comment = influence_weibo_cal(
                s_retweeted_weibo_comment_total_number,
                s_retweeted_weibo_comment_average_number,
                s_retweeted_weibo_comment_top_number,
                s_retweeted_weibo_retweeted_brust)

            s_user_index = user_index_cal(
                s_origin_weibo_list, s_retweeted_weibo_list, user_fansnum,
                s_influence_origin_weibo_retweeted,
                s_influence_origin_weibo_comment,
                s_influence_retweeted_weibo_retweeted,
                s_influence_retweeted_weibo_comment)

        user_item = {}
        user_item['ordinary_influence'] = user_index
        user_item['sensitive_influence'] = s_user_index
        user_item['user_index'] = 0.8 * user_index + 0.2 * s_user_index
        user_item['uid'] = str(user)
        user_item['user_fansnum'] = int(user_fansnum)
        user_item["user_friendsnum"] = int(user_friendsnum)
        user_item['origin_weibo_number'] = len(origin_weibo_list)
        user_item['comment_weibo_number'] = comment_weibo_number
        user_item['retweeted_weibo_number'] = len(retweeted_weibo_list)
        user_item['s_origin_weibo_number'] = len(s_origin_weibo_list)
        user_item['s_comment_weibo_number'] = s_comment_weibo_number
        user_item['s_retweeted_weibo_number'] = len(s_retweeted_weibo_list)
        if sensitive_user:
            user_item['s_origin_weibo_retweeted_detail'] = json.dumps(
                s_origin_weibo_retweeted_detail)
            user_item['s_origin_weibo_comment_detail'] = json.dumps(
                s_origin_weibo_comment_detail)
            user_item['s_retweeted_weibo_retweeted_detail'] = json.dumps(
                s_retweeted_weibo_retweeted_detail)
            user_item['s_retweeted_weibo_comment_detail'] = json.dumps(
                s_retweeted_weibo_comment_detail)

        user_item[
            'origin_weibo_retweeted_total_number'] = origin_weibo_retweeted_total_number
        user_item[
            'origin_weibo_retweeted_average_number'] = origin_weibo_retweeted_average_number
        user_item[
            'origin_weibo_retweeted_top_number'] = origin_weibo_retweeted_top_number
        user_item[
            'origin_weibo_retweeted_brust_average'] = origin_weibo_retweeted_brust[
                1]
        user_item[
            'origin_weibo_top_retweeted_id'] = origin_weibo_top_retweeted_id
        user_item[
            'origin_weibo_retweeted_brust_n'] = origin_weibo_retweeted_brust[0]
        if sensitive_user:
            user_item['origin_weibo_retweeted_detail'] = json.dumps(
                origin_weibo_retweeted_detail)

        user_item[
            's_origin_weibo_retweeted_total_number'] = s_origin_weibo_retweeted_total_number
        user_item[
            's_origin_weibo_retweeted_average_number'] = s_origin_weibo_retweeted_average_number
        user_item[
            's_origin_weibo_retweeted_top_number'] = s_origin_weibo_retweeted_top_number
        user_item[
            's_origin_weibo_retweeted_brust_average'] = s_origin_weibo_retweeted_brust[
                1]
        user_item[
            's_origin_weibo_top_retweeted_id'] = s_origin_weibo_top_retweeted_id
        user_item[
            's_origin_weibo_retweeted_brust_n'] = s_origin_weibo_retweeted_brust[
                0]

        user_item[
            'origin_weibo_comment_total_number'] = origin_weibo_comment_total_number
        user_item[
            'origin_weibo_comment_average_number'] = origin_weibo_comment_average_number
        user_item[
            'origin_weibo_comment_top_number'] = origin_weibo_comment_top_number
        user_item['origin_weibo_comment_brust_n'] = origin_weibo_comment_brust[
            0]
        user_item[
            'origin_weibo_comment_brust_average'] = origin_weibo_comment_brust[
                1]
        user_item['origin_weibo_top_comment_id'] = origin_weibo_top_comment_id
        if sensitive_user:
            user_item['origin_weibo_comment_detail'] = json.dumps(
                origin_weibo_comment_detail)

        user_item[
            's_origin_weibo_comment_total_number'] = s_origin_weibo_comment_total_number
        user_item[
            's_origin_weibo_comment_average_number'] = s_origin_weibo_comment_average_number
        user_item[
            's_origin_weibo_comment_top_number'] = s_origin_weibo_comment_top_number
        user_item[
            's_origin_weibo_comment_brust_n'] = s_origin_weibo_comment_brust[0]
        user_item[
            's_origin_weibo_comment_brust_average'] = s_origin_weibo_comment_brust[
                1]
        user_item[
            's_origin_weibo_top_comment_id'] = s_origin_weibo_top_comment_id

        user_item[
            'retweeted_weibo_retweeted_total_number'] = retweeted_weibo_retweeted_total_number
        user_item[
            'retweeted_weibo_retweeted_average_number'] = retweeted_weibo_retweeted_average_number
        user_item[
            'retweeted_weibo_retweeted_top_number'] = retweeted_weibo_retweeted_top_number
        user_item[
            'retweeted_weibo_retweeted_brust_n'] = retweeted_weibo_retweeted_brust[
                0]
        user_item[
            'retweeted_weibo_retweeted_brust_average'] = retweeted_weibo_retweeted_brust[
                1]
        user_item[
            'retweeted_weibo_top_retweeted_id'] = retweeted_weibo_top_retweeted_id
        if sensitive_user:
            user_item['retweeted_weibo_retweeted_detail'] = json.dumps(
                retweeted_weibo_retweeted_detail)

        user_item[
            's_retweeted_weibo_retweeted_total_number'] = s_retweeted_weibo_retweeted_total_number
        user_item[
            's_retweeted_weibo_retweeted_average_number'] = s_retweeted_weibo_retweeted_average_number
        user_item[
            's_retweeted_weibo_retweeted_top_number'] = s_retweeted_weibo_retweeted_top_number
        user_item[
            's_retweeted_weibo_retweeted_brust_n'] = s_retweeted_weibo_retweeted_brust[
                0]
        user_item[
            's_retweeted_weibo_retweeted_brust_average'] = s_retweeted_weibo_retweeted_brust[
                1]
        user_item[
            's_retweeted_weibo_top_retweeted_id'] = s_retweeted_weibo_top_retweeted_id

        user_item[
            'retweeted_weibo_comment_total_number'] = retweeted_weibo_comment_total_number
        user_item[
            'retweeted_weibo_comment_average_number'] = retweeted_weibo_comment_average_number
        user_item[
            'retweeted_weibo_comment_top_number'] = retweeted_weibo_comment_top_number
        user_item[
            'retweeted_weibo_comment_brust_n'] = retweeted_weibo_comment_brust[
                0]
        user_item[
            'retweeted_weibo_comment_brust_average'] = retweeted_weibo_comment_brust[
                1]
        user_item[
            'retweeted_weibo_top_comment_id'] = retweeted_weibo_top_comment_id
        if sensitive_user:
            user_item['retweeted_weibo_comment_detail'] = json.dumps(
                retweeted_weibo_comment_detail)

        user_item[
            's_retweeted_weibo_comment_total_number'] = s_retweeted_weibo_comment_total_number
        user_item[
            's_retweeted_weibo_comment_average_number'] = s_retweeted_weibo_comment_average_number
        user_item[
            's_retweeted_weibo_comment_top_number'] = s_retweeted_weibo_comment_top_number
        user_item[
            's_retweeted_weibo_comment_brust_n'] = s_retweeted_weibo_comment_brust[
                0]
        user_item[
            's_retweeted_weibo_comment_brust_average'] = s_retweeted_weibo_comment_brust[
                1]
        user_item[
            's_retweeted_weibo_top_comment_id'] = s_retweeted_weibo_top_comment_id

        x = expand_index_action(user_item)
        bulk_action.extend([x[0], x[1]])
        count_c += 1
        if count_c % 1000 == 0:
            try:
                es.bulk(bulk_action,
                        index=es_index,
                        doc_type='bci',
                        timeout=30)
                bulk_action = []
            except:
                es.bulk(bulk_action,
                        index=es_index,
                        doc_type='bci',
                        timeout=30)
                bulk_action = []
            print count_c