def main():
    scan_cursor = 0
    count = 0
    bulk_action = []
    number = r.scard('user_set')
    print number

    if RUN_TYPE:
        ts = time.time() - DAY
        date = ts2datetime(ts)
        start_time = str(ts2datetime(time.time()))
        print "/cron/push_mid2redis.py&start&%s" %start_time
    else:
        date = '2013-09-05'
    index_name = flow_text_index_name_pre+date
    print index_name

    ts = time.time()
    while 1:
        re_scan = r.sscan("user_set", scan_cursor, count=3000)
        scan_cursor = re_scan[0]
        uid_list = re_scan[1] #具体数据
        if len(uid_list):
            for uid in uid_list:
                detail_dict = r.hgetall(uid)
                for k,v in detail_dict.iteritems():
                    update_dict = dict()
                    if "_origin_weibo_retweeted" in k and v:
                        mid = k.split('_')[0]
                        update_dict["retweeted"] = int(v)
                    elif "_origin_weibo_comment" in k and v:
                        mid = k.split('_')[0]
                        update_dict["comment"] = int(v)
                    elif '_retweeted_weibo_comment' in k and v:
                        mid = k.split('_')[0]
                        update_dict["comment"] = int(v)
                    elif '_retweeted_weibo_retweeted' in k and v:
                        mid = k.split('_')[0]
                        update_dict["retweeted"] = int(v)
                    else:
                        pass
                    if update_dict:
                        action = {"update": {"_id": mid}}
                        xdata = {"doc": update_dict}
                        bulk_action.extend([action, xdata])
                        count += 1
                        if count % 400 == 0:
                            r_flow.lpush('update_mid_list', json.dumps(bulk_action))
                            bulk_action = []
                            tp = time.time()
                            #print "%s cost %s" %(count, tp-ts)
                            ts = tp
        if int(scan_cursor) == 0:
            break

    if bulk_action:
        r_flow.lpush('update_mid_list', json.dumps(bulk_action))

    print count
Пример #2
0
def main():
    scan_cursor = 0
    count = 0
    bulk_action = []
    number = r.scard('user_set')
    print number

    if RUN_TYPE:
        ts = time.time() - DAY
        date = ts2datetime(ts)
    else:
        date = '2013-09-05'
    index_name = flow_text_index_name_pre + date

    ts = time.time()
    while 1:
        re_scan = r.sscan("user_set", scan_cursor, count=3000)
        scan_cursor = re_scan[0]
        uid_list = re_scan[1]  #具体数据
        if len(uid_list):
            for uid in uid_list:
                detail_dict = r.hgetall(uid)
                for k, v in detail_dict.iteritems():
                    update_dict = dict()
                    if "_origin_weibo_retweeted" in k and int(v):
                        mid = k.split('_')[0]
                        update_dict["retweeted"] = int(v)
                    elif "_origin_weibo_comment" in k and int(v):
                        mid = k.split('_')[0]
                        update_dict["comment"] = int(v)
                    else:
                        pass
                    if update_dict:
                        action = {"update": {"_id": mid}}
                        xdata = {"doc": update_dict}
                        bulk_action.extend([action, xdata])
                        count += 1
                        if count % 1000 == 0:
                            #print bulk_action
                            r_flow.lpush('update_mid_list',
                                         json.dumps(bulk_action))
                            bulk_action = []
                            tp = time.time()
                            print "%s cost %s" % (count, tp - ts)
                            ts = tp
        if int(scan_cursor) == 0:
            break

    if bulk_action:
        r_flow.lpush('update_mid_list', json.dumps(bulk_action))

    print count
                    "should":[
                    ]
                }
            }
        }
    },
    "size": 10000
}


if __name__ == "__main__":
    scan_cursor = 0
    sensitive_uid_list = []
    count = 0
    while 1:
        re_scan = r_cluster.sscan('s_user_set', scan_cursor, count=10000)
        if int(re_scan[0]) == 0:
            sensitive_uid_list.extend(re_scan[1])
            count += len(re_scan[1])
            print count
            break
        else:
            sensitive_uid_list.extend(re_scan[1])
            count += 10000
            scan_cursor = re_scan[0]

    temp_list = sensitive_uid_list
    count = 0
    patition = 100
    number = int(math.ceil(len(temp_list)/float(100)))
    print number