Exemplo n.º 1
0
def user_important_rank_test(date, window_size):
    date_time = datetime2ts(date)
    uid_active = {}
    if window_size == 1:
        db_name = get_leveldb('important', date_time)
        daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
        
        for uid, active in daily_user_active_bucket.RangeIter():
            uid = int(uid)
            active = float(active)
            uid_active[uid] = active
    else:
        for i in range(window_size):
            db_name = get_leveldb('active', date_time - i*24*60*60)
            daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
            for uid, active in daily_user_active_bucket.RangeIter():
                uid = int(uid)
                active = float(active)
                if uid not in uid_active:
                    uid_active[uid] = 0
                uid_active[uid] += active

    sorted_uids = user_rank(uid_active, 'important', 500, date, window_size)

    for uid in sorted_uids[:5]:
        print uid
Exemplo n.º 2
0
def burst_important_task(top_n, date, window_size):
    date_time = datetime2ts(date)
    uid_important = {}

    db_name = get_leveldb('important', date_time)
    previous_db_name = get_leveldb('important', date_time-window2time(window_size))
    daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                                  block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
    previous_daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, previous_db_name),
                                                           block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))

    for uid, important in daily_user_important_bucket.RangeIter():
        uid = int(uid)
        important = float(important)
        try:
            previous_important = float(previous_daily_user_important_bucket.Get(str(uid)))
        except:
            previous_important = 0
        uid_important[uid] = math.fabs(important - previous_important)

    if len(uid_important) < 100000000:
        sorted_uid_important = sorted(uid_important.iteritems(), key=operator.itemgetter(1), reverse=True)
        sorted_uids = []
        count = 0
        for uid, value in sorted_uid_important:
            if count >= top_n:
                break
            sorted_uids.append(uid)
        count += 1
    else:
        sorted_uids = user_rank(uid_important, 'whole_active', top_n, date, window_size)

    return sorted_uids
Exemplo n.º 3
0
def important_rank(top_n, date, window_size):
    date_time = datetime2ts(date)
    uid_important = {}
    if window_size == 1:
        db_name = get_leveldb('important', date_time)
        daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))

        count = 0
        for uid, important in daily_user_important_bucket.RangeIter():
            count = count + 1
            print count
            uid = int(uid)
            important = float(important)
            uid_important[uid] = important
    else:
        for i in range(window_size):
            db_name = get_leveldb('important', date_time - i*24*60*60)
            daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
            for uid, important in daily_user_important_bucket.RangeIter():
                uid = int(uid)
                important = float(important)
                if uid not in uid_important:
                    uid_important[uid] = 0
                uid_important[uid] += important

    if len(uid_important) < 100000000:
        sorted_uid_important = sorted(uid_important.iteritems(), key=operator.itemgetter(1), reverse=True)
        sorted_uids = []
        count = 0
        for uid, value in sorted_uid_important:
            if is_in_trash_list(uid):
                continue
            if count >= top_n:
                break
            sorted_uids.append(uid)
        count += 1
    else:
        sorted_uids = user_rank(uid_important, 'whole_active', top_n, date, window_size)


    return sorted_uids
Exemplo n.º 4
0
def whole_active_task(top_n, date, window_size):
    date_time = datetime2ts(date)
    uid_active = {}
    if window_size == 1:
        db_name = get_leveldb('active', date_time)
        daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
        
        for uid, active in daily_user_active_bucket.RangeIter():
            uid = int(uid)
            active = float(active)
            uid_active[uid] = active
    else:
        for i in range(window_size):
            db_name = get_leveldb('active', date_time - i*24*60*60)
            daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name),
                                              block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
            for uid, active in daily_user_active_bucket.RangeIter():
                uid = int(uid)
                active = float(active)
                if uid not in uid_active:
                    uid_active[uid] = 0
                uid_active[uid] += active

    if len(uid_active) < 100000000:
        sorted_uid_active = sorted(uid_active.iteritems(), key=operator.itemgetter(1), reverse=True)
        sorted_uids = []
        count = 0
        for uid, value in sorted_uid_active:
            if count >= top_n:
                break
            sorted_uids.append(uid)
        count += 1
    else:
        sorted_uids = user_rank(uid_active, 'whole_active', top_n, date, window_size)

    return sorted_uids
Exemplo n.º 5
0
def rank_simulation(uid_value):
    date = '2012-12-22'
    window_size = 1
    sorted_uids = user_rank(uid_value, 'test', 500, date, window_size)