Exemplo n.º 1
0
def update_targets(week):
    target_week_data = "data/" + ("week_%s/" % week) + "target_users.pkl"
    invalid_week_data = "data/" + ("week_%s/" % week) + "invalid_users.pkl"
    update_info = {}
    invalid_users = []
    with open("data/target_users.pkl") as orig:
        targets = cPickle.load(orig)
        # for index, t in enumerate(targets, start=1):
            # print "--- get user=%s(%d:%d) ---" % (t, index, len(targets))
            # result = get_user_info(t)
            # if result is None:
                # print "--- user=%s get invalid info ---" % t
                # invalid_users.append(t)

            # update_info[t] = result
        from utils import iter_pool_do
        info_iter = iter_pool_do(get_user_info, targets.keys())
        index = 1
        for name, info in info_iter:
            print "--- get user=%s(%d:%d) ---" % (name, index, len(targets))
            if info is None:
                print "--- user=%s get invalid info ---" % name
                invalid_users.append(name)

            update_info[name] = info
            index += 1

    print "---- update to file %s %d targets---" % (
        target_week_data, len(update_info))
    save(target_week_data, update_info)
    print "---- update to file %s %d invalids---" % (
        invalid_week_data, len(invalid_users))
    save(invalid_week_data, invalid_users)
    return update_info
Exemplo n.º 2
0
def get_week_range_history(start_week, end_week):
    cursor, conn = prepare_history_db(start_week, end_week)
    users_param = restore_from_db(cursor)
    History.total_user = len(users_param)
    users_with_index = list(enumerate(users_param, start=1))
    gen = iter_pool_do(dispatch_one_user, users_with_index, cap=4)
    for g in gen:
        pass
Exemplo n.º 3
0
def get_friends_history(filename):
    cursor, conn = prepare_history_db(filename)
    ranges = restore_from_db(cursor)
    History.total_user = len(ranges)
    range_with_index = list(enumerate(ranges, start=1))
    gen = iter_pool_do(dispatch_one_user, range_with_index, cap=10)
    for g in gen:
        pass
Exemplo n.º 4
0
def scheduling_scrape(week):
    db = create_friend_listeners_table(week)
    count = db.execute("select count(*) from meta_info;").fetchone()[0]
    if count == 0:
        initialize_friend_listeners_table(week, db)
    progress = load_progress(db, week)
    log_file = open("data/week_%s/friends.log" % week, "a")
    total = len(progress)
    FriendHistory.setup(db, total, log_file)

    gen = iter_pool_do(dispatch_one_user, progress, cap=10)
    for g in gen:
        pass
Exemplo n.º 5
0
def get_playcount_and_love():
    CONN = sqlite3.connect('data/friends_listened.db')
    CURSOR = CONN.cursor()
    CURSOR.executescript("""
        create table if not exists playcount_and_love (
            target,
            friendname,
            track,
            artist,
            playcount,
            loved,
            timestamp
        );
    """)

    targets = get_targets()
    tracks = get_tracks()

    save_file = 'save_for_friend.pkl'
    error_file = open('error_file.txt', 'a')
    if os.path.exists(save_file):
        obj = pickle.load(open(save_file))
        last_index1 = obj['index1']
        next_index2 = obj['index2']
        already_fetched = obj['already']
    else:
        last_index1 = 0
        next_index2 = 0
        already_fetched = set()

    for index1, t in enumerate(tracks[last_index1:], start=1+last_index1):
        track = t[0]
        artist = t[1]

        for index2, target in enumerate(targets[next_index2:], start=1+next_index2):
            friends = get_target_friends(target)
            filted_friends = [f for f in friends if f not in already_fetched]

            # for index3, friend in enumerate(friends, start=1):
            func = functools.partial(friend_like, track, artist)
            generator = iter_pool_do(func, filted_friends, cap=10)
            index3 = 1
            for friend, result in generator:
                # if friend in already_fetched:
                    # # skip this one
                    # continue
                print "--- [%s(%d:%d) %s(%d:%d) %s(%d:%d)] ---" % (
                        track, index1, len(tracks),
                        target, index2, len(targets),
                        # friend, index3, len(friends))
                        friend, index3, len(filted_friends))
                # result = friend_like(track, artist, friend)
                if result:
                    # insert in to db
                    playcount, loved = result
                    if playcount:
                        print "--- get valid record! ---"
                        CURSOR.execute(
                            "insert into playcount_and_love values (?, ?, ?, ?, ?, ?, ?)",
                            (
                                target,
                                friend,
                                track,
                                artist,
                                playcount,
                                loved,
                                int(time.time())
                            )
                        )
                    else:
                        already_fetched.add(friend)
                else:
                    # log this
                    print >> error_file, "(%s) (%s) (%s)" % (track.encode('utf-8'),
                                                             target.encode('utf-8'),
                                                             friend.encode('utf-8'))
                    error_file.flush()
                index3 += 1

            # save what?
            # in second loop
            # next (track, targets)
            # already_list for current track
            save_obj = {
                'index1': index1-1,
                'index2': index2,
                'already': already_fetched
            }
            save(save_file, save_obj)
            CONN.commit()

        # prepare for next track
        already_fetched = set()
        next_index2 = 0