Пример #1
0
def farm_user_favorites(apikeys, seeds, output_folder):

    user_farm = UserFarm(apikeys=apikeys,
                         verbose=False,
                         output_folder=os.path.abspath(output_folder))

    try:
        #get user id first
        user_ids = user_farm.get_user_ids(seeds)

        for user_id in user_ids:
            # current it skips the user if the result file is already there. Obviously this is not reliable since the error could raise when only half of the tweets for an user is finished... this will mean losing the other half for this user... but my current use case doesn't really care... since I have millions of users to worry about, losing one isn't that big of deal... but certainly needs a better way to track progress
            if not os.path.exists(
                    os.path.abspath('%s/%s' % (output_folder, user_id))):
                user_farm.user_favorites(user_id)
    except KeyboardInterrupt:
        logger.warn('You pressed Ctrl+C!')
        raise
    except:
        raise
    finally:
        user_farm.close()
Пример #2
0
def farm_user_favorites(apikeys,
                        user_favorites_queue,
                        output_folder='./farm/'):

    favorites_output_folder = os.path.abspath('%s/favorites/' %
                                              (output_folder))  # by user id

    user_favorites_farmer = UserFarm(apikeys=apikeys,
                                     verbose=False,
                                     output_folder=favorites_output_folder)

    current_user_id = 0

    retry = False

    problem_queue = []

    while current_user_id != -1:
        time.sleep(10)
        if retry and retry_cnt > 0:
            time.sleep(RETRY_SLEEP)
            user_favorites_farmer.write_to_handler.delete(current_user_id)
            retury = False
            retry_cnt -= 1
        else:
            current_user_id = user_favorites_queue.get(
                True)  # will block and wait for the next user_id
            #logger.info("favorites queue size: %d"%(user_favorites_queue.qsize())) no qsize() function on mac os x
            if current_user_id == -1:
                if len(
                        problem_queue
                ) > 0:  #have issues with a few user_id, we try to add them back to the queue to retry
                    # add this point, the queue should be empty; so no need to worry about block on the put
                    for uid in problem_queue:
                        user_favorites_queue.put(uid, block=True)

                    # get one to continue the process
                    current_user_id = user_favorites_queue.get(True)
                else:
                    break  #continue

            logger.info('retrieving favorites for: %d' % (current_user_id))
            retry_cnt = MAX_RETRY

        try:
            if not os.path.exists(
                    os.path.abspath(
                        '%s/%s' % (favorites_output_folder, current_user_id))):
                user_favorites_farmer.user_favorites(current_user_id)

        except:
            logger.warn("exception; retry: %d" % (retry_cnt))
            retry = True
            # note the problem, but don't die; move onto the next; and push this to the back of the current queue
            user_favorites_queue.put(user_id, block=True)
        finally:
            user_favorites_farmer.close()

    # notify -1
    user_favorites_farmer.close()

    return True