Пример #1
0
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning("\nLoaded a list of {0} tweet IDs to hydrate".format(
        len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
Пример #2
0
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning(
        "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
Пример #3
0
def print_list_of_tweets():
    """
    Fetch a list of three tweets by ID, then print them line by line
    This example can be easily adapted to write the tweets to a file, see above.
    """
    list_generator = rest.fetch_tweet_list(
        [62154131600224256, 662025716746354688, 661931648171302912, ])
    for page in list_generator:
        for tweet in page:
            print_tweet(tweet)
Пример #4
0
def print_list_of_tweets():
    """
    Fetch a list of three tweets by ID, then print them line by line
    This example can be easily adapted to write the tweets to a file, see above.
    """
    list_generator = rest.fetch_tweet_list([
        62154131600224256,
        662025716746354688,
        661931648171302912,
    ])
    for page in list_generator:
        for tweet in page:
            print_tweet(tweet)