Пример #1
0
def gather_data(user_id):
    """
    Gather tweets (in reverse chronological order) from the timeline of
    the user_id provided.

    Parameters:
    ----------
    - user_id (str) : the user ID whose tweets we want to download
    """

    # Load bearer token and authorize osometweet
    # to gather data...
    bearer_token = load_bearer_token()
    ot = initialize_osometweet(bearer_token)

    # Create tweet fields object with all fields
    #   NOTE: if you include other fields/expansions you will
    #   need to ensure that you parse them properly from the
    #   response object below
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Create file names
    data_file_name = f"timeline_data--{today}.json"
    errors_file_name = f"timeline_errors--{today}.json"

    print("Gathering data...")

    # Open a file for data and errors
    with open(data_file_name, 'w') as data_file,\
         open(errors_file_name, 'w') as error_file:

        # Make first request and write data and/or errors
        response = ot.get_tweet_timeline(
            user_id=user_id,
            fields=all_tweet_fields,  # Get all tweet fields
            max_results=100  # Request 100 tweets per call
        )
        write_data(response, data_file, error_file)

        # Begin a while loop which continually makes requests until
        # up to 3,200 tweets (or all tweets) have been returned
        # for the user_id provided. This only continues if the "next_token"
        # is present in `response["meta"]` object, which indicates that
        # Twitter has more data to provide.
        while "next_token" in response["meta"]:
            response = ot.get_tweet_timeline(
                user_id=user_id,
                fields=all_tweet_fields,
                max_results=100,
                pagination_token=response["meta"]["next_token"])
            write_data(response, data_file, error_file)

    # Now that the loop has finished
    # we remove any files that might be empty
    # (for example, if we received no errors)
    delete_if_empty(data_file_name)
    delete_if_empty(errors_file_name)
Пример #2
0
def stream_tweets(bearer_token):
    """
    Write a filtered stream of tweets directly to a new line
    delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format.

    Parameters
    ----------
    - bearer_token (str) : Twitter V2 bearer token.
    """
    print("Streaming tweets...")

    oauth2 = osometweet.OAuth2(
        bearer_token=bearer_token,
        manage_rate_limits=False
    )
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Add streaming rules
    rules = [{"value": "coronavirus", "tag": "all coronavirus tweets"},
             {"value": "indiana", "tag": "all indiana tweets"}]
    add_rules = {"add": rules}
    response = ot.set_filtered_stream_rule(rules=add_rules)
    print(f"API response from adding two rules:\n{response}\n")

    # Retrieve active streaming rules
    current_rules = ot.get_filtered_stream_rule()
    print(f'The current filtered stream rules are:\n{current_rules}\n')

    # Remove a streaming rule by using it's tag
    indiana_rule = [
        rule["id"] for rule in current_rules["data"]
        if 'all indiana tweets' in rule["tag"]
    ]
    delete_rule = {'delete': {'ids': indiana_rule}}
    response = ot.set_filtered_stream_rule(rules=delete_rule)
    print(f"API response from deleting one rule:\n{response}\n")

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_stream--{today}.json", "a") as data_file:
        # stream is a Generator
        stream = ot.filtered_stream(fields=all_tweet_fields)
        # We have to iterate over the stream to fetch streamed tweets
        for tweet in stream.iter_lines():
            # Get data and errors
            try:
                data = json.loads(tweet).get("data")

                # When data is found, we write it to the open file
                if data:
                    json.dump(data, data_file)
                    data_file.write("\n")
            except json.JSONDecodeError:
                pass
Пример #3
0
 def test_tweet_fields(self):
     """
     Test tweet fields. Test case borrowed from
     https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
     """
     fields_to_request = [
         "attachments", "author_id", "context_annotations", "created_at",
         "entities", "id", "in_reply_to_user_id", "lang",
         "possibly_sensitive", "public_metrics", "referenced_tweets",
         "source", "text"
     ]
     tweet_fields = osometweet.TweetFields()
     tweet_fields.fields = fields_to_request
     resp = self.ot.tweet_lookup(['1212092628029698048'],
                                 fields=tweet_fields)
     for field in fields_to_request:
         self.assertIn(field, resp['data'][0])
Пример #4
0
def gather_data(bearer_token, chunked_list):
    """
    Gather tweets based on the chunked list of tweet IDs with the provided
    bearer_token.
    """
    print("Gathering Data...")

    oauth2 = osometweet.OAuth2(bearer_token=bearer_token)
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_data--{today}.json", 'w') as data_file,\
         open(f"tweet_errors--{today}.json", 'w') as error_file:

        # Iterate through the list of lists
        for one_hundred_tweets in chunked_list:
            response = ot.tweet_lookup(tids=one_hundred_tweets,
                                       fields=all_tweet_fields)

            # Get data and errors
            data = response["data"]
            errors = response["errors"]

            # No matter what `data` and `errors` will return something,
            # however, they may return `None` (i.e. no data/errors), which
            # will throw a TypeError.
            try:
                data_file.writelines(f"{json.dumps(line)}\n" for line in data)
            except TypeError:
                print("No data found in this set of tweets, "
                      "skipping to the next set.")

            try:
                error_file.writelines(f"{json.dumps(line)}\n"
                                      for line in errors)
            except TypeError:
                print("No problematic tweets found in this set of tweets, "
                      "skipping to the next set.")
Пример #5
0
def stream_tweets(bearer_token):
    """
    Stream a 1% sample of tweets from twitter and write them directly to a
    new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M"
    format.

    Parameters
    ----------
    - bearer_token (str) : Twitter V2 bearer token.
    """
    print("Streaming tweets...")

    oauth2 = osometweet.OAuth2(bearer_token=bearer_token,
                               manage_rate_limits=False)
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_stream--{today}.json", "a") as data_file:
        # stream is a Generator
        stream = ot.sampled_stream(fields=all_tweet_fields)
        # We have to iterate over the stream to fetch streamed tweets
        for tweet in stream.iter_lines():
            # Get data and errors
            try:
                data = json.loads(tweet).get("data")

                # When data is found, we write it to the open file
                if data:
                    json.dump(data, data_file)
                    data_file.write("\n")
            except json.JSONDecodeError:
                pass