def get_user_data(user_id=None, screen_name=None): """ For a given user, get user profile data Keyword args: `screen_name` or `user_id` (not both!): either user screen_name (@potus) or a user id (110123231) --- cf https://dev.twitter.com/rest/reference/get/users/show """ # Must have either screen_name or user_id (logical xor) assert (screen_name != None) != (user_id != None),\ "Must have screen_name or user_id, but not both" twitter_api = conn.get_twitter_api() get_user_data = partial( make_twitter_request, twitter_api.show_user) if user_id: user_data = get_user_data(user_id=user_id) else: user_data = get_user_data(screen_name=screen_name) return user_data
def get_friends_followers_ids( screen_name=None, user_id=None, friends_limit=maxsize, followers_limit=maxsize): """ For a given user, get list of followers and friends ids Keyword args: `twitter_api`: a twython.Twython authentication object `screen_name` or `user_id` (not both!): either user screen_name (@potus) or a user id (110123231) `friends_limit`: Max number of friends to return (set to 0 if only interested in getting followers) `followers_limit`: Max number of friends to return (set to 0 if only interested in getting friends) --- cf https://dev.twitter.com/rest/reference/get/friends/ids cf https://dev.twitter.com/rest/reference/get/followers/ids """ # Must have either screen_name or user_id (logical xor) assert (screen_name != None) != (user_id != None),\ "Must have screen_name or user_id, but not both" twitter_api = conn.get_twitter_api() get_followers_ids = partial( make_twitter_request, twitter_api.get_followers_ids, count=5000) get_friends_ids = partial( make_twitter_request, twitter_api.get_friends_ids, count=5000) friends_ids, followers_ids = [], [] for twitter_api_func, limit, ids, label in [ [get_friends_ids, friends_limit, friends_ids, "friends"], [get_followers_ids, followers_limit, followers_ids, "followers"] ]: if limit == 0: continue cursor = -1 while cursor != 0: # Use make_twitter_request via the partially bound callable... if screen_name: response = twitter_api_func(screen_name=screen_name, cursor=cursor) else: # user_id response = twitter_api_func(user_id=user_id, cursor=cursor) if response is not None: ids.extend(response['ids']) cursor = response['next_cursor'] print ('Fetched {0} total {1} ids for {2}'.format(len(ids), label, (user_id or screen_name))) # XXX: You may want to store data during each iteration to provide # an additional layer of protection from exceptional circumstances if len(ids) >= limit or response is None: break # Do something useful with the IDs, like store them to disk... return friends_ids[:friends_limit], followers_ids[:followers_limit]
def search(q=None, since_id=None, max_id=None, tweet_limit=sys.maxsize, save_dir=None): """ For a given query q, get recent tweets (up to 7 days old) Keyword args: `q`: keyword query `since_id`: id of the most ancient tweets to fetch up to. `max_id`: id of the most recent tweet to start fetching from (get timeline goes backward in time when fetching tweets) `tweet_limit`: Max number of tweets to return --- https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets.html for details on API parameters """ tweets = [] twitter_api = conn.get_twitter_api() search_tweets = partial(make_twitter_request, twitter_api.search) cursor = max_id while cursor != 0: response = search_tweets(q=q, since_id=since_id, max_id=cursor, count=100) if response is None or response == []: break else: try: cursor = min([t['id'] for t in response['statuses']]) except: print("'response' does not contain a 'statuses' key:") print(list(response.keys())) new_tweets = [t for t in response['statuses'] if t not in tweets] if new_tweets: tweets.extend(new_tweets) print("max_id = {cursor}".format(cursor=cursor)) if save_dir is not None: for tweet in new_tweets: save_json(tweet, save_dir) else: break return tweets
print("max_id = {cursor}".format(cursor=cursor)) if save_dir is not None: for tweet in new_tweets: save_json(tweet, save_dir) else: break return tweets if __name__ == "__main__": import sys assert len(sys.argv) == 3, """ Call not properly formatted; Correct format: ''' python3 search.py {search_term} {save_dir} ''' """ search_term = sys.argv[1] save_dir = sys.argv[2] import conn twitter_api = conn.get_twitter_api() try: tweets = search(q=search_term, save_dir=save_dir) except TwythonError as e: print(e)
def get_user_timeline( screen_name=None, user_id=None, since_id=None, max_id=None, tweet_limit=maxsize): """ For a given user, get user_timeline Keyword args: `twitter_api`: a twython.Twython authentication object `screen_name` or `user_id` (not both!): either user screen_name (@potus) or a user id (110123231) `max_id`: id of the most recent tweet to start fetching from (get timeline goes backward in time when fetching tweets) `tweet_limit`: Max number of tweets to return --- https://dev.twitter.com/rest/reference/get/statuses/user_timeline """ # Must have either screen_name or user_id (logical xor) assert (screen_name != None) != (user_id != None),\ "Must have screen_name or user_id, but not both" # https://dev.twitter.com/rest/reference/get/statuses/user_timeline # for details on API parameters twitter_api = conn.get_twitter_api() get_user_timeline = partial( make_twitter_request, twitter_api.get_user_timeline) user_timeline = [] cursor = max_id while cursor != 0: # Use make_twitter_request via the partially bound callable... if screen_name: response = get_user_timeline(screen_name=screen_name, since_id=since_id, include_rts=True, max_id=cursor, count=200) user_identifier = screen_name # identifier for the user else: # user_id response = get_user_timeline(user_id=user_id, since_id=since_id, include_rts=True, max_id=cursor, count=200) user_identifier = user_id # identifier for the user if response is None or response==[]: break else: ids = [t['id'] for t in response] cursor = min(ids) new_tweets = [t for t in response if t not in user_timeline] if new_tweets: user_timeline.extend(new_tweets) print("max_id = {cursor}".format(cursor=cursor)) else: break return user_timeline