def search_tweets( self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0, ): """ Call the REST API ``'search/tweets'`` endpoint with some plausible defaults. See `the Twitter search documentation <https://dev.twitter.com/rest/public/search>`_ for more information about admissible search parameters. :param str keywords: A list of query terms to search for, written as\ a comma-separated string :param int limit: Number of Tweets to process :param str lang: language :param int max_id: id of the last tweet fetched :param int retries_after_twython_exception: number of retries when\ searching Tweets before raising an exception :rtype: python generator """ if not self.handler: # if no handler is provided, `BasicTweetHandler` provides minimum # functionality for limiting the number of Tweets retrieved self.handler = BasicTweetHandler(limit=limit) count_from_query = 0 if max_id: self.handler.max_id = max_id else: results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent") count = len(results["statuses"]) if count == 0: print( "No Tweets available through REST API for those keywords") return count_from_query = count self.handler.max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return # Pagination loop: keep fetching Tweets until the desired count is # reached while dealing with Twitter rate limits. retries = 0 while count_from_query < limit: try: mcount = min(100, limit - count_from_query) results = self.search( q=keywords, count=mcount, lang=lang, max_id=self.handler.max_id, result_type="recent", ) except TwythonRateLimitError as e: print("Waiting for 15 minutes -{0}".format(e)) time.sleep(15 * 60) # wait 15 minutes continue except TwythonError as e: print("Fatal error in Twython request -{0}".format(e)) if retries_after_twython_exception == retries: raise e retries += 1 count = len(results["statuses"]) if count == 0: print("No more Tweets available through rest api") return count_from_query += count # the max_id is also present in the Tweet metadata # results['search_metadata']['next_results'], but as part of a # query and difficult to fetch. This is doing the equivalent # (last tweet id minus one) self.handler.max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return
def search_tweets(self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0): """ Call the REST API ``'search/tweets'`` endpoint with some plausible defaults. See `the Twitter search documentation <https://dev.twitter.com/rest/public/search>`_ for more information about admissable search parameters. :param str keywords: A list of query terms to search for, written as\ a comma-separated string :param int limit: Number of Tweets to process :param str lang: language :param int max_id: id of the last tweet fetched :param int retries_after_twython_exception: number of retries when\ searching Tweets before raising an exception :rtype: python generator """ if not self.handler: # if no handler is provided, `BasicTweetHandler` provides minimum # functionality for limiting the number of Tweets retrieved self.handler = BasicTweetHandler(limit=limit) count_from_query = 0 if not max_id: results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent") count = len(results["statuses"]) if count == 0: print("No Tweets available through REST API for those keywords") return count_from_query = count max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return # Pagination loop: keep fetching Tweets until the desired count is # reached while dealing with Twitter rate limits. retries = 0 while count_from_query < limit: try: mcount = min(100, limit - count_from_query) results = self.search(q=keywords, count=mcount, lang=lang, max_id=max_id, result_type="recent") except TwythonRateLimitError as e: print("Waiting for 15 minutes -{0}".format(e)) time.sleep(15 * 60) # wait 15 minutes continue except TwythonError as e: print("Fatal error in Twython request -{0}".format(e)) if retries_after_twython_exception == retries: raise e retries += 1 count = len(results["statuses"]) if count == 0: print("No more Tweets available through rest api") return count_from_query += count # the max_id is also present in the Tweet metadata # results['search_metadata']['next_results'], but as part of a # query and difficult to fetch. This is doing the equivalent # (last tweet id minus one) max_id = results["statuses"][count - 1]["id"] - 1 self.handler.max_id = max_id for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return
class Query(Twython): """ Retrieve data from the Twitter REST API. """ def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret): self.handler = None self.do_continue = True Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret) def register(self, handler): """ Register a method for handling Tweets. :param TweetHandlerI handler: method for viewing or writing Tweets to a file. """ self.handler = handler def expand_tweetids(self, ids_f, verbose=True): """ Given a file object containing a list of Tweet IDs, fetch the corresponding full Tweets from the Twitter API. The API call `statuses/lookup` will fail to retrieve a Tweet if the user has deleted it. This call to the Twitter API is rate-limited. See <https://dev.twitter.com/rest/reference/get/statuses/lookup> for details. :param ids_f: input file object consisting of Tweet IDs, one to a line :return: iterable of Tweet objects in JSON format """ ids = [line.strip() for line in ids_f if line] if verbose: print("Counted {0} Tweet IDs in {1}.".format(len(ids), ids_f)) # The Twitter endpoint takes lists of up to 100 ids, so we chunk the # ids. id_chunks = [ids[i:i + 100] for i in range(0, len(ids), 100)] chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks) return itertools.chain.from_iterable(chunked_tweets) def _search_tweets(self, keywords, limit=100, lang="en"): """ Assumes that the handler has been informed. Fetches Tweets from search_tweets generator output and passses them to handler :param str keywords: A list of query terms to search for, written as\ a comma-separated string. :param int limit: Number of Tweets to process :param str lang: language """ while True: tweets = self.search_tweets(keywords=keywords, limit=limit, lang=lang, max_id=self.handler.max_id) for tweet in tweets: self.handler.handle(tweet) if not (self.handler.do_continue() and self.handler.repeat): break self.handler.on_finish() def search_tweets( self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0, ): """ Call the REST API ``'search/tweets'`` endpoint with some plausible defaults. See `the Twitter search documentation <https://dev.twitter.com/rest/public/search>`_ for more information about admissible search parameters. :param str keywords: A list of query terms to search for, written as\ a comma-separated string :param int limit: Number of Tweets to process :param str lang: language :param int max_id: id of the last tweet fetched :param int retries_after_twython_exception: number of retries when\ searching Tweets before raising an exception :rtype: python generator """ if not self.handler: # if no handler is provided, `BasicTweetHandler` provides minimum # functionality for limiting the number of Tweets retrieved self.handler = BasicTweetHandler(limit=limit) count_from_query = 0 if max_id: self.handler.max_id = max_id else: results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent") count = len(results["statuses"]) if count == 0: print( "No Tweets available through REST API for those keywords") return count_from_query = count self.handler.max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return # Pagination loop: keep fetching Tweets until the desired count is # reached while dealing with Twitter rate limits. retries = 0 while count_from_query < limit: try: mcount = min(100, limit - count_from_query) results = self.search( q=keywords, count=mcount, lang=lang, max_id=self.handler.max_id, result_type="recent", ) except TwythonRateLimitError as e: print("Waiting for 15 minutes -{0}".format(e)) time.sleep(15 * 60) # wait 15 minutes continue except TwythonError as e: print("Fatal error in Twython request -{0}".format(e)) if retries_after_twython_exception == retries: raise e retries += 1 count = len(results["statuses"]) if count == 0: print("No more Tweets available through rest api") return count_from_query += count # the max_id is also present in the Tweet metadata # results['search_metadata']['next_results'], but as part of a # query and difficult to fetch. This is doing the equivalent # (last tweet id minus one) self.handler.max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return def user_info_from_id(self, userids): """ Convert a list of userIDs into a variety of information about the users. See <https://dev.twitter.com/rest/reference/get/users/show>. :param list userids: A list of integer strings corresponding to Twitter userIDs :rtype: list(json) """ return [self.show_user(user_id=userid) for userid in userids] def user_tweets(self, screen_name, limit, include_rts="false"): """ Return a collection of the most recent Tweets posted by the user :param str user: The user's screen name; the initial '@' symbol\ should be omitted :param int limit: The number of Tweets to recover; 200 is the maximum allowed :param str include_rts: Whether to include statuses which have been\ retweeted by the user; possible values are 'true' and 'false' """ data = self.get_user_timeline(screen_name=screen_name, count=limit, include_rts=include_rts) for item in data: self.handler.handle(item)
class Query(Twython): """ Retrieve data from the Twitter REST API. """ def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret): self.handler = None self.do_continue = True Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret) def register(self, handler): """ Register a method for handling Tweets. :param TweetHandlerI handler: method for viewing or writing Tweets to a file. """ self.handler = handler def expand_tweetids(self, ids_f, verbose=True): """ Given a file object containing a list of Tweet IDs, fetch the corresponding full Tweets from the Twitter API. The API call `statuses/lookup` will fail to retrieve a Tweet if the user has deleted it. This call to the Twitter API is rate-limited. See <https://dev.twitter.com/rest/reference/get/statuses/lookup> for details. :param ids_f: input file object consisting of Tweet IDs, one to a line :return: iterable of Tweet objects in JSON format """ ids = [line.strip() for line in ids_f if line] if verbose: print("Counted {0} Tweet IDs in {1}.".format(len(ids), ids_f)) # The Twitter endpoint takes lists of up to 100 ids, so we chunk the # ids. id_chunks = [ids[i : i + 100] for i in range(0, len(ids), 100)] chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks) return itertools.chain.from_iterable(chunked_tweets) def _search_tweets(self, keywords, limit=100, lang="en"): """ Assumes that the handler has been informed. Fetches Tweets from search_tweets generator output and passses them to handler :param str keywords: A list of query terms to search for, written as\ a comma-separated string. :param int limit: Number of Tweets to process :param str lang: language """ while True: if isinstance(self.handler, TweetWriter): max_id = self.handler.max_id else: max_id = None tweets = self.search_tweets(keywords=keywords, limit=limit, lang=lang, max_id=max_id) for tweet in tweets: self.handler.handle(tweet) if not (self.handler.do_continue() and self.handler.repeat): break self.handler.on_finish() def search_tweets(self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0): """ Call the REST API ``'search/tweets'`` endpoint with some plausible defaults. See `the Twitter search documentation <https://dev.twitter.com/rest/public/search>`_ for more information about admissable search parameters. :param str keywords: A list of query terms to search for, written as\ a comma-separated string :param int limit: Number of Tweets to process :param str lang: language :param int max_id: id of the last tweet fetched :param int retries_after_twython_exception: number of retries when\ searching Tweets before raising an exception :rtype: python generator """ if not self.handler: # if no handler is provided, `BasicTweetHandler` provides minimum # functionality for limiting the number of Tweets retrieved self.handler = BasicTweetHandler(limit=limit) count_from_query = 0 if not max_id: results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent") count = len(results["statuses"]) if count == 0: print("No Tweets available through REST API for those keywords") return count_from_query = count max_id = results["statuses"][count - 1]["id"] - 1 for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return # Pagination loop: keep fetching Tweets until the desired count is # reached while dealing with Twitter rate limits. retries = 0 while count_from_query < limit: try: mcount = min(100, limit - count_from_query) results = self.search(q=keywords, count=mcount, lang=lang, max_id=max_id, result_type="recent") except TwythonRateLimitError as e: print("Waiting for 15 minutes -{0}".format(e)) time.sleep(15 * 60) # wait 15 minutes continue except TwythonError as e: print("Fatal error in Twython request -{0}".format(e)) if retries_after_twython_exception == retries: raise e retries += 1 count = len(results["statuses"]) if count == 0: print("No more Tweets available through rest api") return count_from_query += count # the max_id is also present in the Tweet metadata # results['search_metadata']['next_results'], but as part of a # query and difficult to fetch. This is doing the equivalent # (last tweet id minus one) max_id = results["statuses"][count - 1]["id"] - 1 self.handler.max_id = max_id for result in results["statuses"]: yield result self.handler.counter += 1 if self.handler.do_continue() == False: return def user_info_from_id(self, userids): """ Convert a list of userIDs into a variety of information about the users. See <https://dev.twitter.com/rest/reference/get/users/show>. :param list userids: A list of integer strings corresponding to Twitter userIDs :rtype: list(json) """ return [self.show_user(user_id=userid) for userid in userids] def user_tweets(self, screen_name, limit, include_rts="false"): """ Return a collection of the most recent Tweets posted by the user :param str user: The user's screen name; the initial '@' symbol\ should be omitted :param int limit: The number of Tweets to recover; 200 is the maximum allowed :param str include_rts: Whether to include statuses which have been\ retweeted by the user; possible values are 'true' and 'false' """ data = self.get_user_timeline(screen_name=screen_name, count=limit, include_rts=include_rts) self.handler.handle(data)