def main(): # Make stdout output UTF-8, preventing "'ascii' codec can't encode" errors sys.stdout = codecs.getwriter('utf8')(sys.stdout) parser = argparse.ArgumentParser(description="") parser.add_argument('screen_name_file') args = parser.parse_args() logger = get_console_info_logger() ACCESS_TOKEN = Twython(consumer_key, consumer_secret, oauth_version=2).obtain_access_token() twython = Twython(consumer_key, access_token=ACCESS_TOKEN) crawler = RateLimitedTwitterEndpoint(twython, "statuses/user_timeline", logger) screen_names = get_screen_names_from_file(args.screen_name_file) for screen_name in screen_names: tweet_filename = "%s.tweets" % screen_name if os.path.exists(tweet_filename): logger.info( "File '%s' already exists - will not attempt to download Tweets for '%s'" % (tweet_filename, screen_name)) else: try: logger.info("Retrieving Tweets for user '%s'" % screen_name) tweets = crawler.get_data(screen_name=screen_name, count=200) except TwythonError as e: print "TwythonError: %s" % e if e.error_code == 404: logger.warn( "HTTP 404 error - Most likely, Twitter user '%s' no longer exists" % screen_name) elif e.error_code == 401: logger.warn( "HTTP 401 error - Most likely, Twitter user '%s' no longer publicly accessible" % screen_name) else: # Unhandled exception raise e else: save_tweets_to_json_file(tweets, "%s.tweets" % screen_name)
def __init__(self, twython, download_path, minimum_tweet_threshold, logger=None): self._crawler = RateLimitedTwitterEndpoint(twython, "statuses/user_timeline", logger) self._download_path = download_path self._minimum_tweet_threshold = minimum_tweet_threshold self._twython = twython TweetFilter.__init__(self, logger=logger)