def user_tweets(api, user_id=None, screen_name=None, limit=None, **kwargs): """ Queries Twitter REST API for user's tweets. Returns as many as possible, or up to given limit. Takes an authenticated API object (API or APIPool), one of user_id or screen_name (not both), and an optional limit for number of tweets returned. Returns a cursor (iterator) over Tweepy status objects. Also takes variable collection of keyword argument to pass on to Tweepy/APIPool query methods, to support full API call parameterization. """ if not (user_id or screen_name): raise Exception("Must provide one of user_id or screen_name") if user_id: cursor = Cursor(api.user_timeline, user_id=user_id, count=200, **kwargs) elif screen_name: cursor = Cursor(api.user_timeline, screen_name=screen_name, count=200, **kwargs) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
def ensure_users_edges_in_db(user, edges_collection, twitter_api): "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db." logging.info(".. Fetching followers_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} followers.".format( user['followers_count'])) cursor = Cursor(twitter_api.followers_ids, id=user['id']) edges = [{ 'from': follower_id, 'to': user['id'] } for follower_id in cursor.items()] store_edges(edges_collection, edges) followers_ids = [edge['from'] for edge in edges] logging.info(".. Fetching friends_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} friends.".format(user['friends_count'])) cursor = Cursor(twitter_api.friends_ids, id=user['id']) edges = [{ 'to': friend_id, 'from': user['id'] } for friend_id in cursor.items()] store_edges(edges_collection, edges) friends_ids = [edge['to'] for edge in edges] return friends_ids, followers_ids
def query_tweets(api, query, limit=None, languages=None): """ Queries twitter REST API for tweets matching given twitter search 'query'. Takes an authenticated api object (API or APIPool), a query string, an optional limit for number of tweets returned, and an optional list of languages to further filter results. Returns a cursor (iterator) over Tweepy status objects (not native JSON docs) """ cursor = Cursor(api.search, q=query, include_entities=True, lang=languages) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
def query_tweets(api, query, limit=None, languages=None): """ Queries twitter REST API for tweets matching given twitter search 'query'. Takes an authenticated api object (API or APIPool), a query string, an optional limit for number of tweets returned, and an optional list of languages to further filter results. Returns a cursor (iterator) over Tweepy status objects (not native JSON docs) """ cursor = Cursor(api.search, q=query, include_entities=True, lang=languages) if limit: _check_limit(limit) return cursor.items(limit) return cursor.items()
def scraper(self, api, queries, fetch_num): """-------------------------------------------------------------------------- Collects tweets for analysis using a Tweepy cursor object and user-defined search terms. :param api: initialized Twitter API object :param queries: list of string searches to be run on Twitter :param fetch_num: number of tweets to return per search term :return: set of tweet tuples, each containing tuple metadata specified in get_data --------------------------------------------------------------------------""" # my_queries: List of search terms to look for. Follows standard Twitter syntax # -filter:retweets excludes all RTs - recommended for sentiment analysis all_tweets = set() for search in queries: search += " -filter:retweets" query = Cursor(api.search, lang='en', rpp=100, tweet_mode='extended', q=search) print(f"Gathering tweets for '{search}'...") results = query.items(fetch_num) temp_list = [] try: # Using specifications in get_data(), saves desired metadata fields of an individual tweet. temp_list = list( map(self.get_data, [status._json for status in results])) time.sleep(5) except TweepError: print(f"Error detected on {search}") pass for t in temp_list: # Converts tweet metadata into tuples, then adds to set of all downloaded tweets all_tweets.add(tuple(t)) return self.filter(all_tweets)
def run(args): logging_config = dict( level=INFO, format= '[%(asctime)s - %(filename)s:%(lineno)d - %(funcName)s - %(levelname)s] %(message)s' ) basicConfig(**logging_config) logger.debug("Reading config file, %s", args.settings) config = ConfigParser() config.read(args.settings) logger.debug("Read config file") query = " OR ".join(config.get("app", "hashtags").split(", ")) max_results = config.getint("app", "max_results") rt_msg = config.get("app", "rt_msg") session_file = config.get("app", "session_file") session_file = session_file.format(cwd=getcwd()) logger.debug("query=%s", query) logger.debug("max_results=%d", max_results) logger.debug("rt_msg=%s", rt_msg) logger.debug("session_file=%s", session_file) persist = PersistentDict(session_file) since_id = persist.get(since_id_name, None) logger.debug("Retrieved since_id %s", since_id) twitter_api = create_api(config.get("twitter", "consumer_key"), config.get("twitter", "consumer_secret"), config.get("twitter", "access_key"), config.get("twitter", "access_secret")) search = Cursor(twitter_api.search, q=query, since_id=since_id) format = "{msg} https://twitter.com/{screen_name}/status/{status_id}" results = search.items(max_results) for tweet in results: msg = format.format(msg=rt_msg, screen_name=tweet.author.screen_name, status_id=tweet.id) logger.info("tweeting: %s", msg) try: twitter_api.update_status(msg) except: logger.exception("Error posting tweet!") if len(results.page_iterator.results) > 1: logger.info("Saving last id %s", results.page_iterator.results[0].since_id) persist[since_id_name] = results.page_iterator.results[0].since_id persist.sync()
def search_tweets(self, query, lang, depth=1000): """ Generator that returns the 'depth' most recent user tweets Arguments: ---------- query: type: string info: string with logic operations (AND, OR...) lang: type: string info: language abbreviation to filter the tweets depth: type: int (optional) info: number of tweets to retrieve Yield: ---------- tweet_text: type: string info: cleaned tweet text """ try: cursor = Cursor(method=self.API.search, q=query, lang=lang, count=100, tweet_mode='extended') for tweet in cursor.items(depth): tweet_text = get_tweet_text(tweet) tweet_text = clean_text(tweet_text) yield tweet_text except TweepError: exit('Unable to find ' + str(depth) + ' tweets')
def process(self, cursor: tweepy.Cursor): """ Stream a single filtered tweet to AWS Kinesis stream :param cursor: Tweepy cursor """ # Start logging logging.info("Processing tweets") for tweet in self.handle_rate_limit(cursor.items(self.limit)): # Filter tweet filtered_tweet = self.filter(tweet) # Stream filtered tweet to AWS Kinesis try: logging.debug("Streaming tweet data to Kinesis") response = self.firehose_client.put_record( DeliveryStreamName=self.delivery_stream, Record={"Data": json.dumps(filtered_tweet)}) logging.debug(response) except ClientError as ex: # In case of client error log the error logging.exception( f"Failed to stream tweet data to AWS Kinesis: {ex}.") finally: self.counter += 1 if self.counter % self.logging_interval == 0: logging.info(f"Processed {self.counter} tweets.")
def get_friends(self, screen_name=None, user_id=None, max_friends=2000): """ Params: screen_name like "barackobama" or "s2t2" or max_friends for now, for performacne, because we can always go back later and re-scrape those who hit this max Returns a list of the user's friend_ids (or empty list if the account was private) See: http://docs.tweepy.org/en/v3.8.0/api.html#API.friends_ids https://github.com/tweepy/tweepy/blob/3733fd673b04b9aa193886d6b8eb9fdaf1718341/tweepy/api.py#L542-L551 http://docs.tweepy.org/en/v3.8.0/cursor_tutorial.html https://developer.twitter.com/en/docs/accounts-and-users/follow-search-get-users/api-reference/get-friends-ids https://developer.twitter.com/en/docs/basics/cursoring """ if screen_name is not None: cursor = Cursor(self.api.friends_ids, screen_name=screen_name, cursor=-1) elif user_id is not None: cursor = Cursor(self.api.friends_ids, user_id=user_id, cursor=-1) else: print("OOPS PLEASE PASS SCREEN NAME OR USER ID") return None #print(cursor) friend_ids = [] try: for friend_id in cursor.items(max_friends): friend_ids.append(friend_id) except TweepError as err: print("OOPS", err) #> "Not authorized." if user is private / protected (e.g. 1003322728890462209) return friend_ids
def fetch_user_timeline(self, request_params={}, limit=2_000): """ See: https://docs.tweepy.org/en/latest/api.html#timeline-methods https://docs.tweepy.org/en/v3.10.0/cursor_tutorial.html Params: request_params (dict) needs either "user_id" or "screen_name" attr limit (int) the number of total tweets to fetch per user ... or overwrite any of the default params Example: get_user_timeline({"user_id": 10101, "count": 100}, limit=300) """ default_params = { "exclude_replies": False, "include_rts": True, "tweet_mode": "extended", # access the full text "count": 200 # number of tweets per request } request_params = {**default_params, **request_params} # use the defaults, and override with user-specified params (including the required user_id or screen_name) request_params["cursor"] = -1 # use a cursor approach! cursor = Cursor(self.api.user_timeline, **request_params) return cursor.items(limit)
def getFollowersLv2(num): auth = tweetielytics.twitterAuth() api = tweepy.API(auth) f = open('followers.txt', 'r') g = open('followers3.txt', 'a') lines = f.readlines() counter = num # 30 while counter: # try: cursor = Cursor(api.followers, id=lines[counter]) print str(counter) + ': ' + str(lines[counter]) for c in cursor.items(): # iterate through followers g.write(c.id_str + '|' + str(c.created_at) + '|' + str(c.favourites_count) + '|' + str(c.followers_count) + '|' + str(c.friends_count) + '|' + str(c.screen_name) + '\n') counter += 1 # except BaseException: # print "Stopped at " + str(counter) g.close() f.close()
def search_tweet(self, query_string): cursor = Cursor(self.api.search, q=query_string, lang='en', count=1000, exclude='retweets') return cursor.items(100)
def get_user_tweets(self, user, word, depth=1000): """ Generator that returns the 'depth' most recent user tweets Arguments: ---------- user: type: string info: Twitter user account without the '@' word: type: string (lowercase) info: word used to filter the tweets depth: type: int (optional) info: number of tweets to retrieve Yield: ---------- tweet_text: type: string info: cleaned tweet text """ try: cursor = Cursor(method=self.API.user_timeline, user_id=user, count=200, tweet_mode='extended') for tweet in cursor.items(depth): tweet_text = get_tweet_text(tweet) tweet_text = clean_text(tweet_text) if word in tweet_text: yield tweet_text except TweepError: exit('Unable to retrieve tweets from ' + user)
def process(self, cursor: tweepy.Cursor): """ Process single tweet and push it to Kinesis stream. :param cursor: Tweepy Cursor """ logging.info("Processing tweets") for tweet in self.handle_rate_limit(cursor.items(self.limit)): filtered_tweet = self.filter(tweet) try: logging.debug("Pushing tweet data to Kinesis") response = self.firehose_client.put_record( DeliveryStreamName=self.delivery_stream, Record={ # kinesis only accepts byte-like data "Data": json.dumps(filtered_tweet) }) logging.debug(response) except ClientError as ex: logging.exception( f"Could not push tweet data to Kinesis: {ex}") finally: self.counter += 1 if self.counter % self.logging_interval == 0: logging.info(f"Processed {self.counter} tweets")
def get_mentions(self, recent_id): """gets all tweets that @mention the bot. @:param recent_id: ID string of latest mention that we have pulled @:return tweet_list returns dictionary of mentioned tweets with username, text, and tweet ID""" mentions = Cursor( self.api.search, q=self.handle + " -filter:retweets", tweet_mode='extended', since_id=recent_id, ) tweet_list = [] for tweet in mentions.items(500): # Ensures that the bot is not responding to a previous response if not ((tweet.user.screen_name == "markoving_bot") and (tweet.in_reply_to_status_id is not None)): tweet_text = re.sub("https:.*$", "", tweet.full_text) tweet_text = re.sub("&", "&", tweet_text) tweet_list.append({ "username": tweet.user.screen_name, "text": tweet_text, "tweet_id": tweet.id_str, "reply_id": tweet.in_reply_to_status_id }) return tweet_list
def execute(self): """Executes the query with any applied argument.""" if not self.query_args: raise Exception('Cannot execute query with no arguments.') query = ' '.join(self.query_args) print(query) results = Cursor(self.twitter.api.search, q=query, rpp=100) return self._limit_handled(results.items())
def user_tweets(api, user_id=None, screen_name=None, limit=None): """ Queries Twitter REST API for user's tweets. Returns as many as possible, or up to given limit. Takes an authenticated API object (API or APIPool), one of user_id or screen_name (not both), and an optional limit for number of tweets returned. Returns a cursor (iterator) over Tweepy status objects """ if not (user_id or screen_name): raise Exception("Must provide one of user_id or screen_name") if user_id: cursor = Cursor(api.user_timeline, user_id=user_id) elif screen_name: cursor = Cursor(api.user_timeline, screen_name=screen_name) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
def search_tweets(self, query, lang, filter_prob = 95, depth = 1000): """ Generator that returns the 'depth' most recent user tweets Arguments: ---------- query: type: string info: string with logic operations (AND, OR...) lang: type: string info: language abbreviation to filter the tweets filter_prob: type: int (optional) info: probability in which the query words are removed depth: type: int (optional) info: number of tweets to retrieve Yield: ---------- tweet_text: type: string info: cleaned tweet text """ try: cursor = Cursor( method = self.API.search, q = query, lang = lang, count = 100, tweet_mode = 'extended' ) # Obtaining the search query words in order to build a filter query_words = query.split(' ') query_words = filter( lambda w: not any(op in w for op in search_ops), query_words ) # Build a probabilistic filter in order to avoid overfitting search_filters = build_filters(query_words, filter_prob) for tweet in cursor.items(depth): tweet_text = self.get_text(tweet) tweet_text = clean_text(tweet_text, search_filters) tweet_text = clean_text(tweet_text) yield tweet_text except TweepError: exit('Unable to find ' + str(depth) + ' tweets')
def user_tweets(api, user_id=None, screen_name=None, limit=None): """ Queries Twitter REST API for user's tweets. Returns as many as possible, or up to given limit. Takes an authenticated API object (API or APIPool), one of user_id or screen_name (not both), and an optional limit for number of tweets returned. Returns a cursor (iterator) over Tweepy status objects """ if not (user_id or screen_name): raise Exception("Must provide one of user_id or screen_name") if user_id: cursor = Cursor(api.user_timeline, user_id=user_id) elif screen_name: cursor = Cursor(api.user_timeline, screen_name=screen_name) if limit: _check_limit(limit) return cursor.items(limit) return cursor.items()
def main(users_list_path: str): screen_name = "nijisanji_app" list_name = "list1" cursor = Cursor(api.list_members, slug=list_name, owner_screen_name=screen_name) with jsonstreams.Stream(jsonstreams.Type.array, users_list_path) as s: for user in cursor.items(): s.write(user._json)
def query_user_tweets(output, id_list, auth_file, max_id=-1, since_id=-1): ''' queries twitter for users from id_list and authentication from auth_file. ''' num_inputs_queried = 0 api_pool = TweepyPool(auth_file) write_fd = open(output, 'a+') for userid in id_list: num_inputs_queried = num_inputs_queried + 1 # even though the count is 200 we can cycle through 3200 items. # if you put a count variable in this cursor it will iterate up # to about 3200 if not userid == '': try: count = 0 if max_id and since_id: cursor = Cursor(api_pool.user_timeline, user_id=userid, count=200, max_id=max_id, since_id=since_id, tweet_mode='extended') elif max_id: cursor = Cursor(api_pool.user_timeline, user_id=userid, count=200, max_id=max_id, tweet_mode='extended') elif since_id: cursor = Cursor(api_pool.user_timeline, user_id=userid, count=200, since_id=since_id, tweet_mode='extended') else: cursor = Cursor(api_pool.user_timeline, user_id=userid, count=200, tweet_mode='extended') for item in cursor.items(): count = count + 1 tweet_item = json.loads(json.dumps(item._json)) tweet_item['smapp_timestamp'] = (datetime.datetime.utcnow( ).strftime('%Y-%m-%d %H:%M:%S +0000')) write_fd.write(json.dumps(tweet_item)) write_fd.write('\n') except TweepError as e: log('tweepy error: {}'.format(e)) log('counted {} objects for input {}'.format(count, userid)) log('number of inputs queried so far: {}'.format(num_inputs_queried)) s3.disk_2_s3(context['log'], context['s3_log']) write_fd.close()
def ensure_users_edges_in_db(user, edges_collection, twitter_api): "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db." logging.info(".. Fetching followers_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} followers.".format(user['followers_count'])) cursor = Cursor(twitter_api.followers_ids, id=user['id']) edges = [{ 'from' : follower_id, 'to' : user['id']} for follower_id in cursor.items()] store_edges(edges_collection, edges) followers_ids = [edge['from'] for edge in edges] logging.info(".. Fetching friends_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} friends.".format(user['friends_count'])) cursor = Cursor(twitter_api.friends_ids, id=user['id']) edges = [{ 'to' : friend_id, 'from' : user['id']} for friend_id in cursor.items()] store_edges(edges_collection, edges) friends_ids = [edge['to'] for edge in edges] return friends_ids, followers_ids
def get_tweets(self, user_id): logger.log(LOG_LEVEL, 'Getting tweets for {}'.format(user_id)) statuses = [] cursor = Cursor( self.api.user_timeline, user_id=user_id, count=PAGE_COUNT ) for status in cursor.items(TWEET_COUNT): statuses.append(self.parse_api_tweet(status)) return statuses
def get_friends(self, user_id): logger.log(LOG_LEVEL, 'Getting friends for {}'.format(user_id)) friends = [] cursor = Cursor( self.api.friends, user_id=user_id ) for user in cursor.items(): if self.is_potential_target(user): friends.append(str(user.id)) return friends
def getFollowers(): auth = tweetielytics.twitterAuth() api = tweepy.API(auth) # user = api.get_user('dep4b') cursor = Cursor(api.followers_ids, id='dep4b') f = open('followers.txt', 'a') for follower in cursor.items(): f.write(str(follower) + '\n') f.close()
def process_batch(self, cursor: tweepy.Cursor): """ Process batch of tweets and push it to Kinesis stream. :param cursor: Tweepy Cursor iterator """ logging.info("Processing tweets") for tweet in self.handle_rate_limit(cursor.items(self.limit)): if len(self._batch) < self.batch_size: self._batch.append(self.filter(tweet)) else: self.submit_batch(self._batch) # make sure remaining tweets are submitted if self._batch: self.submit_batch(self._batch)
def main(users_list_path: str, out_dir: str): with open(users_list_path, 'r') as f: users = json.load(f) screen_names = [user['screen_name'] for user in users] Path(out_dir).mkdir(exist_ok=True) for screen_name in screen_names: print('fetching followers of ' + screen_name) cursor = Cursor(api.followers_ids, screen_name=screen_name, count=2048) out_path = Path(out_dir) / screen_name with open(out_path, mode="w", encoding="utf-8") as f: for user_id in cursor.items(): f.write(f'{user_id}\n') f.flush()
def get_followers_ids(api, user_id): """ Given a Tweepy/smappPy TweepyPool api, query twitter's rest API for followers of given user_id. Returns IDs only (much faster / more per request). Parameters: api - fully authenticated Tweepy api or smappPy TweepyPool api user_id - twitter user id Returns tuple: return code, list of IDs or None (if API call fails) """ cursor = Cursor(api.followers_ids, user_id=user_id) user_list, ret_code = call_with_error_handling(list, cursor.items()) if ret_code != 0: logger.warning("User {0}: Followers request failed".format(user_id)) # Return user list from API or None (call_with_error_handling returns None if # call fail) return ret_code, user_list
def process_batch(self, cursor: tweepy.Cursor): """ Stream a batach of filtered tweet to AWS Kinesis stream :param cursor: Tweepy Cursor """ # Start logging logging.info("Processing tweets.") for tweet in self.handle_rate_limit(cursor.items(self.limit)): # If number of tweets less than batch size, append it. Else stream it. if len(self._batch) < self.batch_size: self._batch.append(self.filter(tweet)) else: self.submit_batch(self._batch) # If rate limit exceeded, stream remaining tweets if any if self._batch: self.submit_batch(self._batch)
async def check_twitter(self): await self.bot.wait_until_ready() channel = discord.Object(id=CONF.ANNONCE_CHANNEL_ID) for target in self.mostRecents: LOG.debug("checking account " + target) tweets = Cursor(self.auth_api.user_timeline, id=target, since_id=self.mostRecents[target], tweet_mode="extended") for status in tweets.items(): if status.in_reply_to_status_id == None and hasattr( status, "retweeted_status") == False: link = "https://twitter.com/{}/status/{}".format( target, status.id_str) embed = discord.Embed(title=target, description=status.full_text, url=link, color=0x1DA1F2) if "media" in status.entities: for media in status.entities["media"]: embed.set_image(url=media["media_url"]) break embed.set_thumbnail(url=status.user.profile_image_url) embed.set_footer(text=status.created_at) await self.bot.send_message(channel, embed=embed) if status.id > self.mostRecents[target]: self.mostRecents[target] = status.id conn = sqlite3.connect('keys.db') c = conn.cursor() args = (status.id, target) c.execute( "UPDATE twitter SET lastTweet = ? WHERE account = ?", args) conn.commit() conn.close() await asyncio.sleep(5 * 60)
def get_followers(self, request_params={}, limit=2_000): """See: https://docs.tweepy.org/en/latest/api.html#API.followers https://docs.tweepy.org/en/v3.10.0/cursor_tutorial.html Params: request_params (dict) needs either "user_id" or "screen_name" attr limit (int) the number of followers to fetch per user Example: get_followers({"user_id": 10101, "count": 100}, limit=300) """ default_params = { "count": 200 # number of followers per request } request_params = {**default_params, **request_params} # use the defaults, and override with user-specified params (including the required user_id or screen_name) request_params["cursor"] = -1 # use a cursor approach! cursor = Cursor(self.api.followers, **request_params) #return cursor.pages() return cursor.items(limit)
def harvest_tweet(db, city, tweet_rate, max_id=None, since_id=None): time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') file_name = f"log/twitter-current-all.log" with open(file_name, "a") as file: file.write(f"Twitter harvest for {city} at begins at: {time}\n") tweets = Cursor(api.search, q="place:%s" % coords[city], max_id=max_id, since_id=since_id, tweet_mode="extended") count = 1 for item in tweets.items(tweet_rate): out = dict() out["_id"] = item.id_str add_fields(item._json, out, afinn, keywords_list, polygons, area_list, city) db.save(out) count += 1 file.write(f"Number of tweets saved: {count-1}\n") time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') file.write(f"Twitter harvest for {city} ends at: {time}\n") file.write("-------------------------------------------\n")
def scraper(queries): """-------------------------------------------------------------------------- Collects tweets for analysis using a Tweepy cursor object and user-defined search terms. :param queries: list of searches to be run on Twitter :return: set of tweet tuples, each containing tuple metadata specified in get_data --------------------------------------------------------------------------""" # my_queries: List of search terms to look for. Follows standard Twitter syntax # -filter:retweets excludes all RTs - recommended for sentiment analysis api = start_api() # Initializes API from class all_tweets = set() for search in queries: query = Cursor(api.search, rpp=100, tweet_mode='extended', q=search) results = query.items( parser.n ) # Gathers user-defined number of items. Defaults to 1000. # Using specifications in get_data(), saves desired metadata fields of an individual tweet. temp_list = list(map(get_data, [status._json for status in results])) for t in temp_list: # Converts tweet metadata into tuples, then adds to set of all downloaded tweets all_tweets.add(tuple(t)) return all_tweets
def job_fetch(self, job): self.logger.debug("start job: %s", inspect.currentframe().f_code.co_name) self.logger.debug("latest id: %s", self.latest_id) tweets = Cursor(self.twitter.user_timeline, id=self.user_id, since_id=self.latest_id) count = 5 if self.latest_id else 1 latest_id = None for tweet in tweets.items(count): latest_id = tweet.id if latest_id < tweet.id else latest_id self.db.save_tweet(tweet) if latest_id: self.logger.info("latest id: %s", latest_id) self.latest_id = latest_id self.logger.info("tweets after fetch: %s", self.db.tweets.count()) self.logger.debug("finish job: %s", inspect.currentframe().f_code.co_name)
"""function for twitter authentication""" auth = OAuthHandler(API_KEY, API_SECRET) return auth if __name__ == '__main__': auth = authenticate() api = API(auth) while True: cursor = Cursor(api.user_timeline, id='legaltech_news', tweet_mode='extended') for status in cursor.items(100): time.sleep(1) text = status.full_text # take extended tweets into account if 'extended_tweet' in dir(status): text = status.extended_tweet.full_text if 'retweeted_status' in dir(status): r = status.retweeted_status if 'extended_tweet' in dir(r): text = r.extended_tweet.full_text tweet = { 'text': text, 'username': status.user.screen_name, 'followers_count': status.user.followers_count