def run_user_timeline_download(): print('downloading user-timelines...') api = API(auth, parser=JSONParser()) user_str_ids = [] with open('data/top_users_to_PrEP.txt') as f_in: for line_no, line in enumerate(f_in): if line_no == 1000: break user_str_ids.append(line) users = [] pages = list(range(0, 150)) with open('data/user_timeline_tweets.json', 'w') as f_out: for user_id in user_str_ids: try: time.sleep(60 * 16) for page in pages: for twt in api.user_timeline(user_id, count=20, page=page): f_out.write(json.dumps(twt) + '\n') users.append(user_id) except: pass print('done with user-timelines...') print(users) print(len(user_str_ids))
class TimelinesFetcher(object): def __init__(self): auth = get_tweepy_oauth_handler() # This could be optimized once we have more than one credentials set in # the DB (we could loop over them). self.api = TwitterAPI(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) def start(self): while True: for producer in Producer.select().order_by(fn.Random()): self.fetch_producer(producer) def fetch_producer(self, producer): # 200 is the page limit kwargs = {"count": STATUSES_PER_PRODUCER, "user_id": producer.id_str} if producer.protected: logger.debug("Removing producer '%s' because they're protected", producer.screen_name) producer.delete_instance(recursive=True) return if producer.last_status_id != 0: kwargs["since_id"] = producer.last_status_id #try: timeline = list(self.api.user_timeline(**kwargs)) #except TweepError as e: # logger.warn(e) # return logger.debug("Importing %d statuses from @%s" % (len(timeline), producer.screen_name)) if timeline: for st in timeline: self.on_status(producer, st) producer.last_status_id = timeline[-1].id producer.save() def on_status(self, producer, status): if filter_status(status): import_status(status, author=producer) return True
class TweepyAPITests(unittest.TestCase): def setUp(self): auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret) auth.set_access_token(oauth_token, oauth_token_secret) self.api = API(auth) self.api.retry_count = 2 self.api.retry_delay = 5 def testhometimeline(self): self.api.home_timeline() def testfriendstimeline(self): self.api.friends_timeline() def testusertimeline(self): self.api.user_timeline() self.api.user_timeline('twitter') def testmentions(self): self.api.mentions() def testretweetedbyme(self): self.api.retweeted_by_me() def testretweetedbyuser(self): self.api.retweeted_by_user('twitter') def testretweetedtome(self): self.api.retweeted_to_me() def testretweetsofme(self): self.api.retweets_of_me() def testretweet(self): s = self.api.retweet(123) s.destroy() def testretweets(self): self.api.retweets(123) def testgetstatus(self): self.api.get_status(id=123) def testupdateanddestroystatus(self): # test update text = 'testing %i' % random.randint(0, 1000) update = self.api.update_status(status=text) self.assertEqual(update.text, text) # test destroy deleted = self.api.destroy_status(id=update.id) self.assertEqual(deleted.id, update.id) def testgetuser(self): u = self.api.get_user('twitter') self.assertEqual(u.screen_name, 'twitter') u = self.api.get_user(783214) self.assertEqual(u.screen_name, 'twitter') def testsearchusers(self): self.api.search_users('twitter') def testme(self): me = self.api.me() self.assertEqual(me.screen_name, username) def testfriends(self): self.api.friends() def testfollowers(self): self.api.followers() def testdirectmessages(self): self.api.direct_messages() def testsentdirectmessages(self): self.api.sent_direct_messages() def testsendanddestroydirectmessage(self): # send sent_dm = self.api.send_direct_message(username, text='test message') self.assertEqual(sent_dm.text, 'test message') self.assertEqual(sent_dm.sender.screen_name, username) self.assertEqual(sent_dm.recipient.screen_name, username) # destroy destroyed_dm = self.api.destroy_direct_message(sent_dm.id) self.assertEqual(destroyed_dm.text, sent_dm.text) self.assertEqual(destroyed_dm.id, sent_dm.id) self.assertEqual(destroyed_dm.sender.screen_name, username) self.assertEqual(destroyed_dm.recipient.screen_name, username) def testcreatedestroyfriendship(self): enemy = self.api.destroy_friendship('twitter') self.assertEqual(enemy.screen_name, 'twitter') self.assertFalse(self.api.exists_friendship(username, 'twitter')) friend = self.api.create_friendship('twitter') self.assertEqual(friend.screen_name, 'twitter') self.assertTrue(self.api.exists_friendship(username, 'twitter')) def testshowfriendship(self): source, target = self.api.show_friendship(target_screen_name='twtiter') self.assert_(isinstance(source, Friendship)) self.assert_(isinstance(target, Friendship)) def testfriendsids(self): self.api.friends_ids(username) def testfollowersids(self): self.api.followers_ids(username) def testverifycredentials(self): self.assertNotEqual(self.api.verify_credentials(), False) # make sure that `me.status.entities` is not an empty dict me = self.api.verify_credentials(include_entities=True) self.assertTrue(me.status.entities) # `status` shouldn't be included me = self.api.verify_credentials(skip_status=True) self.assertFalse(hasattr(me, 'status')) def testratelimitstatus(self): self.api.rate_limit_status() def testupdateprofilecolors(self): original = self.api.me() updated = self.api.update_profile_colors( '000', '000', '000', '000', '000') # restore colors self.api.update_profile_colors( original.profile_background_color, original.profile_text_color, original.profile_link_color, original.profile_sidebar_fill_color, original.profile_sidebar_border_color ) self.assertEqual(updated.profile_background_color, '000') self.assertEqual(updated.profile_text_color, '000') self.assertEqual(updated.profile_link_color, '000') self.assertEqual(updated.profile_sidebar_fill_color, '000') self.assertEqual(updated.profile_sidebar_border_color, '000') """ def testupateprofileimage(self): self.api.update_profile_image('examples/profile.png') def testupdateprofilebg(self): self.api.update_profile_background_image('examples/bg.png') """ def testupdateprofile(self): original = self.api.me() profile = { 'name': 'Tweepy test 123', 'url': 'http://www.example.com', 'location': 'pytopia', 'description': 'just testing things out' } updated = self.api.update_profile(**profile) self.api.update_profile( name=original.name, url=original.url, location=original.location, description=original.description ) for k, v in profile.items(): if k == 'email': continue self.assertEqual(getattr(updated, k), v) def testfavorites(self): self.api.favorites() def testcreatedestroyfavorite(self): self.api.create_favorite(4901062372) self.api.destroy_favorite(4901062372) def testenabledisablenotifications(self): self.api.enable_notifications('twitter') self.api.disable_notifications('twitter') def testcreatedestroyblock(self): self.api.create_block('twitter') self.assertEqual(self.api.exists_block('twitter'), True) self.api.destroy_block('twitter') self.assertEqual(self.api.exists_block('twitter'), False) self.api.create_friendship('twitter') # restore def testblocks(self): self.api.blocks() def testblocksids(self): self.api.blocks_ids() def testcreateupdatedestroylist(self): self.api.create_list('tweeps') # XXX: right now twitter throws a 500 here, # issue is being looked into by twitter. # self.api.update_list('tweeps', mode='private') self.api.destroy_list('tweeps') def testlists(self): self.api.lists() def testlistsmemberships(self): self.api.lists_memberships() def testlistssubscriptions(self): self.api.lists_subscriptions() def testlisttimeline(self): self.api.list_timeline('applepie', 'stars') def testgetlist(self): self.api.get_list('applepie', 'stars') def testlistmembers(self): self.api.list_members('applepie', 'stars') def testislistmember(self): uid = self.api.get_user('applepie').id self.api.is_list_member('applepie', 'stars', uid) def testsubscribeunsubscribelist(self): self.api.subscribe_list('applepie', 'stars') self.api.unsubscribe_list('applepie', 'stars') def testlistsubscribers(self): self.api.list_subscribers('applepie', 'stars') def testissubscribedlist(self): uid = self.api.get_user('applepie').id self.api.is_subscribed_list('applepie', 'stars', uid) def testsavedsearches(self): s = self.api.create_saved_search('test') self.api.saved_searches() self.assertEqual(self.api.get_saved_search(s.id).query, 'test') self.api.destroy_saved_search(s.id) def testsearch(self): self.api.search('tweepy') def testtrends(self): self.api.trends_daily() self.api.trends_weekly() def testgeoapis(self): self.api.geo_id(id='c3f37afa9efcf94b') # Austin, TX, USA self.api.nearby_places(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA self.api.reverse_geocode(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA
class GetTwitterData(): def __init__(self, auth): self.auth = auth self.api = API(self.auth) def get_all_tweets(self, screen_name, tweet_count): # initialize a list to hold all the tweepy Tweets alltweets = [] # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count) # save most recent tweets alltweets.extend(new_tweets) # save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 # keep grabbing tweets until there are no tweets left to grab while len(new_tweets) > 0: print "getting tweets before %s" % (oldest) # all subsiquent requests use the max_id param to prevent duplicates new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count, max_id=oldest) # save most recent tweets alltweets.extend(new_tweets) # update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print "...%s tweets downloaded so far" % (len(alltweets)) # transform the tweepy tweets into a 2D array that will populate the csv out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets] # write the csv with open(users_tweets_path, 'wb') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "created_at", "text"]) writer.writerows(out_tweets) pass def new_get_all_tweets(self, screen_name, tweet_count): # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count) # transform the tweepy tweets into a 2D array that will populate the csv out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in new_tweets] # write the csv with open(users_tweets_path, 'wb') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "created_at", "text"]) writer.writerows(out_tweets) def find_friends(self, screen_name): print "screen_name: " + screen_name # page = self.api.followers_ids(screen_name=screen_name) for id in Cursor(self.api.followers_ids, screen_name=screen_name,count=50).pages(): print id print "ids are: " + str(len(id)) # if (len(id) > 90): # array_offset = (len(id) % 90) # friends_list=[] # for x in range(1, array_offset): # print "cutted id is:" # print id[((x - 1) * 90):(x * 90)] # friends = [user.screen_name for user in self.api.lookup_users(user_ids=str(id[((x - 1) * 90):(x * 90)]))] # friends_list.extend(friends) # # else: # friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)] friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)] print "list of users\n" print friends_list friends_list_output = [[screen_name, id[indx], friend] for indx, friend in enumerate(friends_list)] print friends_list_output with open(users_friends_path, 'ab') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "friends"]) writer.writerows(friends_list_output) time.sleep(1) def readfile(self): tweets_data = [] tweets_file = open(tweets_data_path, "r") for line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continue print len(tweets_data) counter = 1 screen_names = [] for tweet in tweets_data: screen_names.extend(tweet['user']['screen_name']) try: self.new_get_all_tweets(tweet['user']['screen_name'], 5) except Exception, e: print "error:\n" print str(e) try: print tweet['user']['screen_name'] self.find_friends(tweet['user']['screen_name']) except Exception, e: print "fail:\n" print str(e) print counter # print 'tweet:' + tweet['text'] + "\n" # print 'user name:' + tweet['user']['name'] + "\n" # print 'user id:' + str(tweet['user']['id_str']) + "\n" # print "\nuser is flowing \n" # print 'user name:' + tweet['user']['name'] # # print "\n>>>>>>>>>>>>\n" counter = counter + 1