print('The consumer key must be of a Twitter developer account and the access key must be of the account you want to remove the followers of (they can be of the same account if you wish to remove the followers of your developer account).') consumerKey = str(input('Enter your consumer key: ')) consumerSecret = str(input('Enter your consumer secret key: ')) accessKey = str(input('Enter your access key: ')) accessSecret = str(input('Enter your access secret key: ')) auth = tweepy.OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessKey, accessSecret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True) ids = [] print('Starting to remove followers. You\'ll see their Twitter user ID printed out when they have been blocked and unblocked.') for page in tweepy.Cursor(api.followers_ids, screen_name=screenname).pages(): ids.extend(page) for user in ids: try: api.create_block(user) print('Blocked', user) except: print('There was an error blocking the user with ID', user) continue try: api.destroy_block(user) print('Unblocked', user) except: print('There was an error unblocking the user with ID', user)
def sd_friend_scraper(target): for i in tweepy.Cursor(api.friends, target).items(): sd_friends.append(i.id) print "successfully scraping"
terms = [term for row in list(csvreader) for term in row] #Defining labelling logic. def labelTweet(tweet): return np.random.randint(2) #Scrape result from terms clear = lambda: os.system('clear') from time import sleep #from IPython.display import clear_output tweets = {} filename = "tweets.csv" f = csv.writer(open(filename, "a")) count = 0 for term in terms: for tweet in tweepy.Cursor(api.search, q=term, count=100, lang="en").items(): if tweet.text not in tweets: count = count + 1 tweets[tweet.text] = labelTweet(tweet.text) f.writerow( ['''"''' + tweet.text + '''"''', labelTweet(tweet.text)]) print(term + " " + str(count)) sleep(1) clear() #clear_output(wait = True)
def main(): consumer_key = '1Zhm0krQV7hiP1fWnyYH5IZlx' consumer_secret = 'Pkyj4LbbNH4r2B9NJgWo1xFIbLv7nsCcJr6X18RwhqtfCIGj3N' access_token = '979802802364051456-Oo2zhibedp4EeBtzupxf7XBVeAM45br' access_token_secret = '3VrSDugBiQv1gV6a9mdFieSrEPRzijCOGcLlH10MOV5HI' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) searchTerm = input('Enter the term/hashtag to search about: ') noOfSearchTerms = int(input('Enter how many Tweets to analyze: ')) tweets = tweepy.Cursor(api.search, q = searchTerm, lang = 'en').items(noOfSearchTerms) positive = 0 weaklyPositive = 0 stronglyPositive = 0 negative = 0 weaklyNegative = 0 stronglyNegative = 0 neutral = 0 polarity = 0 for tweet in tweets: print(tweet) analysis = TextBlob(tweet.text) polarity += analysis.sentiment.polarity if (analysis.sentiment.polarity == 0): neutral += 1 elif (analysis.sentiment.polarity > 0 and analysis.sentiment.polarity <= 0.3): weaklyPositive += 1 elif (analysis.sentiment.polarity > 0.3 and analysis.sentiment.polarity <= 0.6): positive += 1 elif (analysis.sentiment.polarity > 0.6 and analysis.sentiment.polarity <= 1): stronglyPositive += 1 elif (analysis.sentiment.polarity > -0.3 and analysis.sentiment.polarity <= 0): weaklyNegative += 1 elif (analysis.sentiment.polarity > -0.6 and analysis.sentiment.polarity <= -0.3): negative += 1 elif (analysis.sentiment.polarity > -1 and analysis.sentiment.polarity <= -0.6): stronglyNegative += 1 positive = percentage(positive, noOfSearchTerms) weaklyPositive = percentage(weaklyPositive, noOfSearchTerms) stronglyPositive = percentage(stronglyPositive, noOfSearchTerms) negative = percentage(negative, noOfSearchTerms) weaklyNegative = percentage(weaklyNegative, noOfSearchTerms) stronglyNegative = percentage(stronglyNegative, noOfSearchTerms) neutral = percentage(neutral, noOfSearchTerms) polarity = percentage(polarity, noOfSearchTerms) # Average reaction polarity = polarity / noOfSearchTerms print('How many people are reacting to ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets') if (polarity == 0): print("Neutral") elif (polarity > 0 and polarity <= 0.3): print("Weakly Positive") elif (polarity > 0.3 and polarity <= 0.6): print("Positive") elif (polarity > 0.6 and polarity <= 1): print("Strongly Positive") elif (polarity > -0.3 and polarity <= 0): print("Weakly Negative") elif (polarity > -0.6 and polarity <= -0.3): print("Negative") elif (polarity > -1 and polarity <= -0.6): print("Strongly Negative") print("Detailed Report: ") print(str(positive) + "% people thought it was positive") print(str(weaklyPositive) + "% people thought it was weakly positive") print(str(stronglyPositive) + "% people thought it was strongly positive") print(str(negative) + "% people thought it was negative") print(str(weaklyNegative) + "% people thought it was weakly negative") print(str(stronglyNegative) + "% people thought it was strongly negative") print(str(neutral) + "% people thought it was neutral") labels = ['Positive [' + str(positive) + '%]', 'Weakly Positive [' + str(weaklyPositive) + '%]', 'Strongly Positive [' + str(stronglyPositive) + '%]', 'Neutral [' + str(neutral) + '%]', 'Negative [' + str(negative) + '%]', 'Weakly Negative [' + str(weaklyNegative) + '%]', 'Strongly Negative [' + str(stronglyNegative) + '%]'] sizes = [positive, weaklyPositive, stronglyPositive, neutral, negative, weaklyNegative, stronglyNegative] colors = ['yellowgreen', 'lightgreen', 'darkgreen', 'gold', 'red', 'lightsalmon', 'darkred'] patches, texts = plt.pie(sizes, colors=colors, startangle=90) plt.legend(patches, labels, loc="best") plt.title('How people are reacting on ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets.') plt.axis('equal') plt.tight_layout() plt.show()
def DownloadData(self): # authenticating consumerKey = '' consumerSecret = '' accessToken = '' accessTokenSecret = '' auth = tweepy.OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, accessTokenSecret) api = tweepy.API(auth) # input for term to be searched and how many tweets to search searchTerm = input("Enter Keyword/Tag to search about: ") NoOfTerms = int(input("Enter how many tweets to search: ")) # searching for tweets self.tweets = tweepy.Cursor(api.search, q=searchTerm, lang = "en").items(NoOfTerms) # Open/create a file to append data to csvFile = open('result.csv', 'a') # Use csv writer csvWriter = csv.writer(csvFile) # creating some variables to store info polarity = 0 positive = 0 weak_positive = 0 strong_positive = 0 negative = 0 weak_negative = 0 strong_negative = 0 neutral = 0 # iterating through tweets fetched for tweet in self.tweets: #Append to temp so that we can store in csv later. I use encode UTF-8 self.tweetText.append(self.cleanTweet(tweet.text).encode('utf-8')) # print (tweet.text.translate(non_bmp_map)) #print tweet's text analysis = TextBlob(tweet.text) # print(analysis.sentiment) # print tweet's polarity polarity += analysis.sentiment.polarity # adding up polarities to find the average later if (analysis.sentiment.polarity == 0): # adding reaction of how people are reacting to find average later neutral += 1 elif (analysis.sentiment.polarity > 0 and analysis.sentiment.polarity <= 0.3): weak_positive += 1 elif (analysis.sentiment.polarity > 0.3 and analysis.sentiment.polarity <= 0.6): positive += 1 elif (analysis.sentiment.polarity > 0.6 and analysis.sentiment.polarity <= 1): strong_positive += 1 elif (analysis.sentiment.polarity > -0.3 and analysis.sentiment.polarity <= 0): weak_negative += 1 elif (analysis.sentiment.polarity > -0.6 and analysis.sentiment.polarity <= -0.3): negative += 1 elif (analysis.sentiment.polarity > -1 and analysis.sentiment.polarity <= -0.6): strong_negative += 1 # Write to csv and close csv file csvWriter.writerow(self.tweetText) csvFile.close() # finding average of how people are reacting positive = self.percentage(positive, NoOfTerms) wpositive = self.percentage(weak_positive, NoOfTerms) spositive = self.percentage(strong_positive, NoOfTerms) negative = self.percentage(negative, NoOfTerms) wnegative = self.percentage(weak_negative, NoOfTerms) snegative = self.percentage(strong_negative, NoOfTerms) neutral = self.percentage(neutral, NoOfTerms) # finding average reaction polarity = polarity / NoOfTerms # printing out data print("How people are reacting on " + searchTerm + " by analyzing " + str(NoOfTerms) + " tweets.") print() print("General Report: ") if (polarity == 0): print("Neutral") elif (polarity > 0 and polarity <= 0.3): print("Weakly Positive") elif (polarity > 0.3 and polarity <= 0.6): print("Positive") elif (polarity > 0.6 and polarity <= 1): print("Strongly Positive") elif (polarity > -0.3 and polarity <= 0): print("Weakly Negative") elif (polarity > -0.6 and polarity <= -0.3): print("Negative") elif (polarity > -1 and polarity <= -0.6): print("Strongly Negative") print() print("Detailed Report: ") print(str(positive) + "% people thought it was positive") print(str(weak_positive) + "% people thought it was weakly positive") print(str(strong_positive) + "% people thought it was strongly positive") print(str(negative) + "% people thought it was negative") print(str(weak_negative) + "% people thought it was weakly negative") print(str(strong_negative) + "% people thought it was strongly negative") print(str(neutral) + "% people thought it was neutral") self.plotPieChart(positive, weak_positive, strong_positive, negative, weak_negative, strong_negative, neutral, searchTerm, NoOfTerms)
consumer_key = 'on_twitter' consumer_secret = 'on_twitter' access_token = 'on_twitter' access_token_secret = 'on_twitter' # auth = tw.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tw.API(auth, wait_on_rate_limit=True) # search_term = "#climate+change -filter:retweets" yesterday = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d') search_term = \ '(disease) -filter:retweets' tweets = tw.Cursor(api.search, q=search_term, lang='en', since=yesterday).items(1000) all_tweets = [tweet.text for tweet in tweets] # def remove_url(txt): return ' '.join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", '', txt).split()) # all_tweets_no_urls = [remove_url(tweet) for tweet in all_tweets] #
def delete(self): if not self.authenticated: return # First, run fetch click.secho("Before deleting anything, fetch", fg="cyan") self.fetch() # Unretweet and unlike tweets if self.common.settings.get("retweets_likes"): # Unretweet if self.common.settings.get("retweets_likes_delete_retweets"): datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta( days=self.common.settings.get("retweets_likes_retweets_threshold") ) tweets = ( self.common.session.query(Tweet) .filter(Tweet.user_id == int(self.common.settings.get("user_id"))) .filter(Tweet.is_deleted == 0) .filter(Tweet.is_retweet == 1) .filter(Tweet.created_at < datetime_threshold) .order_by(Tweet.created_at) .all() ) click.secho( "Deleting {} retweets, starting with the earliest".format( len(tweets) ), fg="cyan", ) count = 0 for tweet in tweets: try: self.api.destroy_status(tweet.status_id) tweet.unretweet_summarize() tweet.is_deleted = True self.common.session.add(tweet) except tweepy.error.TweepError as e: if e.api_code == 144: click.echo( "Error, retweet {} is already deleted, updating database".format( tweet.status_id ) ) tweet.is_deleted = True self.common.session.add(tweet) else: click.echo( "Error for tweet {}: {}".format(tweet.status_id, e) ) count += 1 if count % 20 == 0: self.common.session.commit() self.common.session.commit() self.common.log("Deleted %s retweets" % count) # Unlike if self.common.settings.get("retweets_likes_delete_likes"): datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta( days=self.common.settings.get("retweets_likes_likes_threshold") ) tweets = ( self.common.session.query(Tweet) .filter(Tweet.user_id != int(self.common.settings.get("user_id"))) .filter(Tweet.is_unliked == False) .filter(Tweet.favorited == True) .filter(Tweet.created_at < datetime_threshold) .order_by(Tweet.created_at) .all() ) click.secho( "Unliking {} tweets, starting with the earliest".format( len(tweets) ), fg="cyan", ) count = 0 for tweet in tweets: try: self.api.destroy_favorite(tweet.status_id) tweet.unlike_summarize() tweet.is_unliked = True self.common.session.add(tweet) except tweepy.error.TweepError as e: if e.api_code == 144: click.echo( "Error, tweet {} is already unliked, updating database".format( tweet.status_id ) ) tweet.is_unliked = True self.common.session.add(tweet) else: click.echo( "Error for tweet {}: {}".format(tweet.status_id, e) ) count += 1 if count % 20 == 0: self.common.session.commit() self.common.session.commit() self.common.log("Unliked %s tweets" % count) # Deleting tweets if self.common.settings.get("delete_tweets"): tweets_to_delete = self.common.get_tweets_to_delete() click.secho( "Deleting {} tweets, starting with the earliest".format( len(tweets_to_delete) ), fg="cyan", ) count = 0 for tweet in tweets_to_delete: try: self.api.destroy_status(tweet.status_id) tweet.delete_summarize() tweet.is_deleted = True self.common.session.add(tweet) except tweepy.error.TweepError as e: if e.api_code == 144: click.echo( "Error, tweet {} is already deleted, updating database".format( tweet.status_id ) ) tweet.is_deleted = True self.common.session.add(tweet) else: click.echo("Error for tweet {}: {}".format(tweet.status_id, e)) count += 1 if count % 20 == 0: self.common.session.commit() self.common.session.commit() self.common.log("Deleted %s tweets" % count) # Delete DMs if self.common.settings.get("delete_dms"): datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta( days=self.common.settings.get("dms_days_threshold") ) # Sadly, only the last 30 days worth # https://developer.twitter.com/en/docs/direct-messages/sending-and-receiving/api-reference/list-events click.secho( "Fetching direct message metadata for the last 30 days", fg="cyan" ) # Fetch direct messages count = 0 for page in tweepy.Cursor(self.api.list_direct_messages).pages(): for dm in page: created_timestamp = datetime.datetime.fromtimestamp( int(dm.created_timestamp) / 1000 ) if created_timestamp <= datetime_threshold: self.api.destroy_direct_message(dm.id) click.echo( "Deleted DM {}, id {}".format( created_timestamp.strftime("%Y-%m-%d"), dm.id ) ) count += 1 else: click.secho( "Skipping DM {}, id {}".format( created_timestamp.strftime("%Y-%m-%d"), dm.id ), dim=True, ) self.common.log("Deleted %s DMs" % count)
def follow_followers(api): logger.info("Retrieving and following followers") for follower in tweepy.Cursor(api.followers).items(): if not follower.following: logger.info(f"Following {follower.name}") follower.follow()
import tweepy import time from tweepy import OAuthHandler import twitter_credentials auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET) auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) user = api.me() search = 'Javascript' numberOfTweets = 500 for tweet in tweepy.Cursor(api.search, search).items(numberOfTweets): try: print('Tweet liked') tweet.favorite() time.sleep(10) except tweepy.TweepError as e: print(e.reason) except StopIteration: break
def get_firefox_mentions(api): # uses standard search API standard which can only access last 7 days of data # get data using sinceId to ensure no duplicates # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows qry_max_id = ( """SELECT max(id_str) max_id FROM {0} """).format(dataset_name + ".twitter_mentions") query_job = bq_client.query(qry_max_id) max_id_result = query_job.to_dataframe() max_id = max_id_result['max_id'].values[0] print(max_id) #searchQuery = '#someHashtag' # this is what we're searching for maxTweets = 10000000 # Some arbitrary large number tweetCount = 0 print("Downloading max {0} tweets".format(maxTweets)) # tweet_mode="extended" to include truncated tweets results = [] try: if max_id is not None: new_tweets = tweepy.Cursor(api.search, q="@firefox", tweet_mode="extended", since_id=str(max_id)).items() else: new_tweets = tweepy.Cursor(api.search, q="@firefox", tweet_mode="extended").items() for tweet in new_tweets: tweet_row = get_tweet_data_row(tweet) #print(tweet.id_str) results.append(tweet_row) tweetCount = tweetCount + 1 if (tweetCount > maxTweets): break df = pd.DataFrame.from_records(results, columns=[ "id_str", "created_at", "full_text", "user_id", "in_reply_to_status_id_str" ]) if df.shape[0] > 0: min_id_str = df['id_str'].min() max_id_str = df['id_str'].max() print('min: ' + min_id_str + ', max: ' + max_id_str) fn = 'twitter_data_mentions_' + str(min_id_str) + "_to_" + str( max_id_str) + '.csv' df.to_csv("/tmp/" + fn, index=False, encoding='utf-8') print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fn)) blob = sumo_bucket.blob("twitter/" + fn) blob.upload_from_filename("/tmp/" + fn) s = [ bigquery.SchemaField("id_str", "INTEGER"), bigquery.SchemaField("created_at", "TIMESTAMP"), bigquery.SchemaField("full_text", "STRING"), bigquery.SchemaField("user_id", "INTEGER"), bigquery.SchemaField("in_reply_to_status_id_str", "INTEGER"), ] update_bq_table("gs://{}/twitter/".format(bucket), fn, 'twitter_mentions', s) else: print("Downloaded {0} tweets, no mentions updates.".format( tweetCount)) except tweepy.TweepError as e: # Just exit if any error print("some error : " + str(e))
twitter_config["consumer_secret"]) auth.set_access_token(twitter_config["access_token"], twitter_config["access_token_secret"]) api = tweepy.API(auth) # retrieve last savepoint if available try: with open(last_id_file, "r") as file: savepoint = file.read() except IOError: savepoint = "" print("No savepoint found. Bot is now searching for results") # search query timelineIterator = tweepy.Cursor(api.search, q=search, since_id=savepoint, lang=tweetLanguage).items(100) # put everything into a list to be able to sort/filter timeline = [] for status in timelineIterator: timeline.append(status) print(timeline) try: last_tweet_id = timeline[0].id except IndexError: last_tweet_id = savepoint # filter @replies/blacklisted words & users out and reverse timeline #timeline = filter(lambda status: status.text[0] = "@", timeline) - uncomment to remove all tweets with an @mention
def get_firefox_reviews(api): #get all tweets with id=firefox # If results from a specific ID onwards are reqd, set since_id to that ID. # else default to no lower limit, go as far back as API allows sinceId = None # If results only below a specific ID are, set max_id to that ID. # else default to no upper limit, start from the most recent tweet matching the search query. qry_max_id = ( """SELECT max(id_str) max_id FROM {0} """).format(dataset_name + ".twitter_reviews") query_job = bq_client.query(qry_max_id) max_id_result = query_job.to_dataframe() max_id = max_id_result['max_id'].values[0] print(max_id) maxTweets = 10000000 # Some arbitrary large number tweetCount = 0 print("Downloading max {0} tweets".format(maxTweets)) # tweet_mode="extended" to include truncated tweets results = [] try: if max_id is not None: new_tweets = tweepy.Cursor( api.user_timeline, screen_name='@firefox', tweet_mode="extended", since_id=str(max_id) ).items( ) # max_id-1 to exclude max_id since that will have already been added in previous pass else: new_tweets = tweepy.Cursor(api.user_timeline, screen_name='@firefox', tweet_mode="extended").items() for tweet in new_tweets: # if in_reply_to_status_id_str has number, then look up that info, else, put blanks for fields reply_text, reply created_at, reply_user_id. we wouldn't now what % goes un-replied anyway so... tweet_row = get_tweet_data_row(tweet) in_reply_to_status_id_str = tweet.in_reply_to_status_id_str #print(in_reply_to_status_id_str) if in_reply_to_status_id_str: try: reply_tweet = api.get_status(in_reply_to_status_id_str) tweet_row.extend([ reply_tweet.text.replace("\n", "\\n"), reply_tweet.created_at, reply_tweet.user.id ]) except tweepy.TweepError as e: print( "Error trying to get in_reply_to_status_id_str={0}: {1}", in_reply_to_status_id_str, str(e)) tweet_row.extend(['', '', '']) else: tweet_row.extend(['', '', '']) results.append(tweet_row) #get_tweet_data_row(tweet)) tweetCount = tweetCount + 1 if (tweetCount > maxTweets): break df = pd.DataFrame.from_records( results, columns=[ "id_str", "created_at", "full_text", "user_id", "in_reply_to_status_id_str", "in_reply_to_status_text", "in_reply_to_status_created_at", "in_reply_to_status_user_id" ]) # df['ga_date'] = pd.to_datetime(df['ga_date'], format="%Y%m%d").dt.strftime("%Y-%m-%d") if df.shape[0] > 0: min_id_str = df['id_str'].min() max_id_str = df['id_str'].max() fn = 'twitter_data_' + str(min_id_str) + "_to_" + str( max_id_str) + '.csv' df.to_csv("/tmp/" + fn, index=False) print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fn)) blob = sumo_bucket.blob("twitter/" + fn) blob.upload_from_filename("/tmp/" + fn) s = [ bigquery.SchemaField("id_str", "INTEGER"), bigquery.SchemaField("created_at", "TIMESTAMP"), bigquery.SchemaField("full_text", "STRING"), bigquery.SchemaField("user_id", "INTEGER"), bigquery.SchemaField("in_reply_to_status_id_str", "INTEGER"), bigquery.SchemaField("in_reply_to_status_text", "STRING"), bigquery.SchemaField("in_reply_to_status_created_at", "TIMESTAMP"), bigquery.SchemaField("in_reply_to_status_user_id", "INTEGER"), ] update_bq_table("gs://{}/twitter/".format(bucket), fn, 'twitter_reviews', s) else: print("Downloaded {0} tweets, no reviews updates.".format( tweetCount)) except tweepy.TweepError as e: # Just exit if any error print("some error : " + str(e))
def followbot(): try: #name_to_find_ids_of = input("Name to find ids of? Example: twitter, Android, gameinformer, gamasutra\n>") name_to_find_ids_of = 'twitter' name_list = [] info_list = [] with open("twitter_accounts.txt", "r") as f: lines = f.readlines() print("Gathering status of all accounts.") with open("status.txt", "a") as f: f.write("\nGathering status of all accounts.\n") for line in lines: list = line.split(':') name = list[0] CONSUMER_KEY = list[1] CONSUMER_SECRET = list[2] ACCESS_KEY = list[3] ACCESS_SECRET = list[4][:-1] #print(repr(CONSUMER_KEY)) #print(repr(CONSUMER_SECRET)) #print(repr(ACCESS_KEY)) #print(repr(ACCESS_SECRET)) auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) api = tweepy.API(auth) text_file = open("unfollowlist.txt", "w") following_counter = 0 try: for page in tweepy.Cursor(api.friends_ids, screen_name=name).pages(): for line in page: following_counter += 1 text_file.write(str(line)) text_file.write("\n") #time.sleep(60) text_file.close() with open("unfollowlist.txt", "r") as f: lines = f.readlines() last_line = lines[-1] write_actions = True try: api.destroy_friendship(int(last_line)) except tweepy.TweepError as e: errorcode = e.args[0][0]['code'] if (errorcode == 261): print( "Application cannot do write actions. Name: " + name) with open("status.txt", "a") as f: f.write( "\nApplication cannot do write actions. Name: " + name + "\n") write_actions = False elif (errorcode == 88): print("Rate limit exceeded") else: print(e) print("Error happened on account: " + name) time.sleep(10) pass except ConnectionResetError: print("Connection error, sleeping 10s and continuing") time.sleep(10) continue new_list = [ name, CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET, following_counter, write_actions ] info_list.append(new_list) name_list.append(new_list[0]) except tweepy.TweepError as e: print(e) print("Error happened on account: " + name) pass print("\nDONE.\n") with open("status.txt", "a") as f: f.write("\nDONE.\n") for acc in info_list: if (int(acc[5]) > 2126) and acc[6]: number_to_unfollow = int(acc[5]) - 2126 unfollow(acc, number_to_unfollow) for acc in info_list: if acc[6]: follow(acc, name_to_find_ids_of) except (tweepy.TweepError, RuntimeError, TypeError, NameError, ConnectionError) as e: print("Fatal error, starting over.") print(e) with open("status.txt", "a") as f: f.write("Fatal error, starting over.") f.write(e) pass
def unfollow(acc, number_to_unfollow): name = acc[0] CONSUMER_KEY = acc[1] CONSUMER_SECRET = acc[2] ACCESS_KEY = acc[3] ACCESS_SECRET = acc[4] auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) api = tweepy.API(auth) text_file = open("unfollowlist.txt", "w") for page in tweepy.Cursor(api.friends_ids, screen_name=name).pages(): for line in page: text_file.write(str(line)) text_file.write("\n") print("Starting unfollowing on: " + name) with open("status.txt", "a") as f: f.write("\nStarting unfollowing on: " + name + "\n") text_file.close() #print("All current 'following now gathered', Starting to unfollow.\n") running = True counter = 0 while running: try: with open("unfollowlist.txt", "r") as f: lines = f.readlines() last_line = lines[-1] try: api.destroy_friendship(int(last_line)) except tweepy.TweepError as e: print(e) print("error happened on : " + name) time.sleep(20) pass except ConnectionResetError: print("Connection error") time.sleep(20) pass time.sleep(6) with open("unfollowlist.txt", "r") as fin: data = fin.read().splitlines(True) with open('unfollowlist.txt', 'w') as fout: fout.writelines(data[:-1]) s = str(counter) + ' / ' + str( number_to_unfollow) + ' unfollowed.' # string for output print(s, end='') # just print and flush #sys.stdout.flush() # needed for flush when using \x08 backspace(len(s)) # back for n chars if counter >= number_to_unfollow: running = False counter = counter + 1 except tweepy.TweepError or ConnectionResetError: print("Some sort of error, waiting 20s") time.sleep(20) pass print("Unfollowing done on " + name + "!") with open("status.txt", "a") as f: f.write("\nUnfollowing done on " + name + "!\n")
''' Importing libraries ''' import tweepy import csv import pandas as pd ''' Authenticating Twitter Access with Developer Account ''' consumer_key = 'bIVfVMIgLtWaJAmrDqDEpRkwt' consumer_secret = 'cTzQQCUyxHU7hDCMXaRtKTHeqTxEqA6lAaGaE5nbvrOF930VTY' access_token = '1143353752792920065-S2JoUbphkswzmT5JP57WRalAaRR1mJ' access_token_secret = 'I0EyYYGoJ5vDpu3gVY2foePUGuCSUf9pbofGP9J5TFZ1d' ''' Main Crawler Code''' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, wait_on_rate_limit=True) ''' Searching Tweets based on keyword''' # Open/Create a file to append data csvFile = open('info.csv', 'a') #Use csv Writer csvWriter = csv.writer(csvFile) for tweet in tweepy.Cursor(api.search, q="#DevOpsatUPES", count=100, lang="en", since="2020-02-02").items(): print(tweet.created_at, tweet.text) csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
def follow(self): for follower in tweepy.Cursor(self.api.followers).items(): follower.follow()
def get_history(self, api, ntweets): cursor = tweepy.Cursor(api.user_timeline, screen_name=self.tweeter, count=100).items(ntweets) self.tweeter_history = [tweet._json for tweet in limit_cursor(cursor)]
import time import tweepy import sys # Replace the foo bar with your twitter API keys from dev.twitter.com auth = tweepy.auth.OAuthHandler(consumer_key="foo", consumer_secret="bar") auth.set_access_token("foo", "bar") # the following dictionaries etc aren't strictly needed for this # but useful for your own more in-depth apps. api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True) print("Loading followers..") followers = [] for follower in tweepy.Cursor(api.followers).items(): followers.append(follower) print("Found %s followers, finding friends.." % len(followers)) friends = [] for friend in tweepy.Cursor(api.friends).items(): friends.append(friend) # creating dictionaries based on id's is handy too friend_dict = {} for friend in friends: friend_dict[friend.id] = friend follower_dict = {} for follower in followers:
from RefinedMOMTweetRetrieverCode import TweetCollector as TC #Keys to initiate connection to twitter api for bot consumer_key = consumer_key consumer_secret = consumer_secret access_token = access_token access_token_secret = access_token_secret auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) tweetjsons = [] for tweet in tweepy.Cursor(api.search, q="#makeovermonday ", since='2018-04-24', until='2018-05-03', include_entities=True, result_type='recent', exclude_replies=True).items(): if 'RT' not in tweet.text: #remove retweets tweetjsons.append(tweet) viz_info = [] for tweet in tweetjsons: ids = tweet.id_str name = tweet.author.name screen_name = tweet.author.screen_name try: location = tweet.author.location except: location = None
# -*- coding: utf-8 -*- """ Created on Mon Apr 23 21:42:17 2018 @author: mathe """ import pandas as pd import tweepy consumer_key = 'eICGRZ3xMVGAS2LZtW2HZ8ESP' consumer_secret = 'uaaibUdbfNujAJbaXJ6fZCDoqTPZVapf6iLMSuOl7zvEATfRky' access_token = '988578283922100225-kGZrxiEFmMNPVlIupkBz45BzCf5n4Dk' access_token_secret = 'toqhW7r8FfmWQwyf0ebUsQY9tfa2QfUiqHAJYuarET3Br' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) tweets = list() sentimentos = list() for tweet in tweepy.Cursor(api.search,q='#Lula -filter:retweets',tweet_mode='extended',count=100,lang="pt").items(100): if (not tweet.retweeted) and ('RT @' not in tweet.full_text): tweets.append(tweet.full_text) print('Coletados tweets sobre Lula') tweets_Dataframe = pd.DataFrame({'Text':tweets}) tweets_Dataframe.to_csv('../bases/tweets_lula_teste.csv', encoding='utf-8',index = False) print(api.rate_limit_status()['resources']['search'])
def fetch(self): if not self.authenticated: return if self.common.settings.get("delete_tweets"): # We fetch tweets since the last fetch (or all tweets, if it's None) since_id = self.common.settings.get("since_id") if since_id: click.secho("Fetching all recent tweets", fg="cyan") else: click.secho( "Fetching all tweets, this first run may take a long time", fg="cyan", ) # Fetch tweets from timeline a page at a time for page in tweepy.Cursor( self.api.user_timeline, id=self.common.settings.get("username"), since_id=since_id, tweet_mode="extended", ).pages(): fetched_count = 0 # Import these tweets, and all their threads for status in page: fetched_count += self.import_tweet_and_thread(Tweet(status)) # Only commit every 20 tweets if fetched_count % 20 == 0: self.common.session.commit() # Commit the leftovers self.common.session.commit() # Now hunt for threads. This is a dict that maps the root status_id # to a list of status_ids in the thread threads = {} for status in page: if status.in_reply_to_status_id: status_ids = self.calculate_thread(status.id) root_status_id = status_ids[0] if root_status_id in threads: for status_id in status_ids: if status_id not in threads[root_status_id]: threads[root_status_id].append(status_id) else: threads[root_status_id] = status_ids # For each thread, does this thread already exist, or do we create a new one? for root_status_id in threads: status_ids = threads[root_status_id] thread = ( self.common.session.query(Thread) .filter_by(root_status_id=root_status_id) .first() ) if not thread: thread = Thread(root_status_id) count = 0 for status_id in status_ids: tweet = ( self.common.session.query(Tweet) .filter_by(status_id=status_id) .first() ) if tweet: thread.tweets.append(tweet) count += 1 if count > 0: click.echo( "Added new thread with {} tweets (root id={})".format( count, root_status_id ) ) else: count = 0 for status_id in status_ids: tweet = ( self.common.session.query(Tweet) .filter_by(status_id=status_id) .first() ) if tweet and tweet not in thread.tweets: thread.tweets.append(tweet) count += 1 if count > 0: click.echo( "Added {} tweets to existing thread (root id={})".format( count, root_status_id ) ) self.common.session.commit() if self.common.settings.get("retweets_likes") and self.common.settings.get( "retweets_likes_delete_likes" ): like_since_id = self.common.settings.get("since_id") # Fetch tweets that are liked click.secho("Fetching tweets that you liked", fg="cyan") for page in tweepy.Cursor( self.api.favorites, id=self.common.settings.get("username"), since_id=like_since_id, tweet_mode="extended", ).pages(): # Import these tweets for status in page: tweet = Tweet(status) if not tweet.already_saved(self.common.session): tweet.fetch_summarize() self.common.session.add(tweet) # Commit a page of tweets at a time self.common.session.commit() # All done, update the since_id tweet = ( self.common.session.query(Tweet) .order_by(Tweet.status_id.desc()) .first() ) if tweet: self.common.settings.set("since_id", tweet.status_id) self.common.settings.save() # Calculate which threads should be excluded from deletion self.calculate_excluded_threads() self.common.settings.set( "last_fetch", datetime.datetime.today().strftime(self.last_fetch_format) ) self.common.settings.save() self.common.log( "last_fetch: %s" % datetime.datetime.today().strftime(self.last_fetch_format) )
#QUESTION 1A ##Installing and loading the tweepy module pip install tweepy import tweepy ##Establishing the twitter API to be used by my program twitter = imp.load_source('oswaldapp1','/Users/oswaldcodjoe/OneDrive - Washington University in St. Louis/Summer 19/Python/SecretAPIFolder/twitterAPI/start_twitter_oswald.py') api = twitter.client ##Creating a user object called WashU WashU = api.get_user('@WUSTL') ##Extracting a list of 20 followers of WashU WashU_followers = [] for item in tweepy.Cursor(api.followers_ids,'@WUSTL').items(20): WashU_followers.append(item) WashU_followers ##Determining who among the 20 WashU followers has the highest number of tweets WashU_followersstatuses_count =[] for i in WashU_followers: WashU_followersstatuses_count.append(api.get_user(i).statuses_count) WashU_followersstatuses_count max_value = max(WashU_followersstatuses_count) max_index = WashU_followersstatuses_count.index(max_value) api.get_user(WashU_followers[max_index]).name
consumer_secret = keys[1] access_token = keys[2] access_token_secret = keys[3] auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) cliente = MongoClient("mongodb://localhost:27017") bd = cliente['bigdata'] coleccion = bd['tweets'] ultimo = coleccion.find_one(sort=list({'id': -1}.items())) if ultimo != None: ultimo_tweet = ultimo['id'] else: ultimo_tweet = None tweets = [] contador = 1 for tweet in tweepy.Cursor(api.user_timeline, since_id=ultimo_tweet, screen_name='alferdez', tweet_mode='extended').items(2500): tweet_dic = tweet._json tweets.append(tweet_dic) print("tweet capturado", contador) contador += 1 if len(tweets) > 0: coleccion.insert_many(tweets) print("Subidos:", len(tweets), 'tweets') else: print("No hay nuevos tweets para subir")
consumer_key = 'Enter consumer key within single quotes' consumer_secret = 'Enter consumer secret key within single quotes' access_token = 'Enter access token key within single quotes' access_token_secret = 'Enter secret access key within single quotes' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, wait_on_rate_limit=True) csvFile = open('tweets.csv', 'w') #Use csv Writer csvWriter = csv.writer(csvFile) for tweet in tweepy.Cursor(api.search, q="#AyodhyaVerdict", count=100, lang="en", since="2019-11-28").items(): #print (tweet.created_at, tweet.text) csvWriter.writerow([tweet.user.screen_name, tweet.text.encode('utf-8')]) csv = pd.read_csv('tweets.csv', names=["Username", "Tweet"]) count = csv['Username'].value_counts()[:] csv.head(10) top2 = count.head(2) top2 import matplotlib.pyplot as plt colors = [ "#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"
def fd_friends_scraper(target): for i in tweepy.Cursor(api.friends, target).items(): CWLL_Tourney_friends.append(i.id)
# if the geojson file is already present open it and append the new result. Otherwise create it if (os.path.exists(filename)): gdf = geopandas.read_file(filename) else: gdf = geopandas.GeoDataFrame(columns=[ 'id_str', 'user_id_str', 'user_screen_name', 'text', 'created_at', 'user_lang', 'user_location', 'longitude', 'latitude', 'geometry' ]) geolocator = Nominatim(user_agent="my_application") twitter_account_without_coordinate = 0 new_twitter_user_added = 0 # Collect tweets for tweet in tweepy.Cursor(api.search, q=search_words_no_retweet, lang="en", since=date_since).items(numberOfTweets): condition = tweet.id_str in gdf[ 'id_str'].values # the tweet is already present if condition is False: if follow_user and tweet.user.id_str != user.id_str: api.create_friendship(tweet.user.id_str) print("friendship with " + tweet.user.screen_name) if (tweet.user.location is not None): location = geolocator.geocode(tweet.user.location) if (location is None): twitter_account_without_coordinate += 1 else:
def fd_follower_scraper(target): for i in tweepy.Cursor(api.followers, target).items(): CWLL_Tourney_followers.append(i.id)
tweetCount = 1 fileName = "All_Tweets_From_@" + screenName + "_Until_" + str(time) + ".txt" #### Writing tweets to txt with open(fileName, "w") as f: f.write("All Tweets From @" + screenName + " Until " + str(time)) f.write("\n") f.write("---------------------") for status in tweepy.Cursor(api.user_timeline, user_id=userId, tweet_mode="extended", exclude_replies=False, include_rts=True).items(): try: f.write("\n") f.write("Tweet No: " + str(tweetCount) + "\n") f.write("\n") f.write(name + " @" + screenName + " :\n") f.write(status.full_text + "\n") f.write("\n") f.write(str(status.created_at) + "\n") f.write("\n") f.write("---------------------") f.write("\n") except UnicodeEncodeError:
class TweetAnalyzer(): def tweet_to_dataframe(self, tweets): df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['tweets']) df['id'] = np.array([tweet.id for tweet in tweets]) df['date'] = np.array([tweet.created_at for tweet in tweets]) df['source'] = np.array([tweet.source for tweet in tweets]) df['likes'] = np.array([tweet.favorite_count for tweet in tweets]) df['retweets'] = np.array([tweet.retweet_count for tweet in tweets]) df['location'] = np.array([tweet.geo for tweet in tweets]) #print(tweets) return df if __name__ == "__main__": # calling main class twitterclient = TwitterClient() api = twitterclient.get_twitter_api() tweetanalyzer = TweetAnalyzer() tweets = tw.Cursor(api.search, q=['#fire'], lang='en', since='2020-03-10').items(1000) tweets_details = [[tweet.text, tweet.geo, tweet.user.location] for tweet in tweets] df = pd.DataFrame(data=tweets_details, columns=['text', 'geo', 'location']) df.to_csv('fire.csv') #tweets = api.search(q = ['corona','pandemic'], lang = "en", count = 2000) #bulit in function in api #print(tweets[0]) #df = tweetanalyzer.tweet_to_dataframe(tweets) #df.to_csv('result.csv') #print(df.head(10))
# Tweeter search with keyword target_num = 50 query = "olympics" csvFile = open('TwitterResults.csv', 'w') csvWriter = csv.writer(csvFile) csvWriter.writerow([ "username", "author id", "created", "text", "retwc", "hashtag", "followers", "friends", "polarity", "subjectivity" ]) counter = 0 for tweet in tweepy.Cursor(api.search, q=query, lang="en", result_type="popular", count=target_num).items(): created = tweet.created_at text = tweet.text text = unidecode.unidecode(text) retwc = tweet.retweet_count try: hashtag = tweet.entities[u'hashtags'][0][u'text'] #hashtags used except: hashtag = "None" username = tweet.author.name #author/user name authorid = tweet.author.id #author/user ID# followers = tweet.author.followers_count #number of author/user followers (inlink) friends = tweet.author.friends_count #number of author/user friends (outlink)