-
Notifications
You must be signed in to change notification settings - Fork 0
/
TwitterBot.py
212 lines (185 loc) · 8.22 KB
/
TwitterBot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# NOTE: to stop the stream, hit control + shift + \ or control + z
# TODO:
# [x] handle exceptions given when parsing a tweet that's already been liked/favorited/followed/etc.
# [] track DM's (most contests contact winners through DM)
# [x] figure out a way to leave this up and running forever (deal with rate limits?)
# [] decide if we want to filter by location, language, etc.
# [] we should come up with a system to stream tweets now and parse later.
# [x] make our page look less bot-like (not really programming-related).
# [x] only retweet tweets from the current time period on. the stream occasionally returns stuff from a while back that we don't want to deal with.
# [x] don't retweet tweets that are just someone else retweeting the contest.
# [x] deal with this embedded tweet nonsense
# [] parse @ signs
# [x] sleep when over rate limit
# [x] pass in error rather than code
# [x] wrap all our error checks in their own module
# [x] add a log file
# [x] capture an embedded tweet so we can inspect it
# [x] add pytz to requirements.txt
# [] purge followers, retweets, and likes periodically, how long do we want to wait to purge?
# can we introduce more randomness into the stream?
# [] make our bot look less bot-like by injecting phrases and tweets
# use a random imgur link and a dict of various "haha so funny" phrases
# [] log when the stream drops due to Tweepy
# [] remove commas, ampersands, etc. from keywords follow, like, RT
import tweepy # for all the twitter junk
import time # for sleeping
import pytz # for date-checking
from urllib2 import HTTPError
from datetime import timedelta # for date-checking
from keys import (consumer_key, consumer_secret,
access_token_key, access_token_secret, SELF_SCREEN_NAME)
from db_handlers import TweetStorage
from utilities import (get_now, bot_in_name,
parse_embedded_tweet, create_logger)
# global variable of bot spotters
spotters = ["BotSpotterBot", "RealBotSpotter", "bufbvr"]
MAX_DAYS_BACK = 3
SECONDS_TO_WAIT = 11
MINUTES_TO_WAIT = 15
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
api = tweepy.API(auth)
tweet_log = create_logger('Tweets')
error_log = create_logger('Errors')
# begin class definition
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
try:
tweet_to_retweet = self.get_og_tweet(status)
if not self.is_invalid(tweet_to_retweet):
self.retweet(tweet_to_retweet)
self.favorite(tweet_to_retweet)
self.follow(tweet_to_retweet)
except HTTPError as e:
print e
# returns a status object (earliest tweet we can find)
def get_og_tweet(self, status):
tweet_status = status
try:
while hasattr(tweet_status, 'retweeted_status'):
tweet_status = tweet_status.retweeted_status
return self.get_embedded(tweet_status)
except tweepy.RateLimitError as e:
error_log.error('Hit rate limit error from get_og_tweet')
def check_for_words(self, words, status):
status.text = status.text.lower().replace("/", " ").replace(
",", " ").replace("\\", " ")
for word in words:
if word in status.text:
return True
return False
def check_if_bot_spotter(self, name):
return (name in spotters) or bot_in_name(name)
def check_date(self, date):
date = pytz.utc.localize(date)
return (date < (get_now() - timedelta(days=MAX_DAYS_BACK)))
def get_embedded(self, status):
base_tweet_id = parse_embedded_tweet(status.text)
try:
if base_tweet_id is None:
return status
return api.get_status(base_tweet_id)
except tweepy.RateLimitError as e:
error_log.error('Hit rate limit error from get_embedded')
def is_invalid(self, status):
spotter = self.check_if_bot_spotter(status.author.screen_name)
date = self.check_date(status.created_at)
if spotter:
error_log.error('Caught a Bot {}'.format(status.author.screen_name))
return True
if date:
error_log.error('tweet from too long ago. {}'.format(status.created_at))
return True
return False
def retweet(self, status):
words_to_check = ["retweet", "rt"]
if not self.check_for_words(words_to_check, status):
return
try:
api.retweet(status.id)
tweet_log.info('retweeted {}'.format(status.id))
time.sleep(SECONDS_TO_WAIT)
except tweepy.TweepError as e:
self.on_error(e.message[0]['code'])
except tweepy.RateLimitError as e:
error_log.error('Hit rate limit error from retweet')
def favorite(self, status):
words_to_check = ["like", "favorite", "fave", "fav"]
if not self.check_for_words(words_to_check, status):
return
try:
api.create_favorite(status.id)
tweet_log.info('favorited {}'.format(status.id))
time.sleep(SECONDS_TO_WAIT)
except tweepy.TweepError as e:
self.on_error(e.message[0]['code'])
except tweepy.RateLimitError as e:
error_log.error('Hit rate limit error from favorite')
def follow(self, status):
words_to_check = ["follow"]
if not self.check_for_words(words_to_check, status):
return
try:
if not api.lookup_friendships([SELF_SCREEN_NAME], [status.author.screen_name])[0].is_following:
api.create_friendship(status.author.screen_name)
tweet_log.info('followed {}'.format(status.id))
time.sleep(SECONDS_TO_WAIT)
time.sleep(SECONDS_TO_WAIT)
except tweepy.TweepError as e:
self.on_error(e.message[0]['code'])
except tweepy.RateLimitError as e:
error_log.error('Hit rate limit error from follow')
def on_error(self, status_code):
error_log.error('Status code: {}'.format(status_code))
if status_code == 420 or status_code == 88:
print("Overdid our rate limit! Taking a nap now...")
time.sleep(60*MINUTES_TO_WAIT) # sleep for 15 minutes for new requests
return False
elif status_code == 327:
print("We have already retweeted that tweet.")
return False
elif status_code == 139:
print("We have already favorited that tweet.")
return False
elif status_code == 144:
print("Tweet was deleted.")
return False
else:
print("Encountered an error I don't know how to handle. Taking a nap...")
print status_code
time.sleep(60*MINUTES_TO_WAIT)
return False
class TwitterStream():
# class member to hold things we want to see
TERMS = [
'retweet win',
'retweet chance win',
'follow chance win',
'follow like chance win',
'retweet follow chance win',
'retweet like win',
'giveaway like retweet win',
'give away like win retweet'
]
def __init__(self):
myStreamListener = MyStreamListener()
self.myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
def filter_with(self, terms):
self.myStream.filter(track=terms, stall_warnings=True)
# so here's the deal. tweepy can't track and filter by location simultaneously.
# so it seems like we might want to dump this data out to a file and then read it and parse it later
# which isn't quite as clean of a solution but oh well...
# parameters in the same string must have all terms in order to return.
# different strings work on an either/or basis. if any string matches, the tweet is returned.
# other terms we should look for: giveaway, freebie, free stuff, ????
while True:
try:
stream = TwitterStream()
stream.filter_with(stream.TERMS)
except:
print "Encountered a streaming error. Continuing."
continue
# miscellaneous:
# limit to san francisco airport:
# stream_SFO = stream_all.filter(locations=[-122.75,36.8,-121.75,37.8])