def search_tweets(term): count = 0 r = TwitterRestPager(get_twitter(), 'search/tweets', { 'q': term, 'count': COUNT, 'lang': 'en' }) for r in r.get_iterator(): if 'message' in r and r['code'] == 88: print('Got error... Sleeping for 15 minutes.') sys.stderr.flush() time.sleep(61 * 15) d = {} d['id'] = r['id_str'] d['screen_name'] = r['user']['screen_name'] d['screen_id'] = r['user']['id_str'] d['tweet'] = r['text'] d['followcount'] = r['user']['friends_count'] d['user_mentions'] = [ user['id_str'] for user in r['entities']['user_mentions'] ] if d['user_mentions']: if len(d['user_mentions']) > 1: save_to_file(d) count += 1 if count > COUNT - 1: break return count
def sentimentComparison(server, query1 = 'python', query2 = 'javascript', count = 50): # Keep tabs on the number of entries numEntries = 1 positiveSentiment = ' :)' negativeSentiment = ' :(' h_plus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query1, positiveSentiment]), 'count':count}) h_minus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query1, negativeSentiment]), 'count':count}) d_plus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query2, positiveSentiment]), 'count':count}) d_minus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query2, negativeSentiment]), 'count':count}) tweets_list = [h_plus, h_minus, d_plus, d_minus] tweets_titles = ['h+', 'h-', 'd+', 'd-'] tweets_lengths = [] t1 = Thread(target=iterateTweets, args=(tweets_titles[0], r_server, h_plus)) t2 = Thread(target=iterateTweets, args=(tweets_titles[1], r_server, h_minus)) t3 = Thread(target=iterateTweets, args=(tweets_titles[2], r_server, d_plus)) t4 = Thread(target=iterateTweets, args=(tweets_titles[3], r_server, d_minus)) t1.start() t2.start() t3.start() t4.start() print 'sentimentComparison() complete!' return
def count_tweets(hashtags): """connect to the Twitter API and get our counts for each expression""" #substitute your API and ACCESS credentials api = TwitterAPI('API_KEY','API_SECRET','ACCESS_TOKEN','ACCESS_TOKEN_SECRET') all_counts = [] #a list of counts for all hashtags for hashtag in hashtags #iterate through hashtags hashtag_count = 0 #count for one hashtag an_hour_ago = datetime.now() - timedelta(hours = 1) #set time for 1 hour ago #we use search/tweets, a REST API endpoint that closes after returning a maximum of 100 #recent tweets and supports paging #TwitterRestPager spaces out successive requests to stay under Twitter API the rate limit r = TwitterRestPager(api, 'search/tweets', {'q':'#{}'.format(hashtag), 'count': 100}) for item in r.get_iterator(wait=6): #increase the wait interval to 6 seconds if 'text' in item: hashtag_count += 1 #in case we exceed the rate limit, error 88 indicates how long to suspend before #making a new request elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break #covert time when each tweet was created to a datatime type created = datetime.strptime(item['created_at'], '%a %b %d %H:%M:%S +0000 %Y') if created <= an_hour_ago: #finish counting if created more than an hour ago hashtag_count -= 1 #and don't count that tweet break all_counts.append(hashtag_count) #add a count for one hashtag to our list return all_counts
def build_collection(self): """ Page through tweets based on search query """ logging.info("Starting tweet collection") q_dict = self.make_q_dict() pager = TwitterRestPager(self.api, 'search/tweets', q_dict) for tweet in pager.get_iterator(wait=3): self.create_net(tweet) logging.info("Ending tweet collection")
def twittersearch(): pager = TwitterRestPager(api, 'search/tweets', {'q': TRACK_TERM, 'count' : 100}) for item in pager.get_iterator(wait=6): userid = item['user']['id'] tweetid = item['id'] print userid print tweetid favorite(tweetid) follow(userid)
def get_paged_timeline(api: TwitterAPI, count: int = 500): """ Returns an iterator of pages of the timeline. The iterator has higher capacity and scrolls. :param api: Must be a User auth'd TwitterAPI. :param count: number of tweets to display :return: the iterator of timeline tweets """ timeline = TwitterRestPager(api, 'status/home_timeline', {'count': count}) return timeline.get_iterator()
def twittersearch(): pager = TwitterRestPager(api, 'search/tweets', { 'q': TRACK_TERM, 'count': 100 }) for item in pager.get_iterator(wait=6): userid = item['user']['id'] tweetid = item['id'] print userid print tweetid favorite(tweetid) follow(userid)
def rank_old_retweets(api, list, n): words = ' OR '.join(list) retweets = [] while True: pager = TwitterRestPager(api, 'search/tweets', {'q': words}) for item in pager.get_iterator(): if 'retweeted_status' in item: process_tweet(retweets, item, n) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twiter: %s' % item['message'])
def count_old_words(api, word_list): words = ' OR '.join(word_list) count = dict((word,0) for word in word_list) while True: pager = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':COUNT}) for item in pager.get_iterator(): if 'text' in item: process_tweet(item['text'], count, word_list) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def rank_old_words(api, word_list, n): words = ' OR '.join(word_list) count = {} while True: pager = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':COUNT}) for item in pager.get_iterator(): if 'text' in item: process_tweet(item['text'], count, n, word_list) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def count_old_tweets(api, list): words = ' OR '.join(list) count = 0 while True: pager = TwitterRestPager(api, 'search/tweets', {'q': words}) for item in pager.get_iterator(): if 'text' in item: count += 1 print(count) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twiter: %s' % item['message'])
def get_user_tweets(twitter_api, tweets_collection, users_collection, out_collection, uid): if out_collection.find({'_id': uid}).limit(1).count(): return gender = users_collection.find({'_id': uid}).limit(1)[0]['value']['gender'] user_out = { '_id': uid, 'gender': gender } out_collection.insert(user_out) pager = TwitterRestPager(twitter_api, 'statuses/user_timeline', {'user_id': uid, 'count':200}) for tw in pager.get_iterator(): tw['_id'] = tw['id_str'] tweets_collection.insert(tw) out_collection.update({'_id': uid}, {'$push': {'tweets': tw['_id']}}) time.sleep(5)
def harvest_friends(): # get a user Id from database user_id = db_client.find_user_for_friends() api = TwitterAPI( consumer_key=Auth[token_number]['consumer_key'], consumer_secret=Auth[token_number]['consumer_secret'], access_token_key=Auth[token_number]['access_token_key'], access_token_secret=Auth[token_number]['access_token_secret'], auth_type='oAuth1') cursor = -1 # while 1: try: count = 0 # r = api.request('friends/list', {"user_id": user_id, 'count': 200, 'cursor': cursor}) r2 = TwitterRestPager(api, 'friends/list', { "user_id": user_id, 'count': 200 }) for each_user_info in r2.get_iterator(40): FileSave(each_user_info) except TwitterRequestError as e: print(e.status_code) if e.status_code < 500: if e.status_code == 429 or e.status_code == 420: print('I am sleeping') time.sleep(450) elif e.status_code == 401: pass else: raise print('TwitterRequestError') # something needs to be fixed before re-connecting pass else: print('TwitterRequestError') # temporary interruption, re-try request pass # TwitterConnectionError is thrown when the connection times out or is interrupted. # You can always immediately try making the request again. except TwitterConnectionError: print('disconnected from Twitter Connection Error') # temporary interruption, re-try request pass except Exception as e: print(Exception) pass
def counter(hashtag, df, limit=None): count = 0 #Initialize Twitter Rest Pager r = TwitterRestPager(api, 'search/tweets', {'q': hashtag, 'count': 100}) #Limit Option if limit is not None: print("requested tweets for hashtag is limited to {} tweets".format( limit)) for item in r.get_iterator(wait=6): if 'text' in item: if count <= limit: print("collecting tweet {} of {}...".format(count, limit)) count += 1 #Extract Tweet Info extract_tweet_info(item, count, df, hashtag) else: print("requested tweet limit reached...") print("ending query for hashtag...") return elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break #No Limit else: for item in r.get_iterator(wait=6): if 'text' in item: print("collecting tweet {} of all available tweets...".format( count)) count += 1 #Extract Tweet Info extract_tweet_info(item, count, df, hashtag) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break
def counter(hashtag, df, limit=None): count = 0 #Initialize Twitter Rest Pager r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100}) #Limit Option if limit is not None: print("requested tweets for hashtag is limited to {} tweets".format(limit)) for item in r.get_iterator(wait=6): if 'text' in item: if count <= limit: print("collecting tweet {} of {}...".format(count, limit)) count += 1 #Extract Tweet Info extract_tweet_info(item, count, df, hashtag) else: print("requested tweet limit reached...") print("ending query for hashtag...") return elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break #No Limit else: for item in r.get_iterator(wait=6): if 'text' in item: print("collecting tweet {} of all available tweets...".format(count)) count += 1 #Extract Tweet Info extract_tweet_info(item, count, df, hashtag) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break
def search_user(): keepdoing = True while keepdoing: try: # get a user ID user_id = db_client.find_user_for_tweeets() api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'], consumer_secret=Auth[token_number]['consumer_secret'], access_token_key=Auth[token_number]['access_token_key'], access_token_secret=Auth[token_number]['access_token_secret'], auth_type='oAuth2') # r = api.request('statuses/user_timeline', {"user_id": user_id, 'count': 200, 'exclude_replies': 'true'}) # Use the r = TwitterRestPager(api, 'statuses/user_timeline', {"user_id": user_id, 'count': 200, 'exclude_replies': 'true'}) for each in r.get_iterator(): if 'text' in each: FileSave(each) except TwitterRequestError as e: print(e.status_code) if e.status_code < 500: if e.status_code == 429 or e.status_code == 420: print('I am sleeping') time.sleep(450) elif e.status_code == 401: pass else: raise print('TwitterRequestError') # something needs to be fixed before re-connecting pass else: print('TwitterRequestError') # temporary interruption, re-try request pass # TwitterConnectionError is thrown when the connection times out or is interrupted. # You can always immediately try making the request again. except TwitterConnectionError: print('disconnected from Twitter Connection Error') # temporary interruption, re-try request pass except Exception as e: print(Exception) pass
def search_tweets(api, list, region): """Get tweets containing any words in 'list' and that have location or coordinates in 'region'.""" words = ' OR '.join(list) params = { 'q': words } if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius while True: pager = TwitterRestPager(api, 'search/tweets', params) for item in pager.get_iterator(): if 'text' in item: parse_tweet(item) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twiter: %s' % item['message'])
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q':words, 'count':count} if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius while True: pager = TwitterRestPager(api, 'search/tweets', params) for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): process_tweet(item, photo_dir, stalk) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def Searching(): api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'], consumer_secret=Auth[token_number]['consumer_secret'], access_token_key=Auth[token_number]['access_token_key'], access_token_secret=Auth[token_number]['access_token_secret'], auth_type='oAuth2') #https://dev.twitter.com/rest/reference/get/search/tweets #result_type: 'mixed' 'recent' 'popular' # come with the cursor of Max_id r=TwitterRestPager(api,'search/tweets',{'q':search_words, 'geocode':geo_for_searching +',50km', 'result_type':'mixed', 'count':100, }) for item in r.get_iterator(): FileSave(item) twitter_log("searching_finished")
def get_tweets(): ''' Get's tweets with Twitter hashtag api ''' api = TwitterAPI(consumer_key, consumer_secret, '', '', auth_type='oAuth2') pager = TwitterRestPager(api, 'search/tweets', {'q':'%23lootcrate', 'count':100}) a = [] for i,item in enumerate(pager.get_iterator()): if 'text' in item: a.append(item) print(i) elif 'message' in item and item['code'] == 88: print('Rate Exceeded') break pickle.dump(a, open('tweets.p', 'rb')) return
def search(request): # form = DateForm(request.GET) query = request.GET['q'] date = request.GET['date'] formatted_date = datetime.strptime(date, '%m/%d/%Y').strftime('%Y-%m-%d') # json_string = api.request('search/tweets', {'q': '%23' + query + ' since:2016-05-11'}) # decoded_json = json_string.json() r = TwitterRestPager(api, 'search/tweets', { 'q': '%23' + query + ' since:' + formatted_date, 'count': 100 }) maxRT = 0 tweet = {} count = 0 for item in r.get_iterator(wait=2): print(item['text']) if 'text' in item: if item['retweet_count'] > maxRT: tweet = { 'text': item['text'], 'rt_count': item['retweet_count'] } count += 1 elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break print(count) # pdb.set_trace() # # # for item in decoded_json['statuses']: # print(item['text']) # c = Context({'results': decoded_json['statuses']}) t = loader.get_template('results.html') c = Context({'count': count, 'tweet': tweet}) return HttpResponse(t.render(c))
def Searching(): api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'], consumer_secret=Auth[token_number]['consumer_secret'], access_token_key=Auth[token_number]['access_token_key'], access_token_secret=Auth[token_number]['access_token_secret'], auth_type='oAuth2') #https://dev.twitter.com/rest/reference/get/search/tweets #result_type: 'mixed' 'recent' 'popular' # come with the cursor of Max_id r=TwitterRestPager(api,'search/tweets',{'q':search_words, 'geocode':geo_for_searching +',50km', 'result_type':'mixed', 'count':10000, }) count=1 for item in r.get_iterator(): if 'text' in item: id_list.put(item['user']['id_str']) print('Now queue size is %d' % id_list.qsize())
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q': words, 'count': count} if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius while True: pager = TwitterRestPager(api, 'search/tweets', params) for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): process_tweet(item, photo_dir, stalk) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error elif item['code'] == 88: print('Suspend search until %s' % search.get_quota()['reset']) raise Exception('Message from twitter: %s' % item['message'])
def tweet_producer(): kafka = KafkaClient(KAFKA_BROKER) producer = SimpleProducer(kafka) while True: try: api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) print('Using search term: %s' % SEARCH_TERM, file=sys.stderr) pager = TwitterRestPager.TwitterRestPager(api, 'search/tweets', {'q': SEARCH_TERM}) for item in pager.get_iterator(): if 'text' in item: tweet = {} tweet['id'] = item['id'] # tweet['coordinates'] = item['coordinates'] #rhoover: the mktime/strptime below is incorrect (making times in the future) so using this for now tweet['@timestamp'] = int(time.time() * 1000.0) tweet['created_at'] = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime(item['created_at'], '%a %b %d %H:%M:%S +0000 %Y')) # tweet['@timestamp'] = time.mktime(time.strptime(item['created_at'],"%a %b %d %H:%M:%S +0000 %Y")) * 1000 # tweet['place'] = item['place'] # ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(item['created_at'],'%a %b %d %H:%M:%S +0000 %Y')) # tweet['@timestamp'] = item['created_at'] tweet['username'] = item['user']['name'] tweet['handle'] = item['user']['screen_name'] tweet['lang'] = item['lang'] tweet['timezone'] = item['user']['time_zone'] tweet['followers'] = item['user']['followers_count'] tweet['location'] = item['user']['location'] tweet['retweeted'] = item['retweeted'] tweet['text'] = item['text'] producer.send_messages(b'tweets', bytes(json.dumps(tweet), "UTF-8")) twitter_metrics["tweets-consumed"] = twitter_metrics[ "tweets-consumed"] + 1 elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'], file=sys.stderr) time.sleep(120) break print('Consumed %s tweets' % twitter_metrics["tweets-consumed"], file=sys.stderr) except: print(traceback.format_exc(), file=sys.stderr) print("Sleeping for 120 secs.", file=sys.stderr) time.sleep(120) return
def harvest_friends(): num = 1 # get a user Id from database user_id = "40814404" api = TwitterAPI( consumer_key=Auth[token_number]['consumer_key'], consumer_secret=Auth[token_number]['consumer_secret'], access_token_key=Auth[token_number]['access_token_key'], access_token_secret=Auth[token_number]['access_token_secret'], auth_type='oAuth1') cursor = -1 # while 1: r2 = TwitterRestPager(api, 'followers/list', { "user_id": user_id, 'count': 200 }) r = api.request('followers/list', {"user_id": user_id, 'count': 200}) for each_user_info in r2.get_iterator(): if 'Australia' in each_user_info['location']: num += 1 print(num)
def twitter_to_csv(q, limit=500): consumer_key = settings.TWITTER_CONSUMER_KEY consumer_secret = settings.TWITTER_CONSUMER_SECRET access_token_key = settings.TWITTER_ACCESS_TOKEN_KEY access_token_secret = settings.TWITTER_ACCESS_TOKEN_SECRET api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) output = io.StringIO() writer = csv.writer(output) api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) count = 0 r = TwitterRestPager(api, 'search/tweets', {'q': q, 'count': 100}) for item in r.get_iterator(): if 'text' in item: count += 1 author = item['user']['screen_name'].lower() mentions = item['entities']['user_mentions'] text = re.sub(r'https?:\/\/.*[\r\n]*', '', item['text'], flags=re.MULTILINE) text = re.sub(r'@([A-Za-z0-9_]+)', '', text, flags=re.MULTILINE) for mention in mentions: writer.writerow([author, mention['screen_name'].lower(), text]) if len(mentions) == 0: writer.writerow([author, author, text]) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break if count > limit: break print('Twitter search for', q, '; results:', count) return output.getvalue()
def pull_tweets(tweets: int, hashtag: str) -> None: """ Pulls specified number of tweets and writes them into file as Pickled python sets. :param tweets: number of tweets to pull :return: None """ start_time = datetime.now() print(start_time) api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) data_file = open('data/{}{}'.format(str(start_time), '.txt'), 'wb+') # use this so that we don't retrieve tweets that we have already gotten r = TwitterRestPager(api, 'search/tweets', {'q': '#{}'.format(hashtag), 'count': 100, 'lang': 'en'}) tweet_set = set() for item in r.get_iterator(): tweet = Tweet() if len(tweet_set) >= tweets: break if 'text' in item: tweet.hashtags = [hashtag['text'] for hashtag in item['entities']['hashtags']] tweet.text = item['text'].replace('\n', ' ') tweet.target = hashtag if tweet not in tweet_set: tweet_set.add(tweet) print(tweet.hashtags, tweet.text, tweet.target) print(len(tweet_set)) elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) time.sleep(16 * 60) pickle.dump(tweet_set, data_file, 2) data_file.close() print(datetime.now() - start_time) return start_time
def get_score(self, gene): """ :param gene :return: Number of resent tweets of the gene symbol (plus likes and re-tweeks) """ score = 0 twitter_query = '{name} gene OR genomics'.format(name=gene.name) # REST API endpoint that closes after returning a maximum of 100 recent tweets. # TwitterRestPager spaces out successive requests to stay under the rate limit of 5s between calls. tweets = TwitterRestPager(self.api, 'search/tweets', { 'q': twitter_query, 'count': 100 }) for tweet in tweets.get_iterator(wait=6): if 'retweeted_status' not in tweet: score += int(tweet['retweet_count']) + int( tweet['favorite_count']) + 1 sleep(6) return score
def search(request): # form = DateForm(request.GET) query = request.GET['q'] date = request.GET['date'] formatted_date = datetime.strptime(date, '%m/%d/%Y').strftime('%Y-%m-%d') # json_string = api.request('search/tweets', {'q': '%23' + query + ' since:2016-05-11'}) # decoded_json = json_string.json() r = TwitterRestPager(api, 'search/tweets', {'q': '%23' + query + ' since:' + formatted_date, 'count': 100}) maxRT = 0 tweet = {} count = 0 for item in r.get_iterator(wait=2): print(item['text']) if 'text' in item: if item['retweet_count'] > maxRT: tweet = {'text': item['text'], 'rt_count': item['retweet_count']} count += 1 elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break print(count) # pdb.set_trace() # # # for item in decoded_json['statuses']: # print(item['text']) # c = Context({'results': decoded_json['statuses']}) t = loader.get_template('results.html') c = Context({'count': count, 'tweet': tweet}) return HttpResponse(t.render(c))
def makeDB(word='analytics'): o = read_file() api = TwitterAPI(o['consumer_key'],o['consumer_secret'],o['access_token_key'],o['access_token_secret']) r = TwitterRestPager(api, 'search/tweets', {'q': word,'count':100}) d=dict((v,k) for k,v in enumerate(calendar.month_abbr)) try: flag = 0 for item in r.get_iterator(): if flag ==100: break flag+=1 obj = User(U_id = item['user']['id']) obj.name = item['user']['screen_name'] obj.location = item['user']['time_zone'] x=list(map(str,item['user']['created_at'].split(' '))) obj.date = datetime.strptime(x[2] + str(d[x[1]]) + x[-1], "%d%m%Y").date() obj.text = item['text'] obj.search = word obj.save() except: print "Request cannot be served due to the application's rate limit having been exhausted for the resource"
def pagingCall(server, query='blockchain', key='Tweets', count = 200, limit = 200): # Keep tabs on the number of entries numEntries = 1 r = TwitterRestPager(api, 'search/tweets', {'q':query, 'count':count}) for item in r.get_iterator(): if 'text' in item: # Pass-in tweets into a list server.rpush(key, re.sub(r"http\S+", "", item['text'])) if(numEntries % 100 == 0): print 'Entries so far', numEntries #printValues(server, key) if(numEntries == limit): break # Move on to the next entry numEntries += 1 elif 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break return
def _get_pager(self): '''Returns a paginating object over tweets''' config = self._load_config() api = TwitterAPI( config['consumer_key'], # app-only auth config['consumer_secret'], auth_type='oAuth2') self.cache = config.get('cache', NUM_CACHE_TWEETS) query = config.get('query') assert query, 'expected a search query in config' params = {'q': query, 'count': TWEETS_PER_BATCH} self._log('Query: %r' % params) return TwitterRestPager(api, SEARCH_ENDPOINT, params)
if TEST_NUMBER == 1: # POST A TWEET r = api.request('statuses/update', {'status': 'the time is now %s' % datetime.now()}) print(r.status_code) if TEST_NUMBER == 2: # GET 5 TWEETS CONTAINING 'ZZZ' for item in api.request('search/tweets', {'q': 'zzz', 'count': 5}): print(item['text'] if 'text' in item else item) if TEST_NUMBER == 3: # STREAM TWEETS FROM AROUND NYC for item in api.request('statuses/filter', {'locations': '-74,40,-73,41'}): print(item['text'] if 'text' in item else item) if TEST_NUMBER == 4: # GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE' pager = TwitterRestPager(api, 'search/tweets', {'q': 'love'}) for item in pager.get_iterator(): print(item['text'] if 'text' in item else item) except Exception as e: print(e)
try: client = MongoClient('localhost', 27017) db = client['devtweets'] tweets = db.tweets except Exception as e: print('*** STOPPED %s' % str(e)) try: params = to_dict(args.parameters) oauth = TwitterOAuth.read_file('./credentials.txt') api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) """response = api.request('statuses/filter', {'locations':'-102.878723,21.659981,-101.997757,22.473779'})""" #response = api.request('search/tweets', {'q':'Aguascalientes 4sq com, ags 4sq com', 'count': 450}) lastTweet = tweets.find({}).sort('id', -1).limit(1) str_lastTweetId = str(lastTweet[0]["id"]) pager = TwitterRestPager(api, 'search/tweets', {'q':'Aguascalientes 4sq com, ags 4sq com', 'count':100, 'since_id': str_lastTweetId}) #for item in response.get_iterator(): for item in pager.get_iterator(10): print item tweets.insert(item) #print ('\n' % pager.get_rest_quota()) except KeyboardInterrupt: print('\nTerminated by user') except Exception as e: print('*** STOPPED %s' % str(e))
from TwitterAPI import TwitterRestPager, TwitterAPI import couchdb import time consumer_key = '' consumer_secret = '' access_token = '' access_secret_token = '' couch = couchdb.Server() db = couch['twitter_raw_data'] languages = [line for line in open('languages.txt')] api = TwitterAPI(consumer_key, consumer_secret, access_token, access_secret_token) while True: start_time = time.time() r = TwitterRestPager(api, 'search/tweets', {'q':languages, 'count':100}) for item in r.get_iterator(): if 'text' in item: db.save(item) elif 'message' in item and item['code'] == 88: sleep_time = 900 - (time.time() - start_time) print('Request limit exceeded: %s\n' % item['message']) print('Going to sleep for {0}s\n'.format(sleep_time)) time.sleep(sleep_time) continue
BITLY_USERNAME = '******' BITLY_API_KEY = 'R_19bff5294a094749a4177d9f639b7637' BITLY_ACCESS_TOKEN = 'c6869fe932e2b5b912d97382c885fb8df8ba2bd8' FOURSQUARE_ACCESS_TOKEN = 'K44DQZABARQM2HM2ETXZZHLXY11KYXAHAKY3NB0BN15KBN0Z' api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) bitly = bitly_api.Connection(access_token=BITLY_ACCESS_TOKEN) #eastern = timezone('US/Eastern') #utc = timezone('UTC') pager = TwitterRestPager(api, 'search/tweets', {'q': SEARCH_TERM}) def get_checkid_and_s(url): checkin_index = url.find('checkin') mark_s_index = url.find('?s=') check_id = url[checkin_index + 8:mark_s_index] refer_ref_index = url.find('&ref') signature_id = url[mark_s_index + 3:refer_ref_index] return check_id, signature_id def get_check_in_info(id, sig): dt = date.today()
from secrets import consumer_key, consumer_secret, access_token_key, access_token_secret api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret) data_followers = json.load(open('followers.json')) main_account = sys.argv[1] while True: try: if main_account not in data_followers or len( data_followers[main_account]) == 2000: print('followers/', main_account) r = TwitterRestPager(api, 'followers/list', { 'screen_name': main_account, 'count': 200 }) data_followers[main_account] = [] for item in r.get_iterator(wait=60): data_followers[main_account].append(item) if len(data_followers[main_account]) % 200 == 0: print('....', len(data_followers[main_account])) print(' ->', len(data_followers[main_account])) json.dump(data_followers, open('followers.json', 'w'), indent=2, sort_keys=True, ensure_ascii=False) break except TwitterError.TwitterRequestError as e: print(e)
db = client['todos_tweets'] tweets = db.tweets except Exception as e: print('*** STOPPED %s' % str(e)) try: params = to_dict(args.parameters) oauth = TwitterOAuth.read_file('./credentials.txt') api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) str_lastTweetId = None lastTweets = tweets.find({}).sort('id', -1).limit(1) for lastTweet in lastTweets : str_lastTweetId = str(lastTweet["id"]) if str_lastTweetId is None: pager = TwitterRestPager(api, 'search/tweets', {'q':'4sq com', 'count':100}) else : pager = TwitterRestPager(api, 'search/tweets', {'q':'4sq com', 'count':100, 'since_id': str_lastTweetId}) #for item in response.get_iterator(): for item in pager.get_iterator(5): #print('%d \n' % item["id"]) #resp = urllib.urlopen(item["entities"]["urls"][0]["expanded_url"]) #item["entities"]["urls"][0]["over_expanded_url"] = resp.url tweets.insert(item) #print ('\n' % pager.get_rest_quota()) except KeyboardInterrupt: print('\nTerminated by user')
def fetchTweets(apiKey, apiSecret, accessToken, accessTokenSecret, query, lang='-', nTweets=100, nFlush=100, media='-', mSize='medium', saveMedia=False, viewMedia=True, workDir='cache', saveLog=True, logName='log.txt'): from TwitterAPI import TwitterAPI from TwitterAPI import TwitterRestPager api = TwitterAPI(apiKey, apiSecret, accessToken, accessTokenSecret) # Create directories and files, etc. curTime = time.strftime("%d_%b_%Y_%H.%M.%S") if not saveMedia: workDir = "cache" if viewMedia: if not os.path.exists(workDir): os.makedirs(workDir) os.chdir(workDir) if saveLog: f = open(logName, "w") print("Started fetching will following parameters:") print("query: ", query) print("lang: ", lang) print("nTweets: ", nTweets) print("nFlush: ", nFlush) print("media: ", media) print("mSize: ", mSize) print("viewMedia: ", viewMedia) print("saveMedia: ", saveMedia) print("workDir: ", workDir) print("saveLog: ", saveLog) print("logName: ", logName) # Create counters current = 0 total = 0 data = [] stream = [] # Create table header for printing tableHeader=["Name", "Handle", "Text", "Time stamp", "Hashtags", "Retweets", "Favorites", "Media", "Language", "Img Path"] keys = ["name", "handle", "content", "time", "hashtags", "rtCount", "favCount", "media", "lang", "imgName"] # tableHeader=["Handle", "Name", "Text", "Time stamp", "Retweets", "Favorites"] # Search r = TwitterRestPager(api, 'search/tweets', {'q':query, 'count':100}) # For each tweet for tweet in r.get_iterator(): if 'text' in tweet: # if it's really a tweet and not something else # Check if it fits the media requirements (yes, no, don't care) if media != "-": cMedia = True if media == True else False if ('media' not in tweet['entities']) & (cMedia == True): continue elif ('media' in tweet['entities']) & (cMedia == False): continue else: if 'media' in tweet['entities']: cMedia = True else: cMedia = False # Check if it fits the language requirements (anything or specific) if lang != "-": tLang = lang if tweet['metadata']['iso_language_code'] != tLang: continue else: tLang = tweet['metadata']['iso_language_code'] # If no hashtags if tweet['entities']['hashtags']: hashtags = "" for tag in tweet['entities']['hashtags']: hashtags = hashtags + tag['text'] + ", " else: hashtags = None fileName = None if cMedia & viewMedia: cMedia += len(tweet['entities']['media']) mediaURL = tweet['entities']['media'][0]['media_url'] fileName = str(total)+mediaURL[-4:] # last 4 are extension urllib.request.urlretrieve(mediaURL+":"+mSize, fileName) # Push the tweet onto the stream stack stream.append([tweet['user']['name'], tweet['user']['screen_name'], tweet['text'].replace('\n', ' '), tweet['created_at'], hashtags, tweet['retweet_count'], tweet['favorite_count'], cMedia, tLang, fileName]) # increment the counters current += 1 total += 1 # every 100 tweets, flush the stream to improve performance and add to a big stream if current == nFlush: data.extend(stream) # concatenate stream = [] # empty stack current = 0 # reset counter # max number of tweets if total >= nTweets: data.extend(stream) # concatenate break # this should not trigger, but just in case # this handles an exception triggered when we send more than 1 request every 5 seconds # this would result in a 15 minute block elif 'message' in item and item['code'] == 88: print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message']) break #print(count) # print table table = "" if saveLog: table = tabulate(data, headers=tableHeader, tablefmt='fancy_grid') f.write(table) result = [] dictList = [] for i in range(total): dictList.append(dict(zip(keys, data[i]))) result = [dictList, total, workDir, table] os.chdir("..") print("Done Fetching!") return result
DK_names = data_DK["Name"] #Concatenate the batters and pitchers and drop dups frames = [pitchers_names, batters_names] all_players = pd.concat(frames) all_players = all_players.drop_duplicates() #Do some data cleaning so that the Data is easier to match up later cleanplayerlst = [] for i in list(batters_names): test = i.split(",") test[0], test[1] = test[1], test[0] test = test[0].strip() + " " + test[1] cleanplayerlst.append(test) #build list of iterators iterator_list = [ TwitterRestPager(api, 'search/tweets', { 'q': name }).get_iterator(wait=2) for name in cleanplayerlst ] player_dict = defaultdict(list) count = 0 try: for player, iterator in enumerate(iterator_list): count += 1 print "Player Count:", count if player != 0 and player % 60 == 0: time.sleep(1200) for idx, item in enumerate(iterator): if idx == 40: break if 'text' in item: player_dict[cleanplayerlst[player]].append(item['text'])
WORDS_TO_COUNT = ['lion', 'wildbeest', 'snake'] API_KEY = 'NZow9q4lgFxhPw2omwqJ2VtIj' API_SECRET = 'r5fYLio2Da9T6n9TcgasSWLuTV0KPkInOo4mddSfotNX9VfArM' ACCESS_TOKEN = '1538574218-U5Y8z6DsJJKT0xwHBqIs4tygi048fBLwtwMgNtU' ACCESS_TOKEN_SECRET = 'cKeAv9F67ckA8UynkGalpzaQTwtx2NAjWTwt2DtR20VpN' api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) words = ' OR '.join(WORDS_TO_COUNT) counts = dict((word,0) for word in WORDS_TO_COUNT) def process_tweet(text): text = text.lower() for word in WORDS_TO_COUNT: if word in text: counts[word] += 1 print(counts) r = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':100}) for item in r.get_iterator(wait=6): if 'text' in item: process_tweet(item['text']) elif 'message' in item and item['code'] == 88: print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break
if TEST_NUMBER == 2: # GET 5 TWEETS CONTAINING 'ZZZ' for item in api.request('search/tweets', {'q':'zzz', 'count':5}): print(item['text'] if 'text' in item else item) if TEST_NUMBER == 3: # STREAM TWEETS FROM AROUND NYC for item in api.request('statuses/filter', {'locations':'-74,40,-73,41'}): print(item['text'] if 'text' in item else item) if TEST_NUMBER == 4: # GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE' pager = TwitterRestPager(api, 'search/tweets', {'q':'CatchingFire'}); #for item in pager.get_iterator(): # print(item['text'] if 'text' in item else item) if TEST_NUMBER == 5: # GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE' my_request_param_dict = { 'q': '%23' + twitter_hashtag, } pager = TwitterRestPager(api, 'search/tweets', my_request_param_dict); for item in pager.get_iterator(): #print item['id_str'] a = item print "%d - %s" % (loop_counter_break, item['user']['name'],) try: dict_temp = {
#!/usr/bin/python3 import sys, re, fileinput import glob, fnmatch, os, os.path, tempfile, shutil import time, datetime, sqlite3, json from boot import api from TwitterAPI import TwitterRestPager as TP endpoint = 'statuses/mentions_timeline' params = {'count': 200, 'trim_user': 1} pager = TP(api, endpoint, params) users = set() for tweet in pager.get_iterator(): users.add(tweet['user']['id_str']) # or id as int users = list(users) endpoint = 'lists/members/create_all' params = {'slug': 'Summer', 'owner_screen_name': 'shuravban', 'user_id': ""} # only 100 users by time MAX = 100 n = len(users) // MAX + 1 for i in range(n): params['user_id'] = ", ".join(users[i * MAX:i * MAX + MAX]) res = api.request(endpoint, params) if res.status_code != 200:
API_KEY = '<use yours>' API_SECRET = '<use yours>' ACCESS_TOKEN = '<use yours>' ACCESS_TOKEN_SECRET = '<use yours>' api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) words = ' OR '.join(WORDS_TO_COUNT) counts = dict((word,0) for word in WORDS_TO_COUNT) def process_tweet(text): text = text.lower() for word in WORDS_TO_COUNT: if word in text: counts[word] += 1 print(counts) r = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':100}) for item in r.get_iterator(wait=6): if 'text' in item: process_tweet(item['text']) elif 'message' in item and item['code'] == 88: print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']) break
# Print a user's timeline. This will get up to 3,200 tweets, which # is the maximum the Twitter API allows. from TwitterAPI import TwitterAPI, TwitterRestPager SCREEN_NAME = 'TheTweetOfGod' api = TwitterAPI(<consumer key>, <consumer secret>, auth_type='oAuth2') pager = TwitterRestPager(api, 'statuses/user_timeline', {'screen_name':SCREEN_NAME, 'count':200}) count = 0 for item in pager.get_iterator(wait=3.5): if 'text' in item: count = count + 1 print(count, item['text']) elif 'message' in item: print(item['message']) break
worksheet.write(0, 6, 'Favourited') worksheet.write(0, 7, 'Image URL') # Set up initial count and default data count = 0 user = '******' imageUrl = 'No image' text = 'No text' retweeted = 'false' retweetCount = 0 createdAt = 'No time' favorited = 'false' # Return tweets, 100 at a time, with our chosen query (for hashtags, use "%23" to encode the hashtag character) r = TwitterRestPager(api, 'search/tweets', { 'q': '%23SomeHashtag', 'count': 100 }) # Loop through each item in the response, waiting 6 seconds between each request (as this should avoid our hourly limit) for item in r.get_iterator(wait=6): # If we have text within this item then the tweet is valid if 'text' in item: # Update our variables count += 1 text = item['text'] retweeted = item['retweeted'] retweetCount = item['retweet_count'] createdAt = item['created_at'] favorited = item['favorited'] entities = item['entities'] userObject = item['user']