Пример #1
0
def search_tweets(term):
    count = 0
    r = TwitterRestPager(get_twitter(), 'search/tweets', {
        'q': term,
        'count': COUNT,
        'lang': 'en'
    })
    for r in r.get_iterator():
        if 'message' in r and r['code'] == 88:
            print('Got error... Sleeping for 15 minutes.')
            sys.stderr.flush()
            time.sleep(61 * 15)

        d = {}
        d['id'] = r['id_str']
        d['screen_name'] = r['user']['screen_name']
        d['screen_id'] = r['user']['id_str']
        d['tweet'] = r['text']
        d['followcount'] = r['user']['friends_count']
        d['user_mentions'] = [
            user['id_str'] for user in r['entities']['user_mentions']
        ]
        if d['user_mentions']:
            if len(d['user_mentions']) > 1:
                save_to_file(d)
                count += 1

        if count > COUNT - 1:
            break

    return count
Пример #2
0
def sentimentComparison(server, query1 = 'python', query2 = 'javascript', count = 50):
    # Keep tabs on the number of entries
    numEntries = 1

    positiveSentiment = ' :)'
    negativeSentiment = ' :('

    h_plus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query1, positiveSentiment]), 'count':count})
    h_minus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query1, negativeSentiment]), 'count':count})
    d_plus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query2, positiveSentiment]), 'count':count})
    d_minus = TwitterRestPager(api, 'search/tweets', {'q':joinAndEncode([query2, negativeSentiment]), 'count':count})

    tweets_list = [h_plus, h_minus, d_plus, d_minus]
    tweets_titles = ['h+', 'h-', 'd+', 'd-']
    tweets_lengths = []

    t1 = Thread(target=iterateTweets, args=(tweets_titles[0], r_server, h_plus))
    t2 = Thread(target=iterateTweets, args=(tweets_titles[1], r_server, h_minus))
    t3 = Thread(target=iterateTweets, args=(tweets_titles[2], r_server, d_plus))
    t4 = Thread(target=iterateTweets, args=(tweets_titles[3], r_server, d_minus))

    t1.start()
    t2.start()
    t3.start()
    t4.start()

    print 'sentimentComparison() complete!'

    return
Пример #3
0
def count_tweets(hashtags):
    """connect to the Twitter API and get our counts for each expression"""
    #substitute your API and ACCESS credentials
    api = TwitterAPI('API_KEY','API_SECRET','ACCESS_TOKEN','ACCESS_TOKEN_SECRET')
    all_counts = []                                     #a list of counts for all hashtags
    for hashtag in hashtags                             #iterate through hashtags
        hashtag_count = 0                               #count for one hashtag
        an_hour_ago = datetime.now() - timedelta(hours = 1)       #set time for 1 hour ago
        #we use search/tweets, a REST API endpoint that closes after returning a maximum of 100
        #recent tweets and supports paging
        #TwitterRestPager spaces out successive requests to stay under Twitter API the rate limit
        r = TwitterRestPager(api, 'search/tweets', {'q':'#{}'.format(hashtag), 'count': 100})
        for item in r.get_iterator(wait=6):             #increase the wait interval to 6 seconds
            if 'text' in item:            
                hashtag_count += 1
            #in case we exceed the rate limit, error 88 indicates how long to suspend before
            #making a new request
            elif 'message' in item and item['code'] == 88:
                print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
                break
            #covert time when each tweet was created to a datatime type
            created = datetime.strptime(item['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
            if created <= an_hour_ago:    #finish counting if created more than an hour ago
                hashtag_count -= 1        #and don't count that tweet 
                break
        all_counts.append(hashtag_count)  #add a count for one hashtag to our list
    return all_counts          
Пример #4
0
 def build_collection(self):
     """ Page through tweets based on search query """
     logging.info("Starting tweet collection")
     q_dict = self.make_q_dict()
     pager = TwitterRestPager(self.api, 'search/tweets', q_dict)
     for tweet in pager.get_iterator(wait=3):
         self.create_net(tweet)
     logging.info("Ending tweet collection")
Пример #5
0
def twittersearch():
	pager = TwitterRestPager(api, 'search/tweets', {'q': TRACK_TERM, 'count' : 100})
	for item in pager.get_iterator(wait=6):
		userid = item['user']['id']
		tweetid = item['id']
		print userid
		print tweetid
		favorite(tweetid)
		follow(userid)
Пример #6
0
def get_paged_timeline(api: TwitterAPI, count: int = 500):
    """
    Returns an iterator of pages of the timeline. The iterator has higher capacity and scrolls.
    :param api: Must be a User auth'd TwitterAPI.
    :param count: number of tweets to display
    :return: the iterator of timeline tweets
    """
    timeline = TwitterRestPager(api, 'status/home_timeline', {'count': count})
    return timeline.get_iterator()
Пример #7
0
def twittersearch():
    pager = TwitterRestPager(api, 'search/tweets', {
        'q': TRACK_TERM,
        'count': 100
    })
    for item in pager.get_iterator(wait=6):
        userid = item['user']['id']
        tweetid = item['id']
        print userid
        print tweetid
        favorite(tweetid)
        follow(userid)
Пример #8
0
def rank_old_retweets(api, list, n):
	words = ' OR '.join(list)
	retweets = []
	while True:
		pager = TwitterRestPager(api, 'search/tweets', {'q': words})
		for item in pager.get_iterator():
			if 'retweeted_status' in item:
				process_tweet(retweets, item, n)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twiter: %s' % item['message'])
Пример #9
0
def count_old_words(api, word_list):
	words = ' OR '.join(word_list)
	count = dict((word,0) for word in word_list)
	while True:
		pager = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':COUNT})
		for item in pager.get_iterator():
			if 'text' in item:
				process_tweet(item['text'], count, word_list)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twitter: %s' % item['message'])
Пример #10
0
def rank_old_words(api, word_list, n):
	words = ' OR '.join(word_list)
	count = {}
	while True:
		pager = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':COUNT})
		for item in pager.get_iterator():
			if 'text' in item:
				process_tweet(item['text'], count, n, word_list)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twitter: %s' % item['message'])
Пример #11
0
def count_old_tweets(api, list):
	words = ' OR '.join(list)
	count = 0
	while True:
		pager = TwitterRestPager(api, 'search/tweets', {'q': words})
		for item in pager.get_iterator():
			if 'text' in item:
				count += 1
				print(count)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twiter: %s' % item['message'])
Пример #12
0
def get_user_tweets(twitter_api, tweets_collection, users_collection, out_collection, uid):
    if out_collection.find({'_id': uid}).limit(1).count():
        return
    gender = users_collection.find({'_id': uid}).limit(1)[0]['value']['gender']
    user_out = {
        '_id': uid,
        'gender': gender
        }
    out_collection.insert(user_out)
    pager = TwitterRestPager(twitter_api, 'statuses/user_timeline', {'user_id': uid, 'count':200})
    for tw in pager.get_iterator():
        tw['_id'] = tw['id_str']
        tweets_collection.insert(tw)
        out_collection.update({'_id': uid}, {'$push': {'tweets': tw['_id']}})
    time.sleep(5)
Пример #13
0
def harvest_friends():
    # get a user Id from database
    user_id = db_client.find_user_for_friends()
    api = TwitterAPI(
        consumer_key=Auth[token_number]['consumer_key'],
        consumer_secret=Auth[token_number]['consumer_secret'],
        access_token_key=Auth[token_number]['access_token_key'],
        access_token_secret=Auth[token_number]['access_token_secret'],
        auth_type='oAuth1')
    cursor = -1
    # while 1:
    try:
        count = 0
        # r = api.request('friends/list', {"user_id": user_id, 'count': 200, 'cursor': cursor})
        r2 = TwitterRestPager(api, 'friends/list', {
            "user_id": user_id,
            'count': 200
        })
        for each_user_info in r2.get_iterator(40):
            FileSave(each_user_info)

    except TwitterRequestError as e:
        print(e.status_code)
        if e.status_code < 500:
            if e.status_code == 429 or e.status_code == 420:
                print('I am sleeping')
                time.sleep(450)
            elif e.status_code == 401:
                pass
            else:
                raise
            print('TwitterRequestError')
            # something needs to be fixed before re-connecting
            pass
        else:
            print('TwitterRequestError')
            # temporary interruption, re-try request
            pass

    # TwitterConnectionError is thrown when the connection times out or is interrupted.
    # You can always immediately try making the request again.
    except TwitterConnectionError:
        print('disconnected from Twitter Connection Error')
        # temporary interruption, re-try request
        pass
    except Exception as e:
        print(Exception)
        pass
Пример #14
0
def counter(hashtag, df, limit=None):
    count = 0

    #Initialize Twitter Rest Pager
    r = TwitterRestPager(api, 'search/tweets', {'q': hashtag, 'count': 100})

    #Limit Option
    if limit is not None:
        print("requested tweets for hashtag is limited to {} tweets".format(
            limit))

        for item in r.get_iterator(wait=6):

            if 'text' in item:
                if count <= limit:

                    print("collecting tweet {} of {}...".format(count, limit))
                    count += 1

                    #Extract Tweet Info
                    extract_tweet_info(item, count, df, hashtag)

                else:
                    print("requested tweet limit reached...")
                    print("ending query for hashtag...")
                    return

            elif 'message' in item and item['code'] == 88:
                print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
                break

    #No Limit
    else:

        for item in r.get_iterator(wait=6):

            if 'text' in item:

                print("collecting tweet {} of all available tweets...".format(
                    count))
                count += 1

                #Extract Tweet Info
                extract_tweet_info(item, count, df, hashtag)

            elif 'message' in item and item['code'] == 88:
                print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
                break
def counter(hashtag, df, limit=None):
	count = 0

	#Initialize Twitter Rest Pager
	r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100})

	#Limit Option
	if limit is not None:
		print("requested tweets for hashtag is limited to {} tweets".format(limit))

		for item in r.get_iterator(wait=6):

			if 'text' in item:
				if count <= limit:
					
					print("collecting tweet {} of {}...".format(count, limit))
					count += 1
					
					#Extract Tweet Info
					extract_tweet_info(item, count, df, hashtag)

				else:
					print("requested tweet limit reached...")
					print("ending query for hashtag...")
					return

			elif 'message' in item and item['code'] == 88:
				print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
				break
			

	#No Limit
	else:

		for item in r.get_iterator(wait=6):
			
			if 'text' in item:

				print("collecting tweet {} of all available tweets...".format(count))
				count += 1

				#Extract Tweet Info
				extract_tweet_info(item, count, df, hashtag)

			elif 'message' in item and item['code'] == 88:
				print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
				break
def search_user():
    keepdoing = True
    while keepdoing:
        try:
            # get a user ID
            user_id = db_client.find_user_for_tweeets()
            api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'],
                             consumer_secret=Auth[token_number]['consumer_secret'],
                             access_token_key=Auth[token_number]['access_token_key'],
                             access_token_secret=Auth[token_number]['access_token_secret'],
                             auth_type='oAuth2')

            # r = api.request('statuses/user_timeline', {"user_id": user_id, 'count': 200, 'exclude_replies': 'true'})
            # Use the
            r = TwitterRestPager(api, 'statuses/user_timeline', {"user_id": user_id, 'count': 200, 'exclude_replies': 'true'})
            for each in r.get_iterator():
                if 'text' in each:
                    FileSave(each)

        except TwitterRequestError as e:
            print(e.status_code)
            if e.status_code < 500:
                if e.status_code == 429 or e.status_code == 420:
                    print('I am sleeping')
                    time.sleep(450)
                elif e.status_code == 401:
                    pass
                else:
                    raise
                print('TwitterRequestError')
                # something needs to be fixed before re-connecting
                pass
            else:
                print('TwitterRequestError')
                # temporary interruption, re-try request
                pass
        # TwitterConnectionError is thrown when the connection times out or is interrupted.
        # You can always immediately try making the request again.
        except TwitterConnectionError:
            print('disconnected from Twitter Connection Error')
            # temporary interruption, re-try request
            pass

        except Exception as e:
            print(Exception)
            pass
Пример #17
0
def search_tweets(api, list, region):
	"""Get tweets containing any words in 'list' and that have location or coordinates in 'region'."""
	words = ' OR '.join(list)
	params = { 'q': words }
	if region:
		params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
	while True:
		pager = TwitterRestPager(api, 'search/tweets', params)
		for item in pager.get_iterator():
			if 'text' in item:
				parse_tweet(item)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twiter: %s' % item['message'])
Пример #18
0
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
	"""Get tweets containing any words in 'word_list'."""
	words = ' OR '.join(word_list)
	params = {'q':words, 'count':count}
	if region:
		params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
	while True:
		pager = TwitterRestPager(api, 'search/tweets', params)
		for item in pager.get_iterator():
			if 'text' in item:
				if not no_retweets or not item.has_key('retweeted_status'):
					process_tweet(item, photo_dir, stalk)
			elif 'message' in item:
				if item['code'] == 131:
					continue # ignore internal server error
				elif item['code'] == 88:
					print('Suspend search until %s' % search.get_quota()['reset'])
				raise Exception('Message from twitter: %s' % item['message'])
Пример #19
0
def Searching():
    api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'],
                     consumer_secret=Auth[token_number]['consumer_secret'],
                     access_token_key=Auth[token_number]['access_token_key'],
                     access_token_secret=Auth[token_number]['access_token_secret'],
                     auth_type='oAuth2')

    #https://dev.twitter.com/rest/reference/get/search/tweets
    #result_type:   'mixed' 'recent' 'popular'
    # come with the cursor of Max_id
    r=TwitterRestPager(api,'search/tweets',{'q':search_words,
                                   'geocode':geo_for_searching +',50km',
                                   'result_type':'mixed',
                                   'count':100,
                                   })

    for item in r.get_iterator():
        FileSave(item)
    twitter_log("searching_finished")
Пример #20
0
def get_tweets():
    '''
    Get's tweets with Twitter hashtag api
    '''
    api = TwitterAPI(consumer_key, consumer_secret, '', '', auth_type='oAuth2')
    pager = TwitterRestPager(api, 'search/tweets', {'q':'%23lootcrate', 'count':100}) 

    a = []

    for i,item in enumerate(pager.get_iterator()):
        if 'text' in item:
            a.append(item)
            print(i)
        elif 'message' in item and item['code'] == 88:
            print('Rate Exceeded')
            break
    
    pickle.dump(a, open('tweets.p', 'rb'))
    return
Пример #21
0
def search(request):
    # form = DateForm(request.GET)

    query = request.GET['q']
    date = request.GET['date']
    formatted_date = datetime.strptime(date, '%m/%d/%Y').strftime('%Y-%m-%d')

    # json_string = api.request('search/tweets', {'q': '%23' + query + ' since:2016-05-11'})
    # decoded_json = json_string.json()

    r = TwitterRestPager(api, 'search/tweets', {
        'q': '%23' + query + ' since:' + formatted_date,
        'count': 100
    })
    maxRT = 0
    tweet = {}
    count = 0
    for item in r.get_iterator(wait=2):
        print(item['text'])

        if 'text' in item:
            if item['retweet_count'] > maxRT:
                tweet = {
                    'text': item['text'],
                    'rt_count': item['retweet_count']
                }
            count += 1
        elif 'message' in item and item['code'] == 88:
            print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
            break

    print(count)

    # pdb.set_trace()
    #
    #
    # for item in decoded_json['statuses']:
    #   print(item['text'])
    # c = Context({'results': decoded_json['statuses']})

    t = loader.get_template('results.html')
    c = Context({'count': count, 'tweet': tweet})
    return HttpResponse(t.render(c))
Пример #22
0
def Searching():
    api = TwitterAPI(consumer_key=Auth[token_number]['consumer_key'],
                     consumer_secret=Auth[token_number]['consumer_secret'],
                     access_token_key=Auth[token_number]['access_token_key'],
                     access_token_secret=Auth[token_number]['access_token_secret'],
                     auth_type='oAuth2')

    #https://dev.twitter.com/rest/reference/get/search/tweets
    #result_type:   'mixed' 'recent' 'popular'
    # come with the cursor of Max_id
    r=TwitterRestPager(api,'search/tweets',{'q':search_words,
                                   'geocode':geo_for_searching +',50km',
                                   'result_type':'mixed',
                                   'count':10000,
                                   })
    count=1
    for item in r.get_iterator():
        if 'text' in item:
            id_list.put(item['user']['id_str'])
            print('Now queue size is %d' % id_list.qsize())
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets,
                  count):
    """Get tweets containing any words in 'word_list'."""
    words = ' OR '.join(word_list)
    params = {'q': words, 'count': count}
    if region:
        params['geocode'] = '%f,%f,%fkm' % region  # lat,lng,radius
    while True:
        pager = TwitterRestPager(api, 'search/tweets', params)
        for item in pager.get_iterator():
            if 'text' in item:
                if not no_retweets or not item.has_key('retweeted_status'):
                    process_tweet(item, photo_dir, stalk)
            elif 'message' in item:
                if item['code'] == 131:
                    continue  # ignore internal server error
                elif item['code'] == 88:
                    print('Suspend search until %s' %
                          search.get_quota()['reset'])
                raise Exception('Message from twitter: %s' % item['message'])
Пример #24
0
def tweet_producer():
    kafka = KafkaClient(KAFKA_BROKER)
    producer = SimpleProducer(kafka)
    while True:
        try:
            api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY,
                             ACCESS_TOKEN_SECRET)

            print('Using search term: %s' % SEARCH_TERM, file=sys.stderr)
            pager = TwitterRestPager.TwitterRestPager(api, 'search/tweets',
                                                      {'q': SEARCH_TERM})
            for item in pager.get_iterator():
                if 'text' in item:
                    tweet = {}
                    tweet['id'] = item['id']
                    # tweet['coordinates'] = item['coordinates']
                    #rhoover: the mktime/strptime below is incorrect (making times in the future) so using this for now
                    tweet['@timestamp'] = int(time.time() * 1000.0)
                    tweet['created_at'] = time.strftime(
                        '%Y-%m-%d %H:%M:%S',
                        time.strptime(item['created_at'],
                                      '%a %b %d %H:%M:%S +0000 %Y'))
                    # tweet['@timestamp'] = time.mktime(time.strptime(item['created_at'],"%a %b %d %H:%M:%S +0000 %Y")) * 1000
                    # tweet['place'] = item['place']
                    # ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(item['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))

                    # tweet['@timestamp'] = item['created_at']
                    tweet['username'] = item['user']['name']
                    tweet['handle'] = item['user']['screen_name']
                    tweet['lang'] = item['lang']
                    tweet['timezone'] = item['user']['time_zone']
                    tweet['followers'] = item['user']['followers_count']
                    tweet['location'] = item['user']['location']
                    tweet['retweeted'] = item['retweeted']
                    tweet['text'] = item['text']
                    producer.send_messages(b'tweets',
                                           bytes(json.dumps(tweet), "UTF-8"))
                    twitter_metrics["tweets-consumed"] = twitter_metrics[
                        "tweets-consumed"] + 1
                elif 'message' in item and item['code'] == 88:
                    print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' %
                          item['message'],
                          file=sys.stderr)
                    time.sleep(120)
                    break
                print('Consumed %s tweets' %
                      twitter_metrics["tweets-consumed"],
                      file=sys.stderr)
        except:
            print(traceback.format_exc(), file=sys.stderr)
            print("Sleeping for 120 secs.", file=sys.stderr)
            time.sleep(120)
    return
Пример #25
0
def harvest_friends():
    num = 1
    # get a user Id from database
    user_id = "40814404"
    api = TwitterAPI(
        consumer_key=Auth[token_number]['consumer_key'],
        consumer_secret=Auth[token_number]['consumer_secret'],
        access_token_key=Auth[token_number]['access_token_key'],
        access_token_secret=Auth[token_number]['access_token_secret'],
        auth_type='oAuth1')
    cursor = -1
    # while 1:
    r2 = TwitterRestPager(api, 'followers/list', {
        "user_id": user_id,
        'count': 200
    })
    r = api.request('followers/list', {"user_id": user_id, 'count': 200})

    for each_user_info in r2.get_iterator():
        if 'Australia' in each_user_info['location']:
            num += 1
            print(num)
Пример #26
0
def twitter_to_csv(q, limit=500):
    consumer_key = settings.TWITTER_CONSUMER_KEY
    consumer_secret = settings.TWITTER_CONSUMER_SECRET
    access_token_key = settings.TWITTER_ACCESS_TOKEN_KEY
    access_token_secret = settings.TWITTER_ACCESS_TOKEN_SECRET
    api = TwitterAPI(consumer_key, consumer_secret, access_token_key,
                     access_token_secret)

    output = io.StringIO()
    writer = csv.writer(output)

    api = TwitterAPI(consumer_key, consumer_secret, access_token_key,
                     access_token_secret)

    count = 0
    r = TwitterRestPager(api, 'search/tweets', {'q': q, 'count': 100})
    for item in r.get_iterator():
        if 'text' in item:
            count += 1
            author = item['user']['screen_name'].lower()
            mentions = item['entities']['user_mentions']
            text = re.sub(r'https?:\/\/.*[\r\n]*',
                          '',
                          item['text'],
                          flags=re.MULTILINE)
            text = re.sub(r'@([A-Za-z0-9_]+)', '', text, flags=re.MULTILINE)
            for mention in mentions:
                writer.writerow([author, mention['screen_name'].lower(), text])
            if len(mentions) == 0:
                writer.writerow([author, author, text])
        elif 'message' in item and item['code'] == 88:
            print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'])
            break
        if count > limit:
            break

    print('Twitter search for', q, '; results:', count)

    return output.getvalue()
Пример #27
0
def pull_tweets(tweets: int, hashtag: str) -> None:
    """
    Pulls specified number of tweets and writes them into file as Pickled python sets.
    :param tweets: number of tweets to pull
    :return: None
    """

    start_time = datetime.now()
    print(start_time)

    api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret)

    data_file = open('data/{}{}'.format(str(start_time), '.txt'), 'wb+')

    # use this so that we don't retrieve tweets that we have already gotten
    r = TwitterRestPager(api, 'search/tweets', {'q': '#{}'.format(hashtag), 'count': 100, 'lang': 'en'})

    tweet_set = set()
    for item in r.get_iterator():
        tweet = Tweet()
        if len(tweet_set) >= tweets:
            break
        if 'text' in item:
            tweet.hashtags = [hashtag['text'] for hashtag in item['entities']['hashtags']]
            tweet.text = item['text'].replace('\n', ' ')
            tweet.target = hashtag
            if tweet not in tweet_set:
                tweet_set.add(tweet)
                print(tweet.hashtags, tweet.text, tweet.target)
            print(len(tweet_set))

        elif 'message' in item and item['code'] == 88:
            print('SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'])
            time.sleep(16 * 60)

    pickle.dump(tweet_set, data_file, 2)
    data_file.close()
    print(datetime.now() - start_time)
    return start_time
Пример #28
0
    def get_score(self, gene):
        """
        :param gene
        :return: Number of resent tweets of the gene symbol (plus likes and re-tweeks)
        """
        score = 0

        twitter_query = '{name} gene OR genomics'.format(name=gene.name)

        # REST API endpoint that closes after returning a maximum of 100 recent tweets.
        # TwitterRestPager spaces out successive requests to stay under the rate limit of 5s between calls.
        tweets = TwitterRestPager(self.api, 'search/tweets', {
            'q': twitter_query,
            'count': 100
        })
        for tweet in tweets.get_iterator(wait=6):
            if 'retweeted_status' not in tweet:
                score += int(tweet['retweet_count']) + int(
                    tweet['favorite_count']) + 1
        sleep(6)

        return score
Пример #29
0
def search(request):
  # form = DateForm(request.GET)

  query = request.GET['q']
  date = request.GET['date']
  formatted_date = datetime.strptime(date, '%m/%d/%Y').strftime('%Y-%m-%d')

  # json_string = api.request('search/tweets', {'q': '%23' + query + ' since:2016-05-11'})
  # decoded_json = json_string.json()

  r = TwitterRestPager(api, 'search/tweets', {'q': '%23' + query + ' since:' + formatted_date, 'count': 100})
  maxRT = 0
  tweet = {}
  count = 0
  for item in r.get_iterator(wait=2):
    print(item['text'])

    if 'text' in item:
      if item['retweet_count'] > maxRT:
        tweet = {'text': item['text'], 'rt_count': item['retweet_count']}
      count += 1
    elif 'message' in item and item['code'] == 88:
      print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
      break

  print(count)

  # pdb.set_trace()
  #
  #
  # for item in decoded_json['statuses']:
  #   print(item['text'])
  # c = Context({'results': decoded_json['statuses']})


  t = loader.get_template('results.html')
  c = Context({'count': count, 'tweet': tweet})
  return HttpResponse(t.render(c))
Пример #30
0
def makeDB(word='analytics'):
    o = read_file()
    api = TwitterAPI(o['consumer_key'],o['consumer_secret'],o['access_token_key'],o['access_token_secret'])
    r = TwitterRestPager(api, 'search/tweets', {'q': word,'count':100})



    d=dict((v,k) for k,v in enumerate(calendar.month_abbr))
    try:
        flag = 0
        for item in r.get_iterator():
            if flag ==100:
                break
            flag+=1
            obj = User(U_id = item['user']['id'])
            obj.name = item['user']['screen_name']
            obj.location = item['user']['time_zone']
            x=list(map(str,item['user']['created_at'].split(' ')))
            obj.date = datetime.strptime(x[2] + str(d[x[1]]) + x[-1], "%d%m%Y").date()
            obj.text = item['text']
            obj.search = word
            obj.save()
    except:
        print "Request cannot be served due to the application's rate limit having been exhausted for the resource"
Пример #31
0
def pagingCall(server, query='blockchain', key='Tweets', count = 200, limit = 200):
    # Keep tabs on the number of entries
    numEntries = 1

    r = TwitterRestPager(api, 'search/tweets', {'q':query, 'count':count})
    for item in r.get_iterator():
        if 'text' in item:
            # Pass-in tweets into a list
            server.rpush(key, re.sub(r"http\S+", "", item['text']))

            if(numEntries % 100 == 0):
                print 'Entries so far', numEntries
                #printValues(server, key)
            if(numEntries == limit):
                break

            # Move on to the next entry
            numEntries += 1

        elif 'message' in item and item['code'] == 88:
            print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']
            break

    return
Пример #32
0
    def _get_pager(self):
        '''Returns a paginating object over tweets'''
        config = self._load_config()

        api = TwitterAPI(
            config['consumer_key'],  # app-only auth
            config['consumer_secret'],
            auth_type='oAuth2')

        self.cache = config.get('cache', NUM_CACHE_TWEETS)
        query = config.get('query')
        assert query, 'expected a search query in config'

        params = {'q': query, 'count': TWEETS_PER_BATCH}
        self._log('Query: %r' % params)
        return TwitterRestPager(api, SEARCH_ENDPOINT, params)
Пример #33
0
    if TEST_NUMBER == 1:

        # POST A TWEET
        r = api.request('statuses/update',
                        {'status': 'the time is now %s' % datetime.now()})
        print(r.status_code)

    if TEST_NUMBER == 2:

        # GET 5 TWEETS CONTAINING 'ZZZ'
        for item in api.request('search/tweets', {'q': 'zzz', 'count': 5}):
            print(item['text'] if 'text' in item else item)

    if TEST_NUMBER == 3:

        # STREAM TWEETS FROM AROUND NYC
        for item in api.request('statuses/filter',
                                {'locations': '-74,40,-73,41'}):
            print(item['text'] if 'text' in item else item)

    if TEST_NUMBER == 4:

        # GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE'
        pager = TwitterRestPager(api, 'search/tweets', {'q': 'love'})
        for item in pager.get_iterator():
            print(item['text'] if 'text' in item else item)

except Exception as e:
    print(e)
Пример #34
0
        try:
            client = MongoClient('localhost', 27017)
            db = client['devtweets']
            tweets = db.tweets
        except Exception as e:
                print('*** STOPPED %s' % str(e))
            

        try:
                params = to_dict(args.parameters)
                oauth = TwitterOAuth.read_file('./credentials.txt')
                api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret)
                """response = api.request('statuses/filter', {'locations':'-102.878723,21.659981,-101.997757,22.473779'})"""
                #response = api.request('search/tweets', {'q':'Aguascalientes 4sq com, ags 4sq com', 'count': 450})
                lastTweet = tweets.find({}).sort('id', -1).limit(1)
                str_lastTweetId = str(lastTweet[0]["id"])
                pager = TwitterRestPager(api, 'search/tweets', {'q':'Aguascalientes 4sq com, ags 4sq com', 'count':100, 'since_id': str_lastTweetId})

                #for item in response.get_iterator():
                for item in pager.get_iterator(10):
                    print item
                    tweets.insert(item)
                    #print ('\n' % pager.get_rest_quota())
				
        except KeyboardInterrupt:
                print('\nTerminated by user')
                
        except Exception as e:
                print('*** STOPPED %s' % str(e))
Пример #35
0
from TwitterAPI import TwitterRestPager, TwitterAPI
import couchdb
import time

consumer_key = ''
consumer_secret = ''

access_token = ''
access_secret_token = ''

couch = couchdb.Server()
db = couch['twitter_raw_data']

languages = [line for line in open('languages.txt')]

api = TwitterAPI(consumer_key, 
                 consumer_secret, 
                 access_token, 
                 access_secret_token)
while True:
    start_time = time.time()
    r = TwitterRestPager(api, 'search/tweets', {'q':languages, 'count':100})
    for item in r.get_iterator():
        if 'text' in item:
            db.save(item)
        elif 'message' in item and item['code'] == 88:
            sleep_time = 900 - (time.time() - start_time)
            print('Request limit exceeded: %s\n' % item['message'])
            print('Going to sleep for {0}s\n'.format(sleep_time))
            time.sleep(sleep_time)
            continue
BITLY_USERNAME = '******'
BITLY_API_KEY = 'R_19bff5294a094749a4177d9f639b7637'
BITLY_ACCESS_TOKEN = 'c6869fe932e2b5b912d97382c885fb8df8ba2bd8'

FOURSQUARE_ACCESS_TOKEN = 'K44DQZABARQM2HM2ETXZZHLXY11KYXAHAKY3NB0BN15KBN0Z'

api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY,
                 ACCESS_TOKEN_SECRET)

bitly = bitly_api.Connection(access_token=BITLY_ACCESS_TOKEN)

#eastern = timezone('US/Eastern')
#utc = timezone('UTC')

pager = TwitterRestPager(api, 'search/tweets', {'q': SEARCH_TERM})


def get_checkid_and_s(url):
    checkin_index = url.find('checkin')
    mark_s_index = url.find('?s=')
    check_id = url[checkin_index + 8:mark_s_index]
    refer_ref_index = url.find('&ref')
    signature_id = url[mark_s_index + 3:refer_ref_index]

    return check_id, signature_id


def get_check_in_info(id, sig):
    dt = date.today()
Пример #37
0
from secrets import consumer_key, consumer_secret, access_token_key, access_token_secret

api = TwitterAPI(consumer_key, consumer_secret, access_token_key,
                 access_token_secret)

data_followers = json.load(open('followers.json'))

main_account = sys.argv[1]

while True:
    try:
        if main_account not in data_followers or len(
                data_followers[main_account]) == 2000:
            print('followers/', main_account)
            r = TwitterRestPager(api, 'followers/list', {
                'screen_name': main_account,
                'count': 200
            })
            data_followers[main_account] = []
            for item in r.get_iterator(wait=60):
                data_followers[main_account].append(item)
                if len(data_followers[main_account]) % 200 == 0:
                    print('....', len(data_followers[main_account]))
            print(' ->', len(data_followers[main_account]))
            json.dump(data_followers,
                      open('followers.json', 'w'),
                      indent=2,
                      sort_keys=True,
                      ensure_ascii=False)
        break
    except TwitterError.TwitterRequestError as e:
        print(e)
Пример #38
0
            db = client['todos_tweets']
            tweets = db.tweets
        except Exception as e:
                print('*** STOPPED %s' % str(e))
            

        try:
                params = to_dict(args.parameters)
                oauth = TwitterOAuth.read_file('./credentials.txt')
                api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret)
                str_lastTweetId = None
                lastTweets = tweets.find({}).sort('id', -1).limit(1)
                for lastTweet in lastTweets :
                    str_lastTweetId = str(lastTweet["id"])
                if str_lastTweetId is None:
                    pager = TwitterRestPager(api, 'search/tweets', {'q':'4sq com', 'count':100})
                else :
                    pager = TwitterRestPager(api, 'search/tweets', {'q':'4sq com', 'count':100, 'since_id': str_lastTweetId})

                #for item in response.get_iterator():

                for item in pager.get_iterator(5):
                    #print('%d \n' % item["id"])
                    #resp = urllib.urlopen(item["entities"]["urls"][0]["expanded_url"])
                    #item["entities"]["urls"][0]["over_expanded_url"] = resp.url
                    tweets.insert(item)
                    #print ('\n' % pager.get_rest_quota())
				
        except KeyboardInterrupt:
                print('\nTerminated by user')
                
Пример #39
0
def fetchTweets(apiKey, apiSecret, accessToken, accessTokenSecret,
                 query, lang='-', nTweets=100, nFlush=100, media='-',
                 mSize='medium', saveMedia=False, viewMedia=True,
                 workDir='cache', saveLog=True, logName='log.txt'):

    from TwitterAPI import TwitterAPI
    from TwitterAPI import TwitterRestPager
    api = TwitterAPI(apiKey, apiSecret, accessToken, accessTokenSecret)

    # Create directories and files, etc.
    curTime = time.strftime("%d_%b_%Y_%H.%M.%S")
    if not saveMedia:
        workDir = "cache"

    if viewMedia:
        if not os.path.exists(workDir):
            os.makedirs(workDir)
        os.chdir(workDir)

    if saveLog:
        f = open(logName, "w")

    print("Started fetching will following parameters:")
    print("query: ", query)
    print("lang: ", lang)
    print("nTweets: ", nTweets)
    print("nFlush: ", nFlush)
    print("media: ", media)
    print("mSize: ", mSize)
    print("viewMedia: ", viewMedia)
    print("saveMedia: ", saveMedia)
    print("workDir: ", workDir)
    print("saveLog: ", saveLog)
    print("logName: ", logName)

    # Create counters
    current = 0
    total = 0
    data = []
    stream = []
 
    # Create table header for printing
    tableHeader=["Name", "Handle", "Text", "Time stamp", "Hashtags", 
                 "Retweets", "Favorites", "Media", "Language", "Img Path"]
    keys = ["name", "handle", "content", "time", "hashtags", "rtCount", 
            "favCount", "media", "lang", "imgName"]

    # tableHeader=["Handle", "Name", "Text", "Time stamp", "Retweets", "Favorites"]

    # Search
    r = TwitterRestPager(api, 'search/tweets', {'q':query, 'count':100})

    # For each tweet
    for tweet in r.get_iterator():
        if 'text' in tweet: # if it's really a tweet and not something else

            # Check if it fits the media requirements (yes, no, don't care)
            if media != "-":
                cMedia = True if media == True else False 
                if ('media' not in tweet['entities']) & (cMedia == True):
                    continue 
                elif ('media' in tweet['entities']) & (cMedia == False):
                    continue
            else:
                if 'media' in tweet['entities']:
                    cMedia = True 
                else:
                    cMedia = False 

            # Check if it fits the language requirements (anything or specific)
            if lang != "-":
                tLang = lang
                if tweet['metadata']['iso_language_code'] != tLang:
                    continue
            else:
                tLang = tweet['metadata']['iso_language_code']

            # If no hashtags
            if tweet['entities']['hashtags']:
                hashtags = "" 
                for tag in tweet['entities']['hashtags']:
                    hashtags = hashtags + tag['text'] + ", "
            else:
                hashtags = None 

            fileName = None

            if cMedia & viewMedia:
                cMedia += len(tweet['entities']['media'])
                mediaURL = tweet['entities']['media'][0]['media_url']
                fileName = str(total)+mediaURL[-4:] # last 4 are extension 
                urllib.request.urlretrieve(mediaURL+":"+mSize, fileName)

            # Push the tweet onto the stream stack
            stream.append([tweet['user']['name'], tweet['user']['screen_name'], 
                        tweet['text'].replace('\n', ' '), tweet['created_at'],
                        hashtags, tweet['retweet_count'], tweet['favorite_count'],
                        cMedia, tLang, fileName]) 

            # increment the counters
            current += 1
            total += 1

            # every 100 tweets, flush the stream to improve performance and add to a big stream
            if current == nFlush:
                data.extend(stream) # concatenate
                stream = []         # empty stack
                current = 0         # reset counter
            
            # max number of tweets
            if total >= nTweets:
                data.extend(stream) # concatenate
                break
        # this should not trigger, but just in case
        # this handles an exception triggered when we send more than 1 request every 5 seconds
        # this would result in a 15 minute block
        elif 'message' in item and item['code'] == 88:
            print('SUSPEND, RATE LIMIT EXCEEDED: %s' % item['message'])
            break
        #print(count)

    # print table
    table = "" 
    if saveLog:
        table = tabulate(data, headers=tableHeader, tablefmt='fancy_grid')
        f.write(table)
    
    result = []
    dictList = []
    for i in range(total):
        dictList.append(dict(zip(keys, data[i])))

    result = [dictList, total, workDir, table]
    os.chdir("..")
    print("Done Fetching!")

    return result
DK_names = data_DK["Name"]
#Concatenate the batters and pitchers and drop dups
frames = [pitchers_names, batters_names]
all_players = pd.concat(frames)
all_players = all_players.drop_duplicates()
#Do some data cleaning so that the Data is easier to match up later
cleanplayerlst = []
for i in list(batters_names):
    test = i.split(",")
    test[0], test[1] = test[1], test[0]
    test = test[0].strip() + " " + test[1]
    cleanplayerlst.append(test)
#build list of iterators
iterator_list = [
    TwitterRestPager(api, 'search/tweets', {
        'q': name
    }).get_iterator(wait=2) for name in cleanplayerlst
]
player_dict = defaultdict(list)
count = 0
try:
    for player, iterator in enumerate(iterator_list):
        count += 1
        print "Player Count:", count
        if player != 0 and player % 60 == 0:
            time.sleep(1200)
        for idx, item in enumerate(iterator):
            if idx == 40:
                break
            if 'text' in item:
                player_dict[cleanplayerlst[player]].append(item['text'])
Пример #41
0
WORDS_TO_COUNT = ['lion', 'wildbeest', 'snake']


API_KEY = 'NZow9q4lgFxhPw2omwqJ2VtIj'
API_SECRET = 'r5fYLio2Da9T6n9TcgasSWLuTV0KPkInOo4mddSfotNX9VfArM'
ACCESS_TOKEN = '1538574218-U5Y8z6DsJJKT0xwHBqIs4tygi048fBLwtwMgNtU'
ACCESS_TOKEN_SECRET = 'cKeAv9F67ckA8UynkGalpzaQTwtx2NAjWTwt2DtR20VpN'


api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
words = ' OR '.join(WORDS_TO_COUNT)
counts = dict((word,0) for word in WORDS_TO_COUNT)


def process_tweet(text):
	text = text.lower()
	for word in WORDS_TO_COUNT:
		if word in text:
			counts[word] += 1
	print(counts)


r = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':100})
for item in r.get_iterator(wait=6):
	if 'text' in item:
		process_tweet(item['text'])
	elif 'message' in item and item['code'] == 88:
		print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'])
		break
	if TEST_NUMBER == 2:

		# GET 5 TWEETS CONTAINING 'ZZZ'
		for item in api.request('search/tweets', {'q':'zzz', 'count':5}):
			print(item['text'] if 'text' in item else item)

	if TEST_NUMBER == 3:

		# STREAM TWEETS FROM AROUND NYC
		for item in api.request('statuses/filter', {'locations':'-74,40,-73,41'}):
			print(item['text'] if 'text' in item else item)

	if TEST_NUMBER == 4:
		# GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE'
		pager = TwitterRestPager(api, 'search/tweets', {'q':'CatchingFire'});	
		#for item in pager.get_iterator():
		#	print(item['text'] if 'text' in item else item)

	if TEST_NUMBER == 5:
	# GET TWEETS FROM THE PAST WEEK OR SO CONTAINING 'LOVE'
		my_request_param_dict = {
			'q': '%23' + twitter_hashtag,
		}
		pager = TwitterRestPager(api, 'search/tweets', my_request_param_dict);
		for item in pager.get_iterator():
			#print item['id_str']
			a = item
			print "%d - %s" % (loop_counter_break, item['user']['name'],)
			try:
				dict_temp = {
Пример #43
0
#!/usr/bin/python3

import sys, re, fileinput
import glob, fnmatch, os, os.path, tempfile, shutil
import time, datetime, sqlite3, json

from boot import api

from TwitterAPI import TwitterRestPager as TP

endpoint = 'statuses/mentions_timeline'
params = {'count': 200, 'trim_user': 1}
pager = TP(api, endpoint, params)

users = set()

for tweet in pager.get_iterator():
    users.add(tweet['user']['id_str'])  # or id as int

users = list(users)

endpoint = 'lists/members/create_all'
params = {'slug': 'Summer', 'owner_screen_name': 'shuravban', 'user_id': ""}

# only 100 users by time
MAX = 100
n = len(users) // MAX + 1
for i in range(n):
    params['user_id'] = ", ".join(users[i * MAX:i * MAX + MAX])
    res = api.request(endpoint, params)
    if res.status_code != 200:
Пример #44
0

API_KEY = '<use yours>'
API_SECRET = '<use yours>'
ACCESS_TOKEN = '<use yours>'
ACCESS_TOKEN_SECRET = '<use yours>'


api = TwitterAPI(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
words = ' OR '.join(WORDS_TO_COUNT)
counts = dict((word,0) for word in WORDS_TO_COUNT)


def process_tweet(text):
	text = text.lower()
	for word in WORDS_TO_COUNT:
		if word in text:
			counts[word] += 1
	print(counts)


r = TwitterRestPager(api, 'search/tweets', {'q':words, 'count':100})
for item in r.get_iterator(wait=6):
	if 'text' in item:
		process_tweet(item['text'])
	elif 'message' in item and item['code'] == 88:
		print('\n*** SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'])
		break


Пример #45
0
# Print a user's timeline. This will get up to 3,200 tweets, which
# is the maximum the Twitter API allows.

from TwitterAPI import TwitterAPI, TwitterRestPager

SCREEN_NAME = 'TheTweetOfGod'

api = TwitterAPI(<consumer key>, 
                 <consumer secret>,
                 auth_type='oAuth2')

pager = TwitterRestPager(api, 
                         'statuses/user_timeline', 
                         {'screen_name':SCREEN_NAME, 'count':200})

count = 0
for item in pager.get_iterator(wait=3.5):
	if 'text' in item:
		count = count + 1
		print(count, item['text'])
	elif 'message' in item:
		print(item['message'])
		break
Пример #46
0
worksheet.write(0, 6, 'Favourited')
worksheet.write(0, 7, 'Image URL')

# Set up initial count and default data
count = 0
user = '******'
imageUrl = 'No image'
text = 'No text'
retweeted = 'false'
retweetCount = 0
createdAt = 'No time'
favorited = 'false'

# Return tweets, 100 at a time, with our chosen query (for hashtags, use "%23" to encode the hashtag character)
r = TwitterRestPager(api, 'search/tweets', {
    'q': '%23SomeHashtag',
    'count': 100
})

# Loop through each item in the response, waiting 6 seconds between each request (as this should avoid our hourly limit)
for item in r.get_iterator(wait=6):
    # If we have text within this item then the tweet is valid
    if 'text' in item:
        # Update our variables
        count += 1
        text = item['text']
        retweeted = item['retweeted']
        retweetCount = item['retweet_count']
        createdAt = item['created_at']
        favorited = item['favorited']
        entities = item['entities']
        userObject = item['user']