Exemplo n.º 1
0
    def keywords_search(self, keywords, num_tweets, startDate, endDate):
        tweets = []

        data = Cursor(self.twitter_client.search,
                      q=keywords,
                      until=endDate,
                      lang="en").items(num_tweets)

        while True:
            try:
                tweet = data.next()

                if tweet.retweet_count > 0:
                    if tweet not in tweets:
                        tweets.append(tweet)
                else:
                    tweets.append(tweet)

            except tweepy.TweepError:  #exception for twitter rate limits
                print(
                    "Twitter's free API limit rate has been reached.  More data can be requested in fifteen minutes.  Here is what we were able to pull: "
                )
                break
            except Exception as e:
                break

        return tweets
Exemplo n.º 2
0
 def tweet_gathering(api: API,
                     query: str,
                     date_since: str,
                     lang: str = 'en'):
     try:
         logger.info("Retrieving Tweets ... ")
         # Collect tweets
         tweets = Cursor(api.search,
                         lang=lang,
                         q=query,
                         include_entities=True,
                         monitor_rate_limit=True,
                         wait_on_rate_limit_notify=True,
                         wait_on_rate_limit=True,
                         result_type="recent",
                         tweet_mode='extended').items()
         while True:
             try:
                 tweet: Status = tweets.next()
                 print(tweet)
                 yield tweet
             except RateLimitError:
                 time.sleep(60 * 15)
                 continue
             except StopIteration:
                 break
     except Exception as e:
         logger.error(e)
 def get_tweets_between_date2(self, start, end):
     df = pd.DataFrame()
     c = Cursor(self.twitter_client.user_timeline,
                id=self.twitter_user).pages()
     analyzer = tweet_analyzer()
     while True:
         try:
             tweets_list = c.next()
             tweets = []
             for tweet in tweets_list:
                 if tweet.created_at < end and tweet.created_at > start:
                     tweets.append(tweet)
                 elif tweet.created_at < start:
                     return df
             if len(tweets) != 0:
                 temp = analyzer.tweets_to_dataframe(tweets)
                 if df.empty:
                     df = temp
                 else:
                     df.append(temp, ignore_index=True)
             time.sleep(0.15)
         except TweepError:
             time.sleep(60 * 15)
             continue
         except StopIteration:
             return df
Exemplo n.º 4
0
    def testcursornext(self):
        """
        Test cursor.next() behavior, id being passed correctly.
        Regression test for issue #518
        """
        cursor = Cursor(self.api.user_timeline, id='twitter').items(5)
        status = cursor.next()

        self.assertEquals(status.user.screen_name, 'twitter')
 def get_image_tweet(self):
     tweets = []
     c = Cursor(self.twitter_client.user_timeline,
                id=self.twitter_user,
                include_entities=True).pages()
     tweets = c.next()
     for tweet in tweets:
         if 'media' in tweet.entities:
             for image in tweet.entities['media']:
                 print(image['media_url'])
Exemplo n.º 6
0
def insert_user_with_friends(graph_db, twitter_user, user_labels=[]):
    user_labels.append("SeedNode")
    if isinstance(twitter_user, basestring):
        try:
            twitter_user = api.get_user(twitter_user)
        except:
            time.sleep(60 * 16)
            friend = friends.next()
    create_or_get_node(graph_db, twitter_user, user_labels)
    friend_count = 0
    print u"\nINSERTING FOR: {}".format(twitter_user.name)
    friends = Cursor(api.friends, user_id=twitter_user.id_str,
                     count=200).items()
    try:
        while True:
            try:
                friend = friends.next()
            except tweepy.TweepError:
                print "exceeded rate limit. waiting"
                time.sleep(60 * 16)
                friend = friends.next()

            #print u"    INSERTING: {}".format(friend.name)
            friend_count += 1
            sys.stdout.write('.')
            if (friend_count % 10 == 0): sys.stdout.write(' ')
            if (friend_count % 50 == 0): sys.stdout.write('| ')
            if (friend_count % 100 == 0): print

            create_or_get_node(graph_db, friend)
            query_string = """
                MATCH (user:User {id_str:{user_id_str}}),(friend:User {id_str:{friend_id_str}})
                CREATE UNIQUE (user)-[:FOLLOWS]->(friend)
                """
            data = {
                "user_id_str": twitter_user.id_str,
                "friend_id_str": friend.id_str
            }
            n = graph_db.cypher.execute(query_string, data)

    except StopIteration:
        print u"\n    Total Friend Count = {}".format(friend_count)
Exemplo n.º 7
0
def limit_handled(cursor: tweepy.Cursor):
    """Wrap cursor access with rate limiting

    :param cursor: The cursor to siphon
    :returns: Cursor items

    """
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            time.sleep(15 * 60)
Exemplo n.º 8
0
def get_tweets_by_cursor(query):
    api = API(auth)
    query = query + " -RT"
    cursor = Cursor(api.search, q=query, lang="en").items(5000)
    while True:
        try:
            tweet = cursor.next()
            print(tweet._json)
            database.tweets.insert(tweet._json)
        except TweepError:
            time.sleep(60 * 15)
            continue
        except StopIteration:
            break
Exemplo n.º 9
0
def rate_limit_handler(cursor: tweepy.Cursor):
    """
    Handler for tweepy Cursors and automatically stops
    execution when rate limit is reached

    params:
        cursor(tweepy.Cursor) - cursor to handle
    """
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            print("Oh no!! We hit the rate limit. Resuming in 15 mins.")
            time.sleep(15 * 60)
Exemplo n.º 10
0
 def handle_rate_limit(cursor: tweepy.Cursor):
     """
     If tweepy hits Twitter' API limit (180 calls in 15 minutes), wait for 15 minutes before continuing search.
     http://docs.tweepy.org/en/latest/code_snippet.html#handling-the-rate-limit-using-cursors
     :param cursor: Tweepy cursor iterator
     :return: Next iteration of cursor
     """
     while True:
         try:
             yield cursor.next()
         except tweepy.RateLimitError:
             # sleep for 15 minutes
             logging.warning(
                 "Hit Twitter APIs rate limit, sleeping for 15 minutes")
             time.sleep(15 * 60)
 def get_tweets_between_date(self, start, end):
     tweets_during_florence = []
     c = Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items()
     while True:
         try:
             tweet = c.next()
             if tweet.created_at < end_date and tweet.created_at > start:
                 tweets_during_florence.append(tweet)
             elif tweet.created_at < start:
                 return tweets_during_florence
         except TweepError:
             time.sleep(60*15)
             continue
         except StopIteration:
             break
Exemplo n.º 12
0
def get_tweets_for_feature_extraction(query, count):
    api = API(auth)
    query = query + " -RT"
    cursor = Cursor(api.search, q=query, lang="en").items(count)
    tweets = []
    while True:
        try:
            tweet = cursor.next()
            tweets.append(tweet._json)
        except TweepError as e:
            print(e)
            time.sleep(60 * 5)
            continue
        except StopIteration:
            break
    return tweets
    def handle_rate_limit(cursor: tweepy.Cursor):
        """
        If Twitter API rate limit is exceeded (180 calls in 15 minutes), wait for 15 minutes before continuing
        :param cursor: Tweepy cursor
        :return:
        """

        while True:
            try:
                yield cursor.next()

            except tweepy.RateLimitError:
                # Pause for 15 minutes
                logging.warning(
                    "Twitter API rate limit exceeded, waiting for 15 minutes before continuing."
                )
                time.sleep(15 * 60)
Exemplo n.º 14
0
    def fetch_tweets(self):
        cursor = Cursor(SentimentAnalysis.tweepy_api.search, q=f'#{self.search_key} -filter:retweets',
                    count=100, tweet_mode='extended', lang='en').items(self.tweet_count)

        df = pd.DataFrame()

        i = 1
        while True:
            print(f'Running... {i}\r', end='')
            try:
                tweet = cursor.next()
                row = {
                    'id': i,
                    'tweet_id': tweet.id,
                    'screen_name': tweet.user.screen_name,
                    'name': tweet.user.name,
                    'tweet_date': str(self.datetime_from_utc_to_local(tweet.created_at)),
                    'location': tweet.user.location,
                    'retweet_count': tweet.retweet_count,
                    'like_count': tweet.favorite_count,
                    'followers_count': tweet.user.followers_count,
                    'following_count': tweet.user.friends_count,
                    'text': tweet.full_text or tweet.text,
                    'embed_url': f'https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}'
                }
                polarity, polarity_score = self.calc_polarity(row)
                row['polarity'], row['polarity_score'] = polarity, polarity_score
                new_rows = pd.DataFrame([row], index=[i])
                df = pd.concat([df, new_rows])
                self.send_response(row)
            except TweepError:
                break
            except RateLimitError:
                break
            except StopIteration:
                break
            i = i + 1
        
        print('\nCompleted')
        self.save_files(df)
def fetch_tweets(search_key):
    os.mkdir(f'./{search_key}')
    cursor = Cursor(api.search, q=f'#{search_key} -filter:retweets',
                    count=100, tweet_mode='extended').items(15000)

    df = pd.DataFrame()

    i = 1
    while True:
        print(f'Running... {i}\r', end='')
        try:
            tweet = cursor.next()
            row = {
                'tweet_id': tweet.id,
                'screen_name': tweet.user.screen_name,
                'name': tweet.user.name,
                'tweet_date': datetime_from_utc_to_local(tweet.created_at),
                'location': tweet.user.location,
                'retweet_count': tweet.retweet_count,
                'like_count': tweet.favorite_count,
                'followers_count': tweet.user.followers_count,
                'following_count': tweet.user.friends_count,
                'text': tweet.full_text or tweet.text,
                'embed_url': f'https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}?s=20'
            }
            df = pd.concat([df, pd.DataFrame([row], index=[i])])
        except TweepError:
            break
        except RateLimitError:
            break
        except StopIteration:
            break
        i = i + 1

    df = df.sort_values(by=['like_count', 'retweet_count', 'followers_count'], ascending=False)
    df.to_csv(path_or_buf=f'./{search_key}/{search_key}.csv')
    df['screen_name'].value_counts().to_csv(path_or_buf=f'./{search_key}/screen_name_freq.csv')
    print('\nCompleted.')
Exemplo n.º 16
0
def fetch_tweets(kwd, since_id, channel, redis_conf):
    """

    :param kwd:
    :param since_id:
    :param channel:
    :param redis_conf:
    :return:
    """
    r = redis_conf['cursor']
    key = redis_conf['key']

    api, credential_id = get_twitter_client(r, key)
    if not api:
        logger.info(f"{credential_id} failed ...using another one ...")
        api, credential_id = get_twitter_client(r, key)

    keyword = kwd['kwd']
    keyword = f'"{keyword} "' + config.get('FETCHER', 'FILTER')

    page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT'))
    tweets_cursor = Cursor(api.search,
                           q=keyword,
                           count=100,
                           since_id=since_id,
                           tweet_mode='extended').pages(page_remaining)
    page_index = 0
    retry = 0
    t_id = 0
    _sleep = 0
    sleep_delay = int(config.get('FETCHER', 'SLEEP'))
    retry_limit = int(config.get('FETCHER', 'RETRY_LIMIT'))

    while True:
        try:
            print(kwd, page_index)
            tweets, t_id = process_page(tweets_cursor.next(), kwd, page_index)
            feed_saver_new_keyword_tweets(channel, tweets)
            page_index += 1
            page_remaining = int(config.get('FETCHER',
                                            'PAGE_LIMIT')) - page_index
            # sleep(1)

        except StopIteration:
            if page_index == 0:
                # No Tweets Found
                data = {'status': 404, 'k_id': kwd['k_id']}
                feed_saver_new_keyword_tweets(channel, data)
            else:
                # last packet for this kwd so that saver can update scheduled_on
                data = {'status': 202, 'k_id': kwd['k_id']}
                feed_saver_new_keyword_tweets(channel, data)

            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            return True

        except TweepError as error:
            logger.error(
                f"Tweepy Exception occurred for credential id {credential_id} : {error}"
            )
            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            retry += 1
            if retry <= retry_limit:
                logger.info(f"Retrying for keyword {kwd['kwd']}")
                _sleep += sleep_delay
                sleep(_sleep)
                api, credential_id = get_twitter_client(r, key)
                tweets_cursor = Cursor(
                    api.search,
                    q=keyword,
                    count=100,
                    since_id=since_id,
                    max_id=t_id,
                    tweet_mode='extended').pages(page_remaining)
                continue
            # finally after retries
            data = {'status': 500, 'k_id': kwd['k_id']}
            feed_saver_new_keyword_tweets(channel, data)
            return False

        except Exception as e:
            # push keyword in queue & maintain log
            logger.error(
                f"Exception occurred for keyword {kwd['kwd']}. Exception : {e}"
            )
            retry += 1
            # Change credential & lpush current credential id
            r.lpush(key, credential_id)
            if retry <= retry_limit:
                _sleep += sleep_delay
                logger.info(f"Retrying for keyword {kwd['kwd']}")
                api, credential_id = get_twitter_client(r, key)
                tweets_cursor = Cursor(
                    api.search,
                    q=keyword,
                    count=100,
                    since_id=since_id,
                    max_id=t_id,
                    tweet_mode='extended').pages(page_remaining)
                continue

            data = {'status': 500, 'k_id': kwd['k_id']}
            feed_saver_new_keyword_tweets(channel, data)
            return False
Exemplo n.º 17
0
today = datetime.today().date()
week_ago = today - timedelta(7)

start_date = week_ago

while start_date < today:
    end_date = start_date + timedelta(1)
    c = Cursor(api.search,
                           q=query,
                           since=start_date.strftime('%Y-%m-%d'),
                           until=end_date.strftime('%Y-%m-%d'),
                           lang="en").items(400)
    while True:
        try:
            data = c.next()
            tweet = data._json
            print(tweet["created_at"], tweet["source"])
            csvWriter.writerow(process_tweet(tweet))
        except tweepy.TweepError:
            print("-------------------- GOT ERROR --------------------")
            time.sleep(60)
            continue
        except StopIteration:
            break
    start_date += timedelta(1)


# write to csv
# csvFile = open('data/' + since_date + ':' + until_date + '.csv', 'a')
# csvWriter = csv.writer(csvFile)
Exemplo n.º 18
0
        account_created_date = item.created_at
        delta = datetime.utcnow() - account_created_date
        account_age_days = delta.days
        print("Account age (in days): " + str(account_age_days))

        if account_age_days > 0:
            print("Average tweets per day: " + "%.2f" %
                  (float(tweets) / float(account_age_days)))

        end_date = datetime.utcnow() - timedelta(days=30)
        cur = Cursor(auth_api.user_timeline, id=target).items()

        while True:
            try:

                status = cur.next()
                #print(status)
                tweet_count += 1
                if hasattr(status, "text"):
                    print(status.text)
                    sentiment = get_tweet_sentiment(status.text)

                if hasattr(status, "id"):
                    print(status.id)
                    retweets_list = auth_api.retweets(status.id)

                    # {id : vector<string>}

                    for x in retweets_list:
                        retweet.append(x.user.screen_name)
                        # print(retweet.user.screen_name)