Exemplo n.º 1
0
def user_tweets(api, user_id=None, screen_name=None, limit=None, **kwargs):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects.

    Also takes variable collection of keyword argument to pass on to
    Tweepy/APIPool query methods, to support full API call parameterization.
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline,
                        user_id=user_id,
                        count=200,
                        **kwargs)
    elif screen_name:
        cursor = Cursor(api.user_timeline,
                        screen_name=screen_name,
                        count=200,
                        **kwargs)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
Exemplo n.º 2
0
def ensure_users_edges_in_db(user, edges_collection, twitter_api):
    "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db."

    logging.info(".. Fetching followers_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} followers.".format(
        user['followers_count']))
    cursor = Cursor(twitter_api.followers_ids, id=user['id'])
    edges = [{
        'from': follower_id,
        'to': user['id']
    } for follower_id in cursor.items()]
    store_edges(edges_collection, edges)
    followers_ids = [edge['from'] for edge in edges]

    logging.info(".. Fetching friends_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} friends.".format(user['friends_count']))
    cursor = Cursor(twitter_api.friends_ids, id=user['id'])
    edges = [{
        'to': friend_id,
        'from': user['id']
    } for friend_id in cursor.items()]
    store_edges(edges_collection, edges)
    friends_ids = [edge['to'] for edge in edges]

    return friends_ids, followers_ids
Exemplo n.º 3
0
def query_tweets(api, query, limit=None, languages=None):
    """
    Queries twitter REST API for tweets matching given twitter search 'query'.
    Takes an authenticated api object (API or APIPool), a query string, an optional
    limit for number of tweets returned, and an optional list of languages to
    further filter results.
    Returns a cursor (iterator) over Tweepy status objects (not native JSON docs)
    """
    cursor = Cursor(api.search, q=query, include_entities=True, lang=languages)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
Exemplo n.º 4
0
def query_tweets(api, query, limit=None, languages=None):
    """
    Queries twitter REST API for tweets matching given twitter search 'query'.
    Takes an authenticated api object (API or APIPool), a query string, an optional
    limit for number of tweets returned, and an optional list of languages to
    further filter results.
    Returns a cursor (iterator) over Tweepy status objects (not native JSON docs)
    """
    cursor = Cursor(api.search, q=query, include_entities=True, lang=languages)
    if limit:
        _check_limit(limit)
        return cursor.items(limit)
    return cursor.items()
Exemplo n.º 5
0
    def scraper(self, api, queries, fetch_num):
        """--------------------------------------------------------------------------
            Collects tweets for analysis using a Tweepy cursor object and user-defined search terms.

        :param api: initialized Twitter API object
        :param queries: list of string searches to be run on Twitter
        :param fetch_num: number of tweets to return per search term
        :return: set of tweet tuples, each containing tuple metadata specified in get_data
        --------------------------------------------------------------------------"""
        # my_queries: List of search terms to look for. Follows standard Twitter syntax
        # -filter:retweets excludes all RTs - recommended for sentiment analysis

        all_tweets = set()

        for search in queries:
            search += " -filter:retweets"
            query = Cursor(api.search,
                           lang='en',
                           rpp=100,
                           tweet_mode='extended',
                           q=search)
            print(f"Gathering tweets for '{search}'...")
            results = query.items(fetch_num)
            temp_list = []
            try:  # Using specifications in get_data(), saves desired metadata fields of an individual tweet.
                temp_list = list(
                    map(self.get_data, [status._json for status in results]))
                time.sleep(5)
            except TweepError:
                print(f"Error detected on {search}")
                pass
            for t in temp_list:  # Converts tweet metadata into tuples, then adds to set of all downloaded tweets
                all_tweets.add(tuple(t))

        return self.filter(all_tweets)
Exemplo n.º 6
0
def run(args):
    logging_config = dict(
        level=INFO,
        format=
        '[%(asctime)s - %(filename)s:%(lineno)d - %(funcName)s - %(levelname)s] %(message)s'
    )
    basicConfig(**logging_config)

    logger.debug("Reading config file, %s", args.settings)

    config = ConfigParser()
    config.read(args.settings)

    logger.debug("Read config file")

    query = " OR ".join(config.get("app", "hashtags").split(", "))
    max_results = config.getint("app", "max_results")
    rt_msg = config.get("app", "rt_msg")
    session_file = config.get("app", "session_file")
    session_file = session_file.format(cwd=getcwd())

    logger.debug("query=%s", query)
    logger.debug("max_results=%d", max_results)
    logger.debug("rt_msg=%s", rt_msg)
    logger.debug("session_file=%s", session_file)

    persist = PersistentDict(session_file)
    since_id = persist.get(since_id_name, None)
    logger.debug("Retrieved since_id %s", since_id)

    twitter_api = create_api(config.get("twitter", "consumer_key"),
                             config.get("twitter", "consumer_secret"),
                             config.get("twitter", "access_key"),
                             config.get("twitter", "access_secret"))

    search = Cursor(twitter_api.search, q=query, since_id=since_id)

    format = "{msg} https://twitter.com/{screen_name}/status/{status_id}"

    results = search.items(max_results)

    for tweet in results:
        msg = format.format(msg=rt_msg,
                            screen_name=tweet.author.screen_name,
                            status_id=tweet.id)

        logger.info("tweeting: %s", msg)

        try:
            twitter_api.update_status(msg)
        except:
            logger.exception("Error posting tweet!")

    if len(results.page_iterator.results) > 1:

        logger.info("Saving last id %s",
                    results.page_iterator.results[0].since_id)
        persist[since_id_name] = results.page_iterator.results[0].since_id

    persist.sync()
    def search_tweets(self, query, lang, depth=1000):
        """ Generator that returns the 'depth' most recent user tweets
		Arguments:
		----------
			query:
				type: string
				info: string with logic operations (AND, OR...)
			lang:
				type: string
				info: language abbreviation to filter the tweets
			depth:
				type: int (optional)
				info: number of tweets to retrieve
		Yield:
		----------
			tweet_text:
				type: string
				info: cleaned tweet text
		"""

        try:
            cursor = Cursor(method=self.API.search,
                            q=query,
                            lang=lang,
                            count=100,
                            tweet_mode='extended')

            for tweet in cursor.items(depth):
                tweet_text = get_tweet_text(tweet)
                tweet_text = clean_text(tweet_text)

                yield tweet_text

        except TweepError:
            exit('Unable to find ' + str(depth) + ' tweets')
    def process(self, cursor: tweepy.Cursor):
        """
        Stream a single filtered tweet to AWS Kinesis stream
        :param cursor: Tweepy cursor
        """

        # Start logging
        logging.info("Processing tweets")

        for tweet in self.handle_rate_limit(cursor.items(self.limit)):
            # Filter tweet
            filtered_tweet = self.filter(tweet)

            # Stream filtered tweet to AWS Kinesis
            try:
                logging.debug("Streaming tweet data to Kinesis")
                response = self.firehose_client.put_record(
                    DeliveryStreamName=self.delivery_stream,
                    Record={"Data": json.dumps(filtered_tweet)})

                logging.debug(response)

            except ClientError as ex:
                # In case of client error log the error
                logging.exception(
                    f"Failed to stream tweet data to AWS Kinesis: {ex}.")

            finally:
                self.counter += 1

                if self.counter % self.logging_interval == 0:
                    logging.info(f"Processed {self.counter} tweets.")
Exemplo n.º 9
0
    def get_friends(self, screen_name=None, user_id=None, max_friends=2000):
        """
        Params:
            screen_name like "barackobama" or "s2t2" or
            max_friends for now, for performacne, because we can always go back later and re-scrape those who hit this max

        Returns a list of the user's friend_ids (or empty list if the account was private)

        See: http://docs.tweepy.org/en/v3.8.0/api.html#API.friends_ids
            https://github.com/tweepy/tweepy/blob/3733fd673b04b9aa193886d6b8eb9fdaf1718341/tweepy/api.py#L542-L551
            http://docs.tweepy.org/en/v3.8.0/cursor_tutorial.html
            https://developer.twitter.com/en/docs/accounts-and-users/follow-search-get-users/api-reference/get-friends-ids
            https://developer.twitter.com/en/docs/basics/cursoring
        """
        if screen_name is not None:
            cursor = Cursor(self.api.friends_ids, screen_name=screen_name, cursor=-1)
        elif user_id is not None:
            cursor = Cursor(self.api.friends_ids, user_id=user_id, cursor=-1)
        else:
            print("OOPS PLEASE PASS SCREEN NAME OR USER ID")
            return None
        #print(cursor)

        friend_ids = []
        try:
            for friend_id in cursor.items(max_friends):
                friend_ids.append(friend_id)
        except TweepError as err:
            print("OOPS", err) #> "Not authorized." if user is private / protected (e.g. 1003322728890462209)
        return friend_ids
Exemplo n.º 10
0
    def fetch_user_timeline(self, request_params={}, limit=2_000):
        """
            See:
                https://docs.tweepy.org/en/latest/api.html#timeline-methods
                https://docs.tweepy.org/en/v3.10.0/cursor_tutorial.html

            Params:
                request_params (dict) needs either "user_id" or "screen_name" attr

                limit (int) the number of total tweets to fetch per user

                ... or overwrite any of the default params

            Example: get_user_timeline({"user_id": 10101, "count": 100}, limit=300)
        """
        default_params = {
            "exclude_replies": False,
            "include_rts": True,
            "tweet_mode": "extended", # access the full text
            "count": 200 # number of tweets per request
        }
        request_params = {**default_params, **request_params} # use the defaults, and override with user-specified params (including the required user_id or screen_name)
        request_params["cursor"] = -1 # use a cursor approach!

        cursor = Cursor(self.api.user_timeline, **request_params)
        return cursor.items(limit)
Exemplo n.º 11
0
def getFollowersLv2(num):
    auth = tweetielytics.twitterAuth()

    api = tweepy.API(auth)

    f = open('followers.txt', 'r')
    g = open('followers3.txt', 'a')

    lines = f.readlines()

    counter = num  # 30

    while counter:
        # try:
        cursor = Cursor(api.followers, id=lines[counter])
        print str(counter) + ': ' + str(lines[counter])
        for c in cursor.items():  # iterate through followers
            g.write(c.id_str + '|' + str(c.created_at) + '|' +
                    str(c.favourites_count) + '|' + str(c.followers_count) +
                    '|' + str(c.friends_count) + '|' + str(c.screen_name) +
                    '\n')
        counter += 1
    # except BaseException:
    #     print "Stopped at " + str(counter)

    g.close()
    f.close()
Exemplo n.º 12
0
 def search_tweet(self, query_string):
     cursor = Cursor(self.api.search,
                     q=query_string,
                     lang='en',
                     count=1000,
                     exclude='retweets')
     return cursor.items(100)
    def get_user_tweets(self, user, word, depth=1000):
        """ Generator that returns the 'depth' most recent user tweets
		Arguments:
		----------
			user:
				type: string
				info: Twitter user account without the '@'
			word:
				type: string (lowercase)
				info: word used to filter the tweets
			depth:
				type: int (optional)
				info: number of tweets to retrieve
		Yield:
		----------
			tweet_text:
				type: string
				info: cleaned tweet text
		"""

        try:
            cursor = Cursor(method=self.API.user_timeline,
                            user_id=user,
                            count=200,
                            tweet_mode='extended')

            for tweet in cursor.items(depth):
                tweet_text = get_tweet_text(tweet)
                tweet_text = clean_text(tweet_text)

                if word in tweet_text: yield tweet_text

        except TweepError:
            exit('Unable to retrieve tweets from ' + user)
Exemplo n.º 14
0
    def process(self, cursor: tweepy.Cursor):
        """
        Process single tweet and push it to Kinesis stream.
        :param cursor: Tweepy Cursor
        """
        logging.info("Processing tweets")

        for tweet in self.handle_rate_limit(cursor.items(self.limit)):
            filtered_tweet = self.filter(tweet)

            try:
                logging.debug("Pushing tweet data to Kinesis")
                response = self.firehose_client.put_record(
                    DeliveryStreamName=self.delivery_stream,
                    Record={
                        # kinesis only accepts byte-like data
                        "Data": json.dumps(filtered_tweet)
                    })
                logging.debug(response)

            except ClientError as ex:
                logging.exception(
                    f"Could not push tweet data to Kinesis: {ex}")

            finally:
                self.counter += 1
                if self.counter % self.logging_interval == 0:
                    logging.info(f"Processed {self.counter} tweets")
Exemplo n.º 15
0
    def get_mentions(self, recent_id):
        """gets all tweets that @mention the bot.
        @:param recent_id: ID string of latest mention that we have pulled
        @:return tweet_list returns dictionary of mentioned tweets with username, text, and tweet ID"""
        mentions = Cursor(
            self.api.search,
            q=self.handle + " -filter:retweets",
            tweet_mode='extended',
            since_id=recent_id,
        )
        tweet_list = []
        for tweet in mentions.items(500):
            # Ensures that the bot is not responding to a previous response
            if not ((tweet.user.screen_name == "markoving_bot") and
                    (tweet.in_reply_to_status_id is not None)):
                tweet_text = re.sub("https:.*$", "", tweet.full_text)
                tweet_text = re.sub("&amp", "&", tweet_text)

                tweet_list.append({
                    "username": tweet.user.screen_name,
                    "text": tweet_text,
                    "tweet_id": tweet.id_str,
                    "reply_id": tweet.in_reply_to_status_id
                })

        return tweet_list
Exemplo n.º 16
0
 def execute(self):
     """Executes the query with any applied argument."""
     if not self.query_args:
         raise Exception('Cannot execute query with no arguments.')
     query = ' '.join(self.query_args)
     print(query)
     results = Cursor(self.twitter.api.search, q=query, rpp=100)
     return self._limit_handled(results.items())
Exemplo n.º 17
0
def user_tweets(api, user_id=None, screen_name=None, limit=None):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline, user_id=user_id)
    elif screen_name:
        cursor = Cursor(api.user_timeline, screen_name=screen_name)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
Exemplo n.º 18
0
	def search_tweets(self, query, lang, filter_prob = 95, depth = 1000):

		""" Generator that returns the 'depth' most recent user tweets

		Arguments:
		----------
			query:
				type: string
				info: string with logic operations (AND, OR...)

			lang:
				type: string
				info: language abbreviation to filter the tweets

			filter_prob:
				type: int (optional)
				info: probability in which the query words are removed

			depth:
				type: int (optional)
				info: number of tweets to retrieve

		Yield:
		----------
			tweet_text:
				type: string
				info: cleaned tweet text
		"""

		try:
			cursor = Cursor(
				method = self.API.search,
				q = query,
				lang = lang,
				count = 100,
				tweet_mode = 'extended'
			)

			# Obtaining the search query words in order to build a filter
			query_words = query.split(' ')
			query_words = filter(
				lambda w: not any(op in w for op in search_ops),
				query_words
			)

			# Build a probabilistic filter in order to avoid overfitting
			search_filters = build_filters(query_words, filter_prob)


			for tweet in cursor.items(depth):
				tweet_text = self.get_text(tweet)
				tweet_text = clean_text(tweet_text, search_filters)
				tweet_text = clean_text(tweet_text)

				yield tweet_text

		except TweepError:
			exit('Unable to find ' + str(depth) + ' tweets')
Exemplo n.º 19
0
def user_tweets(api, user_id=None, screen_name=None, limit=None):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline, user_id=user_id)
    elif screen_name:
        cursor = Cursor(api.user_timeline, screen_name=screen_name)
    if limit:
        _check_limit(limit)
        return cursor.items(limit)
    return cursor.items()
def main(users_list_path: str):
    screen_name = "nijisanji_app"
    list_name = "list1"
    cursor = Cursor(api.list_members,
                    slug=list_name,
                    owner_screen_name=screen_name)

    with jsonstreams.Stream(jsonstreams.Type.array, users_list_path) as s:
        for user in cursor.items():
            s.write(user._json)
def query_user_tweets(output, id_list, auth_file, max_id=-1, since_id=-1):
    '''
    queries twitter for users from id_list and authentication from auth_file.
    '''
    num_inputs_queried = 0
    api_pool = TweepyPool(auth_file)
    write_fd = open(output, 'a+')
    for userid in id_list:
        num_inputs_queried = num_inputs_queried + 1
        # even though the count is 200 we can cycle through 3200 items.
        # if you put a count variable in this cursor it will iterate up
        # to about 3200
        if not userid == '':
            try:
                count = 0
                if max_id and since_id:
                    cursor = Cursor(api_pool.user_timeline,
                                    user_id=userid,
                                    count=200,
                                    max_id=max_id,
                                    since_id=since_id,
                                    tweet_mode='extended')
                elif max_id:
                    cursor = Cursor(api_pool.user_timeline,
                                    user_id=userid,
                                    count=200,
                                    max_id=max_id,
                                    tweet_mode='extended')
                elif since_id:
                    cursor = Cursor(api_pool.user_timeline,
                                    user_id=userid,
                                    count=200,
                                    since_id=since_id,
                                    tweet_mode='extended')
                else:
                    cursor = Cursor(api_pool.user_timeline,
                                    user_id=userid,
                                    count=200,
                                    tweet_mode='extended')

                for item in cursor.items():
                    count = count + 1
                    tweet_item = json.loads(json.dumps(item._json))
                    tweet_item['smapp_timestamp'] = (datetime.datetime.utcnow(
                    ).strftime('%Y-%m-%d %H:%M:%S +0000'))
                    write_fd.write(json.dumps(tweet_item))
                    write_fd.write('\n')
            except TweepError as e:
                log('tweepy error: {}'.format(e))
            log('counted {} objects for input {}'.format(count, userid))
        log('number of inputs queried so far: {}'.format(num_inputs_queried))
        s3.disk_2_s3(context['log'], context['s3_log'])
    write_fd.close()
Exemplo n.º 22
0
def ensure_users_edges_in_db(user, edges_collection, twitter_api):
    "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db."

    logging.info(".. Fetching followers_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} followers.".format(user['followers_count']))
    cursor = Cursor(twitter_api.followers_ids, id=user['id'])
    edges = [{ 'from' : follower_id,
               'to'   : user['id']}
            for follower_id in cursor.items()]
    store_edges(edges_collection, edges)
    followers_ids = [edge['from'] for edge in edges]

    logging.info(".. Fetching friends_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} friends.".format(user['friends_count']))
    cursor = Cursor(twitter_api.friends_ids, id=user['id'])
    edges = [{ 'to'   : friend_id,
               'from' : user['id']}
            for friend_id in cursor.items()]
    store_edges(edges_collection, edges)
    friends_ids = [edge['to'] for edge in edges]

    return friends_ids, followers_ids
Exemplo n.º 23
0
def user_tweets(api, user_id=None, screen_name=None, limit=None, **kwargs):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects.

    Also takes variable collection of keyword argument to pass on to
    Tweepy/APIPool query methods, to support full API call parameterization.
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline, user_id=user_id, count=200,
            **kwargs)
    elif screen_name:
        cursor = Cursor(api.user_timeline, screen_name=screen_name,
            count=200, **kwargs)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
Exemplo n.º 24
0
    def get_tweets(self, user_id):
        logger.log(LOG_LEVEL, 'Getting tweets for {}'.format(user_id))
        statuses = []

        cursor = Cursor(
            self.api.user_timeline,
            user_id=user_id,
            count=PAGE_COUNT
        )

        for status in cursor.items(TWEET_COUNT):
            statuses.append(self.parse_api_tweet(status))

        return statuses
Exemplo n.º 25
0
    def get_friends(self, user_id):
        logger.log(LOG_LEVEL, 'Getting friends for {}'.format(user_id))
        friends = []

        cursor = Cursor(
            self.api.friends,
            user_id=user_id
        )

        for user in cursor.items():
            if self.is_potential_target(user):
                friends.append(str(user.id))

        return friends
Exemplo n.º 26
0
def getFollowers():

    auth = tweetielytics.twitterAuth()

    api = tweepy.API(auth)

    # user = api.get_user('dep4b')

    cursor = Cursor(api.followers_ids, id='dep4b')

    f = open('followers.txt', 'a')

    for follower in cursor.items():
        f.write(str(follower) + '\n')

    f.close()
Exemplo n.º 27
0
    def process_batch(self, cursor: tweepy.Cursor):
        """
        Process batch of tweets and push it to Kinesis stream.
        :param cursor: Tweepy Cursor iterator
        """
        logging.info("Processing tweets")

        for tweet in self.handle_rate_limit(cursor.items(self.limit)):
            if len(self._batch) < self.batch_size:
                self._batch.append(self.filter(tweet))
            else:
                self.submit_batch(self._batch)

        # make sure remaining tweets are submitted
        if self._batch:
            self.submit_batch(self._batch)
def main(users_list_path: str, out_dir: str):
    with open(users_list_path, 'r') as f:
        users = json.load(f)
        screen_names = [user['screen_name'] for user in users]

    Path(out_dir).mkdir(exist_ok=True)

    for screen_name in screen_names:
        print('fetching followers of ' + screen_name)

        cursor = Cursor(api.followers_ids, screen_name=screen_name, count=2048)
        out_path = Path(out_dir) / screen_name

        with open(out_path, mode="w", encoding="utf-8") as f:
            for user_id in cursor.items():
                f.write(f'{user_id}\n')
                f.flush()
Exemplo n.º 29
0
def get_followers_ids(api, user_id):
    """
    Given a Tweepy/smappPy TweepyPool api, query twitter's rest API for followers of
    given user_id. Returns IDs only (much faster / more per request).
    Parameters:
        api     - fully authenticated Tweepy api or smappPy TweepyPool api
        user_id - twitter user id
    Returns tuple: return code, list of IDs or None (if API call fails)
    """
    cursor = Cursor(api.followers_ids, user_id=user_id)
    user_list, ret_code = call_with_error_handling(list, cursor.items())

    if ret_code != 0:
        logger.warning("User {0}: Followers request failed".format(user_id))

    # Return user list from API or None (call_with_error_handling returns None if
    # call fail)
    return ret_code, user_list
Exemplo n.º 30
0
def get_followers_ids(api, user_id):
    """
    Given a Tweepy/smappPy TweepyPool api, query twitter's rest API for followers of
    given user_id. Returns IDs only (much faster / more per request).
    Parameters:
        api     - fully authenticated Tweepy api or smappPy TweepyPool api
        user_id - twitter user id
    Returns tuple: return code, list of IDs or None (if API call fails)
    """
    cursor = Cursor(api.followers_ids, user_id=user_id)
    user_list, ret_code = call_with_error_handling(list, cursor.items())

    if ret_code != 0:
        logger.warning("User {0}: Followers request failed".format(user_id))

    # Return user list from API or None (call_with_error_handling returns None if
    # call fail)
    return ret_code, user_list
    def process_batch(self, cursor: tweepy.Cursor):
        """
        Stream a batach of filtered tweet to AWS Kinesis stream
        :param cursor: Tweepy Cursor
        """

        # Start logging
        logging.info("Processing tweets.")

        for tweet in self.handle_rate_limit(cursor.items(self.limit)):
            # If number of tweets less than batch size, append it. Else stream it.
            if len(self._batch) < self.batch_size:
                self._batch.append(self.filter(tweet))
            else:
                self.submit_batch(self._batch)

        # If rate limit exceeded, stream remaining tweets if any
        if self._batch:
            self.submit_batch(self._batch)
Exemplo n.º 32
0
    async def check_twitter(self):

        await self.bot.wait_until_ready()

        channel = discord.Object(id=CONF.ANNONCE_CHANNEL_ID)

        for target in self.mostRecents:
            LOG.debug("checking account " + target)
            tweets = Cursor(self.auth_api.user_timeline,
                            id=target,
                            since_id=self.mostRecents[target],
                            tweet_mode="extended")

            for status in tweets.items():
                if status.in_reply_to_status_id == None and hasattr(
                        status, "retweeted_status") == False:
                    link = "https://twitter.com/{}/status/{}".format(
                        target, status.id_str)
                    embed = discord.Embed(title=target,
                                          description=status.full_text,
                                          url=link,
                                          color=0x1DA1F2)
                    if "media" in status.entities:
                        for media in status.entities["media"]:
                            embed.set_image(url=media["media_url"])
                            break

                    embed.set_thumbnail(url=status.user.profile_image_url)
                    embed.set_footer(text=status.created_at)
                    await self.bot.send_message(channel, embed=embed)

                if status.id > self.mostRecents[target]:
                    self.mostRecents[target] = status.id
                    conn = sqlite3.connect('keys.db')
                    c = conn.cursor()
                    args = (status.id, target)
                    c.execute(
                        "UPDATE twitter SET lastTweet = ? WHERE account = ?",
                        args)
                    conn.commit()
                    conn.close()

        await asyncio.sleep(5 * 60)
    def get_followers(self, request_params={}, limit=2_000):
        """See:
            https://docs.tweepy.org/en/latest/api.html#API.followers
            https://docs.tweepy.org/en/v3.10.0/cursor_tutorial.html

        Params:
            request_params (dict) needs either "user_id" or "screen_name" attr

            limit (int) the number of followers to fetch per user

        Example: get_followers({"user_id": 10101, "count": 100}, limit=300)
        """
        default_params = {
            "count": 200 # number of followers per request
        }
        request_params = {**default_params, **request_params} # use the defaults, and override with user-specified params (including the required user_id or screen_name)
        request_params["cursor"] = -1 # use a cursor approach!

        cursor = Cursor(self.api.followers, **request_params)
        #return cursor.pages()
        return cursor.items(limit)
Exemplo n.º 34
0
def harvest_tweet(db, city, tweet_rate, max_id=None, since_id=None):
    time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    file_name = f"log/twitter-current-all.log"
    with open(file_name, "a") as file:
        file.write(f"Twitter harvest for {city} at begins at: {time}\n")
        tweets = Cursor(api.search,
                        q="place:%s" % coords[city],
                        max_id=max_id,
                        since_id=since_id,
                        tweet_mode="extended")
        count = 1
        for item in tweets.items(tweet_rate):
            out = dict()
            out["_id"] = item.id_str
            add_fields(item._json, out, afinn, keywords_list, polygons,
                       area_list, city)
            db.save(out)
            count += 1
        file.write(f"Number of tweets saved: {count-1}\n")
        time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        file.write(f"Twitter harvest for {city} ends at: {time}\n")
        file.write("-------------------------------------------\n")
Exemplo n.º 35
0
def scraper(queries):
    """--------------------------------------------------------------------------
		Collects tweets for analysis using a Tweepy cursor object and user-defined search terms.

	:param queries: list of searches to be run on Twitter
	:return: set of tweet tuples, each containing tuple metadata specified in get_data
	--------------------------------------------------------------------------"""
    # my_queries: List of search terms to look for. Follows standard Twitter syntax
    # -filter:retweets excludes all RTs - recommended for sentiment analysis
    api = start_api()  # Initializes API from class

    all_tweets = set()

    for search in queries:
        query = Cursor(api.search, rpp=100, tweet_mode='extended', q=search)
        results = query.items(
            parser.n
        )  # Gathers user-defined number of items. Defaults to 1000.
        # Using specifications in get_data(), saves desired metadata fields of an individual tweet.
        temp_list = list(map(get_data, [status._json for status in results]))
        for t in temp_list:  # Converts tweet metadata into tuples, then adds to set of all downloaded tweets
            all_tweets.add(tuple(t))

    return all_tweets
Exemplo n.º 36
0
    def job_fetch(self, job):
        self.logger.debug("start job: %s",
                          inspect.currentframe().f_code.co_name)

        self.logger.debug("latest id: %s", self.latest_id)

        tweets = Cursor(self.twitter.user_timeline,
                        id=self.user_id,
                        since_id=self.latest_id)

        count = 5 if self.latest_id else 1
        latest_id = None
        for tweet in tweets.items(count):
            latest_id = tweet.id if latest_id < tweet.id else latest_id
            self.db.save_tweet(tweet)

        if latest_id:
            self.logger.info("latest id: %s", latest_id)
            self.latest_id = latest_id

        self.logger.info("tweets after fetch: %s", self.db.tweets.count())

        self.logger.debug("finish job: %s",
                          inspect.currentframe().f_code.co_name)
Exemplo n.º 37
0
    """function for twitter authentication"""
    auth = OAuthHandler(API_KEY, API_SECRET)
    return auth


if __name__ == '__main__':
    auth = authenticate()
    api = API(auth)

    while True:

        cursor = Cursor(api.user_timeline,
                        id='legaltech_news',
                        tweet_mode='extended')

        for status in cursor.items(100):
            time.sleep(1)
            text = status.full_text

            # take extended tweets into account
            if 'extended_tweet' in dir(status):
                text = status.extended_tweet.full_text
            if 'retweeted_status' in dir(status):
                r = status.retweeted_status
                if 'extended_tweet' in dir(r):
                    text = r.extended_tweet.full_text

            tweet = {
                'text': text,
                'username': status.user.screen_name,
                'followers_count': status.user.followers_count