Пример #1
0
    def search_tweets(
        self,
        keywords,
        limit=100,
        lang="en",
        max_id=None,
        retries_after_twython_exception=0,
    ):
        """
        Call the REST API ``'search/tweets'`` endpoint with some plausible
        defaults. See `the Twitter search documentation
        <https://dev.twitter.com/rest/public/search>`_ for more information
        about admissible search parameters.

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string
        :param int limit: Number of Tweets to process
        :param str lang: language
        :param int max_id: id of the last tweet fetched
        :param int retries_after_twython_exception: number of retries when\
        searching Tweets before raising an exception
        :rtype: python generator
        """
        if not self.handler:
            # if no handler is provided, `BasicTweetHandler` provides minimum
            # functionality for limiting the number of Tweets retrieved
            self.handler = BasicTweetHandler(limit=limit)

        count_from_query = 0
        if max_id:
            self.handler.max_id = max_id
        else:
            results = self.search(q=keywords,
                                  count=min(100, limit),
                                  lang=lang,
                                  result_type="recent")
            count = len(results["statuses"])
            if count == 0:
                print(
                    "No Tweets available through REST API for those keywords")
                return
            count_from_query = count
            self.handler.max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

        # Pagination loop: keep fetching Tweets until the desired count is
        # reached while dealing with Twitter rate limits.
        retries = 0
        while count_from_query < limit:
            try:
                mcount = min(100, limit - count_from_query)
                results = self.search(
                    q=keywords,
                    count=mcount,
                    lang=lang,
                    max_id=self.handler.max_id,
                    result_type="recent",
                )
            except TwythonRateLimitError as e:
                print("Waiting for 15 minutes -{0}".format(e))
                time.sleep(15 * 60)  # wait 15 minutes
                continue
            except TwythonError as e:
                print("Fatal error in Twython request -{0}".format(e))
                if retries_after_twython_exception == retries:
                    raise e
                retries += 1

            count = len(results["statuses"])
            if count == 0:
                print("No more Tweets available through rest api")
                return
            count_from_query += count
            # the max_id is also present in the Tweet metadata
            # results['search_metadata']['next_results'], but as part of a
            # query and difficult to fetch. This is doing the equivalent
            # (last tweet id minus one)
            self.handler.max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return
Пример #2
0
    def search_tweets(self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0):
        """
        Call the REST API ``'search/tweets'`` endpoint with some plausible
        defaults. See `the Twitter search documentation
        <https://dev.twitter.com/rest/public/search>`_ for more information
        about admissable search parameters.

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string
        :param int limit: Number of Tweets to process
        :param str lang: language
        :param int max_id: id of the last tweet fetched
        :param int retries_after_twython_exception: number of retries when\
        searching Tweets before raising an exception
        :rtype: python generator
        """
        if not self.handler:
            # if no handler is provided, `BasicTweetHandler` provides minimum
            # functionality for limiting the number of Tweets retrieved
            self.handler = BasicTweetHandler(limit=limit)

        count_from_query = 0
        if not max_id:
            results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent")
            count = len(results["statuses"])
            if count == 0:
                print("No Tweets available through REST API for those keywords")
                return
            count_from_query = count
            max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

        # Pagination loop: keep fetching Tweets until the desired count is
        # reached while dealing with Twitter rate limits.
        retries = 0
        while count_from_query < limit:
            try:
                mcount = min(100, limit - count_from_query)
                results = self.search(q=keywords, count=mcount, lang=lang, max_id=max_id, result_type="recent")
            except TwythonRateLimitError as e:
                print("Waiting for 15 minutes -{0}".format(e))
                time.sleep(15 * 60)  # wait 15 minutes
                continue
            except TwythonError as e:
                print("Fatal error in Twython request -{0}".format(e))
                if retries_after_twython_exception == retries:
                    raise e
                retries += 1

            count = len(results["statuses"])
            if count == 0:
                print("No more Tweets available through rest api")
                return
            count_from_query += count
            # the max_id is also present in the Tweet metadata
            # results['search_metadata']['next_results'], but as part of a
            # query and difficult to fetch. This is doing the equivalent
            # (last tweet id minus one)
            max_id = results["statuses"][count - 1]["id"] - 1
            self.handler.max_id = max_id

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return
Пример #3
0
class Query(Twython):
    """
    Retrieve data from the Twitter REST API.
    """
    def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
        self.handler = None
        self.do_continue = True
        Twython.__init__(self, app_key, app_secret, oauth_token,
                         oauth_token_secret)

    def register(self, handler):
        """
        Register a method for handling Tweets.

        :param TweetHandlerI handler: method for viewing or writing Tweets to a file.
        """
        self.handler = handler

    def expand_tweetids(self, ids_f, verbose=True):
        """
        Given a file object containing a list of Tweet IDs, fetch the
        corresponding full Tweets from the Twitter API.

        The API call `statuses/lookup` will fail to retrieve a Tweet if the
        user has deleted it.

        This call to the Twitter API is rate-limited. See
        <https://dev.twitter.com/rest/reference/get/statuses/lookup> for details.

        :param ids_f: input file object consisting of Tweet IDs, one to a line
        :return: iterable of Tweet objects in JSON format
        """
        ids = [line.strip() for line in ids_f if line]

        if verbose:
            print("Counted {0} Tweet IDs in {1}.".format(len(ids), ids_f))

        # The Twitter endpoint takes lists of up to 100 ids, so we chunk the
        # ids.
        id_chunks = [ids[i:i + 100] for i in range(0, len(ids), 100)]

        chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks)

        return itertools.chain.from_iterable(chunked_tweets)

    def _search_tweets(self, keywords, limit=100, lang="en"):
        """
        Assumes that the handler has been informed. Fetches Tweets from
        search_tweets generator output and passses them to handler

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string.
        :param int limit: Number of Tweets to process
        :param str lang: language
        """
        while True:
            tweets = self.search_tweets(keywords=keywords,
                                        limit=limit,
                                        lang=lang,
                                        max_id=self.handler.max_id)
            for tweet in tweets:
                self.handler.handle(tweet)
            if not (self.handler.do_continue() and self.handler.repeat):
                break
        self.handler.on_finish()

    def search_tweets(
        self,
        keywords,
        limit=100,
        lang="en",
        max_id=None,
        retries_after_twython_exception=0,
    ):
        """
        Call the REST API ``'search/tweets'`` endpoint with some plausible
        defaults. See `the Twitter search documentation
        <https://dev.twitter.com/rest/public/search>`_ for more information
        about admissible search parameters.

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string
        :param int limit: Number of Tweets to process
        :param str lang: language
        :param int max_id: id of the last tweet fetched
        :param int retries_after_twython_exception: number of retries when\
        searching Tweets before raising an exception
        :rtype: python generator
        """
        if not self.handler:
            # if no handler is provided, `BasicTweetHandler` provides minimum
            # functionality for limiting the number of Tweets retrieved
            self.handler = BasicTweetHandler(limit=limit)

        count_from_query = 0
        if max_id:
            self.handler.max_id = max_id
        else:
            results = self.search(q=keywords,
                                  count=min(100, limit),
                                  lang=lang,
                                  result_type="recent")
            count = len(results["statuses"])
            if count == 0:
                print(
                    "No Tweets available through REST API for those keywords")
                return
            count_from_query = count
            self.handler.max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

        # Pagination loop: keep fetching Tweets until the desired count is
        # reached while dealing with Twitter rate limits.
        retries = 0
        while count_from_query < limit:
            try:
                mcount = min(100, limit - count_from_query)
                results = self.search(
                    q=keywords,
                    count=mcount,
                    lang=lang,
                    max_id=self.handler.max_id,
                    result_type="recent",
                )
            except TwythonRateLimitError as e:
                print("Waiting for 15 minutes -{0}".format(e))
                time.sleep(15 * 60)  # wait 15 minutes
                continue
            except TwythonError as e:
                print("Fatal error in Twython request -{0}".format(e))
                if retries_after_twython_exception == retries:
                    raise e
                retries += 1

            count = len(results["statuses"])
            if count == 0:
                print("No more Tweets available through rest api")
                return
            count_from_query += count
            # the max_id is also present in the Tweet metadata
            # results['search_metadata']['next_results'], but as part of a
            # query and difficult to fetch. This is doing the equivalent
            # (last tweet id minus one)
            self.handler.max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

    def user_info_from_id(self, userids):
        """
        Convert a list of userIDs into a variety of information about the users.

        See <https://dev.twitter.com/rest/reference/get/users/show>.

        :param list userids: A list of integer strings corresponding to Twitter userIDs
        :rtype: list(json)
        """
        return [self.show_user(user_id=userid) for userid in userids]

    def user_tweets(self, screen_name, limit, include_rts="false"):
        """
        Return a collection of the most recent Tweets posted by the user

        :param str user: The user's screen name; the initial '@' symbol\
        should be omitted
        :param int limit: The number of Tweets to recover; 200 is the maximum allowed
        :param str include_rts: Whether to include statuses which have been\
        retweeted by the user; possible values are 'true' and 'false'
        """
        data = self.get_user_timeline(screen_name=screen_name,
                                      count=limit,
                                      include_rts=include_rts)
        for item in data:
            self.handler.handle(item)
Пример #4
0
class Query(Twython):
    """
    Retrieve data from the Twitter REST API.
    """

    def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
        self.handler = None
        self.do_continue = True
        Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret)

    def register(self, handler):
        """
        Register a method for handling Tweets.

        :param TweetHandlerI handler: method for viewing or writing Tweets to a file.
        """
        self.handler = handler

    def expand_tweetids(self, ids_f, verbose=True):
        """
        Given a file object containing a list of Tweet IDs, fetch the
        corresponding full Tweets from the Twitter API.

        The API call `statuses/lookup` will fail to retrieve a Tweet if the
        user has deleted it.

        This call to the Twitter API is rate-limited. See
        <https://dev.twitter.com/rest/reference/get/statuses/lookup> for details.

        :param ids_f: input file object consisting of Tweet IDs, one to a line
        :return: iterable of Tweet objects in JSON format
        """
        ids = [line.strip() for line in ids_f if line]

        if verbose:
            print("Counted {0} Tweet IDs in {1}.".format(len(ids), ids_f))

        # The Twitter endpoint takes lists of up to 100 ids, so we chunk the
        # ids.
        id_chunks = [ids[i : i + 100] for i in range(0, len(ids), 100)]

        chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks)

        return itertools.chain.from_iterable(chunked_tweets)

    def _search_tweets(self, keywords, limit=100, lang="en"):
        """
        Assumes that the handler has been informed. Fetches Tweets from
        search_tweets generator output and passses them to handler

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string.
        :param int limit: Number of Tweets to process
        :param str lang: language
        """
        while True:
            if isinstance(self.handler, TweetWriter):
                max_id = self.handler.max_id
            else:
                max_id = None
            tweets = self.search_tweets(keywords=keywords, limit=limit, lang=lang, max_id=max_id)
            for tweet in tweets:
                self.handler.handle(tweet)
            if not (self.handler.do_continue() and self.handler.repeat):
                break
        self.handler.on_finish()

    def search_tweets(self, keywords, limit=100, lang="en", max_id=None, retries_after_twython_exception=0):
        """
        Call the REST API ``'search/tweets'`` endpoint with some plausible
        defaults. See `the Twitter search documentation
        <https://dev.twitter.com/rest/public/search>`_ for more information
        about admissable search parameters.

        :param str keywords: A list of query terms to search for, written as\
        a comma-separated string
        :param int limit: Number of Tweets to process
        :param str lang: language
        :param int max_id: id of the last tweet fetched
        :param int retries_after_twython_exception: number of retries when\
        searching Tweets before raising an exception
        :rtype: python generator
        """
        if not self.handler:
            # if no handler is provided, `BasicTweetHandler` provides minimum
            # functionality for limiting the number of Tweets retrieved
            self.handler = BasicTweetHandler(limit=limit)

        count_from_query = 0
        if not max_id:
            results = self.search(q=keywords, count=min(100, limit), lang=lang, result_type="recent")
            count = len(results["statuses"])
            if count == 0:
                print("No Tweets available through REST API for those keywords")
                return
            count_from_query = count
            max_id = results["statuses"][count - 1]["id"] - 1

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

        # Pagination loop: keep fetching Tweets until the desired count is
        # reached while dealing with Twitter rate limits.
        retries = 0
        while count_from_query < limit:
            try:
                mcount = min(100, limit - count_from_query)
                results = self.search(q=keywords, count=mcount, lang=lang, max_id=max_id, result_type="recent")
            except TwythonRateLimitError as e:
                print("Waiting for 15 minutes -{0}".format(e))
                time.sleep(15 * 60)  # wait 15 minutes
                continue
            except TwythonError as e:
                print("Fatal error in Twython request -{0}".format(e))
                if retries_after_twython_exception == retries:
                    raise e
                retries += 1

            count = len(results["statuses"])
            if count == 0:
                print("No more Tweets available through rest api")
                return
            count_from_query += count
            # the max_id is also present in the Tweet metadata
            # results['search_metadata']['next_results'], but as part of a
            # query and difficult to fetch. This is doing the equivalent
            # (last tweet id minus one)
            max_id = results["statuses"][count - 1]["id"] - 1
            self.handler.max_id = max_id

            for result in results["statuses"]:
                yield result
                self.handler.counter += 1
                if self.handler.do_continue() == False:
                    return

    def user_info_from_id(self, userids):
        """
        Convert a list of userIDs into a variety of information about the users.

        See <https://dev.twitter.com/rest/reference/get/users/show>.

        :param list userids: A list of integer strings corresponding to Twitter userIDs
        :rtype: list(json)
        """
        return [self.show_user(user_id=userid) for userid in userids]

    def user_tweets(self, screen_name, limit, include_rts="false"):
        """
        Return a collection of the most recent Tweets posted by the user

        :param str user: The user's screen name; the initial '@' symbol\
        should be omitted
        :param int limit: The number of Tweets to recover; 200 is the maximum allowed
        :param str include_rts: Whether to include statuses which have been\
        retweeted by the user; possible values are 'true' and 'false'
        """
        data = self.get_user_timeline(screen_name=screen_name, count=limit, include_rts=include_rts)
        self.handler.handle(data)