示例#1
0
    def test_limit(self):
        warc_iter = TwitterRestWarcIter(self.filepaths,
                                        limit_user_ids=("481186914", "999999"))
        self.assertEqual(244, len(list(warc_iter)))

        warc_iter = TwitterRestWarcIter(self.filepaths,
                                        limit_user_ids=("999999", ))
        self.assertEqual(0, len(list(warc_iter)))
示例#2
0
 def test_no_limit(self):
     warc_iter = TwitterRestWarcIter(self.filepaths)
     tweets = list(warc_iter)
     self.assertEqual(1473, len(tweets))
     self.assertEqual("721345764362948609", tweets[0][1])
     # Datetime is aware
     self.assertIsNotNone(tweets[0][2].tzinfo)
示例#3
0
    def process_search_warc(self, warc_filepath):
        incremental = self.message.get("options", {}).get("incremental", False)
        query = self.message["seeds"][0]["token"]

        since_id = self.state_store.get_state(
            __name__, u"{}.since_id".format(query)) if incremental else None

        max_tweet_id = self._process_tweets(TwitterRestWarcIter(warc_filepath))

        # Update state store
        if incremental and max_tweet_id > since_id:
            self.state_store.set_state(__name__, u"{}.since_id".format(query),
                                       max_tweet_id)
    def process_search_warc(self, warc_filepath):
        incremental = self.message.get("options", {}).get("incremental", False)

        since_id = self.state_store.get_state(
            __name__, u"{}.since_id".format(
                self._search_id())) if incremental else None

        max_tweet_id = self._process_tweets(TwitterRestWarcIter(warc_filepath))

        # Update state store
        if incremental and (max_tweet_id or 0) > (since_id or 0):
            self.state_store.set_state(
                __name__, u"{}.since_id".format(self._search_id()),
                max_tweet_id)
示例#5
0
    def test_ignore_errors(self):
        self.assertEqual(
            0,
            len(
                list(
                    TwitterRestWarcIter._item_iter(
                        None, 'https://api.twitter.com/1.1/statuses/'
                        'user_timeline.json', {
                            'errors': [{
                                'message': 'Rate limit exceeded',
                                'code': 88
                            }]
                        }))))

        self.assertEqual(
            0,
            len(
                list(
                    TwitterRestWarcIter._item_iter(
                        None, 'https://api.twitter.com/1.1/statuses/'
                        'user_timeline.json', {
                            'request': '/1.1/statuses/user_timeline.json',
                            'error': 'Not authorized.'
                        }))))
示例#6
0
    def process_user_timeline_warc(self, warc_filepath):
        incremental = self.message.get("options", {}).get("incremental", False)

        for count, status in enumerate(TwitterRestWarcIter(warc_filepath)):
            tweet = status.item
            if not count % 100:
                log.debug("Processing %s tweets", count)
            if "text" in tweet:
                user_id = tweet["user"]["id_str"]
                if incremental:
                    # Update state
                    key = "timeline.{}.since_id".format(user_id)
                    self.state_store.set_state(
                        __name__, key,
                        max(self.state_store.get_state(__name__, key),
                            tweet.get("id")))
                self._process_tweet(tweet)