示例#1
0
        def fetch_failed_upload_media(tweet_id: str) -> List[Tuple[str, str]]:
            twitter = Twitter()
            tweet: tweepy.Status = twitter.api.get_status(
                id=tweet_id, tweet_mode='extended')
            tweet_medias: Dict[str,
                               TweetMedia] = twitter.get_tweet_medias(tweet)
            tweet_media: TweetMedia = tweet_medias[tweet_id]
            failed_upload_media: List[Tuple[str, str]] = []
            description: str = Twitter.make_tweet_description(tweet)
            for url in tweet_media.urls:
                failed_upload_media.append((url, description))

            return failed_upload_media
示例#2
0
    def backup_media(self, tweet_medias: Dict[str, TweetMedia]) -> None:
        if not tweet_medias:
            logger.info('No new tweet media.')
            return

        target_tweet_ids = self.store.fetch_not_added_tweet_ids(
            list(tweet_medias.keys()))
        if not target_tweet_ids:
            logger.info('No new tweet media.')
            return
        logger.info(f'Target tweet media count={len(target_tweet_ids)}')

        if self._save_mode == 'google':
            self.google_photos.init_album()

        for tweet_id, in target_tweet_ids:
            target_tweet_media: TweetMedia = tweet_medias[tweet_id]
            target_tweet: tweepy.Status = target_tweet_media.tweet
            failed_upload_medias: List[Tuple[str, str]] = []

            target_tweet_media.show_info()
            for url in target_tweet_media.urls:
                description: str = Twitter.make_tweet_description(target_tweet)
                is_saved: bool = self.save_media(url, description,
                                                 target_tweet.user.screen_name)
                if not is_saved:
                    failed_upload_medias.append((url, description))
                    logger.warning(
                        f'Save failed. tweet_id={tweet_id}, media_url={url}')
                    continue

            self.store_tweet_info(target_tweet)

            if not failed_upload_medias:
                logger.debug(
                    f'All media upload succeeded. urls={target_tweet_media.urls}'
                )
                continue

            self.store_failed_upload_media(target_tweet, failed_upload_medias)
示例#3
0
class TestTwitter:
    twitter: Twitter
    mock_cursor: mock.MagicMock
    mock_instagram: mock.MagicMock

    def __init__(self) -> None:
        self.user: TwitterUser = TwitterUser(id=TEST_TWITTER_ID)

    def setUp(self) -> None:
        os.environ['TWITTER_CONSUMER_KEY'] = 'DUMMY'
        os.environ['TWITTER_CONSUMER_SECRET'] = 'DUMMY'
        os.environ['TWITTER_ACCESS_TOKEN'] = 'DUMMY'
        os.environ['TWITTER_ACCESS_TOKEN_SECRET'] = 'DUMMY'

        self.mock_cursor = mock.MagicMock(tweepy.Cursor)
        self.mock_instagram = mock.MagicMock(Instagram)

        mock_cursor.reset_mock()
        mock_cursor.pages.reset_mock(side_effect=True)
        mock_instagram.reset_mock()
        mock_twitter_func.reset_mock()

        # Return mock when instantiating
        mock_cursor.return_value = self.mock_cursor
        mock_instagram.return_value = self.mock_instagram

        self.twitter = Twitter()

    @staticmethod
    def tearDown() -> None:
        delete_env('TWITTER_CONSUMER_KEY')
        delete_env('TWITTER_CONSUMER_SECRET')
        delete_env('TWITTER_ACCESS_TOKEN')
        delete_env('TWITTER_ACCESS_TOKEN_SECRET')

    @nose2.tools.params(
        ('test.jpg', 'test.jpg?name=orig'),
        ('test.jpg?foo=bar', 'test.jpg?foo=bar&name=orig'),
        ('test.jpg?name=100', 'test.jpg?name=orig'),
        ('test.jpg?name=aBc789', 'test.jpg?name=orig'),
        ('test.jpg?name=aBc789&foo=aaa', 'test.jpg?name=orig&foo=aaa'))
    def test_make_original_image_url(self, url: str, ans: str) -> None:
        original_url: str = Twitter.make_original_image_url(url)
        assert original_url == ans

    @nose2.tools.params(('is_fav_rt_quoted', True),
                        ('is_not_fav_rt_quoted', False)
                        # Maybe quoted_status never be empty
                        )
    def test_is_quoted(self, json_name: str, ans: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        result: bool = Twitter.is_quoted(tweet)
        assert result is ans

    @nose2.tools.params(('is_fav_rt_quoted', True),
                        ('is_not_fav_rt_quoted', False)
                        # Maybe favorited is always included
                        )
    def test_is_favorited(self, json_name: str, ans: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        result: bool = Twitter.is_favorited(tweet)
        assert result is ans

    @nose2.tools.params(('is_fav_rt_quoted', True),
                        ('is_not_fav_rt_quoted', False)
                        # Maybe retweeted_status never be empty
                        )
    def test_is_retweeted(self, json_name: str, ans: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        result: bool = Twitter.is_retweeted(tweet)
        assert result is ans

    @nose2.tools.params(('has_images', True), ('has_illegal_images', False)
                        # Maybe media_url_https is always included
                        )
    def test_get_photo_url(self, json_name: str, has_url: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        assert has_attributes(
            tweet, 'extended_entities') and 'media' in tweet.extended_entities
        pattern = re.compile(
            r'^https?://([\w-]+\.)+[\w-]+/?([\w\-./?%&=+]*)?$')
        for media in tweet.extended_entities['media']:
            # noinspection PyProtectedMember
            url: str = Twitter._get_photo_url(media)
            if has_url:
                assert pattern.fullmatch(url) is not None
            else:
                assert len(url) == 0

    @nose2.tools.params(('has_video', True), ('has_illegal_video', False))
    def test_get_video_url(self, json_name: str, has_url: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        assert has_attributes(
            tweet, 'extended_entities') and 'media' in tweet.extended_entities
        pattern = re.compile(
            r'^https?://([\w-]+\.)+[\w-]+/?([\w\-./?%&=+]*)?$')
        for media in tweet.extended_entities['media']:
            # noinspection PyProtectedMember
            url: str = Twitter._get_video_url(media)
            if has_url:
                assert pattern.fullmatch(url) is not None
            else:
                assert len(url) == 0

    @nose2.tools.params(('has_instagram_url', True), ('has_not_images', False)
                        # Maybe urls never be empty
                        )
    def test_has_instagram_url(self, json_name: str, has_url: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        assert has_attributes(tweet, 'entities')
        # noinspection PyProtectedMember
        assert Twitter._has_instagram_url(tweet.entities) is has_url

    @nose2.tools.params(('has_images', False), ('has_instagram_url', True),
                        ('has_illegal_instagram_url', True))
    def test_get_instagram_url(self, json_name: str, has_url: bool) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        assert has_attributes(tweet, 'entities')
        # noinspection PyProtectedMember
        url: str = Twitter._get_instagram_url(tweet.entities)
        assert isinstance(url, str)
        if has_url:
            pattern = re.compile(
                r'^https?://([\w-]+\.)+[\w-]+/?([\w\-./?%&=+]*)?$')
            assert pattern.fullmatch(url) is not None
        else:
            assert len(url) == 0

    @nose2.tools.params('has_images', 'has_video'
                        # Maybe extended_entities never be empty
                        )
    def test_get_twitter_media_urls(self, json_name: str) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        assert has_attributes(tweet, 'extended_entities')
        # noinspection PyProtectedMember
        media_url_list: List[str] = self.twitter._get_twitter_media_urls(
            tweet.extended_entities)

        assert len(media_url_list) != 0
        pattern = re.compile(
            r'^https?://([\w-]+\.)+[\w-]+/?([\w\-./?%&=+]*)?$')
        for url in media_url_list:
            assert pattern.fullmatch(url) is not None

    @nose2.tools.params(('has_images', 'Twitter'), ('has_not_images', None),
                        ('has_instagram_url', 'Instagram'))
    def test_get_tweet_medias(self, json_name: str,
                              media_type: Optional[str]) -> None:
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(json_name=json_name)
        target_tweet_medias: Dict[
            str, TweetMedia] = self.twitter.get_tweet_medias(tweet)

        if media_type is None:
            assert len(target_tweet_medias) == 0
            return
        for key, value in target_tweet_medias.items():
            assert isinstance(key, str)
            assert isinstance(value, TweetMedia)
            assert len(value.urls) != 0

    def test_make_tweet_permalink(self) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(
            json_name='has_images')
        permalink: str = self.twitter.make_tweet_permalink(tweet)
        assert f'https://twitter.com/{tweet.user.screen_name}/status/{tweet.id_str}' == permalink

    def test_make_tweet_description(self) -> None:
        tweet: tweepy.Status = TwitterTestUtils.load_tweet(
            json_name='has_images')
        description: str = self.twitter.make_tweet_description(tweet)
        assert f'{tweet.user.name}\n' \
               f'@{tweet.user.screen_name}\n' \
               f'{tweet.full_text}' == description

    def test_difference_tweet_medias(self) -> None:
        old_tweets: Dict[
            str, TweetMedia] = TwitterTestUtils.load_target_media_tweets(
                json_name='old')
        new_tweets: Dict[
            str, TweetMedia] = TwitterTestUtils.load_target_media_tweets(
                json_name='new')
        target_tweet_medias: Dict[
            str, TweetMedia] = Twitter.difference_tweet_medias(
                new_tweets, old_tweets)

        assert len(target_tweet_medias) == 1

    @nose2.tools.params(
        6, )
    def test_get_favorite_media(self, count: int) -> None:
        self.mock_cursor.pages.side_effect = MockTweepyCursor.pages
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]

        with LogCapture(level=logging.INFO) as log:
            target_tweet_medias: Dict[
                str, TweetMedia] = self.twitter.get_favorite_media(self.user)
            log.check((
                'app.twitter', 'INFO',
                f'Get favorite tweet media. user={self.user.id}. '
                f'pages={self.twitter.tweet_page}, count={self.twitter.tweet_count}'
            ))

        assert len(target_tweet_medias) == count
        for tweet_id, tweet_media in target_tweet_medias.items():
            assert isinstance(tweet_id, str)
            assert isinstance(tweet_media, TweetMedia)

    @mock.patch('app.crawler.Twitter.get_tweet_medias', mock_twitter_func)
    def test_get_favorite_media__exception(self) -> None:
        self.mock_cursor.pages.side_effect = MockTweepyCursor.pages
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]

        mock_twitter_func.side_effect = Exception()

        with LogCapture(level=logging.ERROR) as log:
            target_tweet_medias: Dict[
                str, TweetMedia] = self.twitter.get_favorite_media(self.user)
            assert LogCaptureHelper.check_contain(
                log, ('app.twitter', 'ERROR',
                      'Get tweet media error. exception=()'))

        assert len(target_tweet_medias) == 0

    @nose2.tools.params(
        ('rt', 7),
        ('rtfav', 7),
        ('rrrt', 7),
        ('mixed', 3),
    )
    def test_get_rt_media(self, mode: str, count: int) -> None:
        self.mock_cursor.pages.side_effect = MockTweepyCursor.pages
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]
        self.twitter.mode = mode
        target_tweet_medias: Dict[str, TweetMedia] = self.twitter.get_rt_media(
            self.user)

        assert len(target_tweet_medias) == count
        for tweet_id, tweet_media in target_tweet_medias.items():
            assert isinstance(tweet_id, str)
            assert isinstance(tweet_media, TweetMedia)

    @mock.patch('app.crawler.Twitter.get_tweet_medias', mock_twitter_func)
    def test_get_rt_media__exception(self) -> None:
        self.mock_cursor.pages.side_effect = MockTweepyCursor.pages
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]

        mock_twitter_func.side_effect = Exception()

        with LogCapture(level=logging.ERROR) as log:
            target_tweet_medias: Dict[str,
                                      TweetMedia] = self.twitter.get_rt_media(
                                          self.user)
            assert LogCaptureHelper.check_contain(
                log, ('app.twitter', 'ERROR',
                      'Get tweet media error. exception=()'))

        assert len(target_tweet_medias) == 0

    @nose2.tools.params(
        ('rt', 7),
        ('fav', 6),
        ('rtfav', 10),
        ('rrrt', 7),
        ('mixed', 3),
    )
    def test_get_target_tweets(self, mode: str, count: int) -> None:
        self.mock_cursor.pages.side_effect = MockTweepyCursor.pages
        self.mock_instagram.get_media_urls.return_value = [INSTAGRAM_DUMMY_URL]
        self.twitter.mode = mode
        target_tweet_medias: Dict[str,
                                  TweetMedia] = self.twitter.get_target_tweets(
                                      self.user)

        assert len(target_tweet_medias) == count
        for tweet_id, tweet_media in target_tweet_medias.items():
            assert isinstance(tweet_id, str)
            assert isinstance(tweet_media, TweetMedia)