Exemplo n.º 1
0
    def test_remote_integration(self) -> None:
        """Run santity test on remote apis by calling the internal functions that integrate the CH and twitter data."""
        db = self.db()
        config = mediawords.util.config.get_config()

        topic = mediawords.test.db.create_test_topic(
            db, "test_remote_integration")
        topic['ch_monitor_id'] = config['crimson_hexagon']['test_monitor_id']
        db.update_by_id('topics', topic['topics_id'], topic)

        ttd = mediawords.tm.fetch_topic_tweets._add_topic_tweet_single_day(
            db, topic, datetime.datetime(year=2016, month=1, day=1),
            mediawords.tm.fetch_topic_tweets.CrimsonHexagon)

        max_tweets = 200
        mediawords.tm.fetch_topic_tweets._fetch_tweets_for_day(
            db,
            mediawords.tm.fetch_topic_tweets.Twitter,
            topic,
            ttd,
            max_tweets=max_tweets)

        got_tts = db.query(
            "select * from topic_tweets where topic_tweet_days_id = %(a)s", {
                'a': ttd['topic_tweet_days_id']
            }).hashes()

        # for old ch monitors, lots of the tweets may be deleted
        assert len(got_tts) > max_tweets / 10

        assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH
        assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH
Exemplo n.º 2
0
    def test_fetch_topic_tweets(self) -> None:
        """Run fetch_topic_tweet tests with test database."""
        db = self.db()
        topic = mediawords.test.db.create_test_topic(db, 'test')

        test_dates = get_test_date_range()
        topic['start_date'] = test_dates[0]
        topic['end_date'] = test_dates[1]
        topic['ch_monitor_id'] = 123456
        db.update_by_id('topics', topic['topics_id'], topic)

        mediawords.tm.fetch_topic_tweets.fetch_topic_tweets(
            db, topic['topics_id'], MockTwitter, MockCrimsonHexagon)

        topic_tweet_days = db.query("select * from topic_tweet_days").hashes()
        assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1

        start_date = datetime.datetime.strptime(topic['start_date'],
                                                '%Y-%m-%d')
        test_days = [
            start_date + datetime.timedelta(days=x)
            for x in range(0, LOCAL_DATE_RANGE)
        ]
        for d in test_days:
            topic_tweet_day = db.query(
                "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s",
                {
                    'a': topic['topics_id'],
                    'b': d
                }).hash()
            assert topic_tweet_day is not None

            validate_topic_tweets(db, topic_tweet_day)

        validate_topic_tweet_urls(db, topic)
    def test_fetch_topic_tweets(self) -> None:
        """Run fetch_topic_tweet tests with test database."""
        db = self.db()
        topic = mediawords.test.db.create.create_test_topic(db, 'test')

        topic = db.update_by_id('topics', topic['topics_id'], {'pattern': '.*'})

        test_dates = get_test_date_range()
        topic['start_date'] = test_dates[0]
        topic['end_date'] = test_dates[1]
        topic['ch_monitor_id'] = 123456
        db.update_by_id('topics', topic['topics_id'], topic)

        ftt.fetch_topic_tweets(db, topic['topics_id'], MockTwitter, MockCrimsonHexagon)

        topic_tweet_days = db.query("select * from topic_tweet_days").hashes()
        assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1

        start_date = datetime.datetime.strptime(topic['start_date'], '%Y-%m-%d')
        test_days = [start_date + datetime.timedelta(days=x) for x in range(0, LOCAL_DATE_RANGE)]
        for d in test_days:
            topic_tweet_day = db.query(
                "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s",
                {'a': topic['topics_id'], 'b': d}
            ).hash()
            assert topic_tweet_day is not None

            validate_topic_tweets(db, topic_tweet_day)

        validate_topic_tweet_urls(db, topic)
Exemplo n.º 4
0
    def test_fetch_topic_tweets(self) -> None:
        """Run fetch_topic_tweet tests with test database."""
        db = self.db()
        topic = mediawords.test.db.create.create_test_topic(db, 'test')

        topic = db.update_by_id('topics', topic['topics_id'],
                                {'pattern': '.*'})

        test_dates = get_test_date_range()
        topic['start_date'] = test_dates[0]
        topic['end_date'] = test_dates[1]
        db.update_by_id('topics', topic['topics_id'], topic)

        tsq = {
            'topics_id': topic['topics_id'],
            'platform': 'twitter',
            'source': 'crimson_hexagon',
            'query': 123456
        }
        db.create('topic_seed_queries', tsq)

        db.update_by_id('topics', topic['topics_id'], {'platform': 'twitter'})

        mediawords.tm.fetch_topic_tweets.fetch_meta_tweets_from_ch = mock_fetch_meta_tweets_from_ch
        mediawords.tm.fetch_topic_tweets.fetch_100_tweets = mock_fetch_100_tweets
        ftt.fetch_topic_tweets(db, topic['topics_id'])

        topic_tweet_days = db.query("select * from topic_tweet_days").hashes()
        assert len(topic_tweet_days) == LOCAL_DATE_RANGE + 1

        start_date = datetime.datetime.strptime(topic['start_date'],
                                                '%Y-%m-%d')
        test_days = [
            start_date + datetime.timedelta(days=x)
            for x in range(0, LOCAL_DATE_RANGE)
        ]
        for d in test_days:
            topic_tweet_day = db.query(
                "select * from topic_tweet_days where topics_id = %(a)s and day = %(b)s",
                {
                    'a': topic['topics_id'],
                    'b': d
                }).hash()
            assert topic_tweet_day is not None

            validate_topic_tweets(db, topic_tweet_day)

        validate_topic_tweet_urls(db, topic)
Exemplo n.º 5
0
    def _test_remote_integration(self, source, query, day) -> None:
        """Run santity test on remote apis."""
        db = self.db()

        topic = mediawords.test.db.create.create_test_topic(
            db, "test_remote_integration")

        tsq = {
            'topics_id': topic['topics_id'],
            'platform': 'twitter',
            'source': source,
            'query': query
        }
        db.create('topic_seed_queries', tsq)

        topic['platform'] = 'twitter'
        topic['pattern'] = '.*'
        topic['start_date'] = day
        topic['end_date'] = day
        db.update_by_id('topics', topic['topics_id'], topic)

        # only fetch 200 tweets to make test quicker
        max_tweets = 200
        ftt.fetch_topic_tweets(db, topic['topics_id'], max_tweets)

        # ttd_day = datetime.datetime(year=2016, month=1, day=1)

        # meta_tweets = ftt.fetch_meta_tweets(db, topic, ttd_day)
        # ttd = ftt._add_topic_tweet_single_day(db, topic, len(meta_tweets), ttd_day)

        # max_tweets = 100
        # ftt._fetch_tweets_for_day(db, ttd, meta_tweets, max_tweets=max_tweets)

        got_tts = db.query("select * from topic_tweets").hashes()

        # for old ch monitors, lots of the tweets may be deleted
        assert len(got_tts) > max_tweets / 10

        assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH
        assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH
    def test_remote_integration(self) -> None:
        """Run santity test on remote apis by calling the internal functions that integrate the CH and twitter data."""
        db = self.db()

        topic = mediawords.test.db.create.create_test_topic(db, "test_remote_integration")
        topic['ch_monitor_id'] = TEST_MONITOR_ID
        db.update_by_id('topics', topic['topics_id'], topic)

        ttd_day = datetime.datetime(year=2016, month=1, day=1)
        ttd = ftt._add_topic_tweet_single_day(db, topic, ttd_day, ftt.CrimsonHexagon)

        max_tweets = 200
        ftt._fetch_tweets_for_day(db, ftt.Twitter, topic, ttd, max_tweets=max_tweets)

        got_tts = db.query(
            "select * from topic_tweets where topic_tweet_days_id = %(a)s",
            {'a': ttd['topic_tweet_days_id']}).hashes()

        # for old ch monitors, lots of the tweets may be deleted
        assert len(got_tts) > max_tweets / 10

        assert len(got_tts[0]['content']) > MIN_TEST_TWEET_LENGTH
        assert len(got_tts[0]['twitter_user']) > MIN_TEST_TWITTER_USER_LENGTH