def test_sliding_company_score(redis_client, foo_company):
    ' should update average on every tweet received '
    db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1)
    db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2)
    db.set_company_score(redis_client, foo_company, 0.15)

    api = MockApi(
            MockTweet(id=102, text='0.3'),
            MockTweet(id=103, text='-0.4'),
            MockTweet(id=104, text='0.4'),
            MockTweet(id=105, text='-0.5')
        )

    tweets_processed = 0
    for res in sync_tweets(api, redis_client, foo_company, max_tweets=4):
        if tweets_processed == 0:
            # [100, 101, 102]
            assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'100', b'101', b'102']
            expected_score = (0.1 + 0.2 + 0.3) / 3
            assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
        elif tweets_processed == 1:
            assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'100', b'101', b'102', b'103']
            expected_score = (0.1 + 0.2 + 0.3 - 0.4) / 4
            assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
        elif tweets_processed == 2:
            assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'101', b'102', b'103', b'104']
            expected_score = (0.2 + 0.3 - 0.4 + 0.4) / 4
            assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
        tweets_processed += 1

    assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'102', b'103', b'104', b'105']
    expected_score = (0.3 + 0.4 - 0.4 - 0.5) / 4
    assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
def sync_tweets(api, redis_client, account, limit=inf, max_tweets=inf):
    ''' will fetch max of `limit` tweets from Twitter mentioning `@account`,
    analyze them and write to db '''

    since_id = db.get_max_tweet_id(redis_client, account)
    print('Started syncing tweets mentioning ', account, 'starting from ',
          since_id)
    tweets_processed = 0
    for tweet in twitter.get_tweets_about_company(api,
                                                  account,
                                                  since_id=since_id):

        if tweets_processed >= limit:
            break

        score = get_sentiment_score(tweet.text, tweet.entities)

        if score == 0.0:
            continue

        with redis_client.pipeline() as pipe:
            pipe.watch('tweetsabout:{0}'.format(account))
            n_tweets = db.get_number_of_tweets(pipe, account)
            current_score = db.get_company_score(redis_client, account)

            tweets_to_remove = db.get_tweets_timeline(
                pipe, account, 1,
                withscores=False) if n_tweets >= max_tweets else []

            removed_tweets_score = sum([
                db.get_tweet_score(pipe, account, tweet_id)
                for tweet_id in tweets_to_remove
            ])

            pipe.multi()
            # batch-process insert operation
            db.write_tweet(pipe, account, tweet, score)
            db.remove_tweets(pipe, account, *tweets_to_remove)

            # sliding re-calculation of the score
            new_score = (
                (current_score * n_tweets - removed_tweets_score + score) /
                (n_tweets + 1 - len(tweets_to_remove)))
            db.set_company_score(pipe, account, new_score)

            pipe.execute()

        tweets_processed += 1

        yield {
            'account': account,
            'tweet': tweet,
            'score': score,
            'removed': tweets_to_remove
        }
def test_positive_neagtive(redis_client, foo_company):
    ' should calculate average score for given tweets '
    output_tweets = [
        MockTweet(id=0, text='0.10'),
        MockTweet(id=1, text='-0.10'),
    ]
    api = MockApi(*output_tweets)
    for res in sync_tweets(api, redis_client, foo_company):
        pass

    assert db.get_company_score(redis_client, foo_company) ==  approx(0)
def test_average_score(redis_client, foo_company):
    ' should calculate average score for given tweets '
    output_tweets = [
        MockTweet(id=0, text='0.10'),
        MockTweet(id=1, text='0.20'),
        MockTweet(id=2, text='0.30'),
    ]
    api = MockApi(*output_tweets)
    for res in sync_tweets(api, redis_client, foo_company):
        pass

    expected = (0.10 + 0.20 + 0.30) / 3
    assert db.get_company_score(redis_client, foo_company) ==  approx(expected)
def test_skip_neutral_tweets(redis_client, foo_company):
    ' should skip neutral tweets '
    output_tweets = [
        MockTweet(id=0, text='0.0'),
        MockTweet(id=1, text='0.0'),
        MockTweet(id=2, text='0.1'),
    ]

    api = MockApi(*output_tweets)
    for res in sync_tweets(api, redis_client, foo_company):
        pass

    assert db.get_number_of_tweets(redis_client, foo_company) == 1
    assert db.get_company_score(redis_client, foo_company) == approx(0.1)
def get_companies_details():
    ''' will get full list of companies, with details '''
    accounts = db.get_companies(r)
    companies_details = []

    for account in accounts:

        number_of_tweets = db.get_number_of_tweets(r, account)
        if number_of_tweets < 500:  # TODO: Replace me with env constant
            continue

        company_details = db.get_company_details(r, account)
        company_details['number_of_tweets'] = number_of_tweets
        company_details['score'] = db.get_company_score(r, account)
        companies_details.append(company_details)

    return jsonify({'companies': companies_details})
def test_limit_skip_neutral(redis_client, foo_company):
    ' should respect limit while skipping neutral tweets '
    output_tweets = [
        MockTweet(id=0, text='0.1'),
        MockTweet(id=1, text='0.0'), # should be ignored
        MockTweet(id=2, text='0.0'), # should be ignored
        MockTweet(id=3, text='0.2'),
        MockTweet(id=4, text='0.2'), # should be ignored
        MockTweet(id=5, text='0.0'), # should be ignored
    ]

    api = MockApi(*output_tweets)
    for res in sync_tweets(api, redis_client, foo_company, limit=2):
        print('res', res)

    assert db.get_number_of_tweets(redis_client, foo_company) == 2
    assert db.get_company_score(redis_client, foo_company) == approx(0.15)
def test_max_tweets(redis_client, foo_company):
    ' should remove tweets if `max_tweets` was reached '
    db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1)
    db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2)
    db.set_company_score(redis_client, foo_company, 0.15)

    api = MockApi(
            MockTweet(id=102, text='0.3'),
            MockTweet(id=103, text='0.4')
        )

    for res in sync_tweets(api, redis_client, foo_company, max_tweets=3):
        pass

    assert db.get_number_of_tweets(redis_client, foo_company) == 3
    expected_score = (0.2 + 0.3 + 0.4) / 3
    assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
    assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'101', b'102', b'103']
def test_prior_tweets(redis_client, foo_company):
    ' should update score correctly if some tweets existed prior to syncing '
    db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1)
    db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2)
    db.set_company_score(redis_client, foo_company, 0.15)

    assert db.get_number_of_tweets(redis_client, foo_company) == 2

    api = MockApi(
            MockTweet(id=102, text='0.3'),
            MockTweet(id=103, text='0.4')
        )

    for res in sync_tweets(api, redis_client, foo_company):
        pass

    assert db.get_number_of_tweets(redis_client, foo_company) == 4
    expected_score = (0.1 + 0.2 + 0.3 + 0.4) / 4
    assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
import redis
from lib import db
import sys

r = redis.StrictRedis(host='redis', decode_responses=True)
for company in db.get_companies(r):
    try:
        print('company', company, 'has', db.get_number_of_tweets(r, company),
              'tweets')
        print('removing unneccessary')
        for tweet_id in db.get_tweets_timeline(r, company, withscores=False):
            tweet_score = db.get_tweet_score(r, company, tweet_id)
            if (tweet_score == 0.0):
                print('removing', tweet_id, tweet_score)
                db.remove_tweets(r, company, tweet_id)

        print('company', company, 'has', db.get_number_of_tweets(r, company),
              'tweets')
        print('score before', db.get_company_score(r, company))
        db.update_company_overall_score(r, company)
        print('score after', db.get_company_score(r, company))
    except Exception:
        r.delete('companyscore:{0}'.format(company))
        pass
Пример #11
0
def test_company_score(redis_client, foo_company):
    ' Should be able to get/set company score '
    db.set_company_score(redis_client, foo_company, 0.25)
    assert db.get_company_score(redis_client, foo_company) == 0.25
Пример #12
0
def test_empty_company_score(redis_client, foo_company):
    ' Should return 0 for companies with no score set '
    assert db.get_company_score(redis_client, foo_company) == 0