Exemplo n.º 1
0
def test_basic():
    news = client.get_news_from_source()
    print news
    assert len(news) > 0
    news = client.get_news_from_source(sources=['bbc-news'])
    assert len(news) > 0
    print 'test_basic passed!'
Exemplo n.º 2
0
def test_basic():
    ''' '''
    news = client.get_news_from_source()
    print news
    assert len(news) > 0
    news = client.get_news_from_source(sources=['cnn'], sortBy='top')
    assert len(news) > 0
    print 'test_basic passed'
Exemplo n.º 3
0
def test_basic():
    """Test news API client basically"""
    news = client.get_news_from_source()
    print(news)
    assert news

    news = client.get_news_from_source(sources=['the-new-york-times'],
                                       sort_by='top')
    print(news)
    assert news

    print('test_basic passed!')
Exemplo n.º 4
0
def run():
    logging.info("[news_monitor] start running")
    redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
    cloudamqp_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                       SCRAPE_NEWS_TASK_QUEUE_NAME)

    while True:
        news_list = news_api_client.get_news_from_source(NEWS_SOURCES)
        logging.info("[news_monitor] get %d from news API", len(news_list))
        number_of_news = 0

        for news in news_list:
            news_digest = hashlib.md5(
                news['title'].encode('utf-8')).hexdigest()

            if redis_client.get(news_digest) is None:
                number_of_news += 1
                news['digest'] = news_digest

                if news['publishedAt'] is None:
                    news['publishedAt'] = datetime.datetime.utcnow().strftime(
                        "%Y-%m-%dT%H:%M:%SZ")

                redis_client.set(news_digest, "True")
                redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)

                cloudamqp_client.send_message(news, "[news_monitor]")

        logging.info("[news_monitor] fetch %d news" % number_of_news)
        cloudamqp_client.sleep(SLEEP_TIME_IN_SECONDS)
Exemplo n.º 5
0
    'bloomberg',
    'cnn',
    'entertainment-weekly',
    'espn',
    'ign',
    'techcrunch',
    'the-new-york-times',
    'the-wall-street-journal',
    'the-washington-post'
]

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME)

while True:
    news_list = news_api_client.get_news_from_source(NEWS_SOURCES)
    num_of_news_news = 0
    print(news_list)

    for news in news_list:
        news_digest = hashlib.md5(news['title'].encode('utf-8')).digest().encode('base64')

        if redis_client.get(news_digest) is None:
            num_of_news_news = num_of_news_news + 1
            news['digest'] = news_digest

            if news['publishedAt'] is None:
                news['publishedAt'] = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

            redis_client.set(news_digest, "True")
            redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)