示例#1
0
def run(scrape_queue_url=SCRAPE_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME,
        dedupe_queue_url=DEDUPE_QUEUE_URL, dedupe_queue_name=DEDUPE_NEWS_TASK_QUEUE_NAME,
        times=-1):
    scrape_queue_client = AMQPClient(scrape_queue_url, scrape_queue_name)
    scrape_queue_client.connect()
    dedupe_queue_client = AMQPClient(dedupe_queue_url, dedupe_queue_name)
    dedupe_queue_client.connect()

    assert scrape_queue_client.is_connected()
    assert dedupe_queue_client.is_connected()

    while True:
        logger.debug('News fetcher: iter..')
        msg = scrape_queue_client.get_message()
        if msg is not None:
            try:
                handle_message(msg)
                dedupe_queue_client.send_message(msg)
                logger.info('News Fetcher: message sent to dedupe queue (url: {})'
                    .format(msg.get('url')))
            except Exception as e:
                logger.warning('News fetcher: handling error: {}'.format(e))
        # if decreas count here, weird behavior, decreasing happens before processing message
        scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
示例#2
0
def test_basic():
    print('news_fetcher_test: cleaning all queues...')
    queue_cleaner.clear_all()
    print('flushing all cache in Redis')
    redis_client.flushall()  # TODO dangerous to run when deployed

    scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL,
                                     SCRAPE_NEWS_TASK_QUEUE_NAME)
    scrape_queue_client.connect()
    assert scrape_queue_client.is_connected()

    print('test_fetcher_basic: adding news onto scrape queue...')
    for message in TEST_SCRAPE_TASK:
        scrape_queue_client.send_message(message)

    print('getting messages from the queue and process...')
    news_fetcher.SLEEP_TIME_IN_SECONDS = 1
    news_fetcher.run(len(TEST_SCRAPE_TASK))

    should_be_empty_msg = scrape_queue_client.get_message()
    print('news_fetcher_test(expecting None):', should_be_empty_msg)
    assert should_be_empty_msg is None
    scrape_queue_client.close()

    queue_cleaner.clear_queue(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)
    print('news_fetcher test passed')
def test_basic():
    # print(amqp_url + "({})".format(type(amqp_url)))
    client = AMQPClient(AMQP_URL, 'my_queue')

    client.connect()

    assert client.is_connected()

    assert client.get_message() is None

    client.send_message('hello world')

    assert client.get_message() == 'hello world'

    obj = {"hello": "world"}
    client.send_message(obj)

    assert client.get_message() == obj

    assert client.get_message() is None

    client.cancel_queue()

    client.close()
    print('[x] cloud amqp_client test passed')
示例#4
0
def run(times=-1):
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)
    click_queue_client.connect()
    assert click_queue_client.is_connected()
    print('Click Handler: my queue name: {}'.format(click_queue_client))

    while True:
        message = click_queue_client.get_message()
        try:
            handle_message(message)
        except Exception as e:
            raise e

        click_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
示例#5
0
def test_basic():
    # print('Click Learner tester: clearing queue {}'.format(CLICK_QUEUE_NAME))
    clear_queue(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME)

    clicklearner.PREF_COLLECTION_NAME = PREF_COLLECTION_NAME
    clicklearner.USER_CLICK_QUEUE_NAME = CLICK_QUEUE_NAME
    clicklearner.SLEEP_TIME_IN_SECONDS = 1

    click_queue_client = AMQPClient(clicklearner.USER_CLICK_QUEUE_URL,
                                    CLICK_QUEUE_NAME)
    click_queue_client.connect()

    assert click_queue_client.is_connected()

    print('Click Learner tester: clearing collection "{}" in db "{}"...'.format(PREF_COLLECTION_NAME, 
        PREF_DB_NAME))
    pref_collection.remove()

    print('Click Learner tester: sending click logs')
    for click in TEST_SEQUENCE:
        click_queue_client.send_message(click)
    

    print('Click Learner tester: start handling clicks')

    pref_model_ref = {'userId': TEST_USER_NAME}
    for cat in NEWS_CATEGORIES:
        pref_model_ref[cat] = 1 / len(NEWS_CATEGORIES)

    for click_log in TEST_SEQUENCE:
        clicklearner.run(1)
        pref_model = pref_collection.find_one({'userId': TEST_USER_NAME})

        selected = news_collection.find_one({'digest': (click_log['newsDigest'])})['category']
        pref_model_ref[selected] = (1 - ALPHA) * pref_model_ref[selected] + ALPHA
        for cat in NEWS_CATEGORIES:
            if cat != selected:
                pref_model_ref[cat] = (1 - ALPHA) * pref_model_ref[cat]

        del pref_model['_id']
        print('Click Learner tester: expecting {} == {}'.format(pref_model, pref_model_ref))
        assert pref_model == pref_model_ref

    print('xx Click Learner test passed')
示例#6
0
def run(times=-1):
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)
    click_queue_client.connect()

    news_collection = mongodb_client.get_news_collection()
    pref_collection = (mongodb_client.get_db(PREF_DB_NAME).get_collection(
        PREF_COLLECTION_NAME))

    assert click_queue_client.is_connected()

    while True:
        msg = click_queue_client.get_message()
        try:
            handle_message(msg, news_collection, pref_collection)
        except Exception as e:
            raise e
        click_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
coloredlogs.install(level=os.environ.get('LOGGER_LEVEL', 'INFO'),
                    logger=logger)

# TODO: this global are bad, and makes it uncovered by tests
config = os.environ
DB_NAME = config['news_db']
COLLECTION_NAME = config['new_collection']
DEDUPE_QUEUE_URL = config['dedupe_task_queue_url']
DEDUPE_QUEUE_NAME = config['dedupe_task_queue_name']

SLEEP_TIME_IN_SECONDS = 5

dedupe_queue_client = AMQPClient(DEDUPE_QUEUE_URL, DEDUPE_QUEUE_NAME)
dedupe_queue_client.connect()

assert dedupe_queue_client.is_connected()

NEWS_SIMILARITY_THRESHOLD = 0.8


class NotContainPublishTimeError(Exception):
    def __str__(self):
        return 'News not containing publish time!!!'


def handle_message(msg):
    logger.debug('dedupter handling message: {}'.format(msg))
    if msg is None or not isinstance(msg, dict):
        logger.info('News Deduper: message is broken')
        return