Exemplo n.º 1
0
def test_basic():
    print('news_fetcher_test: cleaning all queues...')
    queue_cleaner.clear_all()
    print('flushing all cache in Redis')
    redis_client.flushall()  # TODO dangerous to run when deployed

    scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL,
                                     SCRAPE_NEWS_TASK_QUEUE_NAME)
    scrape_queue_client.connect()
    assert scrape_queue_client.is_connected()

    print('test_fetcher_basic: adding news onto scrape queue...')
    for message in TEST_SCRAPE_TASK:
        scrape_queue_client.send_message(message)

    print('getting messages from the queue and process...')
    news_fetcher.SLEEP_TIME_IN_SECONDS = 1
    news_fetcher.run(len(TEST_SCRAPE_TASK))

    should_be_empty_msg = scrape_queue_client.get_message()
    print('news_fetcher_test(expecting None):', should_be_empty_msg)
    assert should_be_empty_msg is None
    scrape_queue_client.close()

    queue_cleaner.clear_queue(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)
    print('news_fetcher test passed')
Exemplo n.º 2
0
def test_basic():
    # not a good idea to test, but a way I can think of seperating the queue
    # in test and production
    rpc_operations.USER_CLICK_QUEUE_URL = USER_CLICK_QUEUE_URL
    rpc_operations.USER_CLICK_QUEUE_NAME = USER_CLICK_QUEUE_NAME
    rpc_operations.init()
    clickhandler.USER_CLICK_QUEUE_URL = USER_CLICK_QUEUE_URL
    clickhandler.USER_CLICK_QUEUE_NAME = USER_CLICK_QUEUE_NAME

    print('Click Handler test: cleaning up click queue...')
    clear_queue(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME)

    print('Click Handler test: logging clicks...')
    for pair in TEST_CLICK_DATA:
        rpc_operations.log_click(*pair)

    print(print('Click Handler test: handling clicks...'))
    clickhandler.run(len(TEST_CLICK_DATA))

    queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME)
    queue_client.connect()

    should_be_empty = queue_client.get_message()
    print('Click Handler Test: {} should be None'.format(should_be_empty))
    assert should_be_empty is None
    queue_client.close

    print('Click Handler: test passed')
Exemplo n.º 3
0
def run():
    redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
    amqp_client = AMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                             SCRAPE_NEWS_TASK_QUEUE_NAME)
    amqp_client.connect()

    try:
        while True:
            news_list = news_client.get_news_from_sources(NEWS_SOURCES)
            num_news = 0

            for news in news_list:
                digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()

                if redis_client.get(digest):
                    continue

                num_news += 1
                news['digest'] = digest
                redis_client.set(digest, True)
                redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS)

                print(news)
                amqp_client.send_message(news)

            print('News Monitor: fectched {} news'.format(num_news))
            amqp_client.sleep(SLEEP_TIME_IN_SECONDS)
    except KeyboardInterrupt:
        print('keyboard interrupt')
    # except SigTerm
    finally:
        amqp_client.close()
Exemplo n.º 4
0
def test_monitor_basic():
    news_monitor.NEWS_SOURCES = news_client.MOCK_SOURCES
    MOCK_DATA = news_client.MOCK_DATA

    print('test_monitor_basic: cleaning queue "{}" first---------'.format(QUEUE_NAME))
    clear_queue(QUEUE_URL, QUEUE_NAME)
    # TODO: redis server flush all
    redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
    redis_client.flushall()

    print('test_monitor_basic: adding message to queue "{}"--------'.format(QUEUE_NAME))
    amqp_client = AMQPClient(QUEUE_URL, QUEUE_NAME)
    amqp_client.connect()

    proc = Process(target=news_monitor.run, name='monitor_run', 
        args=(REDIS_HOST, REDIS_PORT, QUEUE_URL, QUEUE_NAME))
    proc.start()
    print('test_monitor_basic: executing... (wait for 2 seconds to cut)')
    time.sleep(2)

    proc.terminate()

    for i in range(len(MOCK_DATA)):
        message = amqp_client.get_message()
        del message['digest']
        print(message, MOCK_DATA[i])
        assert message == MOCK_DATA[i]

    print('test_monitor_basic: [x] test_monitor_basic test passed')
Exemplo n.º 5
0
def run(redis_host=REDIS_HOST,
        redis_port=REDIS_PORT,
        scrape_queue_url=SCRAPE_NEWS_TASK_QUEUE_URL,
        scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME):
    redis_client = redis.StrictRedis(redis_host, redis_port)
    amqp_client = AMQPClient(scrape_queue_url, scrape_queue_name)
    amqp_client.connect()

    while True:
        logger.debug('News monitor: iter..')
        news_list = news_client.get_news_from_sources(NEWS_SOURCES)
        num_news = 0

        for news in news_list:
            digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()

            if redis_client.get(digest):
                continue

            num_news += 1
            news['digest'] = digest
            redis_client.set(digest, True)
            redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS)

            logger.debug('News Monitor: got news {}'.format(news))
            amqp_client.send_message(news)

        logger.info('News Monitor: fectched {} news'.format(num_news))
        amqp_client.sleep(SLEEP_TIME_IN_SECONDS)
Exemplo n.º 6
0
def test_basic():
    queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME)

    user_id, digest = 'click tester', 'digest'
    clickdilivery.send_click(queue_client, user_id, digest)

    message = queue_client.get_message()

    print(message)
    assert message['userId'] == user_id
    assert message['newsDigest'] == digest

    queue_client.close()
    print('click test passed')
Exemplo n.º 7
0
def run(times=-1):
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)
    click_queue_client.connect()
    assert click_queue_client.is_connected()
    print('Click Handler: my queue name: {}'.format(click_queue_client))

    while True:
        message = click_queue_client.get_message()
        try:
            handle_message(message)
        except Exception as e:
            raise e

        click_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
Exemplo n.º 8
0
def clear_queue(queue_url, queue_name):
    count = 0
    amqp_client = AMQPClient(queue_url, queue_name)
    amqp_client.connect()

    print('xxx cleaning queue "{}"'.format(queue_name))
    while True:
        message = amqp_client.get_message()
        if message is None:
            break
        print(message)
        count += 1
    
    amqp_client.close()
    
    print('xxx cleaned {} message on {}'.format(count, queue_name))
Exemplo n.º 9
0
def run(times=-1):
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)
    click_queue_client.connect()

    news_collection = mongodb_client.get_news_collection()
    pref_collection = (mongodb_client.get_db(PREF_DB_NAME).get_collection(
        PREF_COLLECTION_NAME))

    assert click_queue_client.is_connected()

    while True:
        msg = click_queue_client.get_message()
        try:
            handle_message(msg, news_collection, pref_collection)
        except Exception as e:
            raise e
        click_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
Exemplo n.º 10
0
def test_basic():
    # print('Click Learner tester: clearing queue {}'.format(CLICK_QUEUE_NAME))
    clear_queue(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME)

    clicklearner.PREF_COLLECTION_NAME = PREF_COLLECTION_NAME
    clicklearner.USER_CLICK_QUEUE_NAME = CLICK_QUEUE_NAME
    clicklearner.SLEEP_TIME_IN_SECONDS = 1

    click_queue_client = AMQPClient(clicklearner.USER_CLICK_QUEUE_URL,
                                    CLICK_QUEUE_NAME)
    click_queue_client.connect()

    assert click_queue_client.is_connected()

    print('Click Learner tester: clearing collection "{}" in db "{}"...'.format(PREF_COLLECTION_NAME, 
        PREF_DB_NAME))
    pref_collection.remove()

    print('Click Learner tester: sending click logs')
    for click in TEST_SEQUENCE:
        click_queue_client.send_message(click)
    

    print('Click Learner tester: start handling clicks')

    pref_model_ref = {'userId': TEST_USER_NAME}
    for cat in NEWS_CATEGORIES:
        pref_model_ref[cat] = 1 / len(NEWS_CATEGORIES)

    for click_log in TEST_SEQUENCE:
        clicklearner.run(1)
        pref_model = pref_collection.find_one({'userId': TEST_USER_NAME})

        selected = news_collection.find_one({'digest': (click_log['newsDigest'])})['category']
        pref_model_ref[selected] = (1 - ALPHA) * pref_model_ref[selected] + ALPHA
        for cat in NEWS_CATEGORIES:
            if cat != selected:
                pref_model_ref[cat] = (1 - ALPHA) * pref_model_ref[cat]

        del pref_model['_id']
        print('Click Learner tester: expecting {} == {}'.format(pref_model, pref_model_ref))
        assert pref_model == pref_model_ref

    print('xx Click Learner test passed')
Exemplo n.º 11
0
from cloud_amqp_client import AMQPClient

logger = logging.getLogger(__name__)
coloredlogs.install(level=os.environ.get('LOGGER_LEVEL', 'INFO'),
                    logger=logger)

# TODO: this global are bad, and makes it uncovered by tests
config = os.environ
DB_NAME = config['news_db']
COLLECTION_NAME = config['new_collection']
DEDUPE_QUEUE_URL = config['dedupe_task_queue_url']
DEDUPE_QUEUE_NAME = config['dedupe_task_queue_name']

SLEEP_TIME_IN_SECONDS = 5

dedupe_queue_client = AMQPClient(DEDUPE_QUEUE_URL, DEDUPE_QUEUE_NAME)
dedupe_queue_client.connect()

assert dedupe_queue_client.is_connected()

NEWS_SIMILARITY_THRESHOLD = 0.8


class NotContainPublishTimeError(Exception):
    def __str__(self):
        return 'News not containing publish time!!!'


def handle_message(msg):
    logger.debug('dedupter handling message: {}'.format(msg))
    if msg is None or not isinstance(msg, dict):
Exemplo n.º 12
0
def init():
    global click_queue_client
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)
Exemplo n.º 13
0
USER_CLICK_QUEUE_URL = config['new_click_queue_url']
USER_CLICK_QUEUE_NAME = config['new_click_queue_name']

# TODO should do init here? or let the sub-module take care of them
news_collection = mongodb_client.get_db(NEWS_DB_NAME).get_collection(
    NEWS_COLLECTION_NAME)
pref_collection = mongodb_client.get_db(PREF_DB_NAME).get_collection(
    PREF_COLLECTION_NAME)

if REDIS_HOST:
    REDIS_URL = REDIS_HOST + ":" + (REDIS_PORT if REDIS_PORT else '')
else:
    REDIS_URL = os.environ.get("REDIS_URL")  # heroku specific
redis_client = redis.from_url(REDIS_URL)

click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME)

__all__ = ['get_news', 'log_click']


# for test, maybe bad
def init():
    global click_queue_client
    click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL,
                                    USER_CLICK_QUEUE_NAME)


def get_news(user_id, page_num):
    print('++ RPC server: get news for {}, page #{}'.format(user_id, page_num))
    return get_news_for_user(news_collection, pref_collection, redis_client,
                             user_id, page_num)
Exemplo n.º 14
0
def test_basic():
    # print(amqp_url + "({})".format(type(amqp_url)))
    client = AMQPClient(AMQP_URL, 'my_queue')

    client.connect()

    assert client.is_connected()

    assert client.get_message() is None

    client.send_message('hello world')

    assert client.get_message() == 'hello world'

    obj = {"hello": "world"}
    client.send_message(obj)

    assert client.get_message() == obj

    assert client.get_message() is None

    client.cancel_queue()

    client.close()
    print('[x] cloud amqp_client test passed')
Exemplo n.º 15
0
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils'))
import news_client
from cloud_amqp_client import AMQPClient
from config_reader import get_config

config = get_config(
    os.path.join(os.path.dirname(__file__), '..', 'config', 'config.json'))
SCRAPE_QUEUE_URL = config["scrape_task_queue_url"]
DEDUPE_QUEUE_URL = config["dedupe_task_queue_url"]
SCRAPE_NEWS_TASK_QUEUE_NAME = config["scrape_task_queue_name"]
DEDUPE_NEWS_TASK_QUEUE_NAME = config["dedupe_task_queue_name"]

SLEEP_TIME_IN_SECONDS = 5

scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME)
scrape_queue_client.connect()
dedupe_queue_client = AMQPClient(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)
dedupe_queue_client.connect()

assert scrape_queue_client.is_connected()
assert dedupe_queue_client.is_connected()


def handle_message(msg):
    print('News Fetcher getting message:', msg)
    if msg is None or not isinstance(msg, dict):
        print('news fetcher: message is broken')
        return

    task = msg
Exemplo n.º 16
0
def run(scrape_queue_url=SCRAPE_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME,
        dedupe_queue_url=DEDUPE_QUEUE_URL, dedupe_queue_name=DEDUPE_NEWS_TASK_QUEUE_NAME,
        times=-1):
    scrape_queue_client = AMQPClient(scrape_queue_url, scrape_queue_name)
    scrape_queue_client.connect()
    dedupe_queue_client = AMQPClient(dedupe_queue_url, dedupe_queue_name)
    dedupe_queue_client.connect()

    assert scrape_queue_client.is_connected()
    assert dedupe_queue_client.is_connected()

    while True:
        logger.debug('News fetcher: iter..')
        msg = scrape_queue_client.get_message()
        if msg is not None:
            try:
                handle_message(msg)
                dedupe_queue_client.send_message(msg)
                logger.info('News Fetcher: message sent to dedupe queue (url: {})'
                    .format(msg.get('url')))
            except Exception as e:
                logger.warning('News fetcher: handling error: {}'.format(e))
        # if decreas count here, weird behavior, decreasing happens before processing message
        scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break