def test_basic():
    client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME)
    sendMsg = {'test': 'success'}
    client.sendMessage(sendMsg)
    client.sleep(2)
    assert client.getMessage() == sendMsg
    print 'cloudAMQP connection success'
def test_basic():
    client = CloudAMQPClient(CLOUDAMQP_URL, QUEUE_NAME)

    sentMsg = {'test_key': 'test_value'}
    client.sendMessage(sentMsg)
    client.sleep(5)
    receivedMsg = client.getMessage()
    assert sentMsg == receivedMsg
    print "test_basic passed!"
示例#3
0
from newspaper import Article

# Import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
sys.path.append(os.path.join(os.path.dirname(__file__), 'scrapers'))

import cnn_news_scraper
from CloudAMQP_client import CloudAMQPClient

SLEEP_TIME_IN_SECONDS = 5
SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@otter.rmq.cloudamqp.com/fnidwrfk'
SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue'
DEDUPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@otter.rmq.cloudamqp.com/bqloyjhw'
DEDUPE_NEWS_TASK_QUEUE_NAME = 'tap-news-dedupe-news-task-queue'

scrape_news_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME)
dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)

def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        print 'message is broken'
        return

    task = msg

    article = Article(task['url'])
    article.download()
    article.parse()

    task['text'] = article.text
示例#4
0
# Don't modify this value unless you know what you are doing.
NUM_OF_CLASSES = 17
INITIAL_P = 1.0 / NUM_OF_CLASSES
ALPHA = 0.1

SLEEP_TIME_IN_SECONDS = 1

# TODO: use your own queue
LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/roplnjlc"
LOG_CLICKS_TASK_QUEUE_NAME = "tap-news-log-clicks-task-queue"

PREFERENCE_MODEL_TABLE_NAME = "user_preference_model"
NEWS_TABLE_NAME = "news"

cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME)

def handle_message(msg):
    if msg is None or not isinstance(msg, dict) :
        return

    if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg):
        return

    userId = msg['userId']
    newsId = msg['newsId']

    # Update user's preference
    db = mongodb_client.get_db()
    model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId})
示例#5
0
# Redis
REDIS_HOST = 'localhost'
REDIS_PORT = 6379

NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 1
redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)

#CloudAMQP
from CloudAMQP_client import CloudAMQPClient

SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@donkey.rmq.cloudamqp.com/vtrjgcrd'
SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue'
SLEEP_TIME_IN_SECONDS = 10

cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME)

while True:
	news_list = news_api_client.getNewsFromSource(NEWS_SOURCES)
	num_of_new_news = 0

	for news in news_list:
		news_digest = hashlib.md5(news['title'].encode('utf-8')).digest().encode('base64')

		if redis_client.get(news_digest) is None:
			num_of_new_news = num_of_new_news + 1
			news['digest'] = news_digest

			# If 'publishedAt' is None, set it to current UTC time
			if news['publishedAt'] is None:
				# Make the time in format YYYY-MM-DDTHH:MM:SS in UTC
示例#6
0
from CloudAMQP_client import CloudAMQPClient

NEWS_SOURCES = [
    'cnn', 'abc-news', 'bloomberg', 'entertainment-weekly', 'espn', 'ign',
    'techcrunch', 'the-new-york-times', 'the-wall-street-journal',
    'the-washington-post', 'cnbc', 'entertainment-weekly', 'fox-sports',
    'google-news', 'hacker-news', 'recode', 'newsweek', 'news-scientist'
]

REDIS_HOST = "localhost"
REDIS_PORT = 6379
redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)

CloudAMQP_URL = 'amqp://*****:*****@skunk.rmq.cloudamqp.com/idefsmvy'
QUEUE_NAME = 'tap-news-scrape-news-task-queue'
cloudAMQP_client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME)

NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 1
SLEEP_TIME_IN_SECONDS = 10

while (True):
    news_list = news_api_client.getNewsFromSource(NEWS_SOURCES)
    nums_of_new_news = 0
    for news in news_list:
        news_digest = hashlib.md5(
            news['title'].encode('utf-8')).digest().encode('base64')
        if (redis_client.get(news_digest) is None):
            nums_of_new_news = nums_of_new_news + 1
            news['digest'] = news_digest
            #if publishedAt is none, set it to current UTC time
            if (news['publishedAt'] is None):