def test_basic(): client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME) sendMsg = {'test': 'success'} client.sendMessage(sendMsg) client.sleep(2) assert client.getMessage() == sendMsg print 'cloudAMQP connection success'
def test_basic(): client = CloudAMQPClient(CLOUDAMQP_URL, QUEUE_NAME) sentMsg = {'test_key': 'test_value'} client.sendMessage(sentMsg) client.sleep(5) receivedMsg = client.getMessage() assert sentMsg == receivedMsg print "test_basic passed!"
from newspaper import Article # Import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) sys.path.append(os.path.join(os.path.dirname(__file__), 'scrapers')) import cnn_news_scraper from CloudAMQP_client import CloudAMQPClient SLEEP_TIME_IN_SECONDS = 5 SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@otter.rmq.cloudamqp.com/fnidwrfk' SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue' DEDUPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@otter.rmq.cloudamqp.com/bqloyjhw' DEDUPE_NEWS_TASK_QUEUE_NAME = 'tap-news-dedupe-news-task-queue' scrape_news_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) def handle_message(msg): if msg is None or not isinstance(msg, dict): print 'message is broken' return task = msg article = Article(task['url']) article.download() article.parse() task['text'] = article.text
# Don't modify this value unless you know what you are doing. NUM_OF_CLASSES = 17 INITIAL_P = 1.0 / NUM_OF_CLASSES ALPHA = 0.1 SLEEP_TIME_IN_SECONDS = 1 # TODO: use your own queue LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/roplnjlc" LOG_CLICKS_TASK_QUEUE_NAME = "tap-news-log-clicks-task-queue" PREFERENCE_MODEL_TABLE_NAME = "user_preference_model" NEWS_TABLE_NAME = "news" cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME) def handle_message(msg): if msg is None or not isinstance(msg, dict) : return if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg): return userId = msg['userId'] newsId = msg['newsId'] # Update user's preference db = mongodb_client.get_db() model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId})
# Redis REDIS_HOST = 'localhost' REDIS_PORT = 6379 NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 1 redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) #CloudAMQP from CloudAMQP_client import CloudAMQPClient SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@donkey.rmq.cloudamqp.com/vtrjgcrd' SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue' SLEEP_TIME_IN_SECONDS = 10 cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) num_of_new_news = 0 for news in news_list: news_digest = hashlib.md5(news['title'].encode('utf-8')).digest().encode('base64') if redis_client.get(news_digest) is None: num_of_new_news = num_of_new_news + 1 news['digest'] = news_digest # If 'publishedAt' is None, set it to current UTC time if news['publishedAt'] is None: # Make the time in format YYYY-MM-DDTHH:MM:SS in UTC
from CloudAMQP_client import CloudAMQPClient NEWS_SOURCES = [ 'cnn', 'abc-news', 'bloomberg', 'entertainment-weekly', 'espn', 'ign', 'techcrunch', 'the-new-york-times', 'the-wall-street-journal', 'the-washington-post', 'cnbc', 'entertainment-weekly', 'fox-sports', 'google-news', 'hacker-news', 'recode', 'newsweek', 'news-scientist' ] REDIS_HOST = "localhost" REDIS_PORT = 6379 redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) CloudAMQP_URL = 'amqp://*****:*****@skunk.rmq.cloudamqp.com/idefsmvy' QUEUE_NAME = 'tap-news-scrape-news-task-queue' cloudAMQP_client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME) NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 1 SLEEP_TIME_IN_SECONDS = 10 while (True): news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) nums_of_new_news = 0 for news in news_list: news_digest = hashlib.md5( news['title'].encode('utf-8')).digest().encode('base64') if (redis_client.get(news_digest) is None): nums_of_new_news = nums_of_new_news + 1 news['digest'] = news_digest #if publishedAt is none, set it to current UTC time if (news['publishedAt'] is None):