Exemplo n.º 1
0
def test():
    client = CloudAMQPClient(URL, QUEUE_NAME)
    message = {"hhh": "hhh"}
    client.sendMessage(message)
    receiveMessage = client.receiveMessage()
    assert message == receiveMessage
    print("passed!")
Exemplo n.º 2
0
def clear_queue(queue_url, queue_name):
    queue_client = CloudAMQPClient(queue_url, queue_name)
    num_of_messages = 0
    while True:
        if queue_client:
            message = queue_client.receiveMessage()
            if message:
                num_of_messages = num_of_messages + 1
            else:
                print("%s num_of_messages" % num_of_messages)
                #return
                return
Exemplo n.º 3
0
import os
import sys
import news_scraper
from newspaper import Article

SLEEP_TIME_IN_SECOND = 10
RECEIVE_QUEUE_URL = 'amqp://*****:*****@termite.rmq.cloudamqp.com/svowqrcq'
RECEIVE_QUEUE_NAME = 'news-test'
FETCH_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/txggakbg'
FETCH_QUEUE_NAME = 'fect_news'

sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'utils'))
from AMQP_client import CloudAMQPClient

scraper_news_queue_client = CloudAMQPClient(RECEIVE_QUEUE_URL,
                                            RECEIVE_QUEUE_NAME)
fecth_news_queue_client = CloudAMQPClient(FETCH_QUEUE_URL, FETCH_QUEUE_NAME)


def handle_message(msg):
    if not msg or not isinstance(msg, dict):
        print('msg in broken')
        return
    text = None
    #if msg['source'] == 'cnn':
    #text = news_scrapter.extract_news(msg['url'])
    #else:
    #print('News source [%s] is not supported.' % msg['source'])
    #Download article according the url
    article = Article(msg['url'])
    article.download()
Exemplo n.º 4
0
import news_classes
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils'))
import mongodb_client
from AMQP_client import CloudAMQPClient

LOG_CLICKS_TASK_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/cvarabcw'
LOG_CLICKS_TASK_QUEUE_NAME = 'tap-news-log-clicks-task-queue'
cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME)
db = mongodb_client.get_db()
NEWS_TABLE_NAME = "newstest"
PREFERENCE_MODEL_TABLE_NAME = "user_preference_model"
SLEEP_TIME_IN_SECONDS = 10
NUM_OF_CLASSES = 17
INITIAL_P = 1.0 / NUM_OF_CLASSES
ALPHA = 0.1

def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        return
    if 'userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg:
        return
    userId = msg['userId']
    newsId = msg['newsId']
    model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({"userId": userId})
    #if user is not in the database
    if not model:
        print('create preference model for new user: %s' % userId)
        new_model = {'userId': userId}
Exemplo n.º 5
0
import os
import sys

from dateutil import parser
from sklearn.feature_extraction.text import TfidfVectorizer

sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils'))
import mongodb_client
from AMQP_client import CloudAMQPClient

SLEEP_TIME_IN_SECOND = 10
FETCH_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/txggakbg'
FETCH_QUEUE_NAME = 'fect_news'
NEWS_TABLE_NAME = "newstest"

cloudAMQP_client = CloudAMQPClient(FETCH_QUEUE_URL, FETCH_QUEUE_NAME)
SAME_NEWS_SIMILARITY_THRESHOLD = 0.9


def handle_mesage(msg):
    if not msg or not isinstance(msg, dict):
        return
    text = msg['text']
    if not text:
        return
    print(msg)
    #get the start and end time of this day and find list in db
    published_at = parser.parse(msg['publishedAt'])
    published_at_day_begin = datetime.datetime(published_at.year,
                                               published_at.month,
                                               published_at.day, 0, 0, 0, 0)
Exemplo n.º 6
0
SLEEP_TIME_TASK_SECONDS = 10

NEWS_SOURCES = [
    'bbc-newson', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly',
    'espn', 'ign', 'techcrunch', 'the-new-york-times',
    'the-wall-street-journal', 'the-washington-post'
]
#redis
REDIS_HOST = 'localhost'
REDIS_PORT = 6379
redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)

#AMQP_client
QUEUE_URL = "amqp://*****:*****@termite.rmq.cloudamqp.com/svowqrcq"
QUEUE_NAME = "news-test"
cloudAMQP_client = CloudAMQPClient(QUEUE_URL, QUEUE_NAME)

#while
while True:
    news_list = news_api_client.getNews(NEWS_SOURCES)
    number_of_news = 0
    for news in news_list:
        #redis to prevent duplicate
        #use md5 for title
        news_digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()
        if not redis_client.get(news_digest):
            number_of_news = number_of_news + 1
            news['digest'] = news_digest
            redis_client.set(news_digest, 'hh')
            #set expire time
            redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)