示例#1
0
def test():
    client = CloudAMQPClient(URL, QUEUE_NAME)
    message = {"hhh": "hhh"}
    client.sendMessage(message)
    receiveMessage = client.receiveMessage()
    assert message == receiveMessage
    print("passed!")
示例#2
0
def clear_queue(queue_url, queue_name):
    queue_client = CloudAMQPClient(queue_url, queue_name)
    num_of_messages = 0
    while True:
        if queue_client:
            message = queue_client.receiveMessage()
            if message:
                num_of_messages = num_of_messages + 1
            else:
                print("%s num_of_messages" % num_of_messages)
                #return
                return
示例#3
0
def handle_message(msg):
    if not msg or not isinstance(msg, dict):
        print('msg in broken')
        return
    text = None
    #if msg['source'] == 'cnn':
    #text = news_scrapter.extract_news(msg['url'])
    #else:
    #print('News source [%s] is not supported.' % msg['source'])
    #Download article according the url
    article = Article(msg['url'])
    article.download()
    article.parse()
    msg['text'] = article.text
    #sendMessage
    fecth_news_queue_client.sendMessage(msg)


while True:
    #receive message
    if scraper_news_queue_client:
        msg = scraper_news_queue_client.receiveMessage()
        if msg:
            try:
                #handle message
                handle_message(msg)
            except Exception as e:
                print(e)
        fecth_news_queue_client.sleep(SLEEP_TIME_IN_SECOND)
示例#4
0
        tfidf = TfidfVectorizer().fit_transform(documents)
        pairwise_sim = tfidf * tfidf.T

        rows, cols = pairwise_sim.shape
        for row in range(1, rows):
            if pairwise_sim[row, 0] > SAME_NEWS_SIMILARITY_THRESHOLD:
                print('duplicate news.Ignore')
                return

        msg['publishedAt'] = published_at
        db[NEWS_TABLE_NAME].replace_one({'digest': msg['digest']},
                                        msg,
                                        upsert=True)
    else:
        msg['publishedAt'] = published_at
        db[NEWS_TABLE_NAME].replace_one({'digest': msg['digest']},
                                        msg,
                                        upsert=True)


while True:
    if cloudAMQP_client:
        msg = cloudAMQP_client.receiveMessage()
        if msg:
            try:
                handle_mesage(msg)
            except Exception as e:
                print(e)
        cloudAMQP_client.sleep(SLEEP_TIME_IN_SECOND)