def test(): client = CloudAMQPClient(URL, QUEUE_NAME) message = {"hhh": "hhh"} client.sendMessage(message) receiveMessage = client.receiveMessage() assert message == receiveMessage print("passed!")
def clear_queue(queue_url, queue_name): queue_client = CloudAMQPClient(queue_url, queue_name) num_of_messages = 0 while True: if queue_client: message = queue_client.receiveMessage() if message: num_of_messages = num_of_messages + 1 else: print("%s num_of_messages" % num_of_messages) #return return
def handle_message(msg): if not msg or not isinstance(msg, dict): print('msg in broken') return text = None #if msg['source'] == 'cnn': #text = news_scrapter.extract_news(msg['url']) #else: #print('News source [%s] is not supported.' % msg['source']) #Download article according the url article = Article(msg['url']) article.download() article.parse() msg['text'] = article.text #sendMessage fecth_news_queue_client.sendMessage(msg) while True: #receive message if scraper_news_queue_client: msg = scraper_news_queue_client.receiveMessage() if msg: try: #handle message handle_message(msg) except Exception as e: print(e) fecth_news_queue_client.sleep(SLEEP_TIME_IN_SECOND)
tfidf = TfidfVectorizer().fit_transform(documents) pairwise_sim = tfidf * tfidf.T rows, cols = pairwise_sim.shape for row in range(1, rows): if pairwise_sim[row, 0] > SAME_NEWS_SIMILARITY_THRESHOLD: print('duplicate news.Ignore') return msg['publishedAt'] = published_at db[NEWS_TABLE_NAME].replace_one({'digest': msg['digest']}, msg, upsert=True) else: msg['publishedAt'] = published_at db[NEWS_TABLE_NAME].replace_one({'digest': msg['digest']}, msg, upsert=True) while True: if cloudAMQP_client: msg = cloudAMQP_client.receiveMessage() if msg: try: handle_mesage(msg) except Exception as e: print(e) cloudAMQP_client.sleep(SLEEP_TIME_IN_SECOND)