def send(self, metrics, val): try: timestamp = int(time.time()) msgs = '\n%s %f %d\n' % (metrics, val, timestamp) sock = socket.socket() sock.connect((self.host, self.port)) sock.sendall(msgs) sock.close() logger.debug("Metrics '%s' with value %f is sent to %s" % (metrics, val, self.port)) except Exception as e: logger.error("Monitor: coudldn't connect to graphite (%s)" % e)
def handle_message(msg): if msg is None or not isinstance(msg, dict): logger.error('News fetcher : message is broken') return task = msg text = None article = Article(task['url']) article.download() article.parse() task['text'] = article.text.encode('utf-8') dedupe_news_queue_client.sendMessage(task)
def run(): while True: if cloudAMQP_client is not None: msg = cloudAMQP_client.getMessage() if msg is not None: # Parse and process the task try: handle_message(msg) except Exception as e: logger.error( "Click log processor : handle message has error %s" % e) pass # Remove this if this becomes a bottleneck. cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)
config['cloudAMQP'] ['scrape_news_task_queue_sleep_time_in_seconds_at_fetcher']) def handle_message(msg): if msg is None or not isinstance(msg, dict): logger.error('News fetcher : message is broken') return task = msg text = None article = Article(task['url']) article.download() article.parse() task['text'] = article.text.encode('utf-8') dedupe_news_queue_client.sendMessage(task) while True: if scrape_news_queue_client is not None: msg = scrape_news_queue_client.getMessage() if msg is not None: try: handle_message(msg) except Exception as e: logger.error("News fetcher error : %s" % e) pass scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
return # need to transfer string to datetime format when storing in MongoDB task['publishedAt'] = parser.parse(task['publishedAt']) # classify news title = task['title'] if title is not None: topic = news_topic_modeling_service_client.classify(title) task['class'] = topic # if there is the same news, then replace db[NEWS_TABLE_NAME].replace_one({'digest': task['digest']}, task, upsert=True) # Send the metrics to graphite metrics = 'news.' + task['source'] + '.' + task['class'].split(' ')[0] graphite.send(metrics, 1) while True: if dedupe_news_queue_client is not None: msg = dedupe_news_queue_client.getMessage() if msg is not None: # Parse and process the task try: handle_message(msg) except Exception as e: logger.error("News deduper error : %s" % e) pass dedupe_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
def handle_message(msg): if msg is None or not isinstance(msg, dict): return if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg): return userId = msg['userId'] newsId = msg['newsId'] # Update user's preference db = mongodb_client.get_db() model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId}) # If model not exists, create a new one if model is None: logger.debug( 'Click log processor: Creating preference model for new user: %s' % userId) new_model = {'userId': userId} preference = {} for i in NEWS_TOPICS: preference[i] = float(INITIAL_P) new_model['preference'] = preference model = new_model logger.info( 'Click log processor: Updating preference model for new user: %s' % userId) # Update model using time decaying method news = db[NEWS_TABLE_NAME].find_one({'digest': newsId}) if (news is None or 'class' not in news or news['class'] not in NEWS_TOPICS): logger.error( "Click log prrocessor: news doesn't exist or news topic doesn't exist" ) return click_class = news['class'] # Send the metrics to graphite metrics = 'backend.click.' + userId.replace( '.', '') + '.' + newsId.replace('.', '').replace( '\n', '') + '.' + click_class.split(' ')[0] graphite.send(metrics, 1) # Update the clicked one. old_p = model['preference'][click_class] model['preference'][click_class] = float((1 - ALPHA) * old_p + ALPHA) # Update not clicked classes. for i, prob in model['preference'].iteritems(): if not i == click_class: model['preference'][i] = float( (1 - ALPHA) * model['preference'][i]) # update to mongodb db[PREFERENCE_MODEL_TABLE_NAME].replace_one({'userId': userId}, model, upsert=True) # add news title to click log table click_logs = db[CLICK_LOGS_TABLE_NAME].find( {"$and": [{ 'userId': userId }, { 'newsId': news['digest'] }]}) if click_logs.count() == 0: if news['description'] is not None: click_log = { 'userId': userId, 'newsId': news['digest'], 'description': news['description'], 'timestamp': datetime.utcnow(), 'clicked': 1 } db[CLICK_LOGS_TABLE_NAME].insert(click_log) logger.info("Click log processor: add click log") logger.info(news['description']) else: logger.info('==== empty news description ==== ') else: for click_log in click_logs: click_log['timestamp'] = datetime.utcnow() db[CLICK_LOGS_TABLE_NAME].replace_one( {"$and": [{ 'userId': userId }, { 'newsId': news['digest'] }]}, click_log, upsert=True) logger.info( "Click log processor: find duplicated click and update the time" ) logger.info(news['description'])