def send(self, metrics, val):
     try:
         timestamp = int(time.time())
         msgs = '\n%s %f %d\n' % (metrics, val, timestamp)
         sock = socket.socket()
         sock.connect((self.host, self.port))
         sock.sendall(msgs)
         sock.close()
         logger.debug("Metrics '%s' with value %f is sent to %s" %
                      (metrics, val, self.port))
     except Exception as e:
         logger.error("Monitor: coudldn't connect to graphite (%s)" % e)
Пример #2
0
def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        logger.error('News fetcher : message is broken')
        return

    task = msg
    text = None

    article = Article(task['url'])
    article.download()
    article.parse()

    task['text'] = article.text.encode('utf-8')
    dedupe_news_queue_client.sendMessage(task)
Пример #3
0
def run():
    while True:
        if cloudAMQP_client is not None:
            msg = cloudAMQP_client.getMessage()
            if msg is not None:
                # Parse and process the task
                try:
                    handle_message(msg)
                except Exception as e:
                    logger.error(
                        "Click log processor : handle message has error %s" %
                        e)
                    pass
            # Remove this if this becomes a bottleneck.
            cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)
Пример #4
0
    config['cloudAMQP']
    ['scrape_news_task_queue_sleep_time_in_seconds_at_fetcher'])


def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        logger.error('News fetcher : message is broken')
        return

    task = msg
    text = None

    article = Article(task['url'])
    article.download()
    article.parse()

    task['text'] = article.text.encode('utf-8')
    dedupe_news_queue_client.sendMessage(task)


while True:
    if scrape_news_queue_client is not None:
        msg = scrape_news_queue_client.getMessage()
        if msg is not None:
            try:
                handle_message(msg)
            except Exception as e:
                logger.error("News fetcher error : %s" % e)
                pass
        scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
Пример #5
0
                return

    # need to transfer string to datetime format when storing in MongoDB
    task['publishedAt'] = parser.parse(task['publishedAt'])

    # classify news
    title = task['title']
    if title is not None:
        topic = news_topic_modeling_service_client.classify(title)
        task['class'] = topic

    # if there is the same news, then replace
    db[NEWS_TABLE_NAME].replace_one({'digest': task['digest']}, task, upsert=True)

    # Send the metrics to graphite
    metrics = 'news.' + task['source'] + '.' + task['class'].split(' ')[0]
    graphite.send(metrics, 1)

while True:
    if dedupe_news_queue_client is not None:
        msg = dedupe_news_queue_client.getMessage()
        if msg is not None:
            # Parse and process the task
            try:
                handle_message(msg)
            except Exception as e:
                logger.error("News deduper error : %s" % e)
                pass

        dedupe_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
Пример #6
0
def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        return

    if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg):
        return

    userId = msg['userId']
    newsId = msg['newsId']

    # Update user's preference
    db = mongodb_client.get_db()
    model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId})

    # If model not exists, create a new one
    if model is None:
        logger.debug(
            'Click log processor: Creating preference model for new user: %s' %
            userId)
        new_model = {'userId': userId}
        preference = {}
        for i in NEWS_TOPICS:
            preference[i] = float(INITIAL_P)
        new_model['preference'] = preference
        model = new_model

    logger.info(
        'Click log processor: Updating preference model for new user: %s' %
        userId)

    # Update model using time decaying method
    news = db[NEWS_TABLE_NAME].find_one({'digest': newsId})
    if (news is None or 'class' not in news
            or news['class'] not in NEWS_TOPICS):
        logger.error(
            "Click log prrocessor: news doesn't exist or news topic doesn't exist"
        )
        return

    click_class = news['class']

    # Send the metrics to graphite
    metrics = 'backend.click.' + userId.replace(
        '.', '') + '.' + newsId.replace('.', '').replace(
            '\n', '') + '.' + click_class.split(' ')[0]
    graphite.send(metrics, 1)

    # Update the clicked one.
    old_p = model['preference'][click_class]
    model['preference'][click_class] = float((1 - ALPHA) * old_p + ALPHA)

    # Update not clicked classes.
    for i, prob in model['preference'].iteritems():
        if not i == click_class:
            model['preference'][i] = float(
                (1 - ALPHA) * model['preference'][i])

    # update to mongodb
    db[PREFERENCE_MODEL_TABLE_NAME].replace_one({'userId': userId},
                                                model,
                                                upsert=True)

    # add news title to click log table
    click_logs = db[CLICK_LOGS_TABLE_NAME].find(
        {"$and": [{
            'userId': userId
        }, {
            'newsId': news['digest']
        }]})

    if click_logs.count() == 0:
        if news['description'] is not None:
            click_log = {
                'userId': userId,
                'newsId': news['digest'],
                'description': news['description'],
                'timestamp': datetime.utcnow(),
                'clicked': 1
            }
            db[CLICK_LOGS_TABLE_NAME].insert(click_log)
            logger.info("Click log processor: add click log")
            logger.info(news['description'])
        else:
            logger.info('==== empty news description ==== ')
    else:
        for click_log in click_logs:
            click_log['timestamp'] = datetime.utcnow()
            db[CLICK_LOGS_TABLE_NAME].replace_one(
                {"$and": [{
                    'userId': userId
                }, {
                    'newsId': news['digest']
                }]},
                click_log,
                upsert=True)
            logger.info(
                "Click log processor: find duplicated click and update the time"
            )
            logger.info(news['description'])