Exemplo n.º 1
0
from collections import deque
from time import sleep
from typing import Any, Deque  # noqa

from cranial.common import logger
from cranial.datastore.dbapi import Param, render_params
from cranial.listeners import base

log = logger.create('db_listener',
                    logger.fallback('LISTENERS_LOGLEVEL', 'WARNING'))


class Listener(base.Listener):
    def __init__(self,
                 cursor,
                 table: str,
                 id: str = 'id',
                 last_id: Any = None,
                 limit=1000,
                 sleep=10,
                 select='*',
                 where=None,
                 **kwargs) -> None:
        """
        Parameters
        ----------
        cursor
        A DBAPI2 cursor.

        table
        A table to poll for new rows.
Exemplo n.º 2
0
from cranial.common import logger

from cranial.listeners import base
from cranial.messaging.adapters.kafka import get_consumer

log = logger.create('kafka_client',
                    logger.fallback('LISTENERS_LOGLEVEL', 'WARNING'))


class Listener(base.Listener):
    """ Listens for messages on a single Kafka topic."""
    def __init__(self, topic, consumer=None, **kwargs):
        self.topic = topic
        if not consumer:
            self.consumer = get_consumer(topic=topic, **kwargs)
        else:
            consumer.subscribe([topic])
            self.consumer = consumer

        self.empty_messages = 0

    def recv(self, timeout=-1, do_raise=False):
        msg = self.consumer.poll(timeout)
        err = hasattr(msg, 'error') and msg.error()
        if err:
            log.warning(err)
            if do_raise:
                raise Exception(
                    'Error while polling for message: {}'.format(err))
            # An application might expect streaming bytes. In the future, we
            # might revise this to raise an exception which the application
Exemplo n.º 3
0
import datetime
import os
import socket

import confluent_kafka as kafka

from cranial.common import logger

log = logger.create('cranial', os.environ.get('CRANIAL_LOGLEVEL', 'WARN'))


class LoggingProducer(kafka.Producer):
    def logError(self, err, msg):
        if err:
            log.error(msg)

    def produce(self, *args, **kwargs):
        kwargs['on_delivery'] = self.logError
        super().produce(*args, **kwargs)


class MustCommitConsumer(kafka.Consumer):
    def __init__(self, config):
        config['enable.auto.commit'] = False
        super().__init__(config)


class CarefulConsumer(MustCommitConsumer):
    """A Kafka consumer that refrains from commiting it's previous message until
    it is asking for a new one. Threads should not share a CarefulConsumer!
Exemplo n.º 4
0
"""
tokenizers that do not use spaCy
"""
import subprocess
import os
import collections
import logging

from cranial.common import logger
from cranial.model_base import ModelBase

log = logger.create('tokenizers',
                    os.environ.get('MODELS_LOGLEVEL',
                                   logging.WARNING))  # streaming log


class MosesTokenizer(ModelBase):
    name = 'moses_tokenizer'

    def __init__(self, moses_repo_path, language='en', threads=None, **kwargs):
        """
        This wraps around a moses tokenizer - https://github.com/moses-smt/mosesdecoder

        Note that it is much faster to transform few large chunks of text instead of many small ones. So before
        passing strings into this tokenizer it might be good to batch short texts into a
        large one with some known separator between individual texts, and then after split apart again

        Parameters
        ----------
        moses_repo_path
            path to the cloned repo