Exemplo n.º 1
0
    def listen(self, timeout=None):
        '''listen to the queue, ingest what you hear, and report'''
        if not self.queue:
            raise InsufficientConfiguration('No queue configured.')

        self.queue.set_handler(self.handler)
        self.queue.drain(timeout=timeout)
Exemplo n.º 2
0
def _read_config_file(config_file):
    if config_file is None:
        config_file = os.environ.get('DATALAKE_CONFIG', DEFAULT_CONFIG)
    if os.path.exists(config_file):
        load_dotenv(config_file)
    elif config_file != DEFAULT_CONFIG:
        msg = 'Config file {} not exist.'.format(config_file)
        raise InsufficientConfiguration(msg)
Exemplo n.º 3
0
 def _prepare_connection(self, connection):
     region = os.environ.get('AWS_REGION')
     if connection:
         self._connection = connection
     elif region:
         self._connection = boto.dynamodb2.connect_to_region(region)
     else:
         msg = 'Please provide a connection or configure a region'
         raise InsufficientConfiguration(msg)
Exemplo n.º 4
0
def _get_sentry_handler():

    dsn = os.environ.get('DATALAKE_SENTRY_DSN')
    if not dsn:
        return None

    if not sentry_available():
        msg = 'DATALAKE_SENTRY_DSN is configured but raven is not installed. '
        msg += '`pip install datalake[sentry]` to turn this feature on.'
        raise InsufficientConfiguration(msg)

    return {
        'level': 'ERROR',
        'class': 'raven.handlers.logging.SentryHandler',
        'dsn': dsn
    }
Exemplo n.º 5
0
    def _listen(self, timeout=None, workers=1):
        '''listen for files in the queue directory and push them'''
        from . import __version__

        log.info('------------------------------')
        log.info('datalake ' + __version__)

        self._workers = []
        if workers <= 0:
            msg = 'number of upload workers cannot be zero or negative'
            raise InsufficientConfiguration(msg)
        if workers > 1:
            # when multipe workers are requested, the main thread monitors the
            # queue directory and puts the files in a Queue that is serviced by
            # the worker threads. So the word queue is a bit overloaded in this
            # module.
            self._queue = Queue()
            self._workers = [self._create_worker(i) for i in range(workers)]

        for f in os.listdir(self.queue_dir):
            path = os.path.join(self.queue_dir, f)
            self._push(path)

        self._run(timeout)
Exemplo n.º 6
0
 def _validate_queue_dir(self):
     if self.queue_dir is None:
         raise InsufficientConfiguration('Please set DATALAKE_QUEUE_DIR')
     self.queue_dir = os.path.abspath(self.queue_dir)
Exemplo n.º 7
0
 def wrapped(*args, **kwargs):
     if not has_queue:
         msg = 'This feature requires the queuable deps.  '
         msg += '`pip install datalake[queuable]` to turn this feature on.'
         raise InsufficientConfiguration(msg)
     return f(*args, **kwargs)
Exemplo n.º 8
0
 def http_url(self):
     self._http_url = self._http_url or environ.get('DATALAKE_HTTP_URL')
     if self._http_url is None:
         raise InsufficientConfiguration('Please specify DATALAKE_HTTP_URL')
     return self._http_url.rstrip('/')
Exemplo n.º 9
0
 def from_config(cls):
     table_name = os.environ.get('DATALAKE_DYNAMODB_TABLE')
     if table_name is None:
         raise InsufficientConfiguration('Please specify a dynamodb table')
     return cls(table_name)
Exemplo n.º 10
0
 def from_config(cls):
     queue_name = os.environ.get('DATALAKE_INGESTION_QUEUE')
     if queue_name is None:
         raise InsufficientConfiguration('Please configure a queue')
     return cls(queue_name)