Exemplo n.º 1
0
    def rotate(self):
        '''
        Create and rotate file streams.

        The ``path`` format string determines the filename. For example,
        ``tweets.{:%Y-%m-%d}.jsonl`` creates a filename based on the current
        date, e.g. ``tweets.2016-12-31.jsonl``. When rotating, if the new
        filename is the same as the old, the file continues. If it's a different
        file, the old file is closed and the new file is created.

        The rotation frequency is based on the crontab entries in the config,
        i.e. based on ``hours``, ``days``, ``weeks``, etc. It defaults to every
        minute.
        '''
        # First, flush the stream to ensure that data is not lost.
        # Then set up new stream (if required, based on the filename)
        self.flush()
        path = self.path.format(datetime.datetime.utcnow())
        if path != self.stream_path:
            if self.stream is not None:
                self.stream.close()
            self.stream_path = path
            folder = os.path.dirname(os.path.abspath(path))
            if not os.path.exists(folder):
                os.makedirs(folder)
            self.stream = open(path, 'ab')
            app_log.debug('StreamWriter writing to %s', path)

        # Schedule the next call after a minute
        IOLoop.current().call_later(60, self.rotate)
Exemplo n.º 2
0
 def close(self):
     try:
         self.store.close()
     # h5py.h5f.get_obj_ids often raises a ValueError: Not a file id.
     # This is presumably if the file handle has been closed. Log & ignore.
     except ValueError:
         app_log.debug('HDF5Store("%s").close() error ignored', self.path)
Exemplo n.º 3
0
 def load_datasets(data, each):
     '''
     Modify data by load datasets and filter by condition.
     Modify each to apply the each: argument, else return (None, None)
     '''
     for key, val in datasets.items():
         # Allow raw data in lists as-is. Treat dicts as {url: ...}
         data[key] = val if isinstance(val, list) else gramex.data.filter(
             **val)
     result = condition(**data)
     # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas
     if type(result).__name__ == 'DataFrame':
         data['data'] = result
     elif isinstance(result, dict):
         data.update(result)
     elif not result:
         app_log.debug('alert: %s stopped. condition = %s', name, result)
         return
     if 'each' in alert:
         each_data = data[alert['each']]
         if isinstance(each_data, dict):
             each += list(each_data.items())
         elif isinstance(each_data, list):
             each += list(enumerate(each_data))
         elif hasattr(each_data, 'iterrows'):
             each += list(each_data.iterrows())
         else:
             raise ValueError(
                 'alert: %s: each: data.%s must be dict/list/DF, not %s' %
                 (name, alert['each'], type(each_data)))
     else:
         each.append((0, None))
Exemplo n.º 4
0
 def override_user(self):
     '''
     Use ``X-Gramex-User`` HTTP header to override current user for the session.
     Use ``X-Gramex-OTP`` HTTP header to set user based on OTP.
     ``?gramex-otp=`` is a synonym for X-Gramex-OTP.
     '''
     headers = self.request.headers
     cipher = headers.get('X-Gramex-User')
     if cipher:
         import json
         try:
             user = json.loads(decode_signed_value(
                 conf.app.settings['cookie_secret'], 'user', cipher,
                 max_age_days=self._session_expiry))
         except Exception:
             reason = '%s: invalid X-Gramex-User: %s' % (self.name, cipher)
             raise HTTPError(BAD_REQUEST, reason=reason)
         else:
             app_log.debug('%s: Overriding user to %r', self.name, user)
             self.session['user'] = user
             return
     otp = headers.get('X-Gramex-OTP') or self.get_argument('gramex-otp', None)
     if otp:
         otp_data = self._session_store.load('otp:' + otp, None)
         if not isinstance(otp_data, dict) or '_t' not in otp_data or 'user' not in otp_data:
             reason = '%s: invalid X-Gramex-OTP: %s' % (self.name, otp)
             raise HTTPError(BAD_REQUEST, reason=reason)
         elif otp_data['_t'] < time.time():
             reason = '%s: expired X-Gramex-OTP: %s' % (self.name, otp)
             raise HTTPError(BAD_REQUEST, reason=reason)
         self._session_store.dump('otp:' + otp, None)
         self.session['user'] = otp_data['user']
Exemplo n.º 5
0
 def call_later(self):
     '''Schedule next run automatically. Do NOT call twice: creates two callbacks'''
     delay = self.cron.next(default_utc=self.utc) if hasattr(self, 'cron') else None
     self._call_later(delay)
     if delay is not None:
         app_log.debug('Scheduling %s after %.0fs', self.name, delay)
     else:
         app_log.debug('No further schedule for %s', self.name)
Exemplo n.º 6
0
 def call_later(self):
     '''Schedule next run automatically. Clears any previous scheduled runs'''
     delay = self.cron.next(
         default_utc=self.utc) if hasattr(self, 'cron') else None
     self._call_later(delay)
     if delay is not None:
         app_log.debug('Scheduling %s after %.0fs', self.name, delay)
     else:
         app_log.debug('No further schedule for %s', self.name)
Exemplo n.º 7
0
 def call_later(self):
     '''Schedule next run. Do NOT call twice: creates two callbacks'''
     delay = self.cron.next(
         default_utc=False) if hasattr(self, 'cron') else None
     if delay is not None:
         app_log.debug('Scheduling %s after %.0fs', self.name, delay)
         self.callback = self.ioloop.call_later(delay, self.run)
     else:
         app_log.debug('No further schedule for %s', self.name)
Exemplo n.º 8
0
 def flush(self):
     super(JSONStore, self).flush()
     if self.changed:
         app_log.debug('Flushing %s', self.path)
         store = self._read_json()
         store.update(self.update)
         self._write_json(store)
         self.store = store
         self.update = {}
         self.changed = False
Exemplo n.º 9
0
def query(sql, engine, state=None, **kwargs):
    '''
    Read SQL query or database table into a DataFrame. Caches results unless
    state has changed. It always re-runs the query unless state is specified.

    The state can be specified in 3 ways:

    1. A string. This must be as a lightweight SQL query. If the result changes,
       the original SQL query is re-run.
    2. A function. This is called to determine the state of the database.
    3. A list of tables. This list of ["db.table"] names specifies which tables
       to watch for. This is currently experimental.
    4. ``None``: the default. The query is always re-run and not cached.
    '''
    # Pass _reload_status = True for testing purposes. This returns a tuple:
    # (result, reloaded) instead of just the result.
    _reload_status = kwargs.pop('_reload_status', False)
    reloaded = False
    _cache = kwargs.pop('_cache', _QUERY_CACHE)
    store_cache = True

    key = (str(sql), json.dumps(kwargs.get('params', {}),
                                sort_keys=True), engine.url)
    cached = _cache.get(key, {})
    current_status = cached.get('status', None) if cached else None
    if isinstance(state, (list, tuple)):
        status = _table_status(engine, tuple(state))
    elif isinstance(state, six.string_types):
        status = pd.read_sql(state, engine).to_dict(orient='list')
    elif callable(state):
        status = state()
    elif state is None:
        # Create a new status every time, so that the query is always re-run
        status = object()
        store_cache = False
    else:
        raise TypeError(
            'gramex.cache.query(state=) must be a table list, query or fn, not %s',
            repr(state))

    if status == current_status:
        result = _cache[key]['data']
    else:
        app_log.debug(
            'gramex.cache.query: %s. engine: %s. state: %s. kwargs: %s', sql,
            engine, state, kwargs)
        result = pd.read_sql(sql, engine, **kwargs)
        if store_cache:
            _cache[key] = {
                'data': result,
                'status': status,
            }
        reloaded = True

    return (result, reloaded) if _reload_status else result
Exemplo n.º 10
0
 def purge(self):
     '''
     Load all keys into self.store. Delete what's required. Save.
     '''
     self.flush()
     changed = False
     for key in self.purge_keys(self.store):
         del self.store[key]
         changed = True
     if changed:
         app_log.debug('Purging %s', self.path)
         self._write_json(self.store)
Exemplo n.º 11
0
    def get(self, *path_args, **path_kwargs):
        if '_params' in self.args:
            params = {
                'opts': self.config_store.load('transform'),
                'params': self.config_store.load('model')
            }
            self.write(json.dumps(params, indent=2))
        elif '_cache' in self.args:
            self.write(self.load_data().to_json(orient='records'))
        else:
            self._check_model_path()
            if '_download' in self.args:
                self.set_header('Content-Type', 'application/octet-strem')
                self.set_header(
                    'Content-Disposition',
                    f'attachment; filename={op.basename(self.model_path)}')
                with open(self.model_path, 'rb') as fout:
                    self.write(fout.read())
            elif '_model' in self.args:
                self.write(json.dumps(self.get_opt('params'), indent=2))

            else:
                try:
                    data_args = {
                        k: v
                        for k, v in self.args.items() if not k.startswith('_')
                    }
                    data_args = {
                        k: [v] if not isinstance(v, list) else v
                        for k, v in data_args.items()
                    }
                    data = pd.DataFrame.from_dict(data_args)
                except Exception as err:
                    app_log.debug(err.msg)
                    data = []
                if len(data) > 0:
                    self.set_header('Content-Type', 'application/json')
                    data = data.drop([self.get_opt('target_col')],
                                     axis=1,
                                     errors='ignore')
                    prediction = yield gramex.service.threadpool.submit(
                        self._predict, data)
                    self.write(
                        json.dumps(prediction, indent=2,
                                   cls=CustomJSONEncoder))
                else:
                    self.set_header('Content-Type', 'text/html')
                    self.render(self.template,
                                handler=self,
                                data=self.load_data())
        super(MLHandler, self).get(*path_args, **path_kwargs)
Exemplo n.º 12
0
 def allowed(self, path):
     '''
     A path is allowed if it matches any allow:, or matches no ignore:.
     Override this method for a custom implementation.
     '''
     for ignore in self.ignore:
         if _match(path, ignore):
             # Check allows only if an ignore: is matched.
             # If any allow: is matched, allow it
             for allow in self.allow:
                 if _match(path, allow):
                     return True
             app_log.debug('%s: Disallow "%s". It matches "%s"', self.name,
                           path, ignore)
             return False
     return True
Exemplo n.º 13
0
 def purge(self):
     '''
     Load all keys into self.store. Delete what's required. Save.
     '''
     self.flush()
     changed = False
     items = {
         key: json.loads(val[()], object_pairs_hook=AttrDict, cls=CustomJSONDecoder)
         for key, val in self.store.items()
     }
     for key in self.purge_keys(items):
         del self.store[key]
         changed = True
     if changed:
         app_log.debug('Purging %s', self.path)
         self.store.flush()
Exemplo n.º 14
0
def gramex_update(url):
    '''If a newer version of gramex is available, logs a warning'''
    import time
    import requests
    import platform
    from . import services

    if not services.info.eventlog:
        return app_log.error(
            'eventlog: service is not running. So Gramex update is disabled')

    query = services.info.eventlog.query
    update = query(
        'SELECT * FROM events WHERE event="update" ORDER BY time DESC LIMIT 1')
    delay = 24 * 60 * 60  # Wait for one day before updates
    if update and time.time() < update[0]['time'] + delay:
        return app_log.debug('Gramex update ran recently. Deferring check.')

    meta = {
        'dir': variables.get('GRAMEXDATA'),
        'uname': platform.uname(),
    }
    if update:
        events = query('SELECT * FROM events WHERE time > ? ORDER BY time',
                       (update[0]['time'], ))
    else:
        events = query('SELECT * FROM events')
    logs = [dict(log, **meta) for log in events]

    r = requests.post(url, data=json.dumps(logs))
    r.raise_for_status()
    update = r.json()
    version = update['version']
    if version > __version__:
        app_log.error(
            'Gramex %s is available. See https://learn.gramener.com/guide/',
            version)
    elif version < __version__:
        app_log.warning(
            'Gramex update: your version %s is ahead of the stable %s',
            __version__, version)
    else:
        app_log.debug('Gramex version %s is up to date', __version__)
    services.info.eventlog.add('update', update)
    return {'logs': logs, 'response': update}
Exemplo n.º 15
0
def ensure_single_session(handler):
    '''
    Ensure that user in this session is logged out of all other sessions.
    '''
    user_id = handler.session.get('user', {}).get('id')
    if user_id is not None:
        for key in handler._session_store.keys():
            # Ignore current session or OTP sessions
            if key == handler.session.get('id'):
                continue
            if isinstance(key, six.text_type) and key.startswith('otp:'):
                continue
            if isinstance(key, six.binary_type) and key.startswith(b'otp:'):
                continue
            # Remove user from all other sessions
            other_session = handler._session_store.load(key)
            if other_session is not None:
                other_user = other_session.get('user')
                if other_user is not None and other_user.get('id'):
                    other_session.pop('user')
                    handler._session_store.dump(key, other_session)
                    app_log.debug('dropped user %s from session %s', user_id, other_session['id'])
Exemplo n.º 16
0
 def _get(self, *args, **kwargs):
     self.include_body = kwargs.pop('include_body', True)
     path = urljoin('/', args[0] if len(args) else '').lstrip('/')
     if isinstance(self.root, list):
         # Concatenate multiple files and serve them one after another
         for path_item in self.root:
             yield self._get_path(path_item, multipart=True)
     elif isinstance(self.root, dict):
         # Render path for the the first matching regex
         for pattern, filestr in self.root.items():
             match = pattern.match(path)
             if match:
                 q = defaultdict(text_type, **self.default)
                 q.update(
                     {k: v[0]
                      for k, v in self.args.items() if len(v) > 0})
                 q.update(match.groupdict())
                 p = Path(filestr.format(*match.groups(), **q)).absolute()
                 app_log.debug('%s: %s renders %s', self.name,
                               self.request.path, p)
                 yield self._get_path(p)
                 break
         else:
             raise HTTPError(NOT_FOUND, '%s matches no path key',
                             self.request.path)
     elif not args:
         # No group has been specified in the pattern. So just serve root
         yield self._get_path(self.root)
     else:
         # Eliminate parent directory references like `../` in the URL
         path = urljoin('/', path)[1:]
         if self.pattern:
             yield self._get_path(
                 Path(self.pattern.replace('*', path)).absolute())
         else:
             yield self._get_path(self.root /
                                  path if self.root.is_dir() else self.root)
Exemplo n.º 17
0
    def run_alert(callback=None):
        '''
        Runs the configured alert. If a callback is specified, calls the
        callback with all email arguments. Else sends the email.
        '''
        app_log.info('alert: %s running', name)
        data = {'config': alert}
        for key, dataset in datasets.items():
            # Allow raw data in lists as-is. Treat dicts as {url: ...}
            data[key] = dataset if isinstance(
                dataset, list) else gramex.data.filter(**dataset)

        result = condition(**data)
        # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas
        if type(result).__name__ == 'DataFrame':
            data['data'] = result
        elif isinstance(result, dict):
            data.update(result)
        elif not result:
            app_log.debug('alert: %s stopped. condition = %s', name, result)
            return

        each = [(None, None)]
        if 'each' in alert:
            each_data = data[alert['each']]
            if isinstance(each_data, dict):
                each = list(each_data.items())
            elif isinstance(each_data, list):
                each = list(enumerate(each_data))
            elif hasattr(each_data, 'iterrows'):
                each = list(each_data.iterrows())
            else:
                app_log.error(
                    'alert: %s: each: requires data.%s to be a dict/list/DataFrame',
                    name, alert['each'])
                return

        kwargslist = []
        for index, row in each:
            data['index'], data['row'], data['config'] = index, row, alert

            # Generate email content
            kwargs = {}
            kwargslist.append(kwargs)
            for key in ['bodyfile', 'htmlfile', 'markdownfile']:
                target = key.replace('file', '')
                if key in templates and target not in templates:
                    path = templates[key].generate(**data).decode('utf-8')
                    tmpl = gramex.cache.open(path, 'template')
                    kwargs[target] = tmpl.generate(**data).decode('utf-8')
            try:
                for key in [
                        'to', 'cc', 'bcc', 'from', 'subject', 'body', 'html',
                        'markdown'
                ]:
                    if key in templates:
                        tmpl = templates[key]
                        if isinstance(tmpl, list):
                            kwargs[key] = []
                            for subtmpl in tmpl:
                                kwargs[key].append(
                                    subtmpl.generate(**data).decode('utf-8'))
                        else:
                            kwargs[key] = tmpl.generate(**data).decode('utf-8')
            except Exception:
                # If any template raises an exception, log it and continue with next email
                app_log.exception('alert: %s(#%s).%s: Template exception',
                                  name, index, key)
                continue
            headers = {}
            # user: {id: ...} creates an X-Gramex-User header to mimic the user
            if 'user' in alert:
                user = json.dumps(alert['user'],
                                  ensure_ascii=True,
                                  separators=(',', ':'))
                headers['X-Gramex-User'] = tornado.web.create_signed_value(
                    info.app.settings['cookie_secret'], 'user', user)
            if 'markdown' in kwargs:
                kwargs['html'] = _markdown_convert(kwargs.pop('markdown'))
            if 'images' in templates:
                kwargs['images'] = {}
                for cid, val in templates['images'].items():
                    urlpath = val.generate(**data).decode('utf-8')
                    urldata = urlfetch(urlpath, info=True, headers=headers)
                    if urldata['content_type'].startswith('image/'):
                        kwargs['images'][cid] = urldata['name']
                    else:
                        with io.open(urldata['name'], 'rb') as temp_file:
                            bytestoread = 80
                            first_line = temp_file.read(bytestoread)
                        app_log.error(
                            'alert: %s: %s: %d (%s) not an image: %s\n%r',
                            name, cid, urldata['r'].status_code,
                            urldata['content_type'], urlpath, first_line)
            if 'attachments' in templates:
                kwargs['attachments'] = [
                    urlfetch(attachment.generate(**data).decode('utf-8'),
                             headers=headers)
                    for attachment in templates['attachments']
                ]
            if callable(callback):
                return callback(**kwargs)
            # Email recipient. TODO: run this in a queue. (Anand)
            mailer.mail(**kwargs)
            # Log the event
            event = {
                'alert':
                name,
                'service':
                service,
                'from':
                mailer.email or '',
                'to':
                '',
                'cc':
                '',
                'bcc':
                '',
                'subject':
                '',
                'datetime':
                datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
            }
            event.update({k: v for k, v in kwargs.items() if k in event})
            event['attachments'] = ', '.join(kwargs.get('attachments', []))
            alert_logger.info(event)
        return kwargslist
Exemplo n.º 18
0
def url(conf):
    '''Set up the tornado web app URL handlers'''
    handlers = []
    # Sort the handlers in descending order of priority
    specs = sorted(conf.items(), key=_sort_url_patterns, reverse=True)
    for name, spec in specs:
        _key = cache_key('url', spec)
        if _key in _cache:
            handlers.append(_cache[_key])
            continue
        if 'handler' not in spec:
            app_log.error('url: %s: no handler specified')
            continue
        app_log.debug('url: %s (%s) %s', name, spec.handler,
                      spec.get('priority', ''))
        urlspec = AttrDict(spec)
        handler = locate(spec.handler, modules=['gramex.handlers'])
        if handler is None:
            app_log.error('url: %s: ignoring missing handler %s', name,
                          spec.handler)
            continue

        # Create a subclass of the handler with additional attributes.
        class_vars = {'name': name, 'conf': spec}
        # If there's a cache section, get the cache method for use by BaseHandler
        if 'cache' in urlspec:
            class_vars['cache'] = _cache_generator(urlspec['cache'], name=name)
        else:
            class_vars['cache'] = None
        # PY27 type() requires the class name to be a string, not unicode
        urlspec.handler = type(str(spec.handler), (handler, ), class_vars)

        # If there's a setup method, call it to initialize the class
        kwargs = urlspec.get('kwargs', {})
        if hasattr(handler, 'setup'):
            try:
                urlspec.handler.setup_default_kwargs()
                urlspec.handler.setup(**kwargs)
            except Exception:
                app_log.exception('url: %s: setup exception in handler %s',
                                  name, spec.handler)
                # Since we can't set up the handler, all requests must report the error instead
                class_vars['exc_info'] = sys.exc_info()
                error_handler = locate('SetupFailedHandler',
                                       modules=['gramex.handlers'])
                urlspec.handler = type(str(spec.handler), (error_handler, ),
                                       class_vars)
                urlspec.handler.setup(**kwargs)

        try:
            handler_entry = tornado.web.URLSpec(
                name=name,
                pattern=_url_normalize(urlspec.pattern),
                handler=urlspec.handler,
                kwargs=kwargs,
            )
        except re.error:
            app_log.error('url: %s: pattern: %s is invalid', name,
                          urlspec.pattern)
            continue
        except Exception:
            app_log.exception('url: %s: invalid', name)
            continue
        _cache[_key] = handler_entry
        handlers.append(handler_entry)

    info.app.clear_handlers()
    info.app.add_handlers('.*$', handlers)
Exemplo n.º 19
0
def init(force_reload=False, **kwargs):
    '''
    Update Gramex configurations and start / restart the instance.

    ``gramex.init()`` can be called any time to refresh configuration files.
    ``gramex.init(key=val)`` adds ``val`` as a configuration layer named
    ``key``. If ``val`` is a Path, it is converted into a PathConfig. (If it is
    Path directory, use ``gramex.yaml``.)

    Services are re-initialised if their configurations have changed. Service
    callbacks are always re-run (even if the configuration hasn't changed.)
    '''
    try:
        setup_secrets(paths['base'] / '.secrets.yaml')
    except Exception as e:
        app_log.exception(e)

    # Reset variables
    variables.clear()
    variables.update(setup_variables())

    # Initialise configuration layers with provided configurations
    # AttrDicts are updated as-is. Paths are converted to PathConfig
    paths.update(kwargs)
    for key, val in paths.items():
        if isinstance(val, Path):
            if val.is_dir():
                val = val / 'gramex.yaml'
            val = PathConfig(val)
        config_layers[key] = val

    # Locate all config files
    config_files = set()
    for path_config in config_layers.values():
        if hasattr(path_config, '__info__'):
            for pathinfo in path_config.__info__.imports:
                config_files.add(pathinfo.path)
    config_files = list(config_files)

    # Add config file folders to sys.path
    sys.path[:] = _sys_path + [
        str(path.absolute().parent) for path in config_files
    ]

    from . import services
    globals(
    )['service'] = services.info  # gramex.service = gramex.services.info

    # Override final configurations
    appconfig.clear()
    appconfig.update(+config_layers)
    # --settings.debug => log.root.level = True
    if appconfig.app.get('settings', {}).get('debug', False):
        appconfig.log.root.level = logging.DEBUG

    # Set up a watch on config files (including imported files)
    if appconfig.app.get('watch', True):
        from services import watcher
        watcher.watch('gramex-reconfig',
                      paths=config_files,
                      on_modified=lambda event: init())

    # Run all valid services. (The "+" before config_chain merges the chain)
    # Services may return callbacks to be run at the end
    for key, val in appconfig.items():
        if key not in conf or conf[key] != val or force_reload:
            if hasattr(services, key):
                app_log.debug('Loading service: %s', key)
                conf[key] = prune_keys(val, {'comment'})
                callback = getattr(services, key)(conf[key])
                if callable(callback):
                    callbacks[key] = callback
            else:
                app_log.error('No service named %s', key)

    # Run the callbacks. Specifically, the app service starts the Tornado ioloop
    for key in (+config_layers).keys():
        if key in callbacks:
            app_log.debug('Running callback: %s', key)
            callbacks[key]()
Exemplo n.º 20
0
 def stop(self):
     '''Suspend task, clearing any pending callbacks'''
     if self.callback is not None:
         app_log.debug('Stopping %s', self.name)
         self.ioloop.remove_timeout(self.callback)
         self.delay, self.callback = None, None
Exemplo n.º 21
0
 def get(self, *path_args, **path_kwargs):
     if '_download' in self.args:
         self.set_header('Content-Type', 'application/octet-strem')
         self.set_header(
             'Content-Disposition',
             f'attachment; filename={op.basename(self.model_path)}')
         self.write(open(self.model_path, 'rb').read())
     elif '_model' in self.args:
         self._check_model_path()
         if isinstance(self.model, Pipeline):
             for k, v in self.model.named_steps.items():
                 if k != 'transform':
                     break
             params = v.get_params()
         elif isinstance(self.model, BaseEstimator):
             params = self.model.get_params()
         elif self.model is None:
             params = self.get_opt('params')
         self.write(json.dumps(params, indent=4))
     elif '_cache' in self.args:
         if '_opts' in self.args:
             self.write(json.dumps(self.config_store.load('transform')))
             self.finish()
         elif '_params' in self.args:
             self.write(json.dumps(self.config_store.load('model')))
             self.finish()
         else:
             data = self.load_data()
             if len(data):
                 self.write(data.to_json(orient='records'))
             else:
                 self.write(json.dumps([]))
     else:
         self._check_model_path()
         self.set_header('Content-Type', 'application/json')
         action = self.args.pop('_action', [''])[0]
         try:
             data = pd.DataFrame.from_dict({
                 k: v
                 for k, v in self.args.items() if not k.startswith('_')
             })
             if len(data) > 0 and not action:
                 action = 'predict'
         except Exception as err:
             app_log.debug(err.msg)
             data = self.load_data()
         if len(data) == 0:
             data = self.load_data()
         target_col = self.get_opt('target_col')
         if target_col in data:
             target = data.pop(target_col)
         else:
             target = None
         if action in ('predict', 'score'):
             prediction = yield gramex.service.threadpool.submit(
                 # self._predict, data, transform=False)
                 self._predict,
                 data)
             if action == 'predict':
                 self.write(_serialize_prediction(prediction))
             elif action == 'score':
                 score = accuracy_score(target.astype(prediction.dtype),
                                        prediction)
                 self.write(json.dumps({'score': score}, indent=4))
         else:
             if isinstance(self.template, str) and op.isfile(self.template):
                 self.set_header('Content-Type', 'text/html')
                 # return Template(self.template)
                 self.render(self.template,
                             handler=self,
                             data=self.load_data())
             elif self.template:
                 self.set_header('Content-Type', 'text/html')
                 self.render(DEFAULT_TEMPLATE,
                             handler=self,
                             data=self.load_data())
             else:
                 self.set_header('Content-Type', 'application/json')
                 self.write(json.dumps([]))
     super(MLHandler, self).get(*path_args, **path_kwargs)
Exemplo n.º 22
0
    def get(self, *path_args):
        ws = self.request.headers.get('Upgrade', '') == 'websocket'
        if ws:
            return WebSocketHandler.get(self)
        # Construct HTTP headers
        headers = HTTPHeaders(self.request.headers if self.request_headers.get('*', None) else {})
        for key, val in self.request_headers.items():
            if key == '*':
                continue
            if val is True:
                if key in self.request.headers:
                    headers[key] = self.request.headers[key]
            else:
                headers[key] = str(val).format(handler=self)

        # Update query parameters
        # TODO: use a named capture for path_args? This is not the right method
        parts = urlsplit(self.url.format(*path_args))
        params = {
            key: ([str(v).format(handler=self) for v in val] if isinstance(val, list)
                  else str(val).format(handler=self))
            for key, val in self.default.items()
        }
        params.update(parse_qs(parts.query))
        params.update(self.args)
        query = urlencode(params, doseq=True)
        url = urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))

        request = HTTPRequest(
            url=url,
            method=self.request.method,
            headers=headers,
            body=None if self.request.method == 'GET' else self.request.body,
            connect_timeout=self.connect_timeout,
            request_timeout=self.request_timeout,
        )

        if 'prepare' in self.info:
            self.info['prepare'](handler=self, request=request, response=None)

        app_log.debug('%s: proxying %s', self.name, url)
        response = yield self.browser.fetch(request, raise_error=False)

        if response.code in (MOVED_PERMANENTLY, FOUND):
            location = response.headers.get('Location', '')
            # TODO; check if Location: header MATCHES the url, not startswith
            # url: example.org/?x should match Location: example.org/?a=1&x
            # even though location does not start with url.
            if location.startswith(url):
                response.headers['Location'] = location.replace('url', self.conf.pattern)

        if 'modify' in self.info:
            self.info['modify'](handler=self, request=request, response=response)

        # Pass on the headers as-is, but override with the handler HTTP headers
        self.set_header('X-Proxy-Url', response.effective_url)
        for header_name, header_value in response.headers.items():
            if header_name not in {'Connection', 'Transfer-Encoding', 'Content-Length'}:
                self.set_header(header_name, header_value)
        # Proxies may send the wrong Content-Length. Correct it, else Tornado raises an error
        if response.body is not None:
            self.set_header('Content-Length', len(response.body))
        for header_name, header_value in self.headers.items():
            self.set_header(header_name, header_value)
        # Pass on HTTP status code and response body as-is
        self.set_status(response.code, reason=response.reason)
        if response.body is not None:
            self.write(response.body)
Exemplo n.º 23
0
 def flush(self):
     super(HDF5Store, self).flush()
     if self.changed:
         app_log.debug('Flushing %s', self.path)
         self.store.flush()
         self.changed = False
Exemplo n.º 24
0
 def purge(self):
     app_log.debug('Purging %s', self.path)
     super(SQLiteStore, self).purge()
Exemplo n.º 25
0
 def purge(self):
     app_log.debug('Purging %s', self.store)
     # TODO: optimize item retrieval
     items = {key: self.load(key, None) for key in self.store.keys()}
     for key in self.purge_keys(items):
         self.store.delete(key)