def setup_transform(cls, transform): cls.transform = {} for pattern, trans in transform.items(): cls.transform[pattern] = { 'function': build_transform(trans, vars=AttrDict( (('content', None), ('handler', None))), filename='url:%s' % cls.name), 'headers': trans.get('headers', {}), 'encoding': trans.get('encoding'), }
def __init__(self, name, schedule, threadpool, ioloop=None): ''' Create a new task based on a schedule in ioloop (default to current). The schedule configuration accepts: - startup: True to run at startup, '*' to run on every config change - minutes, hours, dates, months, weekdays, years: cron schedule - thread: True to run in a separate thread ''' self.name = name if 'function' not in schedule: raise ValueError('schedule %s has no function:' % name) if callable(schedule['function']): self.function = schedule['function'] else: self.function = build_transform(schedule, vars={}, filename='schedule:%s' % name) self.ioloop = ioloop or tornado.ioloop.IOLoop.current() self.callback = None if schedule.get('thread'): fn = self.function def on_done(future): exception = future.exception(timeout=0) if exception: app_log.error('%s (thread): %s', name, exception) self.function = lambda: threadpool.submit(fn).add_done_callback( on_done) # Run on schedule if any of the schedule periods are specified periods = 'minutes hours dates months weekdays years'.split() if any(schedule.get(key) for key in periods): # Convert all valid values into strings (e.g. 30 => '30'), and ignore any spaces cron = (str(schedule.get(key, '*')).replace(' ', '') for key in periods) self.cron = CronTab(' '.join(cron)) self.call_later() elif not schedule.get('startup'): app_log.warning('schedule:%s has no schedule nor startup', name) # Run now if the task is to be run on startup. Don't re-run if the config was reloaded startup = schedule.get('startup') if startup == '*' or (startup is True and not ioloop_running(self.ioloop)): self.function()
def _setup(cls, **kwargs): override_methods = { 'open': ['handler'], 'on_message': ['handler', 'message'], 'on_close': ['handler'], 'on_pong': ['handler', 'data'], 'select_subprotocol': ['handler', 'subprotocols'], 'get_compression_options': ['handler'], } for method in override_methods: if method in kwargs: setattr(cls, method, build_transform( kwargs[method], vars=OrderedDict((arg, None) for arg in override_methods[method]), filename='url:%s.%s' % (cls.name, method)))
def setup(cls, url, request_headers={}, default={}, prepare=None, modify=None, headers={}, connect_timeout=20, request_timeout=20, **kwargs): super(ProxyHandler, cls).setup(**kwargs) WebSocketHandler._setup(cls, **kwargs) cls.url, cls.request_headers, cls.default = url, request_headers, default cls.headers = headers cls.connect_timeout, cls.request_timeout = connect_timeout, request_timeout cls.info = {} for key, fn in (('prepare', prepare), ('modify', modify)): if fn: cls.info[key] = build_transform( {'function': fn}, filename='url:%s.%s' % (cls.name, key), vars={'handler': None, 'request': None, 'response': None}) cls.browser = AsyncHTTPClient() cls.post = cls.put = cls.delete = cls.patch = cls.options = cls.get
def register_commands(register: Dict[str, str]) -> None: ''' Register a new command to the command list. :arg dict register: keys are the command name. Values are the Python expression to run to apply the command. The expression can use 3 variables: ``shape`` (the Shape object to modify), ``spec`` (the configuration passed to your command) and ``data``. ''' assert isinstance( register, dict), 'register: must be a dict, not %s' % type(register) for key, conf in register.items(): commands.cmdlist[key] = build_transform({'function': conf}, vars={ 'shape': None, 'spec': None, 'data': None }, iter=False)
def stack_shapes(collection, change, data, handler): ''' Function to stack Shapes if required. ''' data_len = len(data) for shape in collection: if shape.name not in change: continue info = change[shape.name] if 'data' in info and info.get('stack') is not None: _vars = {'_color': None, 'data': None, 'handler': None} if not isinstance(info['data'], (dict,)): info['data'] = {'function': '{}'.format(info['data'])} elif isinstance(info['data'], (dict,)) and 'function' not in info['data']: info['data'] = {'function': '{}'.format(info['data'])} args = {'data': data, 'handler': handler} data_len = len(build_transform(info['data'], vars=_vars)(**args)[0]) stack_elements(data_len, shape, stack=info.get('stack'), margin=info.get('margin'))
def apply_transform(data, spec): '''apply transform on dataframe''' pandas_transforms = { 'REPLACE': pd.Series.replace, 'MAP': pd.Series.map, 'IN': pd.Series.isin, 'NOTIN': lambda s, v: ~s.isin(v), 'CONTAINS': { 'function': lambda s, v, **ops: s.str.contains(v, **ops), 'defaults': { 'case': False } }, 'NOTCONTAINS': { 'function': lambda s, v, **ops: ~s.str.contains(v, **ops), 'defaults': { 'case': False } }, 'LEN': lambda s, _: s.str.len(), 'LOWER': lambda s, _: s.str.lower(), 'UPPER': lambda s, _: s.str.upper(), 'PROPER': lambda s, _: s.str.capitalize(), 'STARTSWITH': lambda s, v: s.str.startswith(v), 'ENDSWITH': lambda s, v: s.str.endswith(v) } # TODO: STRREPLACE if spec['type'] == 'function': fn = build_transform({'function': spec['expr']}, vars={'data': None}, filename='lv: %s' % spec.get('name')) fn(data) # applies on copy return data expr = spec['expr'] func = pandas_transforms[expr['op']] kwargs = expr.get('kwargs', {}) if isinstance(func, dict): # use defaults' kwargs if not present in expr.get for key, val in func.get('defaults', {}).items(): if key not in kwargs: kwargs[key] = val func = func['function'] data[spec['as']] = func(data[expr['col']], expr.get('value'), **kwargs) return data
def test_invalid_change(self): fn = build_transform(yaml_parse('function: testlib.dummy.invalid\nargs: []')) remove(self.dummy.replace('.py', '.pyc')) with io.open(self.dummy, 'w', encoding='utf-8') as handle: handle.write('def invalid():\n\tsyntax error\n') with assert_raises(SyntaxError): fn() remove(self.dummy.replace('.py', '.pyc')) with io.open(self.dummy, 'w', encoding='utf-8') as handle: handle.write('1/0\ndef invalid():\n\treturn 100\n') with assert_raises(ZeroDivisionError): fn() remove(self.dummy.replace('.py', '.pyc')) with io.open(self.dummy, 'w', encoding='utf-8') as handle: handle.write('def invalid():\n\treturn 100\n') eq_(fn(), [100])
def load_data(data_config, handler=None): ''' Loads data using gramex cache. ''' if not isinstance(data_config, (dict, AttrDict,)): raise ValueError('Data argument must be a dict like object.') data = {} for key, conf in data_config.items(): if isinstance(conf, (dict, AttrDict,)): if 'function' in conf: data[key] = build_transform(conf, vars={'handler': None})(handler=handler)[0] elif conf.get('ext') in {'yaml', 'yml', 'json'}: data[key] = gramex.cache.open(conf.pop('url'), conf.pop('ext'), **dict(conf)) elif 'url' in conf: data[key] = gramex.data.filter(conf.pop('url'), **dict(conf)) else: data[key] = conf return data
def __init__(self, **kwargs): self.params = kwargs self.url = 'https://stream.twitter.com/1.1/statuses/filter.json' self.valid_params = { 'follow', 'track', 'locations', 'delimited', 'stall_warnings', 'filter_level', 'language'} self.enabled = True self.delay = 0 # Set up writers if 'path' in kwargs: self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False)) self.process_bytes = self.stream.write elif 'function' in kwargs: self.process_json = build_transform( kwargs, vars={'message': {}}, filename='TwitterStream:function') elif kwargs.get('driver') == 'sqlalchemy': engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {})) table = gramex.data.get_table(kwargs['table']) fields = kwargs['fields'] for field in list(fields.keys()): if field not in table.columns: app_log.error('TwitterStream field %s not in table' % field) fields.pop(field) flatten = flattener(fields=fields) self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet))) self.buf = bytearray() self.client = tornado.httpclient.HTTPClient() while True: # Set .enabled to False to temporarily disable streamer if self.enabled: params = {key: val.encode('utf-8') for key, val in self.params.items() if key in self.valid_params} if 'follow' not in params and 'track' not in params and 'locations' not in params: self.enabled = False self.delay = 5 app_log.error('TwitterStream needs follow, track or locations. Disabling') else: self.fetch_tweets(params) # Restart after a delay determined by time.sleep(self.delay)
def setup(cls, **kwargs): super(DataHandler, cls).setup(**kwargs) cls.setup_data(kwargs) driver = kwargs.get('driver') cls.driver_name = driver if driver == 'sqlalchemy': cls.driver_method = cls._sqlalchemy # Create a cached metadata store for SQLAlchemy engines cls.meta = sa.MetaData() elif driver == 'blaze': cls.driver_method = cls._blaze else: raise NotImplementedError('driver=%s is not supported yet.' % driver) posttransform = kwargs.get('posttransform', {}) cls.posttransform = [] if 'function' in posttransform: cls.posttransform.append( build_transform(posttransform, vars=AttrDict(content=None), filename='url:%s' % cls.name))
def check_transform(self, transform, yaml_code, vars=None, cache=True, iter=True): fn = build_transform(yaml_parse(yaml_code), vars=vars, cache=cache, iter=iter) eqfn(fn, transform) return fn
def setup(cls, **kwargs): super(FormHandler, cls).setup(**kwargs) conf_kwargs = merge( AttrDict(kwargs), objectpath(gramex_conf, 'handlers.FormHandler', {}), 'setdefault') cls.headers = conf_kwargs.pop('headers', {}) # Top level formats: key is special. Don't treat it as data cls.formats = conf_kwargs.pop('formats', {}) default_config = conf_kwargs.pop('default', None) # Remove other known special keys from dataset configuration cls.clear_special_keys(conf_kwargs) # If top level has url: then data spec is at top level. Else it's a set of sub-keys if 'url' in conf_kwargs: cls.datasets = AttrDict(data=conf_kwargs) cls.single = True else: if 'modify' in conf_kwargs: cls.modify_all = staticmethod( build_transform( conf={'function': conf_kwargs.pop('modify', None)}, vars=cls.function_vars['modify'], filename='%s.%s' % (cls.name, 'modify'), iter=False)) cls.datasets = conf_kwargs cls.single = False # Apply defaults to each key if isinstance(default_config, dict): for key in cls.datasets: config = cls.datasets[key].get('default', {}) cls.datasets[key]['default'] = merge(config, default_config, mode='setdefault') # Ensure that each dataset is a dict with a url: key at least for key, dataset in list(cls.datasets.items()): if not isinstance(dataset, dict): app_log.error('%s: %s: must be a dict, not %r' % (cls.name, key, dataset)) del cls.datasets[key] elif 'url' not in dataset: app_log.error('%s: %s: does not have a url: key' % (cls.name, key)) del cls.datasets[key] # Ensure that id: is a list -- if it exists if 'id' in dataset and not isinstance(dataset['id'], list): dataset['id'] = [dataset['id']] # Convert function: into a data = transform(data) function conf = { 'function': dataset.pop('function', None), 'args': dataset.pop('args', None), 'kwargs': dataset.pop('kwargs', None) } if conf['function'] is not None: fn_name = '%s.%s.transform' % (cls.name, key) dataset['transform'] = build_transform(conf, vars={ 'data': None, 'handler': None }, filename=fn_name, iter=False) # Convert modify: and prepare: into a data = modify(data) function for fn, fn_vars in cls.function_vars.items(): if fn in dataset: dataset[fn] = build_transform( conf={'function': dataset[fn]}, vars=fn_vars, filename='%s.%s.%s' % (cls.name, key, fn), iter=False)
def setup(cls, prepare=None, action=None, delay=None, session_expiry=None, session_inactive=None, user_key='user', lookup=None, recaptcha=None, **kwargs): # Switch SSL certificates if required to access Google, etc gramex.service.threadpool.submit(check_old_certs) # Set up default redirection based on ?next=... if 'redirect' not in kwargs: kwargs['redirect'] = AttrDict([('query', 'next'), ('header', 'Referer')]) super(AuthHandler, cls).setup(**kwargs) # Set up logging for login/logout events logger = logging.getLogger('gramex.user') keys = objectpath(gramex.conf, 'log.handlers.user.keys', []) log_info = build_log_info(keys, 'event') cls.log_user_event = lambda handler, event: logger.info( log_info(handler, event)) # Count failed logins cls.failed_logins = Counter() # Set delay for failed logins from the delay: parameter which can be a number or list default_delay = [1, 1, 5] cls.delay = delay if isinstance(cls.delay, list) and not all( isinstance(n, (int, float)) for n in cls.delay): app_log.warning('%s: Ignoring invalid delay: %r', cls.name, cls.delay) cls.delay = default_delay elif isinstance(cls.delay, (int, float)) or cls.delay is None: cls.delay = default_delay # Set up session user key, session expiry and inactive expiry cls.session_user_key = user_key cls.session_expiry = session_expiry cls.session_inactive = session_inactive # Set up lookup. Split a copy into self.lookup_id which has the ID, and # self.lookup which has gramex.data keywords. cls.lookup = None if lookup is not None: cls.lookup = lookup.copy() if isinstance(lookup, dict): cls.lookup_id = cls.lookup.pop('id', 'user') else: app_log.error('%s: lookup must be a dict, not %s', cls.name, cls.lookup) # Set up prepare cls.auth_methods = {} if prepare is not None: cls.auth_methods['prepare'] = build_transform( conf={'function': prepare}, vars={ 'handler': None, 'args': None }, filename='url:%s:prepare' % cls.name, iter=False) # Prepare recaptcha if recaptcha is not None: if 'key' not in recaptcha: app_log.error('%s: recaptcha.key missing', cls.name) elif 'key' not in recaptcha: app_log.error('%s: recaptcha.secret missing', cls.name) else: recaptcha.setdefault('action', 'login') cls.auth_methods['recaptcha'] = cls.check_recaptcha # Set up post-login actions cls.actions = [] if action is not None: if not isinstance(action, list): action = [action] for conf in action: cls.actions.append( build_transform(conf, vars=AttrDict(handler=None), filename='url:%s:%s' % (cls.name, conf.function)))
def load_data(_conf, _default_key: str = None, **kwargs) -> dict: ''' Loads datasets based on configuration and returns a dict of those datasets. :arg dataset _conf: The dataset configuration :arg str _default_key: Can be ``function``, ``url`` or ``None`` (default). If specified, it converts string data configurations into ``{_default_key: _conf}``. :return: A dict of datasets loaded based on the configuration. ``_conf`` is processed as follows: - String ``'data.xlsx'`` is loaded via :py:func:`gramex.cache.open` into ``{data: ...}`` if ``_default_key == 'url'`` - String ``'data[0]'`` is evaluated via :py:func:`gramex.transforms.build_transform` into ``{data: ...}``` if ``_default_key == 'function'`` - String ``anything``` raises an Exception if ``_default_key`` is None - Dict ``{url: ...}`` is loaded with :py:func:`gramex.data.filter` into ``{data: ...}`` - Dict ``{function: ...}`` is evaluated via :py:func:`gramex.transforms.build_transform` into ``{data: ...}`` - Dict ``{x: ..., y: ...}`` loads the respective datasets into ``x`` and ``y`` instead of ``data``. Each dataset is processed using the above rules. - Any other datatype passed is returned as is in ``{data: ...}`` Any keyword arguments passed are also added to the resulting dataset, but overwritten only if ``_conf`` loaded a dataset that's not ``None``. ''' def str2conf(data, key): '''Convert string configurations to {url: str} or {function:str} based on _default_key''' # If data is not a string, return data as-is if not isinstance(data, str): return data # If data is a string, return {_default_key: data} (or raise a TypeError) if _default_key is not None: return {_default_key: data} raise TypeError('%s: must be a dict, not %r' % (key, data)) data = str2conf(_conf, 'data') if not isinstance(data, dict) or 'url' in data or 'function' in data: data = {'data': data} data = {key: str2conf(conf, key) for key, conf in data.items()} for key, conf in data.items(): if isinstance(conf, dict): conf = copy.copy(conf) if 'url' in conf: if 'transform' in conf: conf['transform'] = build_transform( {'function': conf['transform']}, vars={'data': None, 'handler': None}, filename='PPTXHandler:data.%s' % key, iter=False) data[key] = gramex.data.filter(**conf) if 'function' in conf: # Let functions use previously defined data variables, including current one _kwargs = {**kwargs, **data} _vars = {key: None for key in _kwargs} data[key] = build_transform(conf, vars=_vars, iter=False)(**_kwargs) # If the dataset returns a None, don't overwrite the default kwargs. # This allow defaults to pass through if a dataset is specified as None. for key, val in data.items(): if (key not in kwargs) or (val is not None): kwargs[key] = val return kwargs
def create_alert(name, alert): '''Generate the function to be run by alert() using the alert configuration''' # Configure email service if alert.get('service', None) is None: if len(info.email) > 0: alert['service'] = list(info.email.keys())[0] app_log.warning('alert: %s: using first email service: %s', name, alert['service']) else: app_log.error('alert: %s: define an email: service to use', name) return service = alert['service'] mailer = info.email.get(service, None) if mailer is None: app_log.error('alert: %s: undefined email service: %s', name, service) return # - Warn if to, cc, bcc exists and is not a string or list of strings. Ignore incorrect # - if to: [1, '*****@*****.**'], then # - log a warning about the 1. Drop the 1. to: becomes ['*****@*****.**'] # Error if to, cc, bcc are all missing, return None if not any(key in alert for key in ['to', 'cc', 'bcc']): app_log.error('alert: %s: missing to/cc/bcc', name) return # Ensure that config has the right type (str, dict, list) contentfields = [ 'body', 'html', 'bodyfile', 'htmlfile', 'markdown', 'markdownfile' ] addr_fields = ['to', 'cc', 'bcc', 'reply_to', 'on_behalf_of', 'from'] for key in ['subject'] + addr_fields + contentfields: if not isinstance(alert.get(key, ''), string_types + (list, )): app_log.error('alert: %s.%s: %r must be a list or str', name, key, alert[key]) return if not isinstance(alert.get('images', {}), dict): app_log.error('alert: %s.images: %r is not a dict', name, alert['images']) return if not isinstance(alert.get('attachments', []), list): app_log.error('alert: %s.attachments: %r is not a list', name, alert['attachments']) return # Warn if subject is missing if 'subject' not in alert: app_log.warning('alert: %s: missing subject', name) # Warn if body, html, bodyfile, htmlfile keys are missing if not any(key in alert for key in contentfields): app_log.warning('alert: %s: missing body/html/bodyfile/htmlfile/...', name) # Pre-compile data. # - `data: {key: [...]}` -- loads data in-place # - `data: {key: {url: file}}` -- loads from a file # - `data: {key: {url: sqlalchemy-url, table: table}}` -- loads from a database # - `data: file` -- same as `data: {data: {url: file}}` # - `data: {key: file}` -- same as `data: {key: {url: file}}` # - `data: [...]` -- same as `data: {data: [...]}` datasets = {} if 'data' in alert: if isinstance(alert['data'], string_types): datasets = {'data': {'url': alert['data']}} elif isinstance(alert['data'], list): datasets = {'data': alert['data']} elif isinstance(alert['data'], dict): for key, dataset in alert['data'].items(): if isinstance(dataset, string_types): datasets[key] = {'url': dataset} elif isinstance(dataset, list) or 'url' in dataset: datasets[key] = dataset else: app_log.error('alert: %s.data: %s is missing url:', name, key) else: app_log.error( 'alert: %s.data: must be a data file or dict. Not %s', name, repr(alert['data'])) if 'each' in alert and alert['each'] not in datasets: app_log.error('alert: %s.each: %s is not in data:', name, alert['each']) return vars = {key: None for key in datasets} vars.update({'config': None, 'args': None}) condition = build_transform({'function': alert.get('condition', 'True')}, filename='alert: %s' % name, vars=vars, iter=False) alert_logger = logging.getLogger('gramex.alert') def load_datasets(data, each): ''' Modify data by load datasets and filter by condition. Modify each to apply the each: argument, else return (None, None) ''' for key, val in datasets.items(): # Allow raw data in lists as-is. Treat dicts as {url: ...} data[key] = val if isinstance(val, list) else gramex.data.filter( **val) result = condition(**data) # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas if type(result).__name__ == 'DataFrame': data['data'] = result elif isinstance(result, dict): data.update(result) elif not result: app_log.debug('alert: %s stopped. condition = %s', name, result) return if 'each' in alert: each_data = data[alert['each']] if isinstance(each_data, dict): each += list(each_data.items()) elif isinstance(each_data, list): each += list(enumerate(each_data)) elif hasattr(each_data, 'iterrows'): each += list(each_data.iterrows()) else: raise ValueError( 'alert: %s: each: data.%s must be dict/list/DF, not %s' % (name, alert['each'], type(each_data))) else: each.append((0, None)) def create_mail(data): ''' Return kwargs that can be passed to a mailer.mail ''' mail = {} for key in ['bodyfile', 'htmlfile', 'markdownfile']: target = key.replace('file', '') if key in alert and target not in alert: path = _tmpl(alert[key]).generate(**data).decode('utf-8') tmpl = gramex.cache.open(path, 'template') mail[target] = tmpl.generate(**data).decode('utf-8') for key in addr_fields + ['subject', 'body', 'html', 'markdown']: if key not in alert: continue if isinstance(alert[key], list): mail[key] = [ _tmpl(v).generate(**data).decode('utf-8') for v in alert[key] ] else: mail[key] = _tmpl(alert[key]).generate(**data).decode('utf-8') headers = {} # user: {id: ...} creates an X-Gramex-User header to mimic the user if 'user' in alert: user = deepcopy(alert['user']) for key, val, node in walk(user): node[key] = _tmpl(val).generate(**data).decode('utf-8') user = json.dumps(user, ensure_ascii=True, separators=(',', ':')) headers['X-Gramex-User'] = tornado.web.create_signed_value( info.app.settings['cookie_secret'], 'user', user) if 'markdown' in mail: mail['html'] = _markdown_convert(mail.pop('markdown')) if 'images' in alert: mail['images'] = {} for cid, val in alert['images'].items(): urlpath = _tmpl(val).generate(**data).decode('utf-8') urldata = urlfetch(urlpath, info=True, headers=headers) if urldata['content_type'].startswith('image/'): mail['images'][cid] = urldata['name'] else: with io.open(urldata['name'], 'rb') as temp_file: bytestoread = 80 first_line = temp_file.read(bytestoread) # TODO: let admin know that the image was not processed app_log.error( 'alert: %s: %s: %d (%s) not an image: %s\n%r', name, cid, urldata['r'].status_code, urldata['content_type'], urlpath, first_line) if 'attachments' in alert: mail['attachments'] = [ urlfetch(_tmpl(v).generate(**data).decode('utf-8'), headers=headers) for v in alert['attachments'] ] return mail def run_alert(callback=None, args=None): ''' Runs the configured alert. If a callback is specified, calls the callback with all email arguments. Else sends the email. If args= is specified, add it as data['args']. ''' app_log.info('alert: %s running', name) data, each, fail = { 'config': alert, 'args': {} if args is None else args }, [], [] try: load_datasets(data, each) except Exception as e: app_log.exception('alert: %s data processing failed', name) fail.append({'error': e}) retval = [] for index, row in each: data['index'], data['row'], data['config'] = index, row, alert try: retval.append( AttrDict(index=index, row=row, mail=create_mail(data))) except Exception as e: app_log.exception('alert: %s[%s] templating (row=%r)', name, index, row) fail.append({'index': index, 'row': row, 'error': e}) callback = mailer.mail if not callable(callback) else callback done = [] for v in retval: try: callback(**v.mail) except Exception as e: fail.append({ 'index': v.index, 'row': v.row, 'mail': v.mail, 'error': e }) app_log.exception('alert: %s[%s] delivery (row=%r)', name, v.index, v.row) else: done.append(v) event = { 'alert': name, 'service': service, 'from': mailer.email or '', 'to': '', 'cc': '', 'bcc': '', 'subject': '', 'datetime': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") } event.update({k: v for k, v in v.mail.items() if k in event}) event['attachments'] = ', '.join(v.mail.get('attachments', [])) alert_logger.info(event) # Run notifications args = {'done': done, 'fail': fail} for notification_name in alert.get('notify', []): notify = info.alert.get(notification_name) if notify is not None: notify.run(callback=callback, args=args) else: app_log.error('alert: %s.notify: alert %s not defined', name, notification_name) return args return run_alert
def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.slug = slugify(cls.name) # Create the config store directory if not config_dir: config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps', 'mlhandler', cls.slug) _mkdir(config_dir) cls.config_dir = config_dir cls.config_store = cache.JSONStore(op.join(cls.config_dir, 'config.json'), flush=None) cls.data_store = op.join(cls.config_dir, 'data.h5') cls.template = kwargs.pop('template', DEFAULT_TEMPLATE) super(MLHandler, cls).setup(**kwargs) try: if 'transform' in data: data['transform'] = build_transform( {'function': data['transform']}, vars={ 'data': None, 'handler': None }, filename='MLHandler:data', iter=False) cls._built_transform = staticmethod(data['transform']) else: cls._built_transform = staticmethod(lambda x: x) data = gdata.filter(**data) cls.store_data(data) except TypeError: app_log.warning('MLHandler could not find training data.') data = None cls._built_transform = staticmethod(lambda x: x) default_model_path = op.join(cls.config_dir, slugify(cls.name) + '.pkl') cls.model_path = model.pop('path', default_model_path) # store the model kwargs from gramex.yaml into the store for key in TRANSFORMS: cls.set_opt(key, model.get(key, cls.get_opt(key))) # Remove target_col if it appears anywhere in cats or nums target_col = cls.get_opt('target_col') cls.set_opt('cats', list(set(cls.get_opt('cats')) - {target_col})) cls.set_opt('nums', list(set(cls.get_opt('nums')) - {target_col})) cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) params = cls.get_opt('params', {}) data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] gramex.service.threadpool.submit(_fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush()
def create_alert(name, alert): '''Generate the function to be run by alert() using the alert configuration''' # Configure email service if alert.get('service', None) is None: if len(info.email) > 0: alert['service'] = list(info.email.keys())[0] app_log.warning('alert: %s: using first email service: %s', name, alert['service']) else: app_log.error('alert: %s: define an email: service to use', name) return service = alert['service'] mailer = info.email.get(service, None) if mailer is None: app_log.error('alert: %s: undefined email service: %s', name, service) return # - Warn if to, cc, bcc exists and is not a string or list of strings. Ignore incorrect # - if to: [1, '*****@*****.**'], then # - log a warning about the 1. Drop the 1. to: becomes ['*****@*****.**'] # Error if to, cc, bcc are all missing, return None if not any(key in alert for key in ['to', 'cc', 'bcc']): app_log.error('alert: %s: missing to/cc/bcc', name) return # Warn if subject is missing if 'subject' not in alert: app_log.warning('alert: %s: missing subject', name) # Warn if body, html, bodyfile, htmlfile keys are missing contentfields = [ 'body', 'html', 'bodyfile', 'htmlfile', 'markdown', 'markdownfile' ] if not any(key in alert for key in contentfields): app_log.warning('alert: %s: missing body/html/bodyfile/htmlfile/...', name) # Precompile templates templates = {} for key in ['to', 'cc', 'bcc', 'from', 'subject'] + contentfields: if key in alert: tmpl = alert[key] if isinstance(tmpl, string_types): templates[key] = Template(tmpl) elif isinstance(tmpl, list): templates[key] = [Template(subtmpl) for subtmpl in tmpl] else: app_log.error('alert: %s: %s: %r must be a list or str', name, key, tmpl) return if 'images' in alert: images = alert['images'] if isinstance(images, dict): templates['images'] = { cid: Template(path) for cid, path in images.items() } else: app_log.error('alert: %s images: %r is not a dict', name, images) if 'attachments' in alert: attachments = alert['attachments'] if isinstance(attachments, list): templates['attachments'] = [Template(path) for path in attachments] # Pre-compile data. # - `data: {key: [...]}` -- loads data in-place # - `data: {key: {url: file}}` -- loads from a file # - `data: {key: {url: sqlalchemy-url, table: table}}` -- loads from a database # - `data: file` -- same as `data: {data: {url: file}}` # - `data: {key: file}` -- same as `data: {key: {url: file}}` # - `data: [...]` -- same as `data: {data: [...]}` datasets = {} if 'data' in alert: if isinstance(alert['data'], string_types): datasets = {'data': {'url': alert['data']}} elif isinstance(alert['data'], list): datasets = {'data': alert['data']} elif isinstance(alert['data'], dict): for key, dataset in alert['data'].items(): if isinstance(dataset, string_types): datasets[key] = {'url': dataset} elif isinstance(dataset, list) or 'url' in dataset: datasets[key] = dataset else: app_log.error('alert: %s data: %s is missing url:', name, key) else: app_log.error( 'alert: %s data: must be a data file or dict. Not %s', name, repr(alert['data'])) if 'each' in alert and alert['each'] not in datasets: app_log.error('alert: %s each: %s is not in data:', name, alert['each']) return vars = {key: None for key in datasets} vars['config'] = None condition = build_transform({'function': alert.get('condition', 'True')}, filename='alert: %s' % name, vars=vars, iter=False) alert_logger = logging.getLogger('gramex.alert') def run_alert(callback=None): ''' Runs the configured alert. If a callback is specified, calls the callback with all email arguments. Else sends the email. ''' app_log.info('alert: %s running', name) data = {'config': alert} for key, dataset in datasets.items(): # Allow raw data in lists as-is. Treat dicts as {url: ...} data[key] = dataset if isinstance( dataset, list) else gramex.data.filter(**dataset) result = condition(**data) # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas if type(result).__name__ == 'DataFrame': data['data'] = result elif isinstance(result, dict): data.update(result) elif not result: app_log.debug('alert: %s stopped. condition = %s', name, result) return each = [(None, None)] if 'each' in alert: each_data = data[alert['each']] if isinstance(each_data, dict): each = list(each_data.items()) elif isinstance(each_data, list): each = list(enumerate(each_data)) elif hasattr(each_data, 'iterrows'): each = list(each_data.iterrows()) else: app_log.error( 'alert: %s: each: requires data.%s to be a dict/list/DataFrame', name, alert['each']) return kwargslist = [] for index, row in each: data['index'], data['row'], data['config'] = index, row, alert # Generate email content kwargs = {} kwargslist.append(kwargs) for key in ['bodyfile', 'htmlfile', 'markdownfile']: target = key.replace('file', '') if key in templates and target not in templates: path = templates[key].generate(**data).decode('utf-8') tmpl = gramex.cache.open(path, 'template') kwargs[target] = tmpl.generate(**data).decode('utf-8') try: for key in [ 'to', 'cc', 'bcc', 'from', 'subject', 'body', 'html', 'markdown' ]: if key in templates: tmpl = templates[key] if isinstance(tmpl, list): kwargs[key] = [] for subtmpl in tmpl: kwargs[key].append( subtmpl.generate(**data).decode('utf-8')) else: kwargs[key] = tmpl.generate(**data).decode('utf-8') except Exception: # If any template raises an exception, log it and continue with next email app_log.exception('alert: %s(#%s).%s: Template exception', name, index, key) continue headers = {} # user: {id: ...} creates an X-Gramex-User header to mimic the user if 'user' in alert: user = json.dumps(alert['user'], ensure_ascii=True, separators=(',', ':')) headers['X-Gramex-User'] = tornado.web.create_signed_value( info.app.settings['cookie_secret'], 'user', user) if 'markdown' in kwargs: kwargs['html'] = _markdown_convert(kwargs.pop('markdown')) if 'images' in templates: kwargs['images'] = {} for cid, val in templates['images'].items(): urlpath = val.generate(**data).decode('utf-8') urldata = urlfetch(urlpath, info=True, headers=headers) if urldata['content_type'].startswith('image/'): kwargs['images'][cid] = urldata['name'] else: with io.open(urldata['name'], 'rb') as temp_file: bytestoread = 80 first_line = temp_file.read(bytestoread) app_log.error( 'alert: %s: %s: %d (%s) not an image: %s\n%r', name, cid, urldata['r'].status_code, urldata['content_type'], urlpath, first_line) if 'attachments' in templates: kwargs['attachments'] = [ urlfetch(attachment.generate(**data).decode('utf-8'), headers=headers) for attachment in templates['attachments'] ] if callable(callback): return callback(**kwargs) # Email recipient. TODO: run this in a queue. (Anand) mailer.mail(**kwargs) # Log the event event = { 'alert': name, 'service': service, 'from': mailer.email or '', 'to': '', 'cc': '', 'bcc': '', 'subject': '', 'datetime': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") } event.update({k: v for k, v in kwargs.items() if k in event}) event['attachments'] = ', '.join(kwargs.get('attachments', [])) alert_logger.info(event) return kwargslist return run_alert
def change_shapes(collection, change, data, handler, **kwargs): ''' Apply changes to a collection of shapes in the context of data. ``collection`` is a slide.shapes or group shapes. ``change`` is typically a dict of <shape-name>: commands. ``data`` is a dictionary passed to the template engine. ''' prs = kwargs.get('prs') new_slide = kwargs.get('new_slide') copy_slide = kwargs.get('copy_slide', False) source_slide = kwargs.get('source_slide') dest = prs.slides.add_slide(new_slide) if copy_slide else None mapping = {} for shape in collection: if shape.name not in change: copy_slide_elem(shape, dest) continue spec = change[shape.name] if shape.name not in mapping: mapping[shape.name] = 0 if spec.get('data'): if not isinstance(spec['data'], (dict, )): spec['data'] = { 'function': '{}'.format(spec['data']) if not isinstance(spec['data'], ( str, six.string_types, )) else spec['data'] } shape_data = build_transform(spec['data'], vars={ 'data': None, 'handler': None })(data=data, handler=handler)[0] else: if isinstance(data, ( dict, AttrDict, )) and 'handler' in data: data.pop('handler') shape_data = copy.deepcopy(data) if isinstance(shape_data, ( dict, AttrDict, )): shape_data['handler'] = handler if spec.get('stack'): shape_data = shape_data[mapping[shape.name]] mapping[shape.name] = mapping[shape.name] + 1 # If the shape is a group, apply spec to each sub-shape if is_group(shape): sub_shapes = SlideShapes(shape.element, collection) change_shapes(sub_shapes, spec, shape_data, handler) # Add args to shape_data if hasattr(handler, 'args'): args = {k: v[0] for k, v in handler.args.items() if len(v) > 0} shape_data['args'] = args # Run commands in the spec for cmd, method in COMMANDS_LIST.items(): if cmd in spec: method(shape, spec, shape_data) copy_slide_elem(shape, dest) add_new_slide(dest, source_slide)
def setup_error(cls, error): ''' Sample configuration:: error: 404: path: template.json # Use a template autoescape: false # with no autoescape whitespace: single # as a single line headers: Content-Type: application/json 500: function: module.fn args: [=status_code, =kwargs, =handler] ''' if not error: return if not isinstance(error, dict): return app_log.error('url:%s.error is not a dict', cls.name) # Compile all errors handlers cls.error = {} for error_code, error_config in error.items(): try: error_code = int(error_code) if error_code < 100 or error_code > 1000: raise ValueError() except ValueError: app_log.error( 'url.%s.error code %s is not a number (100 - 1000)', cls.name, error_code) continue if not isinstance(error_config, dict): return app_log.error('url:%s.error.%d is not a dict', cls.name, error_code) # Make a copy of the original. When we add headers, etc, it shouldn't affect original error_config = AttrDict(error_config) error_path, error_function = error_config.get( 'path'), error_config.get('function') if error_function: if error_path: error_config.pop('path') app_log.warning( 'url.%s.error.%d has function: AND path:. Ignoring path:', cls.name, error_code) cls.error[error_code] = { 'function': build_transform( error_config, vars=AttrDict((('status_code', None), ('kwargs', None), ('handler', None))), filename='url:%s.error.%d' % (cls.name, error_code)) } elif error_path: encoding = error_config.get('encoding', 'utf-8') cls.error[error_code] = { 'function': cls._error_fn(error_code, error_config) } mime_type, encoding = mimetypes.guess_type(error_path, strict=False) if mime_type: error_config.setdefault('headers', {}).setdefault( 'Content-Type', mime_type) else: app_log.error( 'url.%s.error.%d must have a path or function key', cls.name, error_code) # Add the error configuration for reference if error_code in cls.error: cls.error[error_code]['conf'] = error_config cls._write_error, cls.write_error = cls.write_error, cls._write_custom_error
def heatgrid(shape, spec, data): '''Create a heat grid.''' if shape.auto_shape_type != MSO_SHAPE.RECTANGLE: raise NotImplementedError() spec = copy.deepcopy(spec['heatgrid']) top = shape.top left = shape.left width = shape.width pixel_inch = 10000 default_height = 20 height = spec.get('cell-height', default_height) * pixel_inch parent = shape._parent shape.element.delete() # Loading config handler = data.pop('handler') if 'handler' in data else None for key in ['row', 'column', 'value', 'column-order', 'row-order']: if key not in spec: continue if isinstance(spec[key], (dict, )) and 'function' in spec[key]: spec[key] = compile_function(spec, key, data, handler) # Loading data data = compile_function(spec, 'data', data, handler) data = data.sort_values(by=[spec['column']]) rows = spec.get('row-order') or sorted(data[spec['row']].unique().tolist()) columns = spec.get('column-order') or sorted( data[spec['column']].unique().tolist()) left_margin = (width * spec.get('left-margin', 0.15)) padding = spec.get('style', {}).get('padding', 5) if not isinstance(padding, (dict, )): padding = { 'left': padding, 'right': padding, 'top': padding, 'bottom': padding } styles = copy.deepcopy(spec.get('style', {})) if styles.get('gradient'): _min, _max = data[spec['value']].min(), data[spec['value']].max() # Compiling style elements if required for key in [ 'gradient', 'color', 'fill', 'font-size', 'font-family', 'stroke' ]: if isinstance(styles.get(key), (dict, )) and 'function' in styles[key]: prop = compile_function(styles, key, data, handler) styles[key] = prop(**{ 'data': data, 'handler': handler }) if callable(prop) else prop # Calculating cell's width based on config _width = (width - left_margin) / float(len(columns)) / pixel_inch _width = spec.get('cell-width', _width) * pixel_inch # Adding Columns to the HeatGrid. for idx, column in enumerate(columns): txt = parent.add_textbox(left + _width * idx + left_margin, top - height, _width, height) add_text_to_shape(txt, '{}'.format(column), **styles) # Cell width for index, row in enumerate(rows): _data = data[data[spec['row']] == row].dropna() _data = pd.merge(pd.DataFrame({spec['column']: list(columns)}), _data, left_on=spec['column'], right_on=spec['column'], how='left').reset_index(drop=True) for _idx, _row in _data.iterrows(): style = copy.deepcopy(styles) # Setting callable padding args _vars = { 'handler': None, 'row': None, 'column': None, 'value': None } args = { 'handler': handler, 'row': row, 'column': _row[spec['column']], 'value': _row[spec['value']] } # Setting padding if callable. _pad = copy.deepcopy(padding) for key, val in _pad.items(): if isinstance(val, (dict, )) and 'function' in val: _pad[key] = build_transform(val, vars=_vars)(**args)[0] top_pad = _pad.get('top', 5) * pixel_inch left_pad = _pad.get('left', 5) * pixel_inch right_pad = _pad.get('right', 5) * pixel_inch bottom_pad = _pad.get('bottom', 5) * pixel_inch # Adding cells xaxis = left + (_width * _idx) + left_margin + left_pad yaxis = top + (height * index) + (top_pad) * index _rect = rect(parent, xaxis, yaxis, _width - left_pad - right_pad, height - top_pad) # Adding color gradient to cell if gradient is True if style.get('gradient'): grad_txt = scale_data(_row[spec['value']], _min, _max) gradient = matplotlib.cm.get_cmap(style['gradient']) style['fill'] = matplotlib.colors.to_hex(gradient(grad_txt)) style['color'] = _color.contrast(style['fill']) if np.isnan(_row[spec['value']]) and spec.get('na-color'): style['fill'] = spec.get('na-color') style['color'] = _color.contrast(style['fill']) style['stroke'] = style.get('stroke', style['fill']) rect_css(_rect, **style) # Adding text to cells if required. if spec.get('text'): _txt = parent.add_textbox(xaxis, yaxis, _width - left_pad - right_pad, height - top_pad - bottom_pad) if isinstance(spec['text'], dict) and 'function' in spec['text']: cell_txt = compile_function(spec, 'text', _row, handler) else: cell_txt = '{}'.format(_row[spec['value']]) if pd.isnull(cell_txt) and spec.get('na-text'): cell_txt = spec.get('na-text') add_text_to_shape(_txt, cell_txt, **style) # Adding row's text in left side txt = parent.add_textbox(left, top + (height * index) + top_pad * index, _width + left_margin, height) add_text_to_shape(txt, row, **styles)
def pptgen(source, target=None, **config): ''' Process a configuration. This loads a Presentation from source, applies the (optional) configuration changes and saves it into target. ''' # Config was being over written using PPTXHandler and data key was being # removed from yaml config. handler = config.pop('handler', None) _config = copy.deepcopy(config) if _config.get('is_formhandler', False): data = _config.pop('data') _config.pop('is_formhandler') else: data = AttrDict(load_data(_config.pop('data', {}), handler=handler)) # Register a `command` if present in configuration register(_config) # Loading input template prs = Presentation(source) # Removing not required slides from presentation. prs = manage_slides(prs, _config) slides = prs.slides # Loop through each change configuration slides_to_remove = [] manage_slide_order = collections.defaultdict(list) for key, change in _config.items(): # Apply it to every slide slide_data = copy.deepcopy(data) if 'data' in change and change['data'] is not None: if not isinstance(change['data'], (dict,)): change['data'] = {'function': change.pop('data')} slide_data = build_transform(change['data'], vars={'data': None})(slide_data)[0] for index, slide in enumerate(slides): # Restrict to specific slides, if specified if not is_slide_allowed(change, slide, index + 1): continue if change.get('replicate'): is_grp = isinstance(slide_data, pd.core.groupby.DataFrameGroupBy) if isinstance(slide_data, collections.Iterable): for _slide_data in slide_data: _slide_data = _slide_data[1] if is_grp is True else _slide_data replicate_slides( _slide_data, prs, change, slide, slides_to_remove, index, handler) # Creating dict mapping to order slides. manage_slide_order[index + 1].append(len(prs.slides)) else: raise NotImplementedError() else: # Stacking shapes if required. stack_shapes(slide.shapes, change, slide_data, handler) change_shapes(slide.shapes, change, slide_data, handler) indexes = [] slides_to_remove = list(set(slides_to_remove)) for key in sorted(manage_slide_order.keys()): indexes.append(manage_slide_order[key]) matrix = list(map(list, zip(*indexes))) for indx_lst in matrix: for idx in indx_lst: src = prs.slides[idx - 1] slides_to_remove.append(idx - 1) copy_slide = copy.deepcopy(src) new_slide = generate_slide(prs, copy_slide) dest = prs.slides.add_slide(new_slide) for shape in copy_slide.shapes: copy_slide_elem(shape, dest) add_new_slide(dest, src) removed_status = 0 for sld_idx in set(slides_to_remove): delete_slide(prs, (sld_idx - removed_status)) for slide_num in manage_slide_order: manage_slide_order[slide_num] = [(i - 1) for i in manage_slide_order[slide_num]] removed_status += 1 if target is None: return prs else: prs.save(target)