def valid_json(self, key, opt): """ Validate a iso-datetime option. """ try: obj_to_json(opt) except: return RecipeSchemaError( "{} should be a 'json' field but was passed '{}'." .format(key, opt)) return opt
def test_good_recipe_no_nesting(self): grecipe = copy.deepcopy(good_recipe) r = recipe_schema.validate(grecipe, sous_chef) o = r.get('options', {}) # make sure name, slug, description, time of day, and interval are # extracted added to the top-level of recipe. assert ('slug' in r) assert ('description' in r) assert ('name' in r) assert ('schedule_by' in r) assert ('minutes' in r) # make sure user_id is not in recipe or options assert ('user_id' not in r) assert ('user_id' not in o) # make sure slug gets a new hash assert (sous_chef['slug'] != r['slug']) # make sure interval was parsed to null properly assert (r['crontab'] is None) # make sure event counts is parsed to null assert ('event_counts' not in o) # make sure regex is parsed. assert (isinstance(o['user_regex'], RE_TYPE)) # make sure start date is parsed assert (isinstance(o['start_date'], datetime.datetime)) # make boolean is parsed assert (o['filter_bots'] is True) # make sure start date is UTC assert (o['start_date'].tzinfo == pytz.utc) # make sure min followers got filled in with it's defaults. assert (o['min_followers'] == 0) assert (o['link_url'] == "http://example.com/some-url") # assert screen name is list assert (isinstance(o['owner_screen_name'], list)) # make sure search query is a SearchString assert (isinstance(o['search_query'], SearchString)) # make sure first instance of content_items is a dict assert (isinstance(o['set_content_items'][0], dict)) # make sure we can serialize this back to json. obj_to_json(r)
def test_good_recipe_no_nesting(self): grecipe = copy.deepcopy(good_recipe) r = recipe_schema.validate(grecipe, sous_chef) o = r.get('options', {}) # make sure name, slug, description, time of day, and interval are # extracted added to the top-level of recipe. assert('slug' in r) assert('description' in r) assert('name' in r) assert('schedule_by' in r) assert('minutes' in r) # make sure user_id is not in recipe or options assert('user_id' not in r) assert('user_id' not in o) # make sure slug gets a new hash assert(sous_chef['slug'] != r['slug']) # make sure interval was parsed to null properly assert(r['crontab'] is None) # make sure event counts is parsed to null assert('event_counts' not in o) # make sure regex is parsed. assert(isinstance(o['user_regex'], RE_TYPE)) # make sure start date is parsed assert(isinstance(o['start_date'], datetime.datetime)) # make boolean is parsed assert(o['filter_bots'] is True) # make sure start date is UTC assert(o['start_date'].tzinfo == pytz.utc) # make sure min followers got filled in with it's defaults. assert(o['min_followers'] == 0) assert(o['link_url'] == "http://example.com/some-url") # assert screen name is list assert(isinstance(o['owner_screen_name'], list)) # make sure search query is a SearchString assert(isinstance(o['search_query'], SearchString)) # make sure first instance of content_items is a dict assert(isinstance(o['set_content_items'][0], dict)) # make sure we can serialize this back to json. obj_to_json(r)
def update_setting(user, org, level, name_id): if level not in ['me', 'orgs']: raise NotFoundError( 'You cannot store settings for \'{}\''.format(level)) s = fetch_by_id_or_field(Setting, 'name', name_id, org_id=org.id, user_id=user.id, level=level) if not s: raise NotFoundError('Setting "{}" does not yet exist.'.format( name_id, org.name)) # get the request data req_data = request_data() name = req_data.get('name') value = req_data.get('value') json_value = req_data.get('json_value') # if it's a json_value check whether we can parse it as such if json_value: if isinstance(value, basestring): try: obj_to_json(value) except: raise RequestError( "Setting '{}' with value '{}' was declared as a " "'json_value' but could not be parsed as such.".format( name_id, value)) # upsert / patch values. if name: s.name = name if json_value: if not isinstance(json_value, bool): if str(json_value).lower() in TRUE_VALUES: json_value = True else: json_value = False s.json_value = json_value s.value = obj_to_json(value) else: s.value = value db.session.add(s) db.session.commit() return jsonify(s)
def generate(): try: for row in ResultIter(results): if stream: yield obj_to_json(row) + "\n" else: yield row except ResourceClosedError: resp = {'success': True} if stream: yield obj_to_json(resp) + "\n" else: yield resp
def update_setting(user, org, level, name_id): if level not in ['me', 'orgs']: raise NotFoundError( 'You cannot store settings for \'{}\'' .format(level)) s = fetch_by_id_or_field( Setting, 'name', name_id, org_id=org.id, user_id=user.id, level=level) if not s: raise NotFoundError( 'Setting "{}" does not yet exist.' .format(name_id, org.name)) # get the request data req_data = request_data() name = req_data.get('name') value = req_data.get('value') json_value = req_data.get('json_value') # if it's a json_value check whether we can parse it as such if json_value: if isinstance(value, basestring): try: obj_to_json(value) except: raise RequestError( "Setting '{}' with value '{}' was declared as a " "'json_value' but could not be parsed as such." .format(name_id, value)) # upsert / patch values. if name: s.name = name if json_value: if not isinstance(json_value, bool): if str(json_value).lower() in TRUE_VALUES: json_value = True else: json_value = False s.json_value = json_value s.value = obj_to_json(value) else: s.value = value db.session.add(s) db.session.commit() return jsonify(s)
def gen_content_metric_summaries(org, content_items, metrics): for c in content_items: _metrics = {} for m in metrics: if 'summary' in m.content_levels and not 'timeseries' in m.content_levels: if not m.faceted: _metrics[m.name] = random_int(1, 1000) else: _metrics[m.name] = [{ 'facet': 'google.com', 'value': random_int(1, 1000), }, { 'facet': 'twitter.com', 'value': random_int(1, 1000) }, { 'facet': 'facebook.com', 'value': random_int(1, 1000) }] cmd_kwargs = { 'org_id': org.id, 'content_item_id': c.id, 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_content_metric_summary( {org_id}, {content_item_id}, '{metrics}'); """.format(**cmd_kwargs) db.session.execute(cmd) db.session.commit()
def content_summary(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Summary Metrics for a content item. """ content_item_id = obj.pop("content_item_id") if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError("Content Item with ID {} doesnt exist".format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} metrics = ingest_util.prepare_metrics( obj, metrics_lookup, valid_levels=["content_item", "all"], check_timeseries=False ) # upsert command cmd = """SELECT upsert_content_metric_summary( {org_id}, {content_item_id}, '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd
def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True): """ Ingest Timeseries Metrics for an organization. """ cmd_kwargs = {"org_id": org_id} # parse datetime. if "datetime" not in obj: cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat() else: ds = obj.pop("datetime") dt = dates.parse_iso(ds) cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=["org", "all"], check_timeseries=True) # upsert command cmd = """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd_kwargs return cmd
def gen_content_metric_timeseries(org, content_items, metrics, n_content_item_timeseries_metrics=1000): for _ in xrange(n_content_item_timeseries_metrics): _metrics = {} for m in metrics: if 'timeseries' in m.content_levels: _metrics[m.name] = random_int(1, 1000) cmd_kwargs = { 'org_id': org.id, 'content_item_id': choice(content_items).id, 'datetime': dates.floor(random_date(1, 7), unit='hour', value=1), 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}'); """.format(**cmd_kwargs) db_session.execute(cmd) db_session.commit()
def format(self, record): """ Return logging information as JSON. """ fields = record.__dict__.copy() for k in fields.keys(): if k not in settings.LOG_JSON_FIELDS: fields.pop(k, None) return obj_to_json(fields)
def dispatch(self, msg, **kw): payload = { "text": msg, "channel": kw.get('channel', settings.NOTIFY_SLACK_CHANNEL), "username": kw.get('username', settings.NOTIFY_SLACK_USERNAME), "icon_emoji": kw.get('icon_emoji', settings.NOTIFY_SLACK_EMOJI) } requests.post(settings.NOTIFY_SLACK_WEBHOOK, data=obj_to_json(payload))
def __init__(self, **kw): self.org_id = kw.get('org_id') self.name = kw.get('name') self.json_value = kw.get('json_value', False) if self.json_value: v = kw.get('value') if not isinstance(v, basestring): v = obj_to_json(v) self.value = v else: self.value = str(kw.get('value'))
def cook(self, id, **kw): """ Run a Sous Chef """ kw, params = self._split_auth_params_from_data(kw, kw_incl='load') url = self._format_url('sous-chefs', id, 'cook') # add apikey/org when required or set by user. params.update({'apikey': self.apikey}) if 'org' not in kw: kw['org'] = self.org r = requests.post(url, params=params, data=obj_to_json(kw), stream=True) return self._stream(r)
def create(self, level='orgs', **kw): """ Create a setting """ # jsonify value if kw.get('json_value', True): if not isinstance(kw.get('value'), basestring): kw['value'] = obj_to_json(kw['value']) kw, params = self._split_auth_params_from_data(kw) url = self._format_url(level, 'settings') return self._request('POST', url, data=kw, params=params)
def update(self, name_id, **kw): """ Update a setting """ # jsonify value if kw.get('json_value', True): if not isinstance(kw.get('value'), basestring): kw['value'] = obj_to_json(kw['value']) kw, params = self._split_auth_params_from_data(kw) url = self._format_url('settings', name_id) return self._request('PUT', url, data=kw, params=params)
def format_params(self, url): return obj_to_json({ 'method': 'pos.plusones.get', 'id': 'p', 'key': 'p', 'params': { 'nolog': True, 'id': url, 'source': 'widget', }, 'jsonrpc': '2.0', 'apiVersion': 'v1' })
def content_timeseries(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Timeseries Metrics for a content item. """ # if not content_item_id or not org or not metrics_lookup: # raise RequestError('Missing required kwargs.') content_item_id = obj.pop('content_item_id') if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError( 'Content Item with ID {} doesnt exist'.format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} # parse datetime. if 'datetime' not in obj: cmd_kwargs['datetime'] = dates.floor_now(unit='hour', value=1).isoformat() else: ds = obj.pop('datetime') dt = dates.parse_iso(ds) cmd_kwargs['datetime'] = dates.floor(dt, unit='hour', value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['content_item', 'all'], check_timeseries=True) # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd
def bulkload(data, **kw): """ Bulk Load any data. """ kw['src'] = kw.pop('q_src', kw.pop('src', None)) if not kw['src']: raise ValueError('Missing src.') job_id = gen_uuid() # set queue defaults qkw = dict( queued=kw.pop('queued', True), job_id=job_id, timeout=kw.pop('q_timeout', 1000), serializer=kw.pop('q_serializer', 'json'), result_ttl=kw.pop('q_result_ttl', 60), kwargs_ttl=kw.pop('q_kwargs_ttl', 120), name=kw.pop('q_name', 'bulk'), max_workers=kw.pop('q_max_workers', MAX_WORKERS), job_key_fmt=kw.pop('q_job_key', 'rq:{src}:bulk:'.format(**kw)+"{}"), chunk_size=kw.pop('q_chunk_size', MAX_CHUNK_SIZE) ) kw.update({'queued': qkw.get('queued', True)}) # if this is not a queued job, just run ingest. if not qkw.get('queued'): return ingest.source(data, **kw) q = queues.get(qkw.pop('name', 'bulk')) # store the data + kwargs in redis temporarily # this makes the enqueuing process much, much more # efficient by allowing us to only pass a single key # into the queue rather than a massive dump of data # however it also means that all kwargs must be # json serializable job_key = qkw['job_key_fmt'].format(job_id) job = {'data': data, 'kw': kw} if qkw['serializer'] == 'json': job = obj_to_json(job) elif qkw['serializer'] == 'pickle': job = obj_to_pickle(job) rds.set(job_key, job, ex=qkw['kwargs_ttl']) q.enqueue(bulkworker, job_id, **qkw) return job_id
def run(opts, **kw): from newslynx.sc import sc_exec from newslynx.lib import serialize from newslynx.cli.common import load_data from newslynx.client import API # connect to the api and fetch org kw['apikey'] = opts.apikey kw['api_url'] = opts.apiurl api = API( apikey=opts.apikey, org=opts.org, api_url=opts.apiurl, raise_errors=True) try: kw['org'] = api.orgs.get(opts.org) except: log.warning('Cannot connect to the API. Running in dev mode.') kw['org'] = {'id': opts.org} # parse body file / json string. recipe = load_data(opts.recipe) if recipe: kw.update(recipe) res = sc_exec.run(opts.sous_chef, **kw) if not res: return # stream output if isgenerator(res): for r in res: sys.stdout.write(serialize.obj_to_json(r) + "\n") # stream else: sys.stdout.write(serialize.obj_to_json(res))
def bulkload(data, **kw): """ Bulk Load any data. """ kw['src'] = kw.pop('q_src', kw.pop('src', None)) if not kw['src']: raise ValueError('Missing src.') job_id = gen_uuid() # set queue defaults qkw = dict(queued=kw.pop('queued', True), job_id=job_id, timeout=kw.pop('q_timeout', 1000), serializer=kw.pop('q_serializer', 'json'), result_ttl=kw.pop('q_result_ttl', 60), kwargs_ttl=kw.pop('q_kwargs_ttl', 120), name=kw.pop('q_name', 'bulk'), max_workers=kw.pop('q_max_workers', MAX_WORKERS), job_key_fmt=kw.pop('q_job_key', 'rq:{src}:bulk:'.format(**kw) + "{}"), chunk_size=kw.pop('q_chunk_size', MAX_CHUNK_SIZE)) kw.update({'queued': qkw.get('queued', True)}) # if this is not a queued job, just run ingest. if not qkw.get('queued'): return ingest.source(data, **kw) q = queues.get(qkw.pop('name', 'bulk')) # store the data + kwargs in redis temporarily # this makes the enqueuing process much, much more # efficient by allowing us to only pass a single key # into the queue rather than a massive dump of data # however it also means that all kwargs must be # json serializable job_key = qkw['job_key_fmt'].format(job_id) job = {'data': data, 'kw': kw} if qkw['serializer'] == 'json': job = obj_to_json(job) elif qkw['serializer'] == 'pickle': job = obj_to_pickle(job) rds.set(job_key, job, ex=qkw['kwargs_ttl']) q.enqueue(bulkworker, job_id, **qkw) return job_id
def run(opts, **kw): from newslynx.sc import sc_exec from newslynx.lib import serialize from newslynx.cli.common import load_data from newslynx.client import API # connect to the api and fetch org kw['apikey'] = opts.apikey kw['api_url'] = opts.apiurl api = API(apikey=opts.apikey, org=opts.org, api_url=opts.apiurl, raise_errors=True) try: kw['org'] = api.orgs.get(opts.org) except: log.warning('Cannot connect to the API. Running in dev mode.') kw['org'] = {'id': opts.org} # parse body file / json string. recipe = load_data(opts.recipe) if recipe: kw.update(recipe) res = sc_exec.run(opts.sous_chef, **kw) if not res: return # stream output if isgenerator(res): for r in res: sys.stdout.write(serialize.obj_to_json(r) + "\n") # stream else: sys.stdout.write(serialize.obj_to_json(res))
def org_timeseries(data, **kw): """ Ingest Timeseries Metrics for an organization. """ # parse kwargs. org_id = kw.get('org_id') metrics_lookup = kw.get('metrics_lookup', []) queued = kw.get('queued', False) queries = [] objects = [] # standardized format. if not isinstance(data, list): data = [data] for obj in data: obj.pop('org_id') metrics = _prepare_metrics(obj, metrics_lookup) cmd_kwargs = { "org_id": org_id, 'datetime': _prepare_metric_date(obj) } # upsert command cmd = \ """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) queries.append(cmd) cmd_kwargs['metrics'] = metrics objects.append(cmd_kwargs) # execute queries. if len(queries): q = " UNION ALL ".join(queries) db.session.execute(q) db.session.commit() db.session.remove() if queued: return True return objects
def content_summary(data, **kw): """ Ingest content summary metrics. """ # parse kwargs. org_id = kw.get('org_id') content_item_ids = kw.get('content_item_ids', []) metrics_lookup = kw.get('metrics_lookup', []) queued = kw.get('queued', False) queries = [] objects = [] # standardized format. if not isinstance(data, list): data = [data] for obj in data: cid = _check_content_item_id(obj, content_item_ids) cmd_kwargs = { "org_id": org_id, "content_item_id": cid } metrics = _prepare_metrics(obj, metrics_lookup) # upsert command cmd = """SELECT upsert_content_metric_summary( {org_id}, {content_item_id}, '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) queries.append(cmd) # build up list of objects. cmd_kwargs['metrics'] = metrics objects.append(cmd_kwargs) # execute queries. if len(queries): q = " UNION ALL ".join(queries) db.session.execute(q) db.session.commit() db.session.remove() if queued: return True return objects
def content_timeseries(data, **kw): # parse kwargs. org_id = kw.get('org_id') content_item_ids = kw.get('content_item_ids', []) metrics_lookup = kw.get('metrics_lookup', []) queued = kw.get('queued', False) # standardized format. if not isinstance(data, list): data = [data] queries = [] objects = [] for obj in data: cid = _check_content_item_id(obj, content_item_ids) metrics = _prepare_metrics(obj, metrics_lookup) cmd_kwargs = { "org_id": org_id, "content_item_id": cid, 'datetime': _prepare_metric_date(obj) } # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) queries.append(cmd) # build up list of objects. cmd_kwargs['metrics'] = metrics objects.append(cmd_kwargs) # execute queries. if len(queries): q = " UNION ALL ".join(queries) db.session.execute(q) db.session.commit() db.session.remove() if queued: return True return objects
def update(old_sous_chef, new_sous_chef): """ Given a partial or completely new sous-chef, update the souf-chef and re-validate it. """ # if the old sous chef is a SousChef object, coerce it to and from json. if isinstance(old_sous_chef, SousChef): old_sous_chef = json_to_obj(obj_to_json(old_sous_chef)) # pop the id old_sous_chef.pop('id', None) # update the previous version. new_sous_chef = update_nested_dict( old_sous_chef, new_sous_chef, overwrite=True) return validate(new_sous_chef, None)
def content_timeseries(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Timeseries Metrics for a content item. """ # if not content_item_id or not org or not metrics_lookup: # raise RequestError('Missing required kwargs.') content_item_id = obj.pop("content_item_id") if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError("Content Item with ID {} doesnt exist".format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} # parse datetime. if "datetime" not in obj: cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat() else: ds = obj.pop("datetime") dt = dates.parse_iso(ds) cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat() metrics = ingest_util.prepare_metrics( obj, metrics_lookup, valid_levels=["content_item", "all"], check_timeseries=True ) # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd
def org_timeseries(data, **kw): """ Ingest Timeseries Metrics for an organization. """ # parse kwargs. org_id = kw.get('org_id') metrics_lookup = kw.get('metrics_lookup', []) queued = kw.get('queued', False) queries = [] objects = [] # standardized format. if not isinstance(data, list): data = [data] for obj in data: obj.pop('org_id') metrics = _prepare_metrics(obj, metrics_lookup) cmd_kwargs = {"org_id": org_id, 'datetime': _prepare_metric_date(obj)} # upsert command cmd = \ """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) queries.append(cmd) cmd_kwargs['metrics'] = metrics objects.append(cmd_kwargs) # execute queries. if len(queries): q = " UNION ALL ".join(queries) db.session.execute(q) db.session.commit() db.session.remove() if queued: return True return objects
def update(old_sous_chef, new_sous_chef): """ Given a partial or completely new sous-chef, update the souf-chef and re-validate it. """ # if the old sous chef is a SousChef object, coerce it to and from json. if isinstance(old_sous_chef, SousChef): old_sous_chef = json_to_obj(obj_to_json(old_sous_chef)) # pop the id old_sous_chef.pop('id', None) # update the previous version. new_sous_chef = update_nested_dict(old_sous_chef, new_sous_chef, overwrite=True) return validate(new_sous_chef)
def update(old_recipe, new_recipe, sous_chef): """ Given a partial or completely new recipe, update the old recipe and re-validate it. """ # if the old recipe is a Recipe object, coerce it to and from json. if isinstance(old_recipe, Recipe): old_recipe = json_to_obj(obj_to_json(old_recipe)) # format it correctly first. _rs = RecipeSchema(new_recipe, sous_chef) _rs.format_recipe() new_recipe = copy.copy(_rs.recipe) # update the previous version. new_recipe = update_nested_dict(old_recipe, new_recipe, overwrite=True) # revalidate. rs = RecipeSchema(new_recipe, sous_chef) return rs.validate()
def _request(self, method, url, **kw): """ A wrapper for all request executions. """ if not url.endswith('login') and not self.apikey: raise ClientError('You haven\'t set your apikey or logged in yet!') # add params to kw kw.setdefault('params', {}) # add apikey/org when required or set by user. kw['params'].update({'apikey': self.apikey}) if 'org' not in kw['params']: kw['params']['org'] = self.org # orgs endpoint doesn't require org if url.startswith(self._format_url('orgs')): kw['params'].pop('org') # dump json if kw.get('data'): kw['data'] = obj_to_json(kw['data']) # execute r = Request(method, url, **kw) try: resp = self._session.send(r.prepare()) err = None except Exception as e: err = e resp = None # handle errors self._handle_errors(resp, err) # format response return self._format_response(resp)
def gen_content_metric_timeseries(org, content_items, metrics, n_content_item_timeseries_metrics=1000): # all date_list = [] start = dates.now() - timedelta(days=7) for hour in range(1, (7 * 24) + 1): date_list.append(start + timedelta(hours=hour)) for c in content_items: last_values = {} for i, d in enumerate(date_list): _metrics = {} for m in metrics: if 'timeseries' in m.content_levels: if m.type == 'cumulative': if m.name not in last_values: last_values[m.name] = 0 last_values[m.name] += random_int(0, 100) _metrics[m.name] = copy.copy(last_values[m.name]) else: _metrics[m.name] = random_int(1, 1000) cmd_kwargs = { 'org_id': org.id, 'content_item_id': c.id, 'datetime': d.isoformat(), 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}'); """.format(**cmd_kwargs) db.session.execute(cmd) db.session.commit()
def org_summary(obj, org_id, metrics_lookup, commit=True): """ Ingest Summary Metrics for an organization. """ cmd_kwargs = {"org_id": org_id} metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=["org", "all"], check_timeseries=False) # upsert command cmd = """SELECT upsert_org_metric_summary( {org_id}, '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd
def gen_org_metric_timeseries(org, metrics, n_org_timeseries_metrics=1000): for _ in xrange(n_org_timeseries_metrics): _metrics = {} for m in metrics: if 'timeseries' in m.org_levels: if m.type != 'cumulative': _metrics[m.name] = random_int(1, 1000) else: _metrics[m.name] = _ * random_int(2, 10) cmd_kwargs = { 'org_id': org.id, 'datetime': dates.floor(random_date(1, 120), unit='hour', value=1), 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}'); """.format(**cmd_kwargs) db.session.execute(cmd) db.session.commit()
def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True): """ Ingest Timeseries Metrics for an organization. """ cmd_kwargs = {'org_id': org_id} # parse datetime. if 'datetime' not in obj: cmd_kwargs['datetime'] = dates.floor_now(unit='hour', value=1).isoformat() else: ds = obj.pop('datetime') dt = dates.parse_iso(ds) cmd_kwargs['datetime'] = dates.floor(dt, unit='hour', value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['org', 'all'], check_timeseries=True) # upsert command cmd = """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd_kwargs return cmd
def org_summary(obj, org_id, metrics_lookup, commit=True): """ Ingest Summary Metrics for an organization. """ cmd_kwargs = {"org_id": org_id} metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['org', 'all'], check_timeseries=False) # upsert command cmd = """SELECT upsert_org_metric_summary( {org_id}, '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd
def gen_content_metric_summaries(org, content_items, metrics): for c in content_items: _metrics = {} for m in metrics: if 'summary' in m.content_levels and not 'timeseries' in m.content_levels: if not m.faceted: _metrics[m.name] = random_int(1, 1000) else: _metrics[m.name] = [ { 'facet': 'google.com', 'value': random_int(1, 1000), }, { 'facet': 'twitter.com', 'value': random_int(1, 1000) }, { 'facet': 'facebook.com', 'value': random_int(1, 1000) } ] cmd_kwargs = { 'org_id': org.id, 'content_item_id': c.id, 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_content_metric_summary( {org_id}, {content_item_id}, '{metrics}'); """.format(**cmd_kwargs) db.session.execute(cmd) db.session.commit()
def content_summary(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Summary Metrics for a content item. """ content_item_id = obj.pop('content_item_id') if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError( 'Content Item with ID {} doesnt exist'.format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['content_item', 'all'], check_timeseries=False) # upsert command cmd = """SELECT upsert_content_metric_summary( {org_id}, {content_item_id}, '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd
def gen_content_metric_timeseries(org, content_items, metrics, n_content_item_timeseries_metrics=1000): # all date_list = [] start = dates.now() - timedelta(days=7) for hour in range(1, (7*24)+1): date_list.append(start + timedelta(hours=hour)) for c in content_items: last_values = {} for i, d in enumerate(date_list): _metrics = {} for m in metrics: if 'timeseries' in m.content_levels: if m.type == 'cumulative': if m.name not in last_values: last_values[m.name] = 0 last_values[m.name] += random_int(0, 100) _metrics[m.name] = copy.copy(last_values[m.name]) else: _metrics[m.name] = random_int(1, 1000) cmd_kwargs = { 'org_id': org.id, 'content_item_id': c.id, 'datetime': d.isoformat(), 'metrics': obj_to_json(_metrics) } # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}'); """.format(**cmd_kwargs) db.session.execute(cmd) db.session.commit()
def event_tags_to_summary(org): """ Count up impact tag categories + levels assigned to events by the content_items they're associated with. """ # build up list of metrics to compute event_tag_metrics = ['total_events', 'total_event_tags'] case_statements = [] case_pattern = """ sum(CASE WHEN {type} = '{value}' THEN 1 ELSE 0 END) AS {name}""" for l in IMPACT_TAG_LEVELS: kw = { 'type': 'level', 'value': l, 'name': "{}_level_events".format(l) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) for c in IMPACT_TAG_CATEGORIES: kw = { 'type': 'category', 'value': c, 'name': "{}_category_events".format(c) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) # query formatting kwargs qkw = { "metrics": ", ".join(event_tag_metrics), "case_statements": ",\n".join(case_statements), "org_id": org.id, "null_metrics": obj_to_json({k: 0 for k in event_tag_metrics}) } q = """ WITH content_event_tags AS ( SELECT * FROM ( SELECT events.id as event_id, events.org_id, content_items_events.content_item_id, tags.category, tags.level from events FULL OUTER JOIN content_items_events on events.id = content_items_events.event_id FULL OUTER JOIN events_tags on events.id = events_tags.event_id FULL OUTER JOIN tags on events_tags.tag_id = tags.id WHERE events.org_id = {org_id} AND events.status = 'approved' ) t WHERE content_item_id IS NOT NULL ), content_event_tag_counts AS ( SELECT org_id, content_item_id, count(distinct(event_id)) as total_events, count(1) as total_event_tags, {case_statements} FROM content_event_tags GROUP BY org_id, content_item_id ), content_event_metrics AS ( SELECT org_id, content_item_id, (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics FROM content_event_tag_counts ), -- Content Items With Approved Events positive_metrics AS ( SELECT upsert_content_metric_summary(org_id, content_item_id, metrics::text) FROM content_event_metrics ), -- Content Items With No Approved Events null_metrics AS ( SELECT upsert_content_metric_summary(t.org_id, t.content_item_id, '{null_metrics}') FROM ( SELECT org_id, id as content_item_id FROM content WHERE org_id = {org_id} AND id NOT IN ( SELECT distinct(content_item_id) FROM content_event_metrics ) ) t ) SELECT * from positive_metrics, null_metrics """.format(**qkw) db.session.execute(q) db.session.commit() return True
def content_summary_from_events(org, content_item_ids=[]): """ Count up impact tag categories + levels assigned to events by the content_items they're associated with. """ if not isinstance(content_item_ids, list): content_item_ids = [content_item_ids] if not len(content_item_ids): content_item_ids = org.content_item_ids # build up list of metrics to compute event_tag_metrics = ['total_events', 'total_event_tags'] case_statements = [] case_pattern = """ sum(CASE WHEN {type} = '{value}' THEN 1 ELSE 0 END) AS {name}""" for l in IMPACT_TAG_LEVELS: kw = {'type': 'level', 'value': l, 'name': "{}_level_events".format(l)} case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) for c in IMPACT_TAG_CATEGORIES: kw = { 'type': 'category', 'value': c, 'name': "{}_category_events".format(c) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) content_ids_filter = "" if len(content_item_ids): content_ids_filter = "AND content_item_id in ({})"\ .format(",".join([str(i) for i in content_item_ids])) # query formatting kwargs qkw = { "metrics": ", ".join(event_tag_metrics), "case_statements": ",\n".join(case_statements), "org_id": org.id, "null_metrics": obj_to_json({k: 0 for k in event_tag_metrics}), "content_ids_filter": content_ids_filter } # optionally add in null-query null_q = """ -- Content Items With No Approved Events , null_metrics AS ( SELECT upsert_content_metric_summary(t.org_id, t.content_item_id, '{null_metrics}') FROM ( SELECT org_id, id as content_item_id FROM content WHERE org_id = {org_id} AND id NOT IN ( SELECT distinct(content_item_id) FROM content_event_metrics ) ) t ) """.format(**qkw) # add in null query if we're not filtering # by specific content item ids. qkw['null_query'] = "" qkw['final_query'] = "select * from positive_metrics" if not content_ids_filter: qkw['null_query'] = null_q qkw['final_query'] = """ select * from positive_metrics UNION ALL select * from null_metrics""" q = """ WITH content_event_tags AS ( SELECT * FROM ( SELECT events.id as event_id, events.org_id, content_items_events.content_item_id, tags.category, tags.level from events FULL OUTER JOIN content_items_events on events.id = content_items_events.event_id FULL OUTER JOIN events_tags on events.id = events_tags.event_id FULL OUTER JOIN tags on events_tags.tag_id = tags.id WHERE events.org_id = {org_id} AND events.status = 'approved' AND (tags.category IS NOT NULL OR tags.level IS NOT NULL) ) t WHERE content_item_id IS NOT NULL {content_ids_filter} ), content_event_tag_counts AS ( SELECT org_id, content_item_id, count(distinct(event_id)) as total_events, count(1) as total_event_tags, {case_statements} FROM content_event_tags GROUP BY org_id, content_item_id ), content_event_metrics AS ( SELECT org_id, content_item_id, (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics FROM content_event_tag_counts ), -- Content Items With Approved Events positive_metrics AS ( SELECT upsert_content_metric_summary(org_id, content_item_id, metrics::text) FROM content_event_metrics ) {null_query} {final_query} """.format(**qkw) db.session.execute(q) db.session.commit() return True
def content_summary_from_events(org, content_item_ids=[]): """ Count up impact tag categories + levels assigned to events by the content_items they're associated with. """ if not isinstance(content_item_ids, list): content_item_ids = [content_item_ids] if not len(content_item_ids): content_item_ids = org.content_item_ids # build up list of metrics to compute event_tag_metrics = ['total_events', 'total_event_tags'] case_statements = [] case_pattern = """ sum(CASE WHEN {type} = '{value}' THEN 1 ELSE 0 END) AS {name}""" for l in IMPACT_TAG_LEVELS: kw = { 'type': 'level', 'value': l, 'name': "{}_level_events".format(l) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) for c in IMPACT_TAG_CATEGORIES: kw = { 'type': 'category', 'value': c, 'name': "{}_category_events".format(c) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) content_ids_filter = "" if len(content_item_ids): content_ids_filter = "AND content_item_id in ({})"\ .format(",".join([str(i) for i in content_item_ids])) # query formatting kwargs qkw = { "metrics": ", ".join(event_tag_metrics), "case_statements": ",\n".join(case_statements), "org_id": org.id, "null_metrics": obj_to_json({k: 0 for k in event_tag_metrics}), "content_ids_filter": content_ids_filter } # optionally add in null-query null_q = """ -- Content Items With No Approved Events , null_metrics AS ( SELECT upsert_content_metric_summary(t.org_id, t.content_item_id, '{null_metrics}') FROM ( SELECT org_id, id as content_item_id FROM content WHERE org_id = {org_id} AND id NOT IN ( SELECT distinct(content_item_id) FROM content_event_metrics ) ) t ) """.format(**qkw) # add in null query if we're not filtering # by specific content item ids. qkw['null_query'] = "" qkw['final_query'] = "select * from positive_metrics" if not content_ids_filter: qkw['null_query'] = null_q qkw['final_query'] = """ select * from positive_metrics UNION ALL select * from null_metrics""" q = """ WITH content_event_tags AS ( SELECT * FROM ( SELECT events.id as event_id, events.org_id, content_items_events.content_item_id, tags.category, tags.level from events FULL OUTER JOIN content_items_events on events.id = content_items_events.event_id FULL OUTER JOIN events_tags on events.id = events_tags.event_id FULL OUTER JOIN tags on events_tags.tag_id = tags.id WHERE events.org_id = {org_id} AND events.status = 'approved' AND (tags.category IS NOT NULL OR tags.level IS NOT NULL) ) t WHERE content_item_id IS NOT NULL {content_ids_filter} ), content_event_tag_counts AS ( SELECT org_id, content_item_id, count(distinct(event_id)) as total_events, count(1) as total_event_tags, {case_statements} FROM content_event_tags GROUP BY org_id, content_item_id ), content_event_metrics AS ( SELECT org_id, content_item_id, (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics FROM content_event_tag_counts ), -- Content Items With Approved Events positive_metrics AS ( SELECT upsert_content_metric_summary(org_id, content_item_id, metrics::text) FROM content_event_metrics ) {null_query} {final_query} """.format(**qkw) db.session.execute(q) db.session.commit() return True
def run(opts, **kwargs): # connect to the api api = API( apikey=opts.apikey, org=opts.org, api_url=opts.api_url, raise_errors=opts.raise_errors) # get the collection cobj = getattr(api, opts.collection, None) if not cobj: e = RuntimeError("Error: Collection '{}' does not exist." .format(opts.collection)) echo_error(e) echo("Choose from the following collections:\n\t- {}" .format(opts.collection, "\n\t- {}".join(COLLECTIONS)), color = fore.WHITE) sys.exit(1) # allow for `-` instead of `_`: if opts.method: opts.method = opts.method.replace('-', "_") mobj = getattr(cobj, opts.method, None) if not mobj: options = CMD_TREE[opts.collection] if opts.method != 'ls': e = RuntimeError("Method '{}' does not exist for collection '{}'" .format(opts.method, opts.collection)) echo_error(e, no_color=opts.no_color) else: echo("/{}".format(opts.collection), color=Fore.BLUE, no_color=opts.no_color) msg = "choose from the following methods:\n\t- {}"\ .format( "\n\t- ".join(options)) echo(msg, color=Fore.YELLOW, no_color=opts.no_color) sys.exit(1) # parse body file / json string. kwargs.update(load_data(opts.data, opts)) # execute method try: res = mobj(**kwargs) except KeyboardInterrupt as e: echo_error("Interrupted by user. Exiting.", color=Fore.YELLOW, no_color=opts.no_color) sys.exit(2) # interrupt except Exception as e: tb = format_exc() echo_error(e, tb, no_color=opts.no_color) sys.exit(1) # stream output if isgenerator(res): for r in res: sys.stdout.write(serialize.obj_to_json(r) +"\n") # stream else: sys.stdout.write(serialize.obj_to_json(res)) sys.exit(0)
def event_tags_to_summary(org): """ Count up impact tag categories + levels assigned to events by the content_items they're associated with. """ # build up list of metrics to compute event_tag_metrics = ['total_events', 'total_event_tags'] case_statements = [] case_pattern = """ sum(CASE WHEN {type} = '{value}' THEN 1 ELSE 0 END) AS {name}""" for l in IMPACT_TAG_LEVELS: kw = {'type': 'level', 'value': l, 'name': "{}_level_events".format(l)} case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) for c in IMPACT_TAG_CATEGORIES: kw = { 'type': 'category', 'value': c, 'name': "{}_category_events".format(c) } case_statements.append(case_pattern.format(**kw)) event_tag_metrics.append(kw['name']) # query formatting kwargs qkw = { "metrics": ", ".join(event_tag_metrics), "case_statements": ",\n".join(case_statements), "org_id": org.id, "null_metrics": obj_to_json({k: 0 for k in event_tag_metrics}) } q = """ WITH content_event_tags AS ( SELECT * FROM ( SELECT events.id as event_id, events.org_id, content_items_events.content_item_id, tags.category, tags.level from events FULL OUTER JOIN content_items_events on events.id = content_items_events.event_id FULL OUTER JOIN events_tags on events.id = events_tags.event_id FULL OUTER JOIN tags on events_tags.tag_id = tags.id WHERE events.org_id = {org_id} AND events.status = 'approved' ) t WHERE content_item_id IS NOT NULL ), content_event_tag_counts AS ( SELECT org_id, content_item_id, count(distinct(event_id)) as total_events, count(1) as total_event_tags, {case_statements} FROM content_event_tags GROUP BY org_id, content_item_id ), content_event_metrics AS ( SELECT org_id, content_item_id, (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics FROM content_event_tag_counts ), -- Content Items With Approved Events positive_metrics AS ( SELECT upsert_content_metric_summary(org_id, content_item_id, metrics::text) FROM content_event_metrics ), -- Content Items With No Approved Events null_metrics AS ( SELECT upsert_content_metric_summary(t.org_id, t.content_item_id, '{null_metrics}') FROM ( SELECT org_id, id as content_item_id FROM content WHERE org_id = {org_id} AND id NOT IN ( SELECT distinct(content_item_id) FROM content_event_metrics ) ) t ) SELECT * from positive_metrics, null_metrics """.format(**qkw) db.session.execute(q) db.session.commit() return True
def generate(): for item in resp: yield obj_to_json(item) + "\n"
def run(opts, **kwargs): from newslynx.lib import serialize from newslynx.cli.common import load_data # dynamically list collections from newslynx.client import API # connect to the api api = API(apikey=opts.apikey, org=opts.org, api_url=opts.apiurl, raise_errors=opts.raise_errors) # get the collection cobj = None if opts.collection: cobj = getattr(api, opts.collection.replace('-', '_'), None) if not cobj: # report options collections = [c.replace('_', "-") for c in dir(api) if _keep(c)] log.error("Collection '{}' does not exist.".format(opts.collection)) log.warning("Choose from the following collections:\n\t- {}".format( opts.collection, "\n\t- {}".join(collections))) sys.exit(1) # get the method mobj = None if opts.method: mobj = getattr(cobj, opts.method.replace('-', '_'), None) if not mobj: # report options if opts.method != 'ls': log.warning( "Method '{}' does not exist for collection '{}'".format( opts.method, opts.collection)) # compute the tree here to save on processing time. options = [m.replace('_', '-') for m in dir(cobj) if _keep(m)] # list of methods for this collection msg = "choose from the following methods:\n\t- {}"\ .format("\n\t- ".join(options)) log.warning("\n/{}\n".format(opts.collection) + msg) sys.exit(0) # parse body file / json string. d = load_data(opts.data) if d: kwargs.update(d) # execute method try: res = mobj(**kwargs) except KeyboardInterrupt as e: log.warning("\nInterrupted by user. Exiting...\n") sys.exit(2) # interrupt except Exception as e: log.error(format_exc()) sys.exit(1) # stream output if isgenerator(res): for r in res: sys.stdout.write(serialize.obj_to_json(r) + "\n") # stream else: sys.stdout.write(serialize.obj_to_json(res)) sys.stdout.write("\n") sys.exit(0)
def run(opts, **kwargs): from newslynx.lib import serialize from newslynx.cli.common import load_data # dynamically list collections from newslynx.client import API # connect to the api api = API( apikey=opts.apikey, org=opts.org, api_url=opts.apiurl, raise_errors=opts.raise_errors) # get the collection cobj = None if opts.collection: cobj = getattr(api, opts.collection.replace('-', '_'), None) if not cobj: # report options collections = [c.replace('_', "-") for c in dir(api) if _keep(c)] log.error("Collection '{}' does not exist." .format(opts.collection)) log.warning("Choose from the following collections:\n\t- {}" .format(opts.collection, "\n\t- {}".join(collections))) sys.exit(1) # get the method mobj = None if opts.method: mobj = getattr(cobj, opts.method.replace('-', '_'), None) if not mobj: # report options if opts.method != 'ls': log.warning("Method '{}' does not exist for collection '{}'" .format(opts.method, opts.collection)) # compute the tree here to save on processing time. options = [m.replace('_', '-') for m in dir(cobj) if _keep(m)] # list of methods for this collection msg = "choose from the following methods:\n\t- {}"\ .format("\n\t- ".join(options)) log.warning("\n/{}\n".format(opts.collection) + msg) sys.exit(0) # parse body file / json string. d = load_data(opts.data) if d: kwargs.update(d) # execute method try: res = mobj(**kwargs) except KeyboardInterrupt as e: log.warning("\nInterrupted by user. Exiting...\n") sys.exit(2) # interrupt except Exception as e: log.error(format_exc()) sys.exit(1) # stream output if isgenerator(res): for r in res: sys.stdout.write(serialize.obj_to_json(r) + "\n") # stream else: sys.stdout.write(serialize.obj_to_json(res)) sys.stdout.write("\n") sys.exit(0)