def scalar_plotpoints( cls, pk, dimensions=None, metrics=None, depth=0, period=None, flot_time=False, points_type=dict ): metrics = metrics or ["hits"] if isinstance(metrics, basestring): metrics = [metrics] period = Period.get(period) sparse = cls.whale_driver().retrieve(pk, dimensions, metrics, period=period) nonsparse = defaultdict(dict) if flot_time: points_type = list for dim, mets in sparse.items(): for met, points in mets.items(): dts = period.datetimes_strs() nonsparse[dim][met] = [] for dt in dts: if flot_time: dt_t = to_flot_time(Period.parse_dt_str(dt)) else: dt_t = dt value = points[dt] if dt in points else 0 nonsparse[dim][met].append([dt_t, float(value)]) nonsparse[dim][met] = points_type(nonsparse[dim][met]) if depth > 0: for sub in cls.get_subdimensions(pk, dimensions): nonsparse = dict( nonsparse.items() + cls.plotpoints( pk, sub, metrics, depth=depth - 1, period=period, flot_time=flot_time, points_type=points_type ).items() ) return nonsparse
def zranked(cls, pk, parent_dimension='_', metric='hits', period=None, at=None, start=0, size=10, sort_dir=None, tzoffset=None): period, ats, tzoffset = Period.get_days(period, at) dt = ats or [Period.convert(cls.now(), tzoffset)] return map(try_loads, _ranked(cls.whale_driver(), pk, parent_dimension, metric, period, dt, start, size, sort_dir=sort_dir))
def _store(redis, pk, dimension, metric, period, dt, count, method='set', rank=False): # Keep a list of graphs per pk key = keyify(pk, dimension, Period.get(period).interval, metric) # Store pk dimensions dimension_key = keyify('dimensions', pk) dimension_json = keyify(dimension) if not dimension_json in _added_dimensions[dimension_key]: redis.sadd(dimension_key, dimension_json) _added_dimensions[dimension_key].append(dimension_json) # Store dimensional subdimensions if dimension != '_': subdimension_key = keyify('subdimensions', pk, parent(dimension)) if not dimension_json in _added_subdimensions[subdimension_key]: redis.sadd(subdimension_key, dimension_json) _added_subdimensions[subdimension_key].append(dimension_json) if method == 'set': new_val = float(count) redis.hset(key, dt, new_val) elif method == 'incr': new_val = redis.execute_command('HINCRBYFLOAT', key, dt, float(count)) if rank and (isinstance(try_loads(pk), list) or dimension != '_'): if isinstance(pk, list) and dimension == '_': tgt_pk = parent(pk) tgt_dimension = dimension else: tgt_pk = pk tgt_dimension = parent(dimension) rank_key = keyify('rank', tgt_pk, tgt_dimension, Period.get(period).interval, dt, metric) redis.zadd(rank_key, dimension_json, new_val) return new_val
def _ranked(redis, pk, parent_dimension, metric, period, ats, start=0, size=10, sort_dir=None): top, bot = parse_formula(metric) rank_keyify = lambda ats, met: keyify('rank', pk, parent_dimension, Period.get(period).interval, ats, met) final_rank_key = rank_keyify(ats, metric) def squash_ats(met): if len(ats) > 1: map(lambda at: redis.zremrangebyscore(rank_keyify(at, met), 0, 0), ats) redis.zunionstore(rank_keyify(ats, met), map(lambda at: rank_keyify(at, met), ats)) squash_ats(top) if bot: squash_ats(bot) top_key, bot_key = rank_keyify(ats, top), rank_keyify(ats, bot) redis.execute_command("eval", """ for key_i, key_n in ipairs(redis.call("zrange", KEYS[2], 0, -1)) do local top_s = tonumber(redis.call("zscore", KEYS[1], key_n)) local bot_s = tonumber(redis.call("zscore", KEYS[2], key_n)) if top_s and bot_s and bot_s > 0 then redis.call("zadd", KEYS[3], top_s/bot_s, key_n) end end """, 3, top_key, bot_key, final_rank_key) redis.zremrangebyscore(final_rank_key, 0, 0) return redis.zrange(final_rank_key, start, start + size, desc=not sort_dir or sort_dir.upper() in ['-', 'DESC', 'HIGH'])
def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None): period = period or Period.default_size() d_k = keyify(dimension) total = cls.totals(pk, dimension, metric, periods=[period])[str(period)][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) return { 'points': pps, 'total': sub_total, 'important': sub_total > 10 and (sub_total > (total / 10)) or False } for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)): ranked[sub] = info(sub) del(ranked[dimension]) # Prune parents for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]['total'], children)) if info['important'] and (info['total'] - children_total) < (total / 10): info['important'] = False return ranked
def total(cls, pk, metric, dimension='_', period=None, at=None, index=None, tzoffset=None): period, ats, tzoffset = Period.get_days(period, at, tzoffset=None) top, bot = parse_formula(metric) if not ats and not index: index = -1 if isinstance(index, int): pps = cls.plotpoints(pk, dimension, metric, period=period, points_type=list) return pps[dimension][metric][index][1] else: if not bot: pps = cls.plotpoints(pk, dimension, metric, period=period) ppsm = pps[dimension][metric] return sum([ppsm[dt] for dt in ats if dt in ppsm]) else: top_pps = cls.plotpoints(pk, dimension, top, period=period) bot_pps = cls.plotpoints(pk, dimension, bot, period=period) top_ppsm = top_pps[dimension][top] bot_ppsm = bot_pps[dimension][bot] top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm]) bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm]) return bot_tot and top_tot / bot_tot or 0
def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None, recursive=True, prune_parents=True, points=False): period = period or Period.default_size() d_k = keyify(dimension) total = cls.cached_totals(pk, dimension, metric, periods=[period])[period][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) data = { 'points': pps, 'score': sub_total, 'important': sub_total > 10 and (sub_total > (total / 10)) or False, 'effect': total - sub_total, 'difference': total - sub_total, 'value': sub_total, 'count': sub_total, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) # Prune parents if recursive and prune_parents: for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]['score'], children)) if info['important'] and (info['score'] - children_total) < (total / 10): info['important'] = False return ranked
def __call__(self, *args, **kwargs): from whale import Whale from periods import Period if len(args) and args[0] == Whale or issubclass(args[0], Whale): args = args[1:] clear_cache = kwargs.pop("unmemoize", False) self.get_cache() if "period" in kwargs: p = Period.get(kwargs["period"]) kwargs["period"] = str(p) ttl = int(p.interval) / 5 else: ttl = 60 key_name = self.keyify(args, kwargs) if clear_cache: self.cache.delete(key_name) try: return json.loads(self.cache[key_name]) except KeyError: value = self.func(Whale, *args, **kwargs) self.cache[key_name] = json.dumps(value) self.cache.expire(key_name, ttl) return value except TypeError: # uncachable -- for instance, passing a list as an argument. # Better to not cache than to blow up entirely. return self.func(Whale, *args, **kwargs)
def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None): if not periods: periods = DEFAULT_PERIODS if not isinstance(periods, list): periods = [periods] metrics = metrics or ['hits'] if not isinstance(metrics, list): metrics = [metrics] ratios = [] for metric in metrics: if '/' in metric: metrics.remove(metric) ratios.append(metric) metrics += metric.split('/') d = {} for p in periods: period, ats, tzoffset = Period.get_days(p,at) p_data = cls.plotpoints(pk, dimensions, metrics, period=p,at=at) p_totals = dict() for dim in p_data.keys(): p_totals[dim] = dict() for met, vals in p_data[dim].items(): p_totals[dim][met] = sum([ v for k, v in vals.items() if k in ats]) for rat in ratios: top, bot = parse_formula(rat) topt, bott = p_totals[dim][top], p_totals[dim][bot] p_totals[dim][rat] = bott and topt / bott or 0 d[str(p)] = p_totals return d
def total(cls, pk, metric, dimension='_', period=None, at=None, index=None, tzoffset=None): period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset) top, bot = parse_formula(metric) dimension = maybe_dumps(dimension) if not ats and not index: index = -1 if isinstance(index, int): pps = cls.plotpoints(pk, dimension, metric, period=period, points_type=list) return pps[dimension][metric][index][1] else: if not bot: pps = cls.plotpoints(pk, dimension, metric, period=period, tzoffset=tzoffset) ppsm = pps[dimension][metric] return sum([ppsm[dt] for dt in ats if dt in ppsm]) else: top_pps = cls.plotpoints(pk, dimension, top, period=period, tzoffset=tzoffset) bot_pps = cls.plotpoints(pk, dimension, bot, period=period, tzoffset=tzoffset) top_ppsm = top_pps[dimension][top] bot_ppsm = bot_pps[dimension][bot] top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm]) bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm]) return bot_tot and top_tot/bot_tot or 0
def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None): if not periods: periods = DEFAULT_PERIODS if not isinstance(periods, list): periods = [periods] metrics = metrics or ['hits'] if not isinstance(metrics, list): metrics = [metrics] ratios = [] for metric in metrics: if '/' in metric: metrics.remove(metric) ratios.append(metric) metrics += metric.split('/') d = {} for p in periods: period, ats, tzoffset = Period.get_days(p, at) p_data = cls.plotpoints(pk, dimensions, metrics, period=p, at=at) p_totals = dict() for dim in p_data.keys(): p_totals[dim] = dict() for met, vals in p_data[dim].items(): p_totals[dim][met] = sum( [v for k, v in vals.items() if k in ats]) for rat in ratios: top, bot = parse_formula(rat) topt, bott = p_totals[dim][top], p_totals[dim][bot] p_totals[dim][rat] = bott and topt / bott or 0 d[str(p)] = p_totals return d
def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None, recursive=True, points=False): top, bottom = numerator, denominator p_obj, ats, tzoffset = Period.get_days(period) p_s = str(p_obj) d_k = keyify(dimension) top_points = cls.totals(pk, dimension, top, periods=[p_s]) top_total = p_s in top_points and top_points[p_s][d_k][top] or 0 bottom_points = cls.totals(pk, dimension, bottom, periods=[p_s]) bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][ bottom] or 0 ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints( pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum({k: v for k, v in top_pps.items() if k in ats}.values()) sub_bottom_sum = sum( {k: v for k, v in bottom_pps.items() if k in ats}.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = ratio_total and (ratio - ratio_total) / ratio_total or 0 important = sub_bottom_sum > 5 and math.fabs(difference) > .1 data = { 'points': pps, 'score': ratio, 'difference': difference, 'effect': difference * sub_bottom_sum * ratio_total, 'value': sub_top_sum, 'count': sub_bottom_sum, 'important': important, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) return ranked
def plotpoints(cls, pk, dimensions=None, metrics=None, period=None, overall=True): metrics = metrics or ['hits',] period = period or Period.default_size() sparse = cls.whale_driver().retrieve(pk,dimensions,metrics, period=period, overall=overall) nonsparse = defaultdict(dict) for dimensions, metrics in sparse.items(): for metric, points in metrics.items(): dts = Period(*period.split('x')).datetimes_strs() nonsparse[dimensions][metric] = [] for dt in dts: flot_time = to_flot_time(Period.parse_dt_str(dt)) value = points[dt] if dt in points else 0 nonsparse[dimensions][metric].append([flot_time, float(value)]) return nonsparse
def retrieve(self, pk, dimensions, metrics, period=None, dt=None): nested = defaultdict(dict) period = str(Period.get(period)) for dimension in map(maybe_dumps, iterate_dimensions(dimensions)): for metric in map(maybe_dumps, metrics): hash_key = keyify(pk, dimension, period, metric) value_dict = self.hgetall(hash_key) nested[dimension][metric] = dict([(k, float(v)) for k, v in value_dict.items()]) return dict(nested)
def render_divs(cls, pk, metric, dimension='_', period=None, at=None, tzoffset=None, format=None, hidden=False): period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset) top, bot = parse_formula(metric) pps = cls.plotpoints(pk, dimension, metric, period=period) ppsm = pps[dimension][metric] if not format: if bot: format = 'pct' else: format = 'grouped' def fmt(v): import locale if v == 'None': v = None f = format if f == 'int': f = lambda s: int(float(s or 0)) elif f == 'float': v = v or 0.0 f = float elif f == 'grouped': v = v or 0 f = lambda s: locale.format('%d', int(float(s)), True) elif f in ['pct', 'percent', '%', 'ratio']: v = min(101, v and float(v) * 100 or 0) f = lambda s: '%.2f%%' % s elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']: v = v and float(v) or 0.0 if f == 'cents': v = v / 100.0 f = locale.currency if not f: return v return callable(f) and f(v) or v hidden = hidden and 'style="display: none"' or '' rep = lambda s: s.format( pk=pk, metric=metric, dimension=dimension, hidden=hidden) table = rep( '<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \ data-hw-dimension="{dimension}" data-metric="{metric}">' ) + '\n'.join([ '<tr><td>%s</td><td>%s</td></tr>' % (at.replace(' 00:00:00', ''), fmt(count)) for at, count in ppsm.items() ]) + '</table>' return table
def update_count_to(cls, pk, dimensions='_', metrics=None, period=False, at=False, rank=False): period = Period.get(period) at = at or cls.now() dt = period.flatten_str(at) pipe = cls.whale_driver().pipeline(transaction=False) for (metric, i) in metrics.iteritems(): _store(pipe, pk, dimensions, metric, period, dt, i, rank=rank) pipe.execute()
def plotpoints(self, categories=None, dimensions=None, metrics=None, period=None, depth=0): categories = categories or '' dimensions = dimensions or json.dumps(list(list())) # Convert categories to a list, if it's not if type(categories) in [str,unicode]: categories = [categories,] metrics = metrics or ['hits',] period = period or Period.default_size() sparse = self.driver().retrieve(categories,dimensions,metrics, period=period, depth=depth) nonsparse = defaultdict(dict) for dimensions, metrics in sparse.items(): for metric, points in metrics.items(): dts = Period(*period.split('x')).datetimes_strs() nonsparse[dimensions][metric] = [] for dt in dts: flot_time = to_flot_time(Period.parse_dt_str(dt)) value = points[dt] if dt in points else 0 nonsparse[dimensions][metric].append([flot_time, float(value)]) return nonsparse
def plotpoints(cls, pk, dimensions=None, metrics=None, depth=0, period=None, flot_time=False, points_type=dict): metrics = metrics or ['hits'] if isinstance(metrics, basestring): metrics = [metrics] period = Period.get(period) dts = period.datetimes_strs() nonsparse = defaultdict(defaultdict) # Hardwire time-based metrics for lulz time_metrics = {'second': 1, 'minute': 60, 'hour': 3600, 'day': 3600*24, 'week': 3600*24*7} #for t_m, factor in time_metrics.items(): # if t_m in metrics: # metrics.remove(t_m) # for dimension in dimensions: # nonsparse[dimension][t_m] = list() # for dt in dts: # if flot_time: # dt = to_flot_time(Period.parse_dt_str(dt)) # nonsparse[dimension][t_m].append([dt, period.interval / factor]) # nonsparse[dimension][t_m] = points_type(nonsparse[dimension][t_m]) # Pull the plotpoints that exist from Redis sparse = cls.whale_driver().retrieve(pk, dimensions, metrics, period=period) for dimensions, metrics in sparse.items(): for metric, points in metrics.items(): #if metric in time_metrics: continue nonsparse[dimensions][metric] = [] for dt in dts: if flot_time: dt = to_flot_time(Period.parse_dt_str(dt)) value = points[dt] if dt in points else 0 nonsparse[dimensions][metric].append([dt, float(value)]) nonsparse[dimensions][metric] = points_type(nonsparse[dimensions][metric]) if depth > 0: for sub in cls.get_subdimensions(pk, dimensions): nonsparse = dict(nonsparse.items() + cls.plotpoints(pk, sub, metrics, depth=depth - 1, period=period, flot_time=flot_time, points_type=points_type).items()) return nonsparse
def _retrieve(redis, pk, dimensions, metrics, period=None, dt=None): nested = defaultdict(dict) interval = Period.get(period).interval for dimension in iterate_dimensions(dimensions)+['_']: for metric in metrics: if ':' in metric: metric_name = metric.split(':')[0] else: metric_name = metric hash_key = keyify(pk, dimension, interval, metric_name) value_dict = redis.hgetall(hash_key) nested[maybe_dumps(dimension)][maybe_dumps(metric)] = dict([ (k, float(v)) for k, v in value_dict.items()]) return dict(nested)
def zranked(cls, pk, parent_dimension='_', metric='hits', period=None, at=None, start=0, size=10, sort_dir=None, tzoffset=None): period, ats, tzoffset = Period.get_days(period, at) dt = ats or [Period.convert(cls.now(), tzoffset)] return map( try_loads, _ranked(cls.whale_driver(), pk, parent_dimension, metric, period, dt, start, size, sort_dir=sort_dir))
def table_graph(): from periods import Period params = { 'tzoffset': g('tzoffset', 0.0), 'period': g('period', str(Period.get(None))), } debug = g('debug', False) table = g('table', '') height = g('height', '300px') delay = g('delay', 5000) hwurl = req.GET.get('hwurl', '/' or req.url.split('table_graph.js')[0]) include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/d3.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/nvd3.js'></script>\");"%hwurl return_string = ''' appended=false;\n function jqinit() {{\n if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n if(!appended) {{\n appended = true;\n {include_string}\n }}\n setTimeout(jqinit, 250);\n }} else {{\n $(function() {{\n init_graphs =function() {{ $.hailwhale('{hwurl}').graph_tables('{table}', {options});\n }} setTimeout(init_graphs, {delay}); if(ui_loaded_funcs) ui_loaded_funcs.init_graphs = init_graphs; }});\n }} }} jqinit();\n '''.format(include_string=include_string, table=table, delay=delay, hwurl=hwurl, options=util.maybe_dumps(params)) return return_string
def reasons_for(cls, pk, formula="value/hits", known_data=None, period=None, recursive=True): metric, denomenator = parse_formula(formula) period = Period.get(period) pk_base, decision, option = pk base = "_" best = worst = None ranks = cls.cached_rank(pk, formula=formula, dimension=base, period=period, recursive=recursive, points=False) overall = cls.cached_rank( [pk_base, decision], formula=formula, dimension=base, period=period, recursive=recursive, points=False ) parent_score = overall[base]["score"] parent_count = overall[base]["count"] ranks[base]["effect"] = ranks[base]["count"] * ranks[base]["difference"] def delta(info): diff = info["score"] - parent_score info["value_diff"] = info["value"] - overall[base]["value"] info["difference"] += diff if math.fabs(diff) > 0 and info["count"] > 0: info["effect"] += diff * info["count"] info["significance"] = ((0.5 * info["effect"]) ** 2) / parent_count else: info["effect"] = 0 info["significance"] = 0 return info known_dimensions = iterate_dimensions(known_data) for dim, info in ranks.items(): ranks[dim] = info = delta(info) if try_loads(dim) in known_dimensions and info["important"]: best_score = best and ranks[best]["score"] worst_score = worst and ranks[worst]["score"] if info["score"] > best_score: best = dim if info["score"] < worst_score: worst = dim i = { "good": best and ranks[best] or {}, "bad": worst and ranks[worst] or {}, #'ranks': ranks, "base": ranks[base], "parent": overall[base], } i["high"] = i["good"].get("difference", 0) i["high_sig"] = i["good"].get("significance", 0) > 4 i["low"] = i["bad"].get("difference", 0) i["low_sig"] = i["bad"].get("significance", 0) > 4 return i
def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None, recursive=True, points=False): top, bottom = numerator, denominator p_obj, ats, tzoffset = Period.get_days(period) p_s = str(p_obj) d_k = keyify(dimension) top_points = cls.totals(pk, dimension, top, periods=[p_s]) top_total = p_s in top_points and top_points[p_s][d_k][top] or 0 bottom_points = cls.totals(pk, dimension, bottom, periods=[p_s]) bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][bottom] or 0 ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum({k: v for k, v in top_pps.items() if k in ats}.values()) sub_bottom_sum = sum({k: v for k, v in bottom_pps.items() if k in ats}.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = ratio_total and (ratio - ratio_total) / ratio_total or 0 important = sub_bottom_sum > 5 and math.fabs(difference) > .1 data = { 'points': pps, 'score': ratio, 'difference': difference, 'effect': difference * sub_bottom_sum * ratio_total, 'value': sub_top_sum, 'count': sub_bottom_sum, 'important': important, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) return ranked
def _retrieve(redis, pk, dimensions, metrics, period=None, dt=None): nested = defaultdict(dict) interval = Period.get(period).interval for dimension in iterate_dimensions(dimensions) + ['_']: for metric in metrics: if ':' in metric: metric_name = metric.split(':')[0] else: metric_name = metric hash_key = keyify(pk, dimension, interval, metric_name) value_dict = redis.hgetall(hash_key) nested[maybe_dumps(dimension)][maybe_dumps(metric)] = dict([ (k, float(v)) for k, v in value_dict.items() ]) return dict(nested)
def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None, recursive=True, prune_parents=True, points=False): p_obj, ats, tzoffset = Period.get_days(period) p_s = str(period) d_k = keyify(dimension) # sum of all values in metric total = cls.totals(pk, dimension, metric, periods=[p_s])[p_s][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) data = { 'points': pps, 'score': sub_total, 'important': sub_total > 10 and (sub_total > (total / 10)), 'effect': total - sub_total, 'difference': total - sub_total, 'value': sub_total, 'count': sub_total, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) # Prune parents if recursive and prune_parents: for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum( map(lambda s: ranked[s]['score'], children)) if info['important'] and (info['score'] - children_total) < (total / 10): info['important'] = False return ranked
def table_graph(): from periods import Period params = { 'tzoffset': g('tzoffset', 0.0), 'period': g('period', str(Period.get(None))), } debug = g('debug', False) table = g('table', '') height = g('height', '300px') delay = g('delay', 5000) hwurl = req.GET.get('hwurl', '/' or req.url.split('table_graph.js')[0]) include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/d3.js'></script>\");"%hwurl include_string += \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/nvd3.js'></script>\");"%hwurl return_string = ''' appended=false;\n function jqinit() {{\n if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n if(!appended) {{\n appended = true;\n {include_string}\n }}\n setTimeout(jqinit, 250);\n }} else {{\n $(function() {{\n init_graphs =function() {{ $.hailwhale('{hwurl}').graph_tables('{table}', {options});\n }} setTimeout(init_graphs, {delay}); if(ui_loaded_funcs) ui_loaded_funcs.init_graphs = init_graphs; }});\n }} }} jqinit();\n '''.format( include_string=include_string, table=table, delay=delay, hwurl=hwurl, options=util.maybe_dumps(params)) return return_string
def render_hw_plotpoint_table(cls, pk, metric, dimension='_', period=None, at=None, tzoffset=None, format=None, hidden=False, graph_color=''): period, ats, tzoffset = Period.get_days(period, tzoffset=tzoffset) top, bot = parse_formula(metric) pps = cls.plotpoints(pk, dimension, metric, period=period, tzoffset=tzoffset) ppsm = pps[dimension][metric] if not format: if bot: format = 'pct' else: format = 'grouped' def fmt(v): import locale if v == 'None': v = None f = format if f == 'int': f = lambda s: int(float(s or 0)) elif f == 'float': v = v or 0.0 f = float elif f == 'grouped': v = v or 0 f = lambda s: locale.format('%d', int(float(s)), True) elif f in ['pct', 'percent', '%', 'ratio']: v = min(101, v and float(v)*100 or 0) f = lambda s: '%.2f%%'%s elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']: v = v and float(v) or 0.0 if f == 'cents': v = v/100.0 f = locale.currency if not f: return v return callable(f) and f(v) or v hidden = hidden and 'style="display: none"' or '' rep = lambda s: s.format(pk=pk, metric=metric, dimension=dimension, hidden=hidden, color=graph_color) table = rep('<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \ data-hw-dimension="{dimension}" data-metric="{metric}" \ data-hw-color="{color}">')+'\n'.join([ '<tr><td>%s</td><td>%s</td></tr>'%(at.replace(' 00:00:00', ''), fmt(count) ) for at, count in ppsm.items()])+'</table>' return table
def reasons_for(cls, pk, formula='value/hits', known_data=None, period=None, recursive=True): metric, denomenator = parse_formula(formula) period = Period.get(period) pk_base, decision, option = pk base = '_' best = worst = None ranks = cls.cached_rank(pk, formula=formula, dimension=base, period=period, recursive=recursive, points=False) overall = cls.cached_rank([pk_base, decision], formula=formula, dimension=base, period=period, recursive=recursive, points=False) parent_score = overall[base]['score'] parent_count = overall[base]['count'] ranks[base]['effect'] = ranks[base]['count'] * ranks[base]['difference'] def delta(info): diff = info['score'] - parent_score info['value_diff'] = info['value'] - overall[base]['value'] info['difference'] += diff if math.fabs(diff) > 0 and info['count'] > 0: info['effect'] += diff * info['count'] info['significance'] = ((.5 * info['effect']) ** 2) / parent_count else: info['effect'] = 0 info['significance'] = 0 return info known_dimensions = iterate_dimensions(known_data) for dim, info in ranks.items(): ranks[dim] = info = delta(info) if try_loads(dim) in known_dimensions and info['important']: best_score = best and ranks[best]['score'] worst_score = worst and ranks[worst]['score'] if info['score'] > best_score: best = dim if info['score'] < worst_score: worst = dim i = {'good': best and ranks[best] or {}, 'bad': worst and ranks[worst] or {}, #'ranks': ranks, 'base': ranks[base], 'parent': overall[base]} i['high'] = i['good'].get('difference', 0) i['high_sig'] = i['good'].get('significance', 0) > 4 i['low'] = i['bad'].get('difference', 0) i['low_sig'] = i['bad'].get('significance', 0) > 4 return i
def rank_subdimensions_ratio( cls, pk, numerator, denominator="hits", dimension="_", period=None, recursive=True, points=False ): top, bottom = numerator, denominator period = period or Period.default_size() d_k = keyify(dimension) top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top] bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom] ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom, "%s/%s" % (top, bottom)], period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum(top_pps.values()) sub_bottom_sum = sum(bottom_pps.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = ratio_total and (ratio - ratio_total) / ratio_total or 0 important = sub_bottom_sum > 5 and math.fabs(difference) > 0.1 data = { "points": pps, "score": ratio, "difference": difference, "effect": difference * sub_bottom_sum * ratio_total, "value": sub_top_sum, "count": sub_bottom_sum, "important": important, "dimension": sub, } if not points: del data["points"] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) return ranked
def _ranked(redis, pk, parent_dimension, metric, period, ats, start=0, size=10, sort_dir=None): top, bot = parse_formula(metric) rank_keyify = lambda ats, met: keyify('rank', pk, parent_dimension, Period.get(period).interval, ats, met ) final_rank_key = rank_keyify(ats, metric) def squash_ats(met): if len(ats) > 1: map(lambda at: redis.zremrangebyscore(rank_keyify(at, met), 0, 0), ats) redis.zunionstore(rank_keyify(ats, met), map(lambda at: rank_keyify(at, met), ats)) squash_ats(top) if bot: squash_ats(bot) top_key, bot_key = rank_keyify(ats, top), rank_keyify(ats, bot) redis.execute_command( "eval", """ for key_i, key_n in ipairs(redis.call("zrange", KEYS[2], 0, -1)) do local top_s = tonumber(redis.call("zscore", KEYS[1], key_n)) local bot_s = tonumber(redis.call("zscore", KEYS[2], key_n)) if top_s and bot_s and bot_s > 0 then redis.call("zadd", KEYS[3], top_s/bot_s, key_n) end end """, 3, top_key, bot_key, final_rank_key) redis.zremrangebyscore(final_rank_key, 0, 0) return redis.zrange(final_rank_key, start, start + size, desc=not sort_dir or sort_dir.upper() in ['-', 'DESC', 'HIGH'])
def totals(cls, pk, dimensions=None, metrics=None, periods=None): if not periods: periods = DEFAULT_PERIODS if not isinstance(periods, list): periods = [periods] metrics = metrics or ['hits'] if not isinstance(metrics, list): metrics = [metrics] d = {} for p in periods: p_data = cls.plotpoints(pk, dimensions, metrics, period=str(p)) p_totals = dict() for dim, mets in p_data.items(): p_totals[dim] = dict() for met, vals in mets.items(): p_totals[dim][met] = sum([ v for k, v in vals.items() if Period.get(p).flatten(k)]) d[str(p)] = p_totals d['alltime'] = cls.whale_driver().retrieve( pk, dimensions, metrics, period='all') return d
def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None): top, bottom = numerator, denominator period = period or Period.default_size() d_k = keyify(dimension) top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top] bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom] ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom], period=period)[sub] ratio_points = cls.ratio_plotpoints(pk, top, bottom, sub, period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum(top_pps.values()) sub_bottom_sum = sum(bottom_pps.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = (ratio - ratio_total) / ratio_total important = sub_bottom_sum > 5 and (difference > .1 or -difference > .1) return { 'points': pps, 'ratio_points': ratio_points, 'difference': difference, 'effect': difference * sub_bottom_sum, 'important': important } for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)): ranked[sub] = info(sub) del(ranked[dimension]) return ranked
def rank_subdimensions_scalar( cls, pk, dimension="_", metric="hits", period=None, recursive=True, prune_parents=True, points=False ): period = period or Period.default_size() d_k = keyify(dimension) total = cls.totals(pk, dimension, metric, periods=[period])[period][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) data = { "points": pps, "score": sub_total, "important": sub_total > 10 and (sub_total > (total / 10)) or False, "effect": total - sub_total, "difference": total - sub_total, "value": sub_total, "count": sub_total, "dimension": sub, } if not points: del data["points"] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) # Prune parents if recursive and prune_parents: for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]["score"], children)) if info["important"] and (info["score"] - children_total) < (total / 10): info["important"] = False return ranked
def reasons_for(cls, pk, formula='value/hits', known_data=None, period=None, recursive=True): metric, denomenator = parse_formula(formula) period = Period.get(period) pk_base, decision, option = pk base = '_' best = worst = None ranks = cls.cached_rank(pk, formula=formula, dimension=base, period=period, recursive=recursive, points=False) overall = cls.cached_rank([pk_base, decision], formula=formula, dimension=base, period=period, recursive=recursive, points=False) parent_score = overall[base]['score'] parent_count = overall[base]['count'] ranks[base][ 'effect'] = ranks[base]['count'] * ranks[base]['difference'] def delta(info): diff = info['score'] - parent_score info['value_diff'] = info['value'] - overall[base]['value'] info['difference'] += diff if math.fabs(diff) > 0 and info['count'] > 0: info['effect'] += diff * info['count'] info['significance'] = ( (.5 * info['effect'])**2) / parent_count else: info['effect'] = 0 info['significance'] = 0 return info known_dimensions = iterate_dimensions(known_data) for dim, info in ranks.items(): ranks[dim] = info = delta(info) if try_loads(dim) in known_dimensions and info['important']: best_score = best and ranks[best]['score'] worst_score = worst and ranks[worst]['score'] if info['score'] > best_score: best = dim if info['score'] < worst_score: worst = dim i = { 'good': best and ranks[best] or {}, 'bad': worst and ranks[worst] or {}, #'ranks': ranks, 'base': ranks[base], 'parent': overall[base] } i['high'] = i['good'].get('difference', 0) i['high_sig'] = i['good'].get('significance', 0) > 4 i['low'] = i['bad'].get('difference', 0) i['low_sig'] = i['bad'].get('significance', 0) > 4 return i
def scalar_plotpoints(cls, pk, dimensions=None, metrics=None, at=None, depth=0, period=None, flot_time=False, points_type=OrderedDict): metrics = metrics or ['hits'] at = at or times.now() if isinstance(metrics, basestring): metrics = [metrics] p_obj, ats, tzoffset = Period.get_days(period, at) p_s = str(p_obj) dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at))) sparse = _retrieve(cls.whale_driver(), pk, dimensions, metrics, period=p_obj) nonsparse = defaultdict(dict) if flot_time: points_type = list for dim, mets in sparse.items(): for met, points in mets.items(): nonsparse[dim][met] = [] use_method = False met_name = met if ':' in met: met_name, use_method = met.split(':') const_value = False if met_name in TIME_MATRIX: const_value = float(p_obj.getUnits()[0] / TIME_MATRIX[met_name]) # Try to parse static metrics too elif met_name == '_count': const_value = len(dts) try: const_value = float(met_name) except: pass last_value = total = 0 for dt in dts: dt_obj = Period.parse_dt_str(dt) if met_name == '_days_in_month': from calendar import monthrange const_value = monthrange(dt_obj.year, dt_obj.month)[1] if flot_time: dt_t = to_flot_time(dt_obj) else: dt_t = dt if const_value: value = const_value else: value = points[dt] if dt in points else 0 if use_method == 'count' or not use_method: value = value elif use_method in ['+', 'sum', 'add', 'cumulative']: total += value value = total elif use_method in ['_', 'set', 'last', 'level']: if not last_value: last_value = value if not value: value = last_value last_value = value nonsparse[dim][met].append([dt_t, float(value)]) nonsparse[dim][met] = points_type(nonsparse[dim][met]) if depth > 0: for sub in cls.get_subdimensions(pk, dimensions): nonsparse = dict( nonsparse.items() + cls.plotpoints(pk, sub, metrics, at=at, depth=depth - 1, period=period, flot_time=flot_time, points_type=points_type).items()) return nonsparse
def yesterday(cls, pk, metric, dimension='_'): return cls.total(pk, metric, dimension, Period.all_sizes()[1], at=cls.now()-timedelta(days=1))
def today(cls, pk, metric, dimension='_'): return cls.total(pk, metric, dimension, Period.all_sizes()[1], at=cls.now())
def scalar_plotpoints(cls, pk, dimensions=None, metrics=None, at=None, depth=0, period=None, flot_time=False, points_type=OrderedDict): metrics = metrics or ['hits'] at = at or times.now() if isinstance(metrics, basestring): metrics = [metrics] p_obj, ats, tzoffset = Period.get_days(period,at) p_s = str(p_obj) dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at))) sparse = _retrieve(cls.whale_driver(), pk, dimensions, metrics, period=p_obj) nonsparse = defaultdict(dict) if flot_time: points_type = list for dim, mets in sparse.items(): for met, points in mets.items(): nonsparse[dim][met] = [] use_method = False met_name = met if ':' in met: met_name, use_method = met.split(':') const_value = False if met_name in TIME_MATRIX: const_value = float(p_obj.getUnits()[0] / TIME_MATRIX[met_name]) # Try to parse static metrics too elif met_name == '_count': const_value = len(dts) try: const_value = float(met_name) except: pass last_value = total = 0 for dt in dts: dt_obj = Period.parse_dt_str(dt) if met_name == '_days_in_month': from calendar import monthrange const_value = monthrange(dt_obj.year, dt_obj.month)[1] if flot_time: dt_t = to_flot_time(dt_obj) else: dt_t = dt if const_value: value = const_value else: value = points[dt] if dt in points else 0 if use_method == 'count' or not use_method: value = value elif use_method in ['+', 'sum', 'add', 'cumulative']: total += value value = total elif use_method in ['_', 'set', 'last', 'level']: if not last_value: last_value = value if not value: value = last_value last_value = value nonsparse[dim][met].append([dt_t, float(value)]) nonsparse[dim][met] = points_type(nonsparse[dim][met]) if depth > 0: for sub in cls.get_subdimensions(pk, dimensions): nonsparse = dict(nonsparse.items() + cls.plotpoints(pk, sub, metrics, at=at, depth=depth - 1, period=period, flot_time=flot_time, points_type=points_type).items()) return nonsparse
""" query_str = """ {"{{dataset_id_field}}": "{{dataset.dataset_id}}", "{{form_meta_timeend}}": { "$gte": "{{period.start}}", "$lte": "{{period.end}}" } } """ aggregate_str = """ {"$group": {"_id": 0, "total": {"$sum": "$value.{{num_using_fp}}"}}} """ dataset_id = "5791793ac29b4d77b20cf1a04d8e7161" dataset = Dataset.find_one(dataset_id) period = Period.month_period(2013, 3) if dataset: fields = Observation.encoding(dataset) fields["dataset"] = dataset fields['dataset_id_field'] = fields[DATASET_ID] fields['period'] = Period.month_period(2013, 3) mapper = Code(Template(mapper_str).render(fields)) reducer = Code(Template(reducer_str).render(fields)) query = json.loads(Template(query_str).render(fields)) query['%(form_meta_timeend)s' % fields]['$gte'] = period.start query['%(form_meta_timeend)s' % fields]['$lte'] = period.end aggregate = json.loads(Template(aggregate_str).render(fields)) results = db.observations.map_reduce(mapper, reducer, 'myresults_fp', query=query) if results.count(): value = results.aggregate(aggregate)
def add_period(self, start, end): periods = self.periods new = Period(start, end) if new not in periods: periods.append(new)
def yesterday(cls, pk, metric, dimension='_'): return cls.total(pk, metric, dimension, Period.all_sizes()[1], at=cls.now() - timedelta(days=1))
def graph(): from periods import Period params = { 'pk': g('pk', '_', False), 'dimension': g('dimension', '_', False), 'metric': g('metric', 'hits', False), 'depth': g('depth', 0), 'tzoffset': g('tzoffset', 0.0), 'period': g('period', str(Period.get(None))), 'area': g('area', ''), } pk = params['pk'] dimension = params['dimension'] metric = params['metric'] period = Period.get(params['period']) debug = g('debug', False) parent_div = g('parent_div', 'hailwhale_graphs') table = g('table', False) height = g('height', '300px') params['title'] = g('title', '') if not params['title']: pkname = g('pk', '') dimname = util.try_loads(g('dimension', 'Overall')) dimname = isinstance(dimname, list) and dimname[-1] or dimname params['title'] = '%s [%s]' % (util.maybe_dumps(pkname), util.maybe_dumps(dimname)) if isinstance(table, basestring): table = table.lower() == 'true' hwurl = req.GET.get('hwurl', req.url.split('graph.js')[0]) params['autoupdate'] = g('live', True) params['interval'] = g('interval', 6000) graph_id = hashlib.md5(str(params)).hexdigest() include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl if table: try: columns = int(g('table', 6, int)) except: columns = 6 pps = Whale.plotpoints(pk, dimension, metric, period=period, depth=params['depth']) dates = [p for p in Period.get(period).datetimes_strs() ][(-1 * columns - 1):] table_str = ''' $('#{id} .table').html('<table style="width: 100%"> <tr> <th></th> <th></th> {columns} </tr> '''.strip().format(id=graph_id, columns=' '.join([ '<th>%s</th>' % date.replace('00:00:00 ', '') for date in dates ])) dimensions = pps.keys() if '_' in dimensions: dimensions.remove('_') dimensions = ['_'] + dimensions for dimension_counter, dimension in enumerate(dimensions): checked = 'off' if dimension_counter < 10: checked = 'on' if dimension == '_': if params['depth']: continue dimension_name = '<b>Overall</b>' else: dimension_name = dimension.capitalize() table_str += ''' <tr> <td><input id="" style="display: none" type="checkbox" value="{checked}" name="checkbox-{pk}-{dimension}"></td> <td>{dimension_name}</td> {columns} </tr> '''.format(pk=pk, dimension=dimension, checked=checked, dimension_name=dimension_name, columns=' '.join([ "<td>%s</td>" % int(pps[dimension][metric][date]) for date in dates ])).strip() table_str += '''</table>');''' else: table_str = '' include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.min.js'></script>\");"%hwurl return_string = ''' appended=false;\n document.write('<div id="{id}"><div class="graph" style="height: {height}"></div><div class="table"></div></div>');\n function jqinit() {{\n if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n if(!appended) {{\n appended = true;\n {include_string}\n }}\n setTimeout(jqinit, 250);\n }} else {{\n $(function() {{\n $.hailwhale('{hwurl}').add_graph('{id} .graph', {options});\n {table_str} }});\n }} }} jqinit();\n '''.format(parent_div=parent_div, include_string=include_string, hwurl=hwurl, table_str=table_str, height=height, id=graph_id, options=util.maybe_dumps(params)) return return_string
(indicator_def['type'], indicator_def['description'], value, numerator, denominator)) print (indicator_def['type'], indicator_def['name'], value, numerator, denominator) filename = "%(name)s_mvp_indicator_%(start)s_to_%(end)s.csv" % { 'name': name, 'start': period.start.strftime('%Y-%m-%d'), 'end': period.end.strftime('%Y-%m-%d')} filename = os.path.join(REPORTS_DIR, filename) with open(filename, 'wb') as f: csv_writer = csv.writer( f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerows(RESULTS) print "\nSuccessfully generated %s." % filename if __name__ == '__main__': arguments = sys.argv[1:] if len(arguments) < 3: print (u"Expected site_name YEAR MONTH\n" u"Eg.\n\t python report.py ruhiira 2013 03") else: name = arguments[0] year = int(arguments[1]) month = int(arguments[2]) period = Period.month_period(year, month) indicator_name = None if arguments.__len__() > 3: indicator_name = arguments[3] _generate_indicator_export(name, period, indicator_name)
def graph(): from periods import Period params = {'pk': g('pk', '_', False), 'dimension': g('dimension', '_', False), 'metric': g('metric', 'hits', False), 'depth': g('depth', 0), 'tzoffset': g('tzoffset', 0.0), 'period': g('period', str(Period.get(None))), 'area': g('area', ''), } pk = params['pk'] dimension = params['dimension'] metric = params['metric'] period = Period.get(params['period']) debug = g('debug', False) parent_div = g('parent_div', 'hailwhale_graphs') table = g('table', False) height = g('height', '300px') params['title'] = g('title', '') if not params['title']: pkname = g('pk', '') dimname = util.try_loads(g('dimension', 'Overall')) dimname = isinstance(dimname, list) and dimname[-1] or dimname params['title'] = '%s [%s]' % (util.maybe_dumps(pkname), util.maybe_dumps(dimname)) if isinstance(table, basestring): table = table.lower() == 'true' hwurl = req.GET.get('hwurl', req.url.split('graph.js')[0]) params['autoupdate'] = g('live', True) params['interval'] = g('interval', 6000) graph_id = hashlib.md5(str(params)).hexdigest() include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl if table: try: columns = int(g('table', 6, int)) except: columns = 6 pps = Whale.plotpoints(pk, dimension, metric, period=period, depth=params['depth']) dates = [p for p in Period.get(period).datetimes_strs()][(-1*columns - 1):] table_str = ''' $('#{id} .table').html('<table style="width: 100%"> <tr> <th></th> <th></th> {columns} </tr> '''.strip().format(id=graph_id,columns=' '.join([ '<th>%s</th>'%date.replace('00:00:00 ', '') for date in dates])) dimensions = pps.keys() if '_' in dimensions: dimensions.remove('_') dimensions = ['_'] + dimensions for dimension_counter, dimension in enumerate(dimensions): checked = 'off' if dimension_counter < 10: checked = 'on' if dimension == '_': if params['depth']: continue dimension_name = '<b>Overall</b>' else: dimension_name = dimension.capitalize() table_str += ''' <tr> <td><input id="" style="display: none" type="checkbox" value="{checked}" name="checkbox-{pk}-{dimension}"></td> <td>{dimension_name}</td> {columns} </tr> '''.format(pk=pk, dimension=dimension, checked=checked, dimension_name=dimension_name, columns=' '.join([ "<td>%s</td>"%int(pps[dimension][metric][date]) for date in dates])).strip() table_str += '''</table>');''' else: table_str = '' include_string = \ "document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.min.js'></script>\");"%hwurl return_string = ''' appended=false;\n document.write('<div id="{id}"><div class="graph" style="height: {height}"></div><div class="table"></div></div>');\n function jqinit() {{\n if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n if(!appended) {{\n appended = true;\n {include_string}\n }}\n setTimeout(jqinit, 250);\n }} else {{\n $(function() {{\n $.hailwhale('{hwurl}').add_graph('{id} .graph', {options});\n {table_str} }});\n }} }} jqinit();\n '''.format(parent_div=parent_div, include_string=include_string, hwurl=hwurl, table_str=table_str, height=height, id=graph_id, options=util.maybe_dumps(params)) return return_string
""" query_str = """ {"{{dataset_id_field}}": "{{dataset.dataset_id}}", "{{form_meta_timeend}}": { "$gte": "{{period.start}}", "$lte": "{{period.end}}" } } """ aggregate_str = """ {"$group": {"_id": 0, "total": {"$sum": "$value.{{num_using_fp}}"}}} """ dataset_id = "5791793ac29b4d77b20cf1a04d8e7161" dataset = Dataset.find_one(dataset_id) period = Period.month_period(2013, 3) if dataset: fields = Observation.encoding(dataset) fields["dataset"] = dataset fields['dataset_id_field'] = fields[DATASET_ID] fields['period'] = Period.month_period(2013, 3) mapper = Code(Template(mapper_str).render(fields)) reducer = Code(Template(reducer_str).render(fields)) query = json.loads(Template(query_str).render(fields)) query['%(form_meta_timeend)s' % fields]['$gte'] = period.start query['%(form_meta_timeend)s' % fields]['$lte'] = period.end aggregate = json.loads(Template(aggregate_str).render(fields)) results = db.observations.map_reduce(mapper, reducer, 'myresults_fp',