def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None): period = period or Period.default_size() d_k = keyify(dimension) total = cls.totals(pk, dimension, metric, periods=[period])[str(period)][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) return { 'points': pps, 'total': sub_total, 'important': sub_total > 10 and (sub_total > (total / 10)) or False } for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)): ranked[sub] = info(sub) del(ranked[dimension]) # Prune parents for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]['total'], children)) if info['important'] and (info['total'] - children_total) < (total / 10): info['important'] = False return ranked
def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None, recursive=True, prune_parents=True, points=False): period = period or Period.default_size() d_k = keyify(dimension) total = cls.cached_totals(pk, dimension, metric, periods=[period])[period][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) data = { 'points': pps, 'score': sub_total, 'important': sub_total > 10 and (sub_total > (total / 10)) or False, 'effect': total - sub_total, 'difference': total - sub_total, 'value': sub_total, 'count': sub_total, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) # Prune parents if recursive and prune_parents: for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]['score'], children)) if info['important'] and (info['score'] - children_total) < (total / 10): info['important'] = False return ranked
def plotpoints(cls, pk, dimensions=None, metrics=None, period=None, overall=True): metrics = metrics or ['hits',] period = period or Period.default_size() sparse = cls.whale_driver().retrieve(pk,dimensions,metrics, period=period, overall=overall) nonsparse = defaultdict(dict) for dimensions, metrics in sparse.items(): for metric, points in metrics.items(): dts = Period(*period.split('x')).datetimes_strs() nonsparse[dimensions][metric] = [] for dt in dts: flot_time = to_flot_time(Period.parse_dt_str(dt)) value = points[dt] if dt in points else 0 nonsparse[dimensions][metric].append([flot_time, float(value)]) return nonsparse
def rank_subdimensions_ratio( cls, pk, numerator, denominator="hits", dimension="_", period=None, recursive=True, points=False ): top, bottom = numerator, denominator period = period or Period.default_size() d_k = keyify(dimension) top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top] bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom] ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom, "%s/%s" % (top, bottom)], period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum(top_pps.values()) sub_bottom_sum = sum(bottom_pps.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = ratio_total and (ratio - ratio_total) / ratio_total or 0 important = sub_bottom_sum > 5 and math.fabs(difference) > 0.1 data = { "points": pps, "score": ratio, "difference": difference, "effect": difference * sub_bottom_sum * ratio_total, "value": sub_top_sum, "count": sub_bottom_sum, "important": important, "dimension": sub, } if not points: del data["points"] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) return ranked
def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None, recursive=True, points=False): top, bottom = numerator, denominator period = period or Period.default_size() d_k = keyify(dimension) top_total = cls.cached_totals(pk, dimension, top, periods=[period])[str(period)][d_k][top] bottom_total = cls.cached_totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom] ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum(top_pps.values()) sub_bottom_sum = sum(bottom_pps.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = ratio_total and (ratio - ratio_total) / ratio_total or 0 important = sub_bottom_sum > 5 and math.fabs(difference) > .1 data = { 'points': pps, 'score': ratio, 'difference': difference, 'effect': difference * sub_bottom_sum * ratio_total, 'value': sub_top_sum, 'count': sub_bottom_sum, 'important': important, 'dimension': sub } if not points: del data['points'] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) return ranked
def plotpoints(self, categories=None, dimensions=None, metrics=None, period=None, depth=0): categories = categories or '' dimensions = dimensions or json.dumps(list(list())) # Convert categories to a list, if it's not if type(categories) in [str,unicode]: categories = [categories,] metrics = metrics or ['hits',] period = period or Period.default_size() sparse = self.driver().retrieve(categories,dimensions,metrics, period=period, depth=depth) nonsparse = defaultdict(dict) for dimensions, metrics in sparse.items(): for metric, points in metrics.items(): dts = Period(*period.split('x')).datetimes_strs() nonsparse[dimensions][metric] = [] for dt in dts: flot_time = to_flot_time(Period.parse_dt_str(dt)) value = points[dt] if dt in points else 0 nonsparse[dimensions][metric].append([flot_time, float(value)]) return nonsparse
def rank_subdimensions_scalar( cls, pk, dimension="_", metric="hits", period=None, recursive=True, prune_parents=True, points=False ): period = period or Period.default_size() d_k = keyify(dimension) total = cls.totals(pk, dimension, metric, periods=[period])[period][d_k][metric] ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric] sub_total = sum(pps.values()) data = { "points": pps, "score": sub_total, "important": sub_total > 10 and (sub_total > (total / 10)) or False, "effect": total - sub_total, "difference": total - sub_total, "value": sub_total, "count": sub_total, "dimension": sub, } if not points: del data["points"] return data _subs = recursive and cls.all_subdimensions or cls.get_subdimensions for sub in map(maybe_dumps, _subs(pk, dimension)): ranked[sub] = info(sub) # Prune parents if recursive and prune_parents: for sub, info in ranked.items(): children = map(maybe_dumps, cls.get_subdimensions(pk, sub)) children_total = sum(map(lambda s: ranked[s]["score"], children)) if info["important"] and (info["score"] - children_total) < (total / 10): info["important"] = False return ranked
def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None): top, bottom = numerator, denominator period = period or Period.default_size() d_k = keyify(dimension) top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top] bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom] ratio_total = bottom_total and float(top_total / bottom_total) or 0 ranked = dict() def info(sub): pps = cls.plotpoints(pk, sub, [top, bottom], period=period)[sub] ratio_points = cls.ratio_plotpoints(pk, top, bottom, sub, period=period)[sub] top_pps = pps[top] bottom_pps = pps[bottom] sub_top_sum = sum(top_pps.values()) sub_bottom_sum = sum(bottom_pps.values()) ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0 difference = (ratio - ratio_total) / ratio_total important = sub_bottom_sum > 5 and (difference > .1 or -difference > .1) return { 'points': pps, 'ratio_points': ratio_points, 'difference': difference, 'effect': difference * sub_bottom_sum, 'important': important } for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)): ranked[sub] = info(sub) del(ranked[dimension]) return ranked