Пример #1
0
 def scalar_plotpoints(
     cls, pk, dimensions=None, metrics=None, depth=0, period=None, flot_time=False, points_type=dict
 ):
     metrics = metrics or ["hits"]
     if isinstance(metrics, basestring):
         metrics = [metrics]
     period = Period.get(period)
     sparse = cls.whale_driver().retrieve(pk, dimensions, metrics, period=period)
     nonsparse = defaultdict(dict)
     if flot_time:
         points_type = list
     for dim, mets in sparse.items():
         for met, points in mets.items():
             dts = period.datetimes_strs()
             nonsparse[dim][met] = []
             for dt in dts:
                 if flot_time:
                     dt_t = to_flot_time(Period.parse_dt_str(dt))
                 else:
                     dt_t = dt
                 value = points[dt] if dt in points else 0
                 nonsparse[dim][met].append([dt_t, float(value)])
             nonsparse[dim][met] = points_type(nonsparse[dim][met])
     if depth > 0:
         for sub in cls.get_subdimensions(pk, dimensions):
             nonsparse = dict(
                 nonsparse.items()
                 + cls.plotpoints(
                     pk, sub, metrics, depth=depth - 1, period=period, flot_time=flot_time, points_type=points_type
                 ).items()
             )
     return nonsparse
Пример #2
0
 def zranked(cls, pk, parent_dimension='_', metric='hits', period=None,
         at=None, start=0, size=10, sort_dir=None, tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at)
     dt = ats or [Period.convert(cls.now(), tzoffset)]
     return map(try_loads,
             _ranked(cls.whale_driver(), pk, parent_dimension, metric,
                 period, dt, start, size, sort_dir=sort_dir))
Пример #3
0
def _store(redis, pk, dimension, metric, period, dt, count, method='set',
        rank=False):
    # Keep a list of graphs per pk
    key = keyify(pk, dimension, Period.get(period).interval, metric)
    # Store pk dimensions
    dimension_key = keyify('dimensions', pk)
    dimension_json = keyify(dimension)
    
    if not dimension_json in _added_dimensions[dimension_key]:
        redis.sadd(dimension_key, dimension_json)
        _added_dimensions[dimension_key].append(dimension_json)
    # Store dimensional subdimensions
    if dimension != '_':
        subdimension_key = keyify('subdimensions', pk, parent(dimension))
        if not dimension_json in _added_subdimensions[subdimension_key]:
            redis.sadd(subdimension_key, dimension_json)
            _added_subdimensions[subdimension_key].append(dimension_json)

    if method == 'set':
        new_val = float(count)
        redis.hset(key, dt, new_val)
    elif method == 'incr':
        new_val = redis.execute_command('HINCRBYFLOAT', key, dt, float(count))
    if rank and (isinstance(try_loads(pk), list) or dimension != '_'):
        if isinstance(pk, list) and dimension == '_':
            tgt_pk = parent(pk)
            tgt_dimension = dimension
        else:
            tgt_pk = pk
            tgt_dimension = parent(dimension)
        rank_key = keyify('rank', tgt_pk, tgt_dimension,
                Period.get(period).interval, dt, metric) 
        redis.zadd(rank_key, dimension_json, new_val)
    return new_val
Пример #4
0
def _ranked(redis, pk, parent_dimension, metric, period, ats, start=0, size=10,
        sort_dir=None):
    top, bot = parse_formula(metric)
    rank_keyify = lambda ats, met: keyify('rank', pk, parent_dimension,
            Period.get(period).interval, ats, met)
    final_rank_key = rank_keyify(ats, metric)
    def squash_ats(met):
        if len(ats) > 1:
            map(lambda at: redis.zremrangebyscore(rank_keyify(at, met), 0, 0), ats)
            redis.zunionstore(rank_keyify(ats, met),
                    map(lambda at: rank_keyify(at, met), ats))
    squash_ats(top)
    if bot:
        squash_ats(bot)
        top_key, bot_key = rank_keyify(ats, top), rank_keyify(ats, bot)
        redis.execute_command("eval", """
        for key_i, key_n in ipairs(redis.call("zrange", KEYS[2], 0, -1)) do
            local top_s = tonumber(redis.call("zscore", KEYS[1], key_n))
            local bot_s = tonumber(redis.call("zscore", KEYS[2], key_n))
            if top_s and bot_s and bot_s > 0 then
                redis.call("zadd", KEYS[3], top_s/bot_s, key_n)
            end
        end
        """, 3, top_key, bot_key, final_rank_key)
        redis.zremrangebyscore(final_rank_key, 0, 0)
    return redis.zrange(final_rank_key, start, start + size,
                desc=not sort_dir or sort_dir.upper() in ['-', 'DESC', 'HIGH'])
Пример #5
0
    def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.totals(pk, dimension, metric, periods=[period])[str(period)][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            return {
                'points': pps,
                'total': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)) or False
            }

        for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)):
            ranked[sub] = info(sub)
        del(ranked[dimension])

        # Prune parents
        for sub, info in ranked.items():
            children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
            children_total = sum(map(lambda s: ranked[s]['total'], children))
            if info['important'] and (info['total'] - children_total) < (total / 10):
                info['important'] = False
        return ranked
Пример #6
0
 def total(cls,
           pk,
           metric,
           dimension='_',
           period=None,
           at=None,
           index=None,
           tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at, tzoffset=None)
     top, bot = parse_formula(metric)
     if not ats and not index:
         index = -1
     if isinstance(index, int):
         pps = cls.plotpoints(pk,
                              dimension,
                              metric,
                              period=period,
                              points_type=list)
         return pps[dimension][metric][index][1]
     else:
         if not bot:
             pps = cls.plotpoints(pk, dimension, metric, period=period)
             ppsm = pps[dimension][metric]
             return sum([ppsm[dt] for dt in ats if dt in ppsm])
         else:
             top_pps = cls.plotpoints(pk, dimension, top, period=period)
             bot_pps = cls.plotpoints(pk, dimension, bot, period=period)
             top_ppsm = top_pps[dimension][top]
             bot_ppsm = bot_pps[dimension][bot]
             top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm])
             bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm])
             return bot_tot and top_tot / bot_tot or 0
Пример #7
0
    def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits',
            period=None, recursive=True, prune_parents=True, points=False):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.cached_totals(pk, dimension, metric, periods=[period])[period][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                'points': pps,
                'score': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)) or False,
                'effect': total - sub_total,
                'difference': total - sub_total,
                'value': sub_total,
                'count': sub_total,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data
        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(map(lambda s: ranked[s]['score'], children))
                if info['important'] and (info['score'] - children_total) < (total / 10):
                    info['important'] = False
        return ranked
Пример #8
0
    def __call__(self, *args, **kwargs):
        from whale import Whale
        from periods import Period

        if len(args) and args[0] == Whale or issubclass(args[0], Whale):
            args = args[1:]
        clear_cache = kwargs.pop("unmemoize", False)
        self.get_cache()
        if "period" in kwargs:
            p = Period.get(kwargs["period"])
            kwargs["period"] = str(p)
            ttl = int(p.interval) / 5
        else:
            ttl = 60

        key_name = self.keyify(args, kwargs)

        if clear_cache:
            self.cache.delete(key_name)

        try:
            return json.loads(self.cache[key_name])
        except KeyError:
            value = self.func(Whale, *args, **kwargs)
            self.cache[key_name] = json.dumps(value)
            self.cache.expire(key_name, ttl)
            return value
        except TypeError:
            # uncachable -- for instance, passing a list as an argument.
            # Better to not cache than to blow up entirely.
            return self.func(Whale, *args, **kwargs)
Пример #9
0
 def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None):
     if not periods:
         periods = DEFAULT_PERIODS
     if not isinstance(periods, list):
         periods = [periods]
     metrics = metrics or ['hits']
     if not isinstance(metrics, list):
         metrics = [metrics]
     ratios = []
     for metric in metrics:
         if '/' in metric:
             metrics.remove(metric)
             ratios.append(metric)
             metrics += metric.split('/')
     d = {}
     for p in periods:
         period, ats, tzoffset = Period.get_days(p,at)
         p_data = cls.plotpoints(pk, dimensions, metrics, period=p,at=at)
         p_totals = dict()
         for dim in p_data.keys():
             p_totals[dim] = dict()
             for met, vals in p_data[dim].items():
                 p_totals[dim][met] = sum([
                     v for k, v in vals.items()
                     if k in ats])
             for rat in ratios:
                 top, bot = parse_formula(rat)
                 topt, bott = p_totals[dim][top], p_totals[dim][bot]
                 p_totals[dim][rat] = bott and topt / bott or 0
         d[str(p)] = p_totals
     return d
Пример #10
0
 def total(cls, pk, metric, dimension='_', period=None, at=None, index=None,
         tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset)
     top, bot = parse_formula(metric)
     dimension = maybe_dumps(dimension)
     if not ats and not index:
         index = -1
     if isinstance(index, int):
         pps = cls.plotpoints(pk, dimension, metric, period=period, points_type=list)
         return pps[dimension][metric][index][1]
     else:
         if not bot:
             pps = cls.plotpoints(pk, dimension, metric, period=period,
                                  tzoffset=tzoffset)
             ppsm = pps[dimension][metric]
             return sum([ppsm[dt] for dt in ats if dt in ppsm])
         else:
             top_pps = cls.plotpoints(pk, dimension, top, period=period,
                                      tzoffset=tzoffset)
             bot_pps = cls.plotpoints(pk, dimension, bot, period=period,
                                      tzoffset=tzoffset)
             top_ppsm = top_pps[dimension][top]
             bot_ppsm = bot_pps[dimension][bot]
             top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm])
             bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm])
             return bot_tot and top_tot/bot_tot or 0
Пример #11
0
 def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None):
     if not periods:
         periods = DEFAULT_PERIODS
     if not isinstance(periods, list):
         periods = [periods]
     metrics = metrics or ['hits']
     if not isinstance(metrics, list):
         metrics = [metrics]
     ratios = []
     for metric in metrics:
         if '/' in metric:
             metrics.remove(metric)
             ratios.append(metric)
             metrics += metric.split('/')
     d = {}
     for p in periods:
         period, ats, tzoffset = Period.get_days(p, at)
         p_data = cls.plotpoints(pk, dimensions, metrics, period=p, at=at)
         p_totals = dict()
         for dim in p_data.keys():
             p_totals[dim] = dict()
             for met, vals in p_data[dim].items():
                 p_totals[dim][met] = sum(
                     [v for k, v in vals.items() if k in ats])
             for rat in ratios:
                 top, bot = parse_formula(rat)
                 topt, bott = p_totals[dim][top], p_totals[dim][bot]
                 p_totals[dim][rat] = bott and topt / bott or 0
         d[str(p)] = p_totals
     return d
Пример #12
0
    def rank_subdimensions_ratio(cls,
                                 pk,
                                 numerator,
                                 denominator='hits',
                                 dimension='_',
                                 period=None,
                                 recursive=True,
                                 points=False):
        top, bottom = numerator, denominator
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(p_obj)
        d_k = keyify(dimension)
        top_points = cls.totals(pk, dimension, top, periods=[p_s])
        top_total = p_s in top_points and top_points[p_s][d_k][top] or 0
        bottom_points = cls.totals(pk, dimension, bottom, periods=[p_s])
        bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][
            bottom] or 0
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(
                pk, sub, [top, bottom, '%s/%s' % (top, bottom)],
                period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum({k: v
                               for k, v in top_pps.items()
                               if k in ats}.values())
            sub_bottom_sum = sum(
                {k: v
                 for k, v in bottom_pps.items() if k in ats}.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio -
                                          ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > .1

            data = {
                'points': pps,
                'score': ratio,
                'difference': difference,
                'effect': difference * sub_bottom_sum * ratio_total,
                'value': sub_top_sum,
                'count': sub_bottom_sum,
                'important': important,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Пример #13
0
 def plotpoints(cls, pk, dimensions=None, metrics=None,
         period=None, overall=True):
     metrics = metrics or ['hits',]
     period = period or Period.default_size()
     sparse = cls.whale_driver().retrieve(pk,dimensions,metrics,
             period=period, overall=overall)
     nonsparse = defaultdict(dict)
     for dimensions, metrics in sparse.items():
         for metric, points in metrics.items():
             dts = Period(*period.split('x')).datetimes_strs()
             nonsparse[dimensions][metric] = []
             for dt in dts:
                 flot_time = to_flot_time(Period.parse_dt_str(dt))
                 value = points[dt] if dt in points else 0
                 nonsparse[dimensions][metric].append([flot_time,
                     float(value)])
     return nonsparse
Пример #14
0
 def retrieve(self, pk, dimensions, metrics, period=None, dt=None):
     nested = defaultdict(dict)
     period = str(Period.get(period))
     for dimension in map(maybe_dumps, iterate_dimensions(dimensions)):
         for metric in map(maybe_dumps, metrics):
             hash_key = keyify(pk, dimension, period, metric)
             value_dict = self.hgetall(hash_key)
             nested[dimension][metric] = dict([(k, float(v)) for k, v in value_dict.items()])
     return dict(nested)
Пример #15
0
    def render_divs(cls,
                    pk,
                    metric,
                    dimension='_',
                    period=None,
                    at=None,
                    tzoffset=None,
                    format=None,
                    hidden=False):
        period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset)
        top, bot = parse_formula(metric)
        pps = cls.plotpoints(pk, dimension, metric, period=period)
        ppsm = pps[dimension][metric]
        if not format:
            if bot:
                format = 'pct'
            else:
                format = 'grouped'

        def fmt(v):
            import locale
            if v == 'None':
                v = None
            f = format
            if f == 'int':
                f = lambda s: int(float(s or 0))
            elif f == 'float':
                v = v or 0.0
                f = float
            elif f == 'grouped':
                v = v or 0
                f = lambda s: locale.format('%d', int(float(s)), True)
            elif f in ['pct', 'percent', '%', 'ratio']:
                v = min(101, v and float(v) * 100 or 0)
                f = lambda s: '%.2f%%' % s
            elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']:
                v = v and float(v) or 0.0
                if f == 'cents':
                    v = v / 100.0
                f = locale.currency
            if not f:
                return v
            return callable(f) and f(v) or v

        hidden = hidden and 'style="display: none"' or ''
        rep = lambda s: s.format(
            pk=pk, metric=metric, dimension=dimension, hidden=hidden)
        table = rep(
            '<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \
                data-hw-dimension="{dimension}" data-metric="{metric}">'
        ) + '\n'.join([
            '<tr><td>%s</td><td>%s</td></tr>' %
            (at.replace(' 00:00:00', ''), fmt(count))
            for at, count in ppsm.items()
        ]) + '</table>'
        return table
Пример #16
0
 def update_count_to(cls, pk, dimensions='_', metrics=None, period=False,
         at=False, rank=False):
     period = Period.get(period)
     at = at or cls.now()
     dt = period.flatten_str(at)
     pipe = cls.whale_driver().pipeline(transaction=False)
     for (metric, i) in metrics.iteritems():
         _store(pipe, pk, dimensions, metric, period, dt, i,
                 rank=rank)
     pipe.execute()
Пример #17
0
def _store(redis,
           pk,
           dimension,
           metric,
           period,
           dt,
           count,
           method='set',
           rank=False):
    # Keep a list of graphs per pk
    key = keyify(pk, dimension, Period.get(period).interval, metric)
    # Store pk dimensions
    dimension_key = keyify('dimensions', pk)
    dimension_json = keyify(dimension)

    if not dimension_json in _added_dimensions[dimension_key]:
        redis.sadd(dimension_key, dimension_json)
        _added_dimensions[dimension_key].append(dimension_json)
    # Store dimensional subdimensions
    if dimension != '_':
        subdimension_key = keyify('subdimensions', pk, parent(dimension))
        if not dimension_json in _added_subdimensions[subdimension_key]:
            redis.sadd(subdimension_key, dimension_json)
            _added_subdimensions[subdimension_key].append(dimension_json)

    if method == 'set':
        new_val = float(count)
        redis.hset(key, dt, new_val)
    elif method == 'incr':
        new_val = redis.execute_command('HINCRBYFLOAT', key, dt, float(count))
    if rank and (isinstance(try_loads(pk), list) or dimension != '_'):
        if isinstance(pk, list) and dimension == '_':
            tgt_pk = parent(pk)
            tgt_dimension = dimension
        else:
            tgt_pk = pk
            tgt_dimension = parent(dimension)
        rank_key = keyify('rank', tgt_pk, tgt_dimension,
                          Period.get(period).interval, dt, metric)
        redis.zadd(rank_key, dimension_json, new_val)
    return new_val
Пример #18
0
 def plotpoints(self, categories=None, dimensions=None, metrics=None,
         period=None, depth=0):
     categories = categories or ''
     dimensions = dimensions or json.dumps(list(list()))
     # Convert categories to a list, if it's not
     if type(categories) in [str,unicode]: categories = [categories,]
     metrics = metrics or ['hits',]
     period = period or Period.default_size()
     sparse = self.driver().retrieve(categories,dimensions,metrics,
             period=period, depth=depth)
     nonsparse = defaultdict(dict)
     for dimensions, metrics in sparse.items():
         for metric, points in metrics.items():
             dts = Period(*period.split('x')).datetimes_strs()
             nonsparse[dimensions][metric] = []
             for dt in dts:
                 flot_time = to_flot_time(Period.parse_dt_str(dt))
                 value = points[dt] if dt in points else 0
                 nonsparse[dimensions][metric].append([flot_time,
                     float(value)])
     return nonsparse
Пример #19
0
    def plotpoints(cls, pk, dimensions=None, metrics=None,
            depth=0, period=None, flot_time=False, points_type=dict):
        metrics = metrics or ['hits']
        if isinstance(metrics, basestring):
            metrics = [metrics]
        period = Period.get(period)
        dts = period.datetimes_strs()
        nonsparse = defaultdict(defaultdict)

        # Hardwire time-based metrics for lulz
        time_metrics = {'second': 1, 'minute': 60, 'hour': 3600, 'day': 3600*24, 'week': 3600*24*7}
        #for t_m, factor in time_metrics.items():
        #    if t_m in metrics:
        #        metrics.remove(t_m)
        #        for dimension in dimensions:
        #            nonsparse[dimension][t_m] = list()
        #            for dt in dts:
        #                if flot_time:
        #                    dt = to_flot_time(Period.parse_dt_str(dt))
        #                nonsparse[dimension][t_m].append([dt, period.interval / factor])
        #            nonsparse[dimension][t_m] = points_type(nonsparse[dimension][t_m])
        # Pull the plotpoints that exist from Redis
        sparse = cls.whale_driver().retrieve(pk, dimensions, metrics, period=period)
        
        for dimensions, metrics in sparse.items():
            for metric, points in metrics.items():
                #if metric in time_metrics: continue
                nonsparse[dimensions][metric] = []
                for dt in dts:
                    if flot_time:
                        dt = to_flot_time(Period.parse_dt_str(dt))
                    value = points[dt] if dt in points else 0
                    nonsparse[dimensions][metric].append([dt, float(value)])
                nonsparse[dimensions][metric] = points_type(nonsparse[dimensions][metric])
        if depth > 0:
            for sub in cls.get_subdimensions(pk, dimensions):
                nonsparse = dict(nonsparse.items() +
                    cls.plotpoints(pk, sub, metrics, depth=depth - 1, period=period,
                        flot_time=flot_time, points_type=points_type).items())
        return nonsparse
Пример #20
0
def _retrieve(redis, pk, dimensions, metrics, period=None, dt=None):
    nested = defaultdict(dict)
    interval = Period.get(period).interval
    for dimension in iterate_dimensions(dimensions)+['_']:
        for metric in metrics:
            if ':' in metric:
                metric_name = metric.split(':')[0]
            else: metric_name = metric
            hash_key = keyify(pk, dimension, interval, metric_name)
            value_dict = redis.hgetall(hash_key)
            nested[maybe_dumps(dimension)][maybe_dumps(metric)] = dict([
                    (k, float(v)) for k, v in value_dict.items()])
    return dict(nested)
Пример #21
0
 def zranked(cls,
             pk,
             parent_dimension='_',
             metric='hits',
             period=None,
             at=None,
             start=0,
             size=10,
             sort_dir=None,
             tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at)
     dt = ats or [Period.convert(cls.now(), tzoffset)]
     return map(
         try_loads,
         _ranked(cls.whale_driver(),
                 pk,
                 parent_dimension,
                 metric,
                 period,
                 dt,
                 start,
                 size,
                 sort_dir=sort_dir))
Пример #22
0
 def update_count_to(cls,
                     pk,
                     dimensions='_',
                     metrics=None,
                     period=False,
                     at=False,
                     rank=False):
     period = Period.get(period)
     at = at or cls.now()
     dt = period.flatten_str(at)
     pipe = cls.whale_driver().pipeline(transaction=False)
     for (metric, i) in metrics.iteritems():
         _store(pipe, pk, dimensions, metric, period, dt, i, rank=rank)
     pipe.execute()
Пример #23
0
def table_graph():
    from periods import Period
    params = {
        'tzoffset': g('tzoffset', 0.0),
        'period': g('period', str(Period.get(None))),
    }
    debug = g('debug', False)
    table = g('table', '')
    height = g('height', '300px')
    delay = g('delay', 5000)
    hwurl = req.GET.get('hwurl', '/' or req.url.split('table_graph.js')[0])
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/d3.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/nvd3.js'></script>\");"%hwurl

    return_string = '''
appended=false;\n
function jqinit() {{\n
    if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n
        if(!appended) {{\n
            appended = true;\n
            {include_string}\n
        }}\n
        setTimeout(jqinit, 250);\n
    }} else {{\n
        $(function() {{\n
        init_graphs =function() {{
                $.hailwhale('{hwurl}').graph_tables('{table}', {options});\n
                }}
        setTimeout(init_graphs, {delay});
        if(ui_loaded_funcs)
            ui_loaded_funcs.init_graphs = init_graphs;
        }});\n
    }}
}}
jqinit();\n


    '''.format(include_string=include_string,
               table=table,
               delay=delay,
               hwurl=hwurl,
               options=util.maybe_dumps(params))
    return return_string
Пример #24
0
    def reasons_for(cls, pk, formula="value/hits", known_data=None, period=None, recursive=True):
        metric, denomenator = parse_formula(formula)
        period = Period.get(period)
        pk_base, decision, option = pk
        base = "_"
        best = worst = None
        ranks = cls.cached_rank(pk, formula=formula, dimension=base, period=period, recursive=recursive, points=False)
        overall = cls.cached_rank(
            [pk_base, decision], formula=formula, dimension=base, period=period, recursive=recursive, points=False
        )
        parent_score = overall[base]["score"]
        parent_count = overall[base]["count"]
        ranks[base]["effect"] = ranks[base]["count"] * ranks[base]["difference"]

        def delta(info):
            diff = info["score"] - parent_score
            info["value_diff"] = info["value"] - overall[base]["value"]
            info["difference"] += diff
            if math.fabs(diff) > 0 and info["count"] > 0:
                info["effect"] += diff * info["count"]
                info["significance"] = ((0.5 * info["effect"]) ** 2) / parent_count
            else:
                info["effect"] = 0
                info["significance"] = 0
            return info

        known_dimensions = iterate_dimensions(known_data)
        for dim, info in ranks.items():
            ranks[dim] = info = delta(info)
            if try_loads(dim) in known_dimensions and info["important"]:
                best_score = best and ranks[best]["score"]
                worst_score = worst and ranks[worst]["score"]
                if info["score"] > best_score:
                    best = dim
                if info["score"] < worst_score:
                    worst = dim
        i = {
            "good": best and ranks[best] or {},
            "bad": worst and ranks[worst] or {},
            #'ranks': ranks,
            "base": ranks[base],
            "parent": overall[base],
        }
        i["high"] = i["good"].get("difference", 0)
        i["high_sig"] = i["good"].get("significance", 0) > 4
        i["low"] = i["bad"].get("difference", 0)
        i["low_sig"] = i["bad"].get("significance", 0) > 4
        return i
Пример #25
0
    def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits',
            dimension='_', period=None, recursive=True, points=False):
        top, bottom = numerator, denominator
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(p_obj)
        d_k = keyify(dimension)
        top_points = cls.totals(pk, dimension, top,
                periods=[p_s])
        top_total = p_s in top_points and top_points[p_s][d_k][top] or 0
        bottom_points = cls.totals(pk, dimension, bottom,
                periods=[p_s])
        bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][bottom] or 0
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum({k: v for k, v in top_pps.items() if k in ats}.values())
            sub_bottom_sum = sum({k: v for k, v in bottom_pps.items() if k in ats}.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio - ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > .1

            data = {
                'points': pps,
                'score': ratio,
                'difference': difference,
                'effect': difference * sub_bottom_sum * ratio_total, 
                'value': sub_top_sum,
                'count': sub_bottom_sum,
                'important': important,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Пример #26
0
def _retrieve(redis, pk, dimensions, metrics, period=None, dt=None):
    nested = defaultdict(dict)
    interval = Period.get(period).interval
    for dimension in iterate_dimensions(dimensions) + ['_']:
        for metric in metrics:
            if ':' in metric:
                metric_name = metric.split(':')[0]
            else:
                metric_name = metric

            hash_key = keyify(pk, dimension, interval, metric_name)
            value_dict = redis.hgetall(hash_key)
            nested[maybe_dumps(dimension)][maybe_dumps(metric)] = dict([
                (k, float(v)) for k, v in value_dict.items()
            ])
    return dict(nested)
Пример #27
0
    def rank_subdimensions_scalar(cls,
                                  pk,
                                  dimension='_',
                                  metric='hits',
                                  period=None,
                                  recursive=True,
                                  prune_parents=True,
                                  points=False):
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(period)
        d_k = keyify(dimension)
        # sum of all values in metric
        total = cls.totals(pk, dimension, metric,
                           periods=[p_s])[p_s][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                'points': pps,
                'score': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)),
                'effect': total - sub_total,
                'difference': total - sub_total,
                'value': sub_total,
                'count': sub_total,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(
                    map(lambda s: ranked[s]['score'], children))
                if info['important'] and (info['score'] -
                                          children_total) < (total / 10):
                    info['important'] = False
        return ranked
Пример #28
0
def table_graph():
    from periods import Period
    params = {
            'tzoffset': g('tzoffset', 0.0),
            'period': g('period', str(Period.get(None))),
            }
    debug = g('debug', False)
    table = g('table', '')
    height = g('height', '300px')
    delay = g('delay', 5000)
    hwurl = req.GET.get('hwurl', '/' or req.url.split('table_graph.js')[0])
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/d3.js'></script>\");"%hwurl
    include_string += \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/nvd3.js'></script>\");"%hwurl

    return_string = '''
appended=false;\n
function jqinit() {{\n
    if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n
        if(!appended) {{\n
            appended = true;\n
            {include_string}\n
        }}\n
        setTimeout(jqinit, 250);\n
    }} else {{\n
        $(function() {{\n
        init_graphs =function() {{
                $.hailwhale('{hwurl}').graph_tables('{table}', {options});\n
                }}
        setTimeout(init_graphs, {delay});
        if(ui_loaded_funcs)
            ui_loaded_funcs.init_graphs = init_graphs;
        }});\n
    }}
}}
jqinit();\n


    '''.format( include_string=include_string, table=table, delay=delay,
            hwurl=hwurl, options=util.maybe_dumps(params))
    return return_string
Пример #29
0
 def render_hw_plotpoint_table(cls, pk, metric, dimension='_', period=None, at=None,
         tzoffset=None, format=None, hidden=False, graph_color=''):
     period, ats, tzoffset = Period.get_days(period, tzoffset=tzoffset)
     top, bot = parse_formula(metric)
     pps = cls.plotpoints(pk, dimension, metric, period=period,
                          tzoffset=tzoffset)
     ppsm = pps[dimension][metric]
     if not format:
         if bot:
             format = 'pct'
         else:
             format = 'grouped'
     def fmt(v):
         import locale
         if v == 'None':
             v = None
         f = format
         if f == 'int':
             f = lambda s: int(float(s or 0))
         elif f == 'float':
             v = v or 0.0
             f = float
         elif f == 'grouped':
             v = v or 0
             f = lambda s: locale.format('%d', int(float(s)), True)
         elif f in ['pct', 'percent', '%', 'ratio']:
             v = min(101, v and float(v)*100 or 0)
             f = lambda s: '%.2f%%'%s
         elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']:
             v = v and float(v) or 0.0
             if f == 'cents':
                 v = v/100.0
             f = locale.currency
         if not f:
             return v
         return callable(f) and f(v) or v
     hidden = hidden and 'style="display: none"' or ''
     rep = lambda s: s.format(pk=pk, metric=metric, dimension=dimension,
     hidden=hidden, color=graph_color)
     table = rep('<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \
             data-hw-dimension="{dimension}" data-metric="{metric}" \
             data-hw-color="{color}">')+'\n'.join([
         '<tr><td>%s</td><td>%s</td></tr>'%(at.replace(' 00:00:00', ''), fmt(count) )
         for at, count in ppsm.items()])+'</table>'
     return table
Пример #30
0
    def reasons_for(cls, pk, formula='value/hits', known_data=None, period=None, recursive=True):
        metric, denomenator = parse_formula(formula)
        period = Period.get(period)
        pk_base, decision, option = pk
        base = '_'
        best = worst = None
        ranks = cls.cached_rank(pk, formula=formula, dimension=base,
            period=period, recursive=recursive, points=False)
        overall = cls.cached_rank([pk_base, decision], formula=formula, dimension=base,
            period=period, recursive=recursive, points=False)
        parent_score = overall[base]['score']
        parent_count = overall[base]['count']
        ranks[base]['effect'] = ranks[base]['count'] * ranks[base]['difference']

        def delta(info):
            diff = info['score'] - parent_score
            info['value_diff'] = info['value'] - overall[base]['value']
            info['difference'] += diff
            if math.fabs(diff) > 0  and info['count'] > 0:
                info['effect'] += diff * info['count']
                info['significance'] = ((.5 * info['effect']) ** 2) / parent_count
            else:
                info['effect'] = 0
                info['significance'] = 0
            return info
        known_dimensions = iterate_dimensions(known_data)
        for dim, info in ranks.items():
            ranks[dim] = info = delta(info)
            if try_loads(dim) in known_dimensions and info['important']:
                best_score = best and ranks[best]['score']
                worst_score = worst and ranks[worst]['score']
                if info['score'] > best_score:
                    best = dim
                if info['score'] < worst_score:
                    worst = dim
        i = {'good': best and ranks[best] or {},
                'bad': worst and ranks[worst] or {},
                #'ranks': ranks,
                'base': ranks[base],
                'parent': overall[base]}
        i['high'] = i['good'].get('difference', 0)
        i['high_sig'] = i['good'].get('significance', 0) > 4
        i['low'] = i['bad'].get('difference', 0)
        i['low_sig'] = i['bad'].get('significance', 0) > 4
        return i
Пример #31
0
    def rank_subdimensions_ratio(
        cls, pk, numerator, denominator="hits", dimension="_", period=None, recursive=True, points=False
    ):
        top, bottom = numerator, denominator
        period = period or Period.default_size()
        d_k = keyify(dimension)
        top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top]
        bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom]
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom, "%s/%s" % (top, bottom)], period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum(top_pps.values())
            sub_bottom_sum = sum(bottom_pps.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio - ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > 0.1

            data = {
                "points": pps,
                "score": ratio,
                "difference": difference,
                "effect": difference * sub_bottom_sum * ratio_total,
                "value": sub_top_sum,
                "count": sub_bottom_sum,
                "important": important,
                "dimension": sub,
            }
            if not points:
                del data["points"]
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Пример #32
0
def _ranked(redis,
            pk,
            parent_dimension,
            metric,
            period,
            ats,
            start=0,
            size=10,
            sort_dir=None):
    top, bot = parse_formula(metric)
    rank_keyify = lambda ats, met: keyify('rank', pk, parent_dimension,
                                          Period.get(period).interval, ats, met
                                          )
    final_rank_key = rank_keyify(ats, metric)

    def squash_ats(met):
        if len(ats) > 1:
            map(lambda at: redis.zremrangebyscore(rank_keyify(at, met), 0, 0),
                ats)
            redis.zunionstore(rank_keyify(ats, met),
                              map(lambda at: rank_keyify(at, met), ats))

    squash_ats(top)
    if bot:
        squash_ats(bot)
        top_key, bot_key = rank_keyify(ats, top), rank_keyify(ats, bot)
        redis.execute_command(
            "eval", """
        for key_i, key_n in ipairs(redis.call("zrange", KEYS[2], 0, -1)) do
            local top_s = tonumber(redis.call("zscore", KEYS[1], key_n))
            local bot_s = tonumber(redis.call("zscore", KEYS[2], key_n))
            if top_s and bot_s and bot_s > 0 then
                redis.call("zadd", KEYS[3], top_s/bot_s, key_n)
            end
        end
        """, 3, top_key, bot_key, final_rank_key)
        redis.zremrangebyscore(final_rank_key, 0, 0)
    return redis.zrange(final_rank_key,
                        start,
                        start + size,
                        desc=not sort_dir
                        or sort_dir.upper() in ['-', 'DESC', 'HIGH'])
Пример #33
0
 def totals(cls, pk, dimensions=None, metrics=None, periods=None):
     if not periods:
         periods = DEFAULT_PERIODS
     if not isinstance(periods, list):
         periods = [periods]
     metrics = metrics or ['hits']
     if not isinstance(metrics, list):
         metrics = [metrics]
     d = {}
     for p in periods:
         p_data = cls.plotpoints(pk, dimensions, metrics, period=str(p))
         p_totals = dict()
         for dim, mets in p_data.items():
             p_totals[dim] = dict()
             for met, vals in mets.items():
                 p_totals[dim][met] = sum([
                     v for k, v in vals.items()
                     if Period.get(p).flatten(k)])
         d[str(p)] = p_totals
     d['alltime'] = cls.whale_driver().retrieve(
             pk, dimensions, metrics, period='all')
     return d
Пример #34
0
    def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None):
        top, bottom = numerator, denominator
        period = period or Period.default_size()
        d_k = keyify(dimension)
        top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top]
        bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom]
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict() 

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom], period=period)[sub]
            ratio_points = cls.ratio_plotpoints(pk, top, bottom, sub, period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum(top_pps.values())
            sub_bottom_sum = sum(bottom_pps.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = (ratio - ratio_total) / ratio_total

            important = sub_bottom_sum > 5 and (difference > .1 or -difference > .1)

            return {
                'points': pps,
                'ratio_points': ratio_points,
                'difference': difference,
                'effect': difference * sub_bottom_sum,
                'important': important
            }
        
        for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)):
            ranked[sub] = info(sub)
        del(ranked[dimension])

        return ranked  
Пример #35
0
    def rank_subdimensions_scalar(
        cls, pk, dimension="_", metric="hits", period=None, recursive=True, prune_parents=True, points=False
    ):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.totals(pk, dimension, metric, periods=[period])[period][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                "points": pps,
                "score": sub_total,
                "important": sub_total > 10 and (sub_total > (total / 10)) or False,
                "effect": total - sub_total,
                "difference": total - sub_total,
                "value": sub_total,
                "count": sub_total,
                "dimension": sub,
            }
            if not points:
                del data["points"]
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(map(lambda s: ranked[s]["score"], children))
                if info["important"] and (info["score"] - children_total) < (total / 10):
                    info["important"] = False
        return ranked
Пример #36
0
    def reasons_for(cls,
                    pk,
                    formula='value/hits',
                    known_data=None,
                    period=None,
                    recursive=True):
        metric, denomenator = parse_formula(formula)
        period = Period.get(period)
        pk_base, decision, option = pk
        base = '_'
        best = worst = None
        ranks = cls.cached_rank(pk,
                                formula=formula,
                                dimension=base,
                                period=period,
                                recursive=recursive,
                                points=False)
        overall = cls.cached_rank([pk_base, decision],
                                  formula=formula,
                                  dimension=base,
                                  period=period,
                                  recursive=recursive,
                                  points=False)
        parent_score = overall[base]['score']
        parent_count = overall[base]['count']
        ranks[base][
            'effect'] = ranks[base]['count'] * ranks[base]['difference']

        def delta(info):
            diff = info['score'] - parent_score
            info['value_diff'] = info['value'] - overall[base]['value']
            info['difference'] += diff
            if math.fabs(diff) > 0 and info['count'] > 0:
                info['effect'] += diff * info['count']
                info['significance'] = (
                    (.5 * info['effect'])**2) / parent_count
            else:
                info['effect'] = 0
                info['significance'] = 0
            return info

        known_dimensions = iterate_dimensions(known_data)
        for dim, info in ranks.items():
            ranks[dim] = info = delta(info)
            if try_loads(dim) in known_dimensions and info['important']:
                best_score = best and ranks[best]['score']
                worst_score = worst and ranks[worst]['score']
                if info['score'] > best_score:
                    best = dim
                if info['score'] < worst_score:
                    worst = dim
        i = {
            'good': best and ranks[best] or {},
            'bad': worst and ranks[worst] or {},
            #'ranks': ranks,
            'base': ranks[base],
            'parent': overall[base]
        }
        i['high'] = i['good'].get('difference', 0)
        i['high_sig'] = i['good'].get('significance', 0) > 4
        i['low'] = i['bad'].get('difference', 0)
        i['low_sig'] = i['bad'].get('significance', 0) > 4
        return i
Пример #37
0
    def scalar_plotpoints(cls,
                          pk,
                          dimensions=None,
                          metrics=None,
                          at=None,
                          depth=0,
                          period=None,
                          flot_time=False,
                          points_type=OrderedDict):
        metrics = metrics or ['hits']
        at = at or times.now()
        if isinstance(metrics, basestring):
            metrics = [metrics]
        p_obj, ats, tzoffset = Period.get_days(period, at)
        p_s = str(p_obj)
        dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at)))

        sparse = _retrieve(cls.whale_driver(),
                           pk,
                           dimensions,
                           metrics,
                           period=p_obj)
        nonsparse = defaultdict(dict)
        if flot_time:
            points_type = list
        for dim, mets in sparse.items():
            for met, points in mets.items():
                nonsparse[dim][met] = []
                use_method = False
                met_name = met
                if ':' in met:
                    met_name, use_method = met.split(':')

                const_value = False
                if met_name in TIME_MATRIX:
                    const_value = float(p_obj.getUnits()[0] /
                                        TIME_MATRIX[met_name])
                # Try to parse static metrics too
                elif met_name == '_count':
                    const_value = len(dts)
                try:
                    const_value = float(met_name)
                except:
                    pass
                last_value = total = 0
                for dt in dts:
                    dt_obj = Period.parse_dt_str(dt)
                    if met_name == '_days_in_month':
                        from calendar import monthrange
                        const_value = monthrange(dt_obj.year, dt_obj.month)[1]
                    if flot_time:
                        dt_t = to_flot_time(dt_obj)
                    else:
                        dt_t = dt
                    if const_value:
                        value = const_value
                    else:
                        value = points[dt] if dt in points else 0
                    if use_method == 'count' or not use_method:
                        value = value
                    elif use_method in ['+', 'sum', 'add', 'cumulative']:
                        total += value
                        value = total
                    elif use_method in ['_', 'set', 'last', 'level']:
                        if not last_value:
                            last_value = value
                        if not value:
                            value = last_value
                        last_value = value
                    nonsparse[dim][met].append([dt_t, float(value)])
                nonsparse[dim][met] = points_type(nonsparse[dim][met])

        if depth > 0:
            for sub in cls.get_subdimensions(pk, dimensions):
                nonsparse = dict(
                    nonsparse.items() +
                    cls.plotpoints(pk,
                                   sub,
                                   metrics,
                                   at=at,
                                   depth=depth - 1,
                                   period=period,
                                   flot_time=flot_time,
                                   points_type=points_type).items())
        return nonsparse
Пример #38
0
 def yesterday(cls, pk, metric, dimension='_'):
     return cls.total(pk, metric, dimension, Period.all_sizes()[1],
             at=cls.now()-timedelta(days=1))
Пример #39
0
 def today(cls, pk, metric, dimension='_'):
     return cls.total(pk, metric, dimension, Period.all_sizes()[1],
             at=cls.now())
Пример #40
0
    def scalar_plotpoints(cls, pk, dimensions=None, metrics=None, at=None,
            depth=0, period=None, flot_time=False, points_type=OrderedDict):
        metrics = metrics or ['hits']
        at = at or times.now()
        if isinstance(metrics, basestring):
            metrics = [metrics]
        p_obj, ats, tzoffset = Period.get_days(period,at)
        p_s = str(p_obj)
        dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at)))

        sparse = _retrieve(cls.whale_driver(), pk, dimensions, metrics, period=p_obj)
        nonsparse = defaultdict(dict)
        if flot_time:
            points_type = list
        for dim, mets in sparse.items():
            for met, points in mets.items():
                nonsparse[dim][met] = []
                use_method = False
                met_name = met
                if ':' in met:
                    met_name, use_method = met.split(':')

                const_value = False
                if met_name in TIME_MATRIX:
                    const_value = float(p_obj.getUnits()[0] /
                            TIME_MATRIX[met_name])
                # Try to parse static metrics too
                elif met_name == '_count':
                    const_value = len(dts)
                try:
                    const_value = float(met_name)
                except:
                    pass
                last_value = total = 0
                for dt in dts:
                    dt_obj = Period.parse_dt_str(dt)
                    if met_name == '_days_in_month':
                        from calendar import monthrange
                        const_value = monthrange(dt_obj.year, dt_obj.month)[1]
                    if flot_time:
                        dt_t = to_flot_time(dt_obj)
                    else:
                        dt_t = dt
                    if const_value:
                        value = const_value
                    else:
                        value = points[dt] if dt in points else 0
                    if use_method == 'count' or not use_method:
                        value = value
                    elif use_method in ['+', 'sum', 'add', 'cumulative']:
                        total += value
                        value = total
                    elif use_method in ['_', 'set', 'last', 'level']:
                        if not last_value:
                            last_value = value
                        if not value:
                            value = last_value
                        last_value = value
                    nonsparse[dim][met].append([dt_t, float(value)])
                nonsparse[dim][met] = points_type(nonsparse[dim][met])

        if depth > 0:
            for sub in cls.get_subdimensions(pk, dimensions):
                nonsparse = dict(nonsparse.items() +
                    cls.plotpoints(pk, sub, metrics, at=at, depth=depth - 1, period=period,
                        flot_time=flot_time, points_type=points_type).items())
        return nonsparse
Пример #41
0
"""

query_str = """
{"{{dataset_id_field}}": "{{dataset.dataset_id}}",
"{{form_meta_timeend}}": {
              "$gte": "{{period.start}}", 
              "$lte": "{{period.end}}"
            }
}
"""
aggregate_str = """
{"$group": {"_id": 0, "total": {"$sum": "$value.{{num_using_fp}}"}}}
"""
dataset_id = "5791793ac29b4d77b20cf1a04d8e7161"
dataset = Dataset.find_one(dataset_id)
period = Period.month_period(2013, 3)

if dataset:
    fields = Observation.encoding(dataset)
    fields["dataset"] = dataset
    fields['dataset_id_field'] = fields[DATASET_ID]
    fields['period'] = Period.month_period(2013, 3)
    mapper = Code(Template(mapper_str).render(fields))
    reducer = Code(Template(reducer_str).render(fields))
    query = json.loads(Template(query_str).render(fields))
    query['%(form_meta_timeend)s' % fields]['$gte'] = period.start
    query['%(form_meta_timeend)s' % fields]['$lte'] = period.end
    aggregate = json.loads(Template(aggregate_str).render(fields))
    results = db.observations.map_reduce(mapper, reducer, 'myresults_fp', query=query)
    if results.count():
        value = results.aggregate(aggregate)
Пример #42
0
 def add_period(self, start, end):
     periods = self.periods
     new = Period(start, end)
     if new not in periods:
         periods.append(new)
Пример #43
0
 def yesterday(cls, pk, metric, dimension='_'):
     return cls.total(pk,
                      metric,
                      dimension,
                      Period.all_sizes()[1],
                      at=cls.now() - timedelta(days=1))
Пример #44
0
def graph():
    from periods import Period
    params = {
        'pk': g('pk', '_', False),
        'dimension': g('dimension', '_', False),
        'metric': g('metric', 'hits', False),
        'depth': g('depth', 0),
        'tzoffset': g('tzoffset', 0.0),
        'period': g('period', str(Period.get(None))),
        'area': g('area', ''),
    }
    pk = params['pk']
    dimension = params['dimension']
    metric = params['metric']
    period = Period.get(params['period'])
    debug = g('debug', False)
    parent_div = g('parent_div', 'hailwhale_graphs')
    table = g('table', False)
    height = g('height', '300px')
    params['title'] = g('title', '')
    if not params['title']:
        pkname = g('pk', '')
        dimname = util.try_loads(g('dimension', 'Overall'))
        dimname = isinstance(dimname, list) and dimname[-1] or dimname
        params['title'] = '%s [%s]' % (util.maybe_dumps(pkname),
                                       util.maybe_dumps(dimname))
    if isinstance(table, basestring):
        table = table.lower() == 'true'
    hwurl = req.GET.get('hwurl', req.url.split('graph.js')[0])
    params['autoupdate'] = g('live', True)
    params['interval'] = g('interval', 6000)
    graph_id = hashlib.md5(str(params)).hexdigest()
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl
    if table:
        try:
            columns = int(g('table', 6, int))
        except:
            columns = 6
        pps = Whale.plotpoints(pk,
                               dimension,
                               metric,
                               period=period,
                               depth=params['depth'])
        dates = [p for p in Period.get(period).datetimes_strs()
                 ][(-1 * columns - 1):]

        table_str = '''
            $('#{id} .table').html('<table style="width: 100%"> <tr> <th></th> <th></th> {columns} </tr>
        '''.strip().format(id=graph_id,
                           columns=' '.join([
                               '<th>%s</th>' % date.replace('00:00:00 ', '')
                               for date in dates
                           ]))

        dimensions = pps.keys()
        if '_' in dimensions:
            dimensions.remove('_')
            dimensions = ['_'] + dimensions
        for dimension_counter, dimension in enumerate(dimensions):
            checked = 'off'
            if dimension_counter < 10:
                checked = 'on'
            if dimension == '_':
                if params['depth']:
                    continue
                dimension_name = '<b>Overall</b>'
            else:
                dimension_name = dimension.capitalize()
            table_str += '''
                <tr> <td><input id="" style="display: none" type="checkbox" value="{checked}" name="checkbox-{pk}-{dimension}"></td> <td>{dimension_name}</td> {columns} </tr>
                '''.format(pk=pk,
                           dimension=dimension,
                           checked=checked,
                           dimension_name=dimension_name,
                           columns=' '.join([
                               "<td>%s</td>" %
                               int(pps[dimension][metric][date])
                               for date in dates
                           ])).strip()

        table_str += '''</table>');'''
    else:
        table_str = ''
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.min.js'></script>\");"%hwurl

    return_string = '''
appended=false;\n
document.write('<div id="{id}"><div class="graph" style="height: {height}"></div><div class="table"></div></div>');\n
function jqinit() {{\n
    if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n
        if(!appended) {{\n
            appended = true;\n
            {include_string}\n
        }}\n
        setTimeout(jqinit, 250);\n
    }} else {{\n
        $(function() {{\n
                $.hailwhale('{hwurl}').add_graph('{id} .graph', {options});\n
                {table_str}
        }});\n
    }}
}}
jqinit();\n


    '''.format(parent_div=parent_div,
               include_string=include_string,
               hwurl=hwurl,
               table_str=table_str,
               height=height,
               id=graph_id,
               options=util.maybe_dumps(params))
    return return_string
Пример #45
0
                (indicator_def['type'], indicator_def['description'],
                    value, numerator, denominator))
            print (indicator_def['type'], indicator_def['name'],
                   value, numerator, denominator)

    filename = "%(name)s_mvp_indicator_%(start)s_to_%(end)s.csv" % {
        'name': name,
        'start': period.start.strftime('%Y-%m-%d'),
        'end': period.end.strftime('%Y-%m-%d')}
    filename = os.path.join(REPORTS_DIR, filename)
    with open(filename, 'wb') as f:
        csv_writer = csv.writer(
            f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(RESULTS)
    print "\nSuccessfully generated %s." % filename

if __name__ == '__main__':
    arguments = sys.argv[1:]
    if len(arguments) < 3:
        print (u"Expected site_name YEAR MONTH\n"
               u"Eg.\n\t python report.py ruhiira 2013 03")
    else:
        name = arguments[0]
        year = int(arguments[1])
        month = int(arguments[2])
        period = Period.month_period(year, month)
        indicator_name = None
        if arguments.__len__() > 3:
            indicator_name = arguments[3]
        _generate_indicator_export(name, period, indicator_name)
Пример #46
0
 def today(cls, pk, metric, dimension='_'):
     return cls.total(pk,
                      metric,
                      dimension,
                      Period.all_sizes()[1],
                      at=cls.now())
Пример #47
0
def graph():
    from periods import Period
    params = {'pk': g('pk', '_', False),
            'dimension': g('dimension', '_', False),
            'metric': g('metric', 'hits', False),
            'depth': g('depth', 0),
            'tzoffset': g('tzoffset', 0.0),
            'period': g('period', str(Period.get(None))),
            'area': g('area', ''),
            }
    pk = params['pk']
    dimension = params['dimension']
    metric = params['metric']
    period = Period.get(params['period'])
    debug = g('debug', False)
    parent_div = g('parent_div', 'hailwhale_graphs')
    table = g('table', False)
    height = g('height', '300px')
    params['title'] = g('title', '')
    if not params['title']:
        pkname = g('pk', '')
        dimname = util.try_loads(g('dimension', 'Overall'))
        dimname = isinstance(dimname, list) and dimname[-1] or dimname
        params['title'] = '%s [%s]' % (util.maybe_dumps(pkname), util.maybe_dumps(dimname))
    if isinstance(table, basestring):
        table = table.lower() == 'true'
    hwurl = req.GET.get('hwurl', req.url.split('graph.js')[0])
    params['autoupdate'] = g('live', True)
    params['interval'] = g('interval', 6000)
    graph_id = hashlib.md5(str(params)).hexdigest()
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/jquery.min.js'></script>\");"%hwurl
    if table:
        try:
            columns = int(g('table', 6, int))
        except:
            columns = 6
        pps = Whale.plotpoints(pk, dimension, metric, period=period,
                depth=params['depth'])
        dates = [p for p in
                Period.get(period).datetimes_strs()][(-1*columns - 1):]

        table_str = '''
            $('#{id} .table').html('<table style="width: 100%"> <tr> <th></th> <th></th> {columns} </tr>
        '''.strip().format(id=graph_id,columns=' '.join([
            '<th>%s</th>'%date.replace('00:00:00 ', '') for date in dates]))

        dimensions = pps.keys()
        if '_' in dimensions:
            dimensions.remove('_')
            dimensions = ['_'] + dimensions
        for dimension_counter, dimension in enumerate(dimensions):
            checked = 'off'
            if dimension_counter < 10:
                checked = 'on'
            if dimension == '_':
                if params['depth']:
                    continue
                dimension_name = '<b>Overall</b>'
            else:
                dimension_name = dimension.capitalize()
            table_str += '''
                <tr> <td><input id="" style="display: none" type="checkbox" value="{checked}" name="checkbox-{pk}-{dimension}"></td> <td>{dimension_name}</td> {columns} </tr>
                '''.format(pk=pk, dimension=dimension, checked=checked,
                        dimension_name=dimension_name,
                        columns=' '.join([
                "<td>%s</td>"%int(pps[dimension][metric][date]) for date in dates])).strip()

        table_str += '''</table>');'''
    else:
        table_str = ''
    include_string = \
"document.write(\"<scr\" + \"ipt type='text/javascript' src='%sjs/hailwhale.min.js'></script>\");"%hwurl

    return_string = '''
appended=false;\n
document.write('<div id="{id}"><div class="graph" style="height: {height}"></div><div class="table"></div></div>');\n
function jqinit() {{\n
    if(typeof(jQuery) == 'undefined' || typeof(jQuery.hailwhale) == 'undefined') {{\n
        if(!appended) {{\n
            appended = true;\n
            {include_string}\n
        }}\n
        setTimeout(jqinit, 250);\n
    }} else {{\n
        $(function() {{\n
                $.hailwhale('{hwurl}').add_graph('{id} .graph', {options});\n
                {table_str}
        }});\n
    }}
}}
jqinit();\n


    '''.format(parent_div=parent_div, include_string=include_string,
            hwurl=hwurl, table_str=table_str, height=height,
            id=graph_id,
            options=util.maybe_dumps(params))
    return return_string
Пример #48
0
"""

query_str = """
{"{{dataset_id_field}}": "{{dataset.dataset_id}}",
"{{form_meta_timeend}}": {
              "$gte": "{{period.start}}", 
              "$lte": "{{period.end}}"
            }
}
"""
aggregate_str = """
{"$group": {"_id": 0, "total": {"$sum": "$value.{{num_using_fp}}"}}}
"""
dataset_id = "5791793ac29b4d77b20cf1a04d8e7161"
dataset = Dataset.find_one(dataset_id)
period = Period.month_period(2013, 3)

if dataset:
    fields = Observation.encoding(dataset)
    fields["dataset"] = dataset
    fields['dataset_id_field'] = fields[DATASET_ID]
    fields['period'] = Period.month_period(2013, 3)
    mapper = Code(Template(mapper_str).render(fields))
    reducer = Code(Template(reducer_str).render(fields))
    query = json.loads(Template(query_str).render(fields))
    query['%(form_meta_timeend)s' % fields]['$gte'] = period.start
    query['%(form_meta_timeend)s' % fields]['$lte'] = period.end
    aggregate = json.loads(Template(aggregate_str).render(fields))
    results = db.observations.map_reduce(mapper,
                                         reducer,
                                         'myresults_fp',