Exemplo n.º 1
0
    def dump_now(cls):
        """ Flush hits to Whale and increment """
        # Get the incoming hits from Hail
        
        r=cls.hail_driver()
        set_number_name = 'hail_number'
        r.setnx(set_number_name, 0)
        set_number = r.incr(set_number_name) - 1
        set_name = 'hail_%s' % set_number
        try: keys_from_hail = r.smembers(set_name)
        except: return
        if not len(keys_from_hail):
            r.delete(set_name)
            return

        def get_keys_from_json(k):
            try: 
                class_name, pk, dimensions, metrics, at = json.loads(r[k])
                #at = datetime.datetime.fromtimestamp(float(t))
                return (pk, dimensions, metrics, at)
            except Exception as e: 
                print e
                return False 

        keys_to_update = map(get_keys_from_json, keys_from_hail)
        for packed in keys_to_update:
            if packed:
                pk, dimensions, metrics, at = packed
                Whale.count_now(pk, dimensions, metrics, at=at)

        # Delete the hits
        map(r.delete, keys_from_hail)
        r.delete(set_name)
Exemplo n.º 2
0
    def dump_now(cls):
        """ Flush hits to Whale and increment """
        # Get the incoming hits from Hail

        r = cls.hail_driver()
        set_number_name = 'hail_number'
        r.setnx(set_number_name, 0)
        set_number = r.incr(set_number_name) - 1
        set_name = 'hail_%s' % set_number
        try:
            keys_from_hail = r.smembers(set_name)
        except:
            return
        if not len(keys_from_hail):
            r.delete(set_name)
            return

        def get_keys_from_json(k):
            try:
                class_name, pk, dimensions, metrics, at = json.loads(r[k])
                #at = datetime.datetime.fromtimestamp(float(t))
                return (pk, dimensions, metrics, at)
            except Exception as e:
                print e
                return False

        keys_to_update = map(get_keys_from_json, keys_from_hail)
        for packed in keys_to_update:
            if packed:
                pk, dimensions, metrics, at = packed
                Whale.count_now(pk, dimensions, metrics, at=at)

        # Delete the hits
        map(r.delete, keys_from_hail)
        r.delete(set_name)
Exemplo n.º 3
0
    def dump_now(cls):
        """ Flush hits to Whale and increment """
        # Get the incoming hits from Hail
        from whale import Whale
        whale = Whale()
        r=cls.driver()
        _s_n_n = 'hail_number'
        r.setnx(_s_n_n, 0)
        set_number = r.incr(_s_n_n) - 1
        set_name = 'hail_%s'%set_number
        try: keys_from_hail = r.smembers(set_name)
        except: return
        if len(keys_from_hail) is 0:
            r.delete(set_name)
            return
        def get_keys_from_json(k):
            try: 
                class_name, categories, dimensions, metrics, t = json.loads(r[k])
                at = datetime.datetime.fromtimestamp(float(t))
                return (categories, dimensions, metrics, at)
            except Exception as e: 
                print e
                return False 

        keys_to_update = map(get_keys_from_json, keys_from_hail)
        for packed in keys_to_update:
            if not packed: continue
            categories, dimensions, metrics, at = packed
            whale.count_now(categories, dimensions, metrics, at=at)

        # Delete the hits
        map(r.delete, keys_to_update)
        r.delete(set_name)
Exemplo n.º 4
0
class TestHailWHale(unittest.TestCase):
    def setUp(self):
        from hail import Hail
        from whale import Whale

        self.hail = Hail()
        self.whale = Whale()

    def testGetSubdimensions(self):
        self.whale.count_now("test", {"a": 1, "b": 2})
        subs = self.whale.get_subdimensions("test")
        assert ["a"] in subs
        assert ["b"] in subs

    def testGetAllSubdimensions(self):
        self.whale.count_now("test", {"a": 1, "b": 2})
        subs = self.whale.all_subdimensions("test")
        assert ["a"] in subs
        assert ["a", "1"] in subs
        assert ["b"] in subs
        assert ["b", "2"] in subs

    def testCrunch(self):
        # Unique key for every test
        t = str(time.time())
        self.whale.count_now("test_crunch", [t, "a"], {"value": 5})
        self.whale.count_now("test_crunch", [t, "b"], {"value": 1})
        self.whale.count_now("test_crunch", [t, "c"], {"value": 15})

        data = self.whale.crunch("test_crunch", [t], "value")
Exemplo n.º 5
0
def count_now():
    whale = Whale()
    at = g('at', False)
    tzoffset = None
    if not at:
        at = times.now()
    else:
        from dateutil.parser import parse
        at = parse(g('at'))
        at = at.replace(tzinfo=None)
    val = whale.count_now(at=at, **default_params())
    return 'OK'
Exemplo n.º 6
0
def tracker():
    from periods import Period
    import random
    params = default_params()
    # LOLOL THIS SHOULD REALLY CHANGE
    key = hashlib.sha256('hailwhale_weak_key').digest()
    if 'pk' not in req.GET and 'pixel' in req.GET:
        from Crypto.Cipher import AES
        from base64 import b64encode, b64decode
        from urllib import quote_plus

        mode = AES.MODE_CBC
        encryptor = AES.new(key, mode)
        text = g('pixel')
        INTERRUPT = u'\u0001'
        PAD = u'\u0000'

        # Since you need to pad your data before encryption,
        # create a padding function as well
        # Similarly, create a function to strip off the padding after decryption
        def AddPadding(data, interrupt, pad, block_size):
            new_data = ''.join([data, interrupt])
            new_data_len = len(new_data)
            remaining_len = block_size - new_data_len
            to_pad_len = remaining_len % block_size
            pad_string = pad * to_pad_len
            return ''.join([new_data, pad_string])

        def StripPadding(data, interrupt, pad):
            return data.rstrip(pad).rstrip(interrupt)

        def hw_encoded(t):
            return quote_plus(
                b64encode(encryptor.encrypt(AddPadding(t, INTERRUPT, PAD,
                                                       32))))

        def hw_decoded(t):
            return StripPadding(encryptor.decrypt(b64decode(t)), INTERRUPT,
                                PAD)

        params['pk'] = hw_decoded(text)
    pk = params['pk']
    whale = Whale()
    hail = Hail()
    val = whale.count_now(at=times.now(), **params)
    #val = whale.count_now(**params)
    uid = g('uid')
    if not uid or uid == '_new':
        default = random.randrange(10**6, 10**9)
        uid = str(req.get_cookie('uid', str(default), key))
    hail.spy_log(uid, params)
    response.set_cookie('uid', uid, key)
    return str(uid)
Exemplo n.º 7
0
def count_now():
    whale = Whale()
    vals = default_params()
    at = vals.get("at")#g('at', False)
    tzoffset = None
    if not at:
        at = times.now()
    else:
        from dateutil.parser import parse
        at = parse(at)
    val = whale.count_now(at= at, pk=vals.get("pk"), metrics=vals.get("metrics"), dimensions=vals.get("dimensions"))
    return 'OK'
Exemplo n.º 8
0
def count_now():
    whale = Whale()
    vals = default_params()
    at = vals.get("at")  #g('at', False)
    tzoffset = None
    if not at:
        at = times.now()
    else:
        from dateutil.parser import parse
        at = parse(at)
    val = whale.count_now(at=at,
                          pk=vals.get("pk"),
                          metrics=vals.get("metrics"),
                          dimensions=vals.get("dimensions"))
    return 'OK'
Exemplo n.º 9
0
def tracker():
    from periods import Period
    import random
    params = default_params()
    # LOLOL THIS SHOULD REALLY CHANGE
    key = hashlib.sha256('hailwhale_weak_key').digest()
    if 'pk' not in req.GET and 'pixel' in req.GET:
        from Crypto.Cipher import AES
        from base64 import b64encode, b64decode
        from urllib import quote_plus

        mode = AES.MODE_CBC
        encryptor = AES.new(key, mode)
        text = g('pixel')
        INTERRUPT = u'\u0001'
        PAD = u'\u0000'

        # Since you need to pad your data before encryption,
        # create a padding function as well
        # Similarly, create a function to strip off the padding after decryption
        def AddPadding(data, interrupt, pad, block_size):
            new_data = ''.join([data, interrupt])
            new_data_len = len(new_data)
            remaining_len = block_size - new_data_len
            to_pad_len = remaining_len % block_size
            pad_string = pad * to_pad_len
            return ''.join([new_data, pad_string])
        def StripPadding(data, interrupt, pad):
            return data.rstrip(pad).rstrip(interrupt)
        def hw_encoded(t):
            return quote_plus(b64encode(encryptor.encrypt(AddPadding(t, INTERRUPT, PAD, 32))))
        def hw_decoded(t):
            return StripPadding(encryptor.decrypt(b64decode(t)), INTERRUPT, PAD)
        params['pk'] = hw_decoded(text)
    pk = params['pk']
    whale = Whale()
    hail = Hail()
    val = whale.count_now(at=times.now(), **params)
    #val = whale.count_now(**params)
    uid = g('uid')
    if not uid or uid == '_new':
        default = random.randrange(10**6,10**9)
        uid = str(req.get_cookie('uid', str(default), key))
    hail.spy_log(uid, params)
    response.set_cookie('uid', uid, key)
    return str(uid)
Exemplo n.º 10
0
class TestHailWhale(unittest.TestCase):
    def setUp(self):
        from hail import Hail
        from whale import Whale
        self.hail = Hail()
        self.whale = Whale()

    def testGetSubdimensions(self):
        t = 'subs_%s' % str(time.time())
        self.whale.count_now(t, {'a': 1, 'b': 2})
        subs = self.whale.get_subdimensions(t)
        assert('a' in subs)
        assert('b' in subs)

    def testGetAllSubdimensions(self):
        t = 'all_subs_%s' % str(time.time())
        self.whale.count_now(t, {'a': 1, 'b': 2})
        subs = self.whale.all_subdimensions(t)
        assert('a' in subs)
        assert(['a', '1'] in subs)
        assert('b' in subs)
        assert(['b', '2'] in subs)

    def testPlotpoints(self):
        t = str(time.time())

        for i in range(5):
            self.whale.count_now('test_plotpoints', t, {'hits': 1, 'values': 5})
        plotpoints = self.whale.plotpoints('test_plotpoints', t, ['hits', 'values'], points_type=list)

        self.assertEqual(plotpoints[t]['hits'][-1][1], 5)
        self.assertEqual(plotpoints[t]['values'][-1][1], 25)

    def testPlotpointsDepth(self):
        t = str(time.time())
        self.whale.count_now('test_depth', {t: 'a'})
        self.whale.count_now('test_depth', {t: 'b'})
        self.whale.count_now('test_depth', {t: 'b'})
        self.whale.count_now('test_depth', {t: {'c': 'child'}})
        # Test 1 level deep
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=1)
        self.assertEqual(plotpoints[maybe_dumps([t, 'a'])]['hits'][-1][1], 1)
        self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2)
        self.assertEqual(plotpoints[maybe_dumps([t, 'c'])]['hits'][-1][1], 1)
        self.assertEqual(False, maybe_dumps([t, 'c', 'child']) in plotpoints)
        # Test 2 levels deep
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=2)
        self.assertEqual(True, maybe_dumps([t, 'c', 'child']) in plotpoints)
        self.assertEqual(plotpoints[maybe_dumps([t, 'c', 'child'])]['hits'][-1][1], 1)

        # Test ranking and limiting
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list,
                depth=1, limit=2)
        self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2)
        self.assertEqual(True, maybe_dumps([t, 'a']) not in plotpoints)
        self.assertEqual(True, maybe_dumps([t, 'c']) not in plotpoints)

    def testRatioPlotpoints(self):
        t = str(time.time())

        for i in range(5):
            self.whale.count_now('test_ratio', t, {'hit': 1, 'value': 5})

        plotpoints = self.whale.plotpoints('test_ratio', t, ['hit', 'value', 'value/hit'], points_type=list)

        self.assertEqual(plotpoints[t]['hit'][-1][1], 5)
        self.assertEqual(plotpoints[t]['value'][-1][1], 25)

        self.assertEqual(plotpoints[t]['value/hit'][-1][1], 5)

    def testRankSubdimensionsScalar(self):
        t = str(time.time())
        self.whale.count_now('test_rank', [t, 'a', 'asub1'], {'value': 1})
        self.whale.count_now('test_rank', [t, 'a', 'asub2'], {'value': 30})
        self.whale.count_now('test_rank', [t, 'b'], {'value': 80})
        self.whale.count_now('test_rank', [t, 'c'], {'value': 10})
        ranked = self.whale.rank_subdimensions_scalar('test_rank', t, 'value')
        self.assertEqual(ranked[maybe_dumps([t, 'a'])]['important'], False)
        self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub1'])]['important'], False)
        self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub2'])]['important'], True)
        self.assertEqual(ranked[maybe_dumps([t, 'b'])]['important'], True)
        self.assertEqual(ranked[maybe_dumps([t, 'c'])]['important'], False)

    def testRankSubdimensionsRatio(self):
        t = str(time.time())
        pk = 'test_ratio_rank'
        # OVERALL STATS: 529,994 value, 50,000 visitors, 10.6 value per visitor
        # Not important, too close to overall
        self.whale.count_now(pk, [t, 'a', 'asub1'],
            {'value': 54989, 'visitors': 4999})  # 11 value per visitor
        # Important, high relative ratio
        self.whale.count_now(pk, [t, 'a', 'asub2'],
            {'value': 375000, 'visitors': 25000})  # 15 value per visitor
        # Important, low relative ratio
        self.whale.count_now(pk, [t, 'b'],
            {'value': 100000, 'visitors': 20000})  # 5 value per visitor
        # Not important, not enough visitors
        self.whale.count_now(pk, [t, 'c'],
            {'value': 5, 'visitors': 1})  # 5 value per visitor

        one_level = self.whale.rank_subdimensions_ratio('test_rank_ratio', 'value', 'visitors',
            t, recursive=False)

        all_levels = self.whale.rank_subdimensions_ratio(pk, 'value', 'visitors', t)
        self.assertEqual(True, maybe_dumps([t, 'a', 'asub1']) not in one_level)
        self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub1'])]['important'], False)
        self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub2'])]['important'], True)
        self.assertEqual(all_levels[maybe_dumps([t, 'b'])]['important'], True)
        self.assertEqual(all_levels[maybe_dumps([t, 'c'])]['important'], False)

    def testBasicDecision(self):
        pk = 'test_basic_decision'
        decision = str(time.time())
        # Make a decision, any decision, from no information whatsoever
        good, bad, test = self.whale.weighted_reasons(pk, 'random', [1,2,3])
        #_print_reasons(good, bad, test)
        any_one = self.whale.decide_from_reasons(good, bad, test)
        self.assertEqual(True, any_one in [1, 2, 3])

        # OK, now how about something somewhat informed?
        # This will be easy. Slogan A makes us huge profit. Products B and C suck.
        # D looks promissing but isn't yet significant
        opts = ['a', 'b', 'c', 'd']
        self.whale.count_now([pk, decision, 'a'], None, dict(dollars=5000, visitors=1000))
        self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'd'], None, dict(dollars=50, visitors=10))

        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors')
        #_print_reasons(good, bad, test)

        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in  bad.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())
        which_one = self.whale.decide(pk, decision, opts, formula='dollars/visitors',
            bad_idea_threshold=0, test_idea_threshold=0)
        self.assertEqual(which_one, 'a')

    def testInformedDecision(self):
        pk = 'test_informed_decision'
        decision = str(time.time())

        # A is the clear winner, except when country=UK, in which case B wins
        opts = ['a', 'b', 'c', 'd']
        self.whale.count_now([pk, decision, 'a'], None, dict(dollars=50000, visitors=10000))
        self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'b'], {'country': 'uk'}, dict(dollars=10000, visitors=2000))
        self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=7500))
        self.whale.count_now([pk, decision, 'd'], None, dict(dollars=5, visitors=1))

        # Here's a visitor with no info -- 'A' should win by far.
        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors')
        #_print_reasons(good, bad, test)
        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in bad.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())

        # How about when we know the country is "UK"?
        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors',
            known_data={'country': 'uk'})
        #_print_reasons(good, bad, test)
        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in good.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())
        chosen = {'a': 0, 'b': 0}
        for k in range(100):
            choose = self.whale.decide(pk, decision, opts, formula='dollars/visitors',
                known_data={'country': 'uk'}, bad_idea_threshold=0, test_idea_threshold=0)
            chosen[choose] += 1
        self.assertEqual(True, chosen['b'] > 70,
            """A decision made 100 times between weights .15 vs .85 should have around 85 votes for 'b',
                we got %s, which is unlikely enough to fail a test, but not definitely
                indicative of a problem. If this test passes again on the next run, ignore the failure.""" % chosen)

    def testTrickyDecision(self):
        pk = 'test_tricky_decision'
        decision = str(time.time())
        opts = ['en', 'sp', 'pt']

        def count(geo, lang, dollars, visitors):
            self.whale.count_decided_now(pk, decision, lang, geo,
            {'dollars': dollars, 'visitors': visitors})

        def justify(geo):
            #print
            #print 'Picking reasons for ', geo
            good, bad, test = self.whale.weighted_reasons(pk, decision, opts,
                'dollars/visitors', geo)
            #print good.keys(), bad.keys(), test.keys()
            #_print_reasons(good, bad, test)
            return self.whale.decide(pk, decision, opts, 'dollars/visitors', geo,
                bad_idea_threshold=0, test_idea_threshold=0)
        k = 1000
        m = k * k
        # Sure, these results seem predictable to a human
        # But what will our philosopher whale friend make of it?
        count('us', 'en', 1.5 * m, 300 * k)  # $5/visitor, alright!
        count('us', 'sp', 1 * k, 10 * k)  # $.10/visitor, well that is not surprising
        count('us', 'pt', 300, 5 * k)  # $.06/visitor, :(

        count('mx', 'en', 100 * k, 100 * k)  # $1/visitor, this almost works
        count('mx', 'sp', 200 * k, 100 * k)  # $2/visitor aww yah!
        count('mx', 'pt', 200, 10 * k)  # $.02/visitor lol

        count('br', 'en', 300 * k, 100 * k)  # $3/visitor is good
        count('br', 'sp', 150 * k, 50 * k)   # $3/visitor as well
        count('br', 'pt', 500 * k, 50 * k)   # $10 JACKPOT

        self.assertEqual('en', justify('us'))
        self.assertEqual(True, justify('mx') in ['sp', 'en'])
        self.assertEqual('pt', justify('br'))

    def testWhaleCacheWrapper(self):
        t = str(time.time())
        count = lambda: self.whale.count_now('test_cached', t)
        cached_sum = lambda clear=False: sum(self.whale.cached_plotpoints('test_cached',
                t, period='fivemin', unmemoize=clear)[t]['hits'].values())

        # Set hits to 1
        count()
        self.assertEqual(cached_sum(), 1)

        # Should stay 1 for a while
        for i in range(3):
            count()
            self.assertEqual(cached_sum(), 1)
        self.assertEqual(cached_sum(clear=True), 4)
Exemplo n.º 11
0
def count_now():
    from datetime import datetime
    whale = Whale()
    val = whale.count_now(at=datetime.utcnow(), **default_params())
    return 'OK'
Exemplo n.º 12
0
def count_now():
    whale = Whale()
    val = whale.count_now(at=datetime.now(), **default_params())
    return 'OK'
Exemplo n.º 13
0
class TestHailWhale(unittest.TestCase):

    def setUp(self):
        from hail import Hail
        from whale import Whale
        self.hail = Hail()
        self.whale = Whale()
    
    def testGetSubdimensions(self):
        t = 'subs_%s' % str(time.time())
        self.whale.count_now(t, {'a': 1, 'b': 2})
        subs = self.whale.get_subdimensions(t)
        assert('a' in subs)
        assert('b' in subs)
    
    def testGetAllSubdimensions(self):
        t = 'all_subs_%s' % str(time.time())
        self.whale.count_now(t, {'a': 1, 'b': 2})
        subs = self.whale.all_subdimensions(t)
        assert('a' in subs)
        assert(['a', '1'] in subs)
        assert('b' in subs)
        assert(['b', '2'] in subs)
    
    def testPlotpoints(self):
        t = str(time.time())

        for i in range(5):
            self.whale.count_now('test_plotpoints', t, {'hits': 1, 'values': 5})
        plotpoints = self.whale.plotpoints('test_plotpoints', t, ['hits', 'values'], points_type=list)
        self.assertEqual(plotpoints[t]['hits'][-1][1], 5)
        self.assertEqual(plotpoints[t]['values'][-1][1], 25)
    
    
    def testPlotpointsDepth(self):
        t = str(time.time())
        self.whale.count_now('test_depth', {t: 'a'})
        self.whale.count_now('test_depth', {t: 'b'})
        self.whale.count_now('test_depth', {t: 'b'})
        self.whale.count_now('test_depth', {t: {'c': 'child'}})
        # Test 1 level deep
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=1)
        
        self.assertEqual(plotpoints[maybe_dumps([t, 'a'])]['hits'][-1][1], 1)
        self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2)
        self.assertEqual(plotpoints[maybe_dumps([t, 'c'])]['hits'][-1][1], 1)
        self.assertEqual(False, maybe_dumps([t, 'c', 'child']) in plotpoints)
        # Test 2 levels deep
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=2)
        
        self.assertEqual(True, maybe_dumps([t, 'c', 'child']) in plotpoints)
        self.assertEqual(plotpoints[maybe_dumps([t, 'c', 'child'])]['hits'][-1][1], 1)


        # Test ranking and limiting i.e assign rank on the basis of value and then extract top limit candidate
        plotpoints = self.whale.plotpoints('test_depth', t, points_type=list,depth=1, limit=2)
        
        #self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2)
        self.assertEqual(True, maybe_dumps([t, 'a']) not in plotpoints)
        self.assertEqual(True, maybe_dumps([t, 'c']) not in plotpoints)
    
    
    def testRatioPlotpoints(self):
        t = str(time.time())

        for i in range(5):
            self.whale.count_now('test_ratio', t, {'hit': 1, 'value': 5})

        plotpoints = self.whale.plotpoints('test_ratio', t, ['hit', 'value', 'value/hit'], points_type=list)

        
        self.assertEqual(plotpoints[t]['hit'][-1][1], 5)
        self.assertEqual(plotpoints[t]['value'][-1][1], 25)

        self.assertEqual(plotpoints[t]['value/hit'][-1][1], 5)
    
    def testRankSubdimensionsScalar(self):
        t = str(time.time())
        self.whale.count_now('test_rank', [t, 'a', 'asub1'], {'value': 1})
        self.whale.count_now('test_rank', [t, 'a', 'asub2'], {'value': 30})
        self.whale.count_now('test_rank', [t, 'b'], {'value': 80})
        self.whale.count_now('test_rank', [t, 'c'], {'value': 10})
        ranked = self.whale.rank_subdimensions_scalar('test_rank', t, 'value')

        self.assertEqual(ranked[maybe_dumps([t, 'a'])]['important'], False)
        self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub1'])]['important'], False)
        self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub2'])]['important'], True)
        self.assertEqual(ranked[maybe_dumps([t, 'b'])]['important'], True)
        self.assertEqual(ranked[maybe_dumps([t, 'c'])]['important'], False)
    
    def testRankSubdimensionsRatio(self):
        t = str(time.time())
        pk = 'test_ratio_rank'
        # OVERALL STATS: 529,994 value, 50,000 visitors, 10.6 value per visitor
        # Not important, too close to overall
        self.whale.count_now(pk, [t, 'a', 'asub1'],
            {'value': 54989, 'visitors': 4999})  # 11 value per visitor
        # Important, high relative ratio
        self.whale.count_now(pk, [t, 'a', 'asub2'],
            {'value': 375000, 'visitors': 25000})  # 15 value per visitor
        # Important, low relative ratio
        self.whale.count_now(pk, [t, 'b'],
            {'value': 100000, 'visitors': 20000})  # 5 value per visitor
        # Not important, not enough visitors
        self.whale.count_now(pk, [t, 'c'],
            {'value': 5, 'visitors': 1})  # 5 value per visitor

        one_level = self.whale.rank_subdimensions_ratio('test_rank_ratio', 'value', 'visitors',
            t, recursive=False)

        all_levels = self.whale.rank_subdimensions_ratio(pk, 'value', 'visitors', t)
        self.assertEqual(True, maybe_dumps([t, 'a', 'asub1']) not in one_level)
        self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub1'])]['important'], False)
        self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub2'])]['important'], True)
        self.assertEqual(all_levels[maybe_dumps([t, 'b'])]['important'], True)
        self.assertEqual(all_levels[maybe_dumps([t, 'c'])]['important'], False)

    
    def testBasicDecision(self):
        pk = 'test_basic_decision'
        decision = str(time.time())
        # Make a decision, any decision, from no information whatsoever
        good, bad, test = self.whale.weighted_reasons(pk, 'random', [1,2,3])
        #_print_reasons(good, bad, test)
        any_one = self.whale.decide_from_reasons(good, bad, test)
        self.assertEqual(True, any_one in [1, 2, 3])

        # OK, now how about something somewhat informed?
        # This will be easy. Slogan A makes us huge profit. Products B and C suck.
        # D looks promissing but isn't yet significant
        opts = ['a', 'b', 'c', 'd']
        self.whale.count_now([pk, decision, 'a'], None, dict(dollars=5000, visitors=1000))
        self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'd'], None, dict(dollars=50, visitors=10))

        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors')
        #_print_reasons(good, bad, test)

        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in  bad.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())
        which_one = self.whale.decide(pk, decision, opts, formula='dollars/visitors',
            bad_idea_threshold=0, test_idea_threshold=0)
        self.assertEqual(which_one, 'a')
    
    def testInformedDecision(self):
        pk = 'test_informed_decision'
        decision = str(time.time())

        # A is the clear winner, except when country=UK, in which case B wins
        opts = ['a', 'b', 'c', 'd']
        self.whale.count_now([pk, decision, 'a'], None, dict(dollars=50000, visitors=10000))
        self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000))
        self.whale.count_now([pk, decision, 'b'], {'country': 'uk'}, dict(dollars=10000, visitors=2000))
        self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=7500))
        self.whale.count_now([pk, decision, 'd'], None, dict(dollars=5, visitors=1))

        # Here's a visitor with no info -- 'A' should win by far.
        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors')
        #_print_reasons(good, bad, test)
        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in bad.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())

        # How about when we know the country is "UK"?
        good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors',
            known_data={'country': 'uk'})
        #_print_reasons(good, bad, test)
        self.assertEqual(True, 'a' in good.keys())
        self.assertEqual(True, 'b' in good.keys())
        self.assertEqual(True, 'c' in bad.keys())
        self.assertEqual(True, 'd' in test.keys())
        chosen = {'a': 0, 'b': 0}
        for k in range(100):
            choose = self.whale.decide(pk, decision, opts, formula='dollars/visitors',
                known_data={'country': 'uk'}, bad_idea_threshold=0, test_idea_threshold=0)
            chosen[choose] += 1
        self.assertEqual(True, chosen['b'] > 70,
            """A decision made 100 times between weights .15 vs .85 should have around 85 votes for 'b',
                we got %s, which is unlikely enough to fail a test, but not definitely
                indicative of a problem. If this test passes again on the next run, ignore the failure.""" % chosen)
    
    def testTrickyDecision(self):
        pk = 'test_tricky_decision'
        decision = str(time.time())
        opts = ['en', 'sp', 'pt']

        def count(geo, lang, dollars, visitors):
            self.whale.count_decided_now(pk, decision, lang, geo,
            {'dollars': dollars, 'visitors': visitors})

        def justify(geo):
            #print
            #print 'Picking reasons for ', geo
            good, bad, test = self.whale.weighted_reasons(pk, decision, opts,
                'dollars/visitors', geo)
            #print good.keys(), bad.keys(), test.keys()
            #_print_reasons(good, bad, test)
            return self.whale.decide(pk, decision, opts, 'dollars/visitors', geo,
                bad_idea_threshold=0, test_idea_threshold=0)
        k = 1000
        m = k * k
        # Sure, these results seem predictable to a human
        # But what will our philosopher whale friend make of it?
        count('us', 'en', 1.5 * m, 300 * k)  # $5/visitor, alright!
        count('us', 'sp', 1 * k, 10 * k)  # $.10/visitor, well that is not surprising
        count('us', 'pt', 300, 5 * k)  # $.06/visitor, :(

        count('mx', 'en', 100 * k, 100 * k)  # $1/visitor, this almost works
        count('mx', 'sp', 200 * k, 100 * k)  # $2/visitor aww yah!
        count('mx', 'pt', 200, 10 * k)  # $.02/visitor lol

        count('br', 'en', 300 * k, 100 * k)  # $3/visitor is good
        count('br', 'sp', 150 * k, 50 * k)   # $3/visitor as well
        count('br', 'pt', 500 * k, 50 * k)   # $10 JACKPOT

        self.assertEqual('en', justify('us'))
        self.assertEqual(True, justify('mx') in ['sp', 'en'])
        self.assertEqual('pt', justify('br'))

    def testWhaleCacheWrapper(self):
        t = str(time.time())
        count = lambda: self.whale.count_now('test_cached', t)
        cached_sum = lambda clear=False: sum(self.whale.cached_plotpoints('test_cached',
                t, period='fivemin', unmemoize=clear)[t]['hits'].values())

        # Set hits to 1
        count()
        self.assertEqual(cached_sum(), 1)

        # Should stay 1 for a while
        for i in range(3):
            count()
            self.assertEqual(cached_sum(), 1)
        self.assertEqual(cached_sum(clear=True), 4)