def test_calculate_bucket(self):
        experiment = create_simple_experiment()

        # Give ourselves enough users that we can get some reasonable amount of
        # precision when checking amounts per bucket.
        num_users = experiment.num_buckets * 2000
        fullnames = []
        for i in range(num_users):
            fullnames.append("t2_%s" % str(i))

        counter = collections.Counter()
        for fullname in fullnames:
            bucket = experiment._calculate_bucket(fullname)
            counter[bucket] += 1
            # Ensure bucketing is deterministic.
            self.assertEqual(bucket, experiment._calculate_bucket(fullname))

        for bucket in range(experiment.num_buckets):
            # We want an even distribution across buckets.
            expected = num_users / experiment.num_buckets
            actual = counter[bucket]
            # Calculating the percentage difference instead of looking at the
            # raw difference scales better as we change num_users.
            percent_equal = float(actual) / expected
            self.assertAlmostEqual(percent_equal,
                                   1.0,
                                   delta=.10,
                                   msg='bucket: %s' % bucket)
Exemplo n.º 2
0
    def test_calculate_bucket_with_seed(self):
        cfg = {
            "id": 1,
            "name": "test",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    "control_1": 10,
                    "control_2": 10,
                },
                "seed": "itscoldintheoffice",
            }
        }
        experiment = parse_experiment(cfg)

        # Give ourselves enough users that we can get some reasonable amount of
        # precision when checking amounts per bucket.
        num_users = experiment.num_buckets * 1000
        fullnames = []
        for i in range(num_users):
            fullnames.append("t2_%s" % str(i))

        counter = collections.Counter()
        bucketing_changed = False
        for fullname in fullnames:
            self.assertEqual(experiment.seed, "itscoldintheoffice")
            bucket1 = experiment._calculate_bucket(fullname)
            counter[bucket1] += 1
            # Ensure bucketing is deterministic.
            self.assertEqual(bucket1, experiment._calculate_bucket(fullname))

            current_seed = experiment.seed
            experiment.seed = "newstring"
            bucket2 = experiment._calculate_bucket(fullname)
            experiment.seed = current_seed
            # check that the bucketing changed at some point. Can't compare
            # bucket1 to bucket2 inline because sometimes the user will fall
            # into both buckets, and test will fail
            if bucket1 != bucket2:
                bucketing_changed = True

        self.assertTrue(bucketing_changed)

        for bucket in range(experiment.num_buckets):
            # We want an even distribution across buckets.
            expected = num_users / experiment.num_buckets
            actual = counter[bucket]
            # Calculating the percentage difference instead of looking at the
            # raw difference scales better as we change NUM_USERS.
            percent_equal = float(actual) / expected
            self.assertAlmostEqual(percent_equal,
                                   1.0,
                                   delta=.10,
                                   msg='bucket: %s' % bucket)
    def test_change_shuffle_version_changes_bucketing(self):
        cfg = get_simple_config()
        experiment_version_1 = parse_experiment(cfg)

        shuffle_cfg = get_simple_config()
        shuffle_cfg['experiment']['shuffle_version'] = 2

        experiment_version_2 = parse_experiment(shuffle_cfg)

        # Give ourselves enough users that we can get some reasonable amount of
        # precision when checking amounts per bucket.
        num_users = experiment_version_1.num_buckets * 100
        fullnames = []
        for i in range(num_users):
            fullnames.append("t2_%s" % str(i))

        counter = collections.Counter()
        bucketing_changed = False
        for fullname in fullnames:
            bucket1 = experiment_version_1._calculate_bucket(fullname)
            counter[bucket1] += 1
            # Ensure bucketing is deterministic.
            self.assertEqual(bucket1,
                             experiment_version_1._calculate_bucket(fullname))

            bucket2 = experiment_version_2._calculate_bucket(fullname)
            # check that the bucketing changed at some point. Can't compare
            # bucket1 to bucket2 inline because sometimes the user will fall
            # into both buckets, and test will fail. When a user doesn't match,
            # break out of loop
            if bucket1 != bucket2:
                bucketing_changed = True
                break

        self.assertTrue(bucketing_changed)
Exemplo n.º 4
0
    def test_distribution_def_odd(self):
        variant_cfg = generate_variant_config()
        variant_cfg.append({"name": "variant_4", "size": 0.25})
        variant_set = MultiVariantSet(
            variants=variant_cfg,
            num_buckets=NUM_BUCKETS_ODD,
        )

        variant_counts = {
            "variant_1": 0,
            "variant_2": 0,
            "variant_3": 0,
            "variant_4": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_ODD):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 5)
        self.assertEqual(variant_counts["variant_1"], 259)
        self.assertEqual(variant_counts["variant_2"], 259)
        self.assertEqual(variant_counts["variant_3"], 259)
        self.assertEqual(variant_counts["variant_4"], 259)
        self.assertEqual(variant_counts[None], 1)
    def test_distribution_def_odd(self):
        variant_cfg = [
            {
                "name": "variant_1",
                "size": 0.5
            },
            {
                "name": "variant_2",
                "size": 0.5
            },
        ]
        variant_set = SingleVariantSet(
            variants=variant_cfg,
            num_buckets=NUM_BUCKETS_ODD,
        )

        variant_counts = {
            "variant_1": 0,
            "variant_2": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_ODD):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 3)
        self.assertEqual(variant_counts["variant_1"], 518)
        self.assertEqual(variant_counts["variant_2"], 518)
        self.assertEqual(variant_counts[None], 1)
    def test_distribution_single_bucket(self):
        cfg = [
            {
                "name": "variant_1",
                "size": 0.001
            },
            {
                "name": "variant_2",
                "size": 0
            },
        ]

        variant_set = SingleVariantSet(variants=cfg,
                                       num_buckets=NUM_BUCKETS_DEFAULT)

        variant_counts = {
            "variant_1": 0,
            "variant_2": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_DEFAULT):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 3)

        self.assertEqual(variant_counts['variant_1'], 1)
        self.assertEqual(variant_counts['variant_2'], 0)
        self.assertEqual(variant_counts[None], 999)
Exemplo n.º 7
0
    def test_calculate_bucket(self):
        cfg = {
            "id": 1,
            "name": "test",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    "control_1": 10,
                    "control_2": 10,
                }
            }
        }
        experiment = parse_experiment(cfg)

        # Give ourselves enough users that we can get some reasonable amount of
        # precision when checking amounts per bucket.
        num_users = experiment.num_buckets * 1000
        fullnames = []
        for i in range(num_users):
            fullnames.append("t2_%s" % str(i))

        counter = collections.Counter()
        for fullname in fullnames:
            bucket = experiment._calculate_bucket(fullname)
            counter[bucket] += 1
            # Ensure bucketing is deterministic.
            self.assertEqual(bucket, experiment._calculate_bucket(fullname))

        for bucket in range(experiment.num_buckets):
            # We want an even distribution across buckets.
            expected = num_users / experiment.num_buckets
            actual = counter[bucket]
            # Calculating the percentage difference instead of looking at the
            # raw difference scales better as we change num_users.
            percent_equal = float(actual) / expected
            self.assertAlmostEqual(percent_equal,
                                   1.0,
                                   delta=.10,
                                   msg='bucket: %s' % bucket)
Exemplo n.º 8
0
def get_users(num_users, logged_in=True):
    users = []
    for i in range(num_users):
        if logged_in:
            name = str(i)
        else:
            name = None
        users.append(dict(
            name=name,
            id="t2_%s" % str(i),
            logged_in=logged_in,
        ))
    return users
Exemplo n.º 9
0
def generate_content(num_content, content_type):
    content = []

    if content_type == "subreddit":
        id_fmt = "t5_%s"
    elif content_type == "link":
        id_fmt = "t3_%s"
    elif content_type == "comment":
        id_fmt = "t1_%s"
    else:
        raise ValueError("Unknown content type: %s", content_type)

    for i in range(num_content):
        content.append(dict(id=id_fmt % i, type=content_type))

    return content
    def test_distribution_def_buckets(self):
        variant_set = create_rollout_variant_set()

        variant_counts = {
            "variant_1": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_DEFAULT):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 2)

        self.assertEqual(variant_counts['variant_1'], 250)
        self.assertEqual(variant_counts[None], 750)
Exemplo n.º 11
0
    def test_distribution_def_buckets(self):
        variant_set = create_multi_variant_set()

        variant_counts = {
            "variant_1": 0,
            "variant_2": 0,
            "variant_3": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_DEFAULT):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 4)

        for variant_count in variant_counts.values():
            self.assertEqual(variant_count, 250)
Exemplo n.º 12
0
 def simulate_percent_loggedout(wanted_percent):
     cfg = {
         "id": 1,
         "name": "test_feature",
         "type": "feature_flag",
         "expires":
         (datetime.utcnow() + THIRTY_DAYS).strftime(ISO_DATE_FMT),
         "experiment": {
             "targeting": {
                 "logged_in": [False],
             },
             "variants": {
                 "active": wanted_percent,
             },
         },
     }
     feature_flag = parse_experiment(cfg)
     return (feature_flag.variant(
         user_id="t2_%s" % str(i),
         logged_in=False,
     ) == "active" for i in range(num_users))
Exemplo n.º 13
0
 def simulate_percent_loggedout(wanted_percent):
     cfg = {
         "id": 1,
         "name": "test_feature",
         "type": "feature_flag",
         "version": "1",
         "start_ts": time.time() - THIRTY_DAYS,
         "stop_ts": time.time() + THIRTY_DAYS,
         "experiment": {
             "targeting": {
                 "logged_in": [False],
             },
             "variants": {
                 "active": wanted_percent,
             },
         },
     }
     feature_flag = parse_experiment(cfg)
     return (feature_flag.variant(
         user_id="t2_%s" % str(i),
         logged_in=False,
     ) == "active" for i in range(num_users))
    def test_distribution_def_odd(self):
        variant_cfg = [
            {
                "name": "variant_1",
                "size": 1.0
            },
        ]
        variant_set = RolloutVariantSet(
            variants=variant_cfg,
            num_buckets=NUM_BUCKETS_ODD,
        )

        variant_counts = {
            "variant_1": 0,
            None: 0,
        }

        for bucket in range(0, NUM_BUCKETS_ODD):
            variant = variant_set.choose_variant(bucket)
            variant_counts[variant] += 1

        self.assertEqual(len(variant_counts), 2)
        self.assertEqual(variant_counts["variant_1"], 1037)
        self.assertEqual(variant_counts[None], 0)
Exemplo n.º 15
0
    def test_choose_variant(self):
        control_only = parse_experiment({
            "id": 1,
            "name": "control_only",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    "control_1": 10,
                    "control_2": 10,
                }
            }
        })
        three_variants = parse_experiment({
            "id": 1,
            "name": "three_variants",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    'remove_vote_counters': 5,
                    'control_1': 10,
                    'control_2': 5,
                }
            }
        })
        three_variants_more = parse_experiment({
            "id": 1,
            "name": "three_variants_more",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    'remove_vote_counters': 15.6,
                    'control_1': 10,
                    'control_2': 20,
                }
            }
        })

        counters = collections.defaultdict(collections.Counter)
        for bucket in range(control_only.num_buckets):
            variant = control_only._choose_variant(bucket)
            if variant:
                counters[control_only.name][variant] += 1
            # Ensure variant-choosing is deterministic.
            self.assertEqual(variant, control_only._choose_variant(bucket))

            variant = three_variants._choose_variant(bucket)
            if variant:
                counters[three_variants.name][variant] += 1
            # Ensure variant-choosing is deterministic.
            self.assertEqual(variant, three_variants._choose_variant(bucket))

            previous_variant = variant
            variant = three_variants_more._choose_variant(bucket)
            if variant:
                counters[three_variants_more.name][variant] += 1
            # Ensure variant-choosing is deterministic.
            self.assertEqual(variant,
                             three_variants_more._choose_variant(bucket))
            # If previously we had a variant, we should still have the same one
            # now.
            if previous_variant:
                self.assertEqual(variant, previous_variant)

        for experiment in (control_only, three_variants, three_variants_more):
            for variant, percentage in iteritems(experiment.variants):
                count = counters[experiment.name][variant]
                scaled_percentage = float(count) / (experiment.num_buckets /
                                                    100)
                self.assertEqual(scaled_percentage, percentage)

        # Test boundary conditions around the maximum percentage allowed for
        # variants.
        fifty_fifty = parse_experiment({
            "id": 1,
            "name": "fifty_fifty",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    'control_1': 50,
                    'control_2': 50,
                }
            }
        })
        almost_fifty_fifty = parse_experiment({
            "id": 1,
            "name": "almost_fifty_fifty",
            "owner": "test",
            "type": "r2",
            "version": "1",
            "start_ts": time.time() - THIRTY_DAYS,
            "stop_ts": time.time() + THIRTY_DAYS,
            "experiment": {
                "variants": {
                    'control_1': 49,
                    'control_2': 51,
                }
            }
        })
        for bucket in range(fifty_fifty.num_buckets):
            for experiment in (fifty_fifty, almost_fifty_fifty):
                variant = experiment._choose_variant(bucket)
                counters[experiment.name][variant] += 1

        count = counters[fifty_fifty.name]['control_1']
        scaled_percentage = float(count) / (fifty_fifty.num_buckets / 100)
        self.assertEqual(scaled_percentage, 50)

        count = counters[fifty_fifty.name]['control_2']
        scaled_percentage = float(count) / (fifty_fifty.num_buckets / 100)
        self.assertEqual(scaled_percentage, 50)

        count = counters[almost_fifty_fifty.name]['control_1']
        scaled_percentage = float(count) / (almost_fifty_fifty.num_buckets /
                                            100)
        self.assertEqual(scaled_percentage, 49)

        count = counters[almost_fifty_fifty.name]['control_2']
        scaled_percentage = float(count) / (almost_fifty_fifty.num_buckets /
                                            100)
        self.assertEqual(scaled_percentage, 50)