def test_classifier_runs(Model, EXAMPLE): model = Model.model_load(EXAMPLE['model']) values = EXAMPLE['values'] classifier = Model.Classifier() for value in values: classifier.append(model.group_create([value])) model.classifier_init(classifier) groupids = [] for value in values: scores = numpy.zeros(len(classifier), dtype=numpy.float32) model.classifier_score(classifier, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) model.classifier_add_value(classifier, groupid, value) groupids.append(groupid) model.classifier_add_group(classifier) assert len(classifier) == len(values) + 1 scores = numpy.zeros(len(classifier), dtype=numpy.float32) for value, groupid in zip(values, groupids): model.classifier_remove_value(classifier, groupid, value) model.classifier_remove_group(classifier, 0) model.classifier_remove_group(classifier, len(classifier) - 1) assert len(classifier) == len(values) - 1 for value in values: scores = numpy.zeros(len(classifier), dtype=numpy.float32) model.classifier_score(classifier, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) model.classifier_add_value(classifier, groupid, value)
def sample_from_image(image, row_count): image = -1.0 * image image -= image.min() x_pmf = image.sum(axis=1) y_pmfs = image.copy() for y_pmf in y_pmfs: y_pmf /= (y_pmf.sum() + 1e-8) for _ in xrange(row_count): x = sample_discrete(x_pmf) y = sample_discrete(y_pmfs[x]) x += numpy.random.random() - 0.5 y += numpy.random.random() - 0.5 yield to_loom_coordinates(x, y)
def sample_from_image(image, sample_count): image = -1.0 * image image -= image.min() x_pmf = image.sum(axis=1) y_pmfs = image.copy() for y_pmf in y_pmfs: y_pmf /= (y_pmf.sum() + 1e-8) x_scale = 2.0 / (image.shape[0] - 1) y_scale = 2.0 / (image.shape[1] - 1) for _ in xrange(sample_count): x = sample_discrete(x_pmf) y = sample_discrete(y_pmfs[x]) yield (x * x_scale - 1.0, y * y_scale - 1.0)
def test_mixture_score(module, EXAMPLE): shared = module.Shared.from_dict(EXAMPLE['shared']) values = EXAMPLE['values'] for value in values: shared.add_value(value) groups = [module.Group.from_values(shared, [value]) for value in values] mixture = module.Mixture() for group in groups: mixture.append(group) mixture.init(shared) def check_score_value(value): expected = [group.score_value(shared, value) for group in groups] actual = numpy.zeros(len(mixture), dtype=numpy.float32) noise = numpy.random.randn(len(actual)) actual += noise mixture.score_value(shared, value, actual) actual -= noise assert_close(actual, expected, err_msg='score_value {}'.format(value)) another = [ mixture.score_value_group(shared, i, value) for i in xrange(len(groups)) ] assert_close( another, expected, err_msg='score_value_group {}'.format(value)) return actual def check_score_data(): expected = sum(group.score_data(shared) for group in groups) actual = mixture.score_data(shared) assert_close(actual, expected, err_msg='score_data') print 'init' for value in values: check_score_value(value) check_score_data() print 'adding' groupids = [] for value in values: scores = check_score_value(value) probs = scores_to_probs(scores) groupid = sample_discrete(probs) groups[groupid].add_value(shared, value) mixture.add_value(shared, groupid, value) groupids.append(groupid) check_score_data() print 'removing' for value, groupid in zip(values, groupids): groups[groupid].remove_value(shared, value) mixture.remove_value(shared, groupid, value) scores = check_score_value(value) check_score_data()
def test_mixture_score(module, EXAMPLE): shared = module.Shared.from_dict(EXAMPLE['shared']) values = EXAMPLE['values'] for value in values: shared.add_value(value) groups = [module.Group.from_values(shared, [value]) for value in values] mixture = module.Mixture() for group in groups: mixture.append(group) mixture.init(shared) def check_score_value(value): expected = [group.score_value(shared, value) for group in groups] actual = numpy.zeros(len(mixture), dtype=numpy.float32) noise = numpy.random.randn(len(actual)) actual += noise mixture.score_value(shared, value, actual) actual -= noise assert_close(actual, expected, err_msg='score_value {}'.format(value)) another = [ mixture.score_value_group(shared, i, value) for i in xrange(len(groups)) ] assert_close(another, expected, err_msg='score_value_group {}'.format(value)) return actual def check_score_data(): expected = sum(group.score_data(shared) for group in groups) actual = mixture.score_data(shared) assert_close(actual, expected, err_msg='score_data') print 'init' for value in values: check_score_value(value) check_score_data() print 'adding' groupids = [] for value in values: scores = check_score_value(value) probs = scores_to_probs(scores) groupid = sample_discrete(probs) groups[groupid].add_value(shared, value) mixture.add_value(shared, groupid, value) groupids.append(groupid) check_score_data() print 'removing' for value, groupid in zip(values, groupids): groups[groupid].remove_value(shared, value) mixture.remove_value(shared, groupid, value) scores = check_score_value(value) check_score_data()
def test_mixture_runs(module, EXAMPLE): shared = module.Shared.from_dict(EXAMPLE['shared']) values = EXAMPLE['values'] mixture = module.Mixture() for value in values: shared.add_value(value) mixture.append(module.Group.from_values(shared, [value])) mixture.init(shared) groupids = [] for value in values: scores = numpy.zeros(len(mixture), dtype=numpy.float32) mixture.score_value(shared, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) mixture.add_value(shared, groupid, value) groupids.append(groupid) mixture.add_group(shared) assert len(mixture) == len(values) + 1 scores = numpy.zeros(len(mixture), dtype=numpy.float32) for value, groupid in zip(values, groupids): mixture.remove_value(shared, groupid, value) mixture.remove_group(shared, 0) if module.__name__ == 'distributions.lp.models.dpd': raise SkipTest('FIXME known segfault here') mixture.remove_group(shared, len(mixture) - 1) assert len(mixture) == len(values) - 1 for value in values: scores = numpy.zeros(len(mixture), dtype=numpy.float32) mixture.score_value(shared, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) mixture.add_value(shared, groupid, value)
def test_mixture_runs(module, EXAMPLE): shared = module.Shared.from_dict(EXAMPLE['shared']) values = EXAMPLE['values'] mixture = module.Mixture() for value in values: shared.add_value(value) mixture.append(module.Group.from_values(shared, [value])) mixture.init(shared) groupids = [] for value in values: scores = numpy.zeros(len(mixture), dtype=numpy.float32) mixture.score_value(shared, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) mixture.add_value(shared, groupid, value) groupids.append(groupid) mixture.add_group(shared) assert len(mixture) == len(values) + 1 scores = numpy.zeros(len(mixture), dtype=numpy.float32) for value, groupid in zip(values, groupids): mixture.remove_value(shared, groupid, value) mixture.remove_group(shared, 0) mixture.remove_group(shared, len(mixture) - 1) assert len(mixture) == len(values) - 1 for value in values: scores = numpy.zeros(len(mixture), dtype=numpy.float32) mixture.score_value(shared, value, scores) probs = scores_to_probs(scores) groupid = sample_discrete(probs) mixture.add_value(shared, groupid, value)
def _sample_crp(n, alpha): """ generate an assignment vector of length n from a CRP with alpha """ if n <= 0: raise ValueError("need positive n") if alpha <= 0.: raise ValueError("need positive alpha") counts = np.array([1]) assignments = np.zeros(n, dtype=np.int) assignments[0] = 0 for i in xrange(1, n): dist = np.append(counts, alpha).astype(np.float, copy=False) dist /= dist.sum() choice = sample_discrete(dist) if choice == len(counts): # new cluster counts = np.append(counts, 1) else: # existing cluster counts[choice] += 1 assignments[i] = choice return assignments
def test_classifier_score(Model, EXAMPLE): model = Model.model_load(EXAMPLE['model']) values = EXAMPLE['values'] groups = [model.group_create([value]) for value in values] classifier = Model.Classifier() for group in groups: classifier.append(group) model.classifier_init(classifier) def check_scores(): expected = [model.score_value(group, value) for group in groups] actual = numpy.zeros(len(classifier), dtype=numpy.float32) model.classifier_score(classifier, value, actual) assert_close(actual, expected, err_msg='scores') return actual print 'init' for value in values: check_scores() print 'adding' groupids = [] for value in values: scores = check_scores() probs = scores_to_probs(scores) groupid = sample_discrete(probs) model.group_add_value(groups[groupid], value) model.classifier_add_value(classifier, groupid, value) groupids.append(groupid) print 'removing' for value, groupid in zip(values, groupids): model.group_remove_value(groups[groupid], value) model.classifier_remove_value(classifier, groupid, value) scores = check_scores()
def eval(self, shared): return sample_discrete(self.ps)
def eval(self, shared): index = sample_discrete(self.probs) return self.values[index]
def sampler_eval(self, sampler): index = sample_discrete(sampler) if index == len(self.betas): return OTHER else: return index
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused): sample_count = 200 model = Model() model.load(EXAMPLE) if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size: raise SkipTest('skipping trivial example') assignment_vector = model.sample_assignments(sample_count) assignments = dict(enumerate(assignment_vector)) nonempty_counts = count_assignments(assignments) nonempty_group_count = len(nonempty_counts) assert_greater(nonempty_group_count, 1, "test is inaccurate") def check_counts(mixture, counts, empty_group_count): # print 'counts =', counts empty_groupids = frozenset(mixture.empty_groupids) assert_equal(len(empty_groupids), empty_group_count) for groupid in empty_groupids: assert_equal(counts[groupid], 0) def check_scores(mixture, counts, empty_group_count): sample_count = sum(counts) nonempty_group_count = len(counts) - empty_group_count expected = [ model.score_add_value( group_size, nonempty_group_count, sample_count, empty_group_count) for group_size in counts ] noise = numpy.random.randn(len(counts)) actual = numpy.zeros(len(counts), dtype=numpy.float32) actual[:] = noise mixture.score_value(model, actual) assert_close(actual, expected) return actual for empty_group_count in [1, 10]: print 'empty_group_count =', empty_group_count counts = nonempty_counts + [0] * empty_group_count numpy.random.shuffle(counts) mixture = Model.Mixture() id_tracker = MixtureIdTracker() print 'init' mixture.init(model, counts) id_tracker.init(len(counts)) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'adding' groupids = [] for _ in xrange(sample_count): check_counts(mixture, counts, empty_group_count) scores = check_scores(mixture, counts, empty_group_count) probs = scores_to_probs(scores) groupid = sample_discrete(probs) expected_group_added = (counts[groupid] == 0) counts[groupid] += 1 actual_group_added = mixture.add_value(model, groupid) assert_equal(actual_group_added, expected_group_added) groupids.append(groupid) if actual_group_added: id_tracker.add_group() counts.append(0) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'removing' for global_groupid in groupids: groupid = id_tracker.global_to_packed(global_groupid) counts[groupid] -= 1 expected_group_removed = (counts[groupid] == 0) actual_group_removed = mixture.remove_value(model, groupid) assert_equal(actual_group_removed, expected_group_removed) if expected_group_removed: id_tracker.remove_group(groupid) back = counts.pop() if groupid < len(counts): counts[groupid] = back check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count)
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused): sample_count = 200 model = Model() model.load(EXAMPLE) if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size: raise SkipTest('skipping trivial example') assignment_vector = model.sample_assignments(sample_count) assignments = dict(enumerate(assignment_vector)) nonempty_counts = count_assignments(assignments) nonempty_group_count = len(nonempty_counts) assert_greater(nonempty_group_count, 1, "test is inaccurate") def check_counts(mixture, counts, empty_group_count): # print 'counts =', counts empty_groupids = frozenset(mixture.empty_groupids) assert_equal(len(empty_groupids), empty_group_count) for groupid in empty_groupids: assert_equal(counts[groupid], 0) def check_scores(mixture, counts, empty_group_count): sample_count = sum(counts) nonempty_group_count = len(counts) - empty_group_count expected = [ model.score_add_value(group_size, nonempty_group_count, sample_count, empty_group_count) for group_size in counts ] noise = numpy.random.randn(len(counts)) actual = numpy.zeros(len(counts), dtype=numpy.float32) actual[:] = noise mixture.score_value(model, actual) assert_close(actual, expected) return actual for empty_group_count in [1, 10]: print 'empty_group_count =', empty_group_count counts = nonempty_counts + [0] * empty_group_count numpy.random.shuffle(counts) mixture = Model.Mixture() id_tracker = MixtureIdTracker() print 'init' mixture.init(model, counts) id_tracker.init(len(counts)) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'adding' groupids = [] for _ in xrange(sample_count): check_counts(mixture, counts, empty_group_count) scores = check_scores(mixture, counts, empty_group_count) probs = scores_to_probs(scores) groupid = sample_discrete(probs) expected_group_added = (counts[groupid] == 0) counts[groupid] += 1 actual_group_added = mixture.add_value(model, groupid) assert_equal(actual_group_added, expected_group_added) groupids.append(groupid) if actual_group_added: id_tracker.add_group() counts.append(0) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'removing' for global_groupid in groupids: groupid = id_tracker.global_to_packed(global_groupid) counts[groupid] -= 1 expected_group_removed = (counts[groupid] == 0) actual_group_removed = mixture.remove_value(model, groupid) assert_equal(actual_group_removed, expected_group_removed) if expected_group_removed: id_tracker.remove_group(groupid) back = counts.pop() if groupid < len(counts): counts[groupid] = back check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count)