Exemplo n.º 1
0
def test_classifier_runs(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values']

    classifier = Model.Classifier()
    for value in values:
        classifier.append(model.group_create([value]))
    model.classifier_init(classifier)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.classifier_add_value(classifier, groupid, value)
        groupids.append(groupid)

    model.classifier_add_group(classifier)
    assert len(classifier) == len(values) + 1
    scores = numpy.zeros(len(classifier), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        model.classifier_remove_value(classifier, groupid, value)

    model.classifier_remove_group(classifier, 0)
    model.classifier_remove_group(classifier, len(classifier) - 1)
    assert len(classifier) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.classifier_add_value(classifier, groupid, value)
Exemplo n.º 2
0
def sample_from_image(image, row_count):
    image = -1.0 * image
    image -= image.min()
    x_pmf = image.sum(axis=1)
    y_pmfs = image.copy()
    for y_pmf in y_pmfs:
        y_pmf /= (y_pmf.sum() + 1e-8)

    for _ in xrange(row_count):
        x = sample_discrete(x_pmf)
        y = sample_discrete(y_pmfs[x])
        x += numpy.random.random() - 0.5
        y += numpy.random.random() - 0.5
        yield to_loom_coordinates(x, y)
Exemplo n.º 3
0
Arquivo: main.py Projeto: fritzo/loom
def sample_from_image(image, row_count):
    image = -1.0 * image
    image -= image.min()
    x_pmf = image.sum(axis=1)
    y_pmfs = image.copy()
    for y_pmf in y_pmfs:
        y_pmf /= (y_pmf.sum() + 1e-8)

    for _ in xrange(row_count):
        x = sample_discrete(x_pmf)
        y = sample_discrete(y_pmfs[x])
        x += numpy.random.random() - 0.5
        y += numpy.random.random() - 0.5
        yield to_loom_coordinates(x, y)
Exemplo n.º 4
0
def sample_from_image(image, sample_count):
    image = -1.0 * image
    image -= image.min()
    x_pmf = image.sum(axis=1)
    y_pmfs = image.copy()
    for y_pmf in y_pmfs:
        y_pmf /= (y_pmf.sum() + 1e-8)

    x_scale = 2.0 / (image.shape[0] - 1)
    y_scale = 2.0 / (image.shape[1] - 1)

    for _ in xrange(sample_count):
        x = sample_discrete(x_pmf)
        y = sample_discrete(y_pmfs[x])
        yield (x * x_scale - 1.0, y * y_scale - 1.0)
Exemplo n.º 5
0
def sample_from_image(image, sample_count):
    image = -1.0 * image
    image -= image.min()
    x_pmf = image.sum(axis=1)
    y_pmfs = image.copy()
    for y_pmf in y_pmfs:
        y_pmf /= (y_pmf.sum() + 1e-8)

    x_scale = 2.0 / (image.shape[0] - 1)
    y_scale = 2.0 / (image.shape[1] - 1)

    for _ in xrange(sample_count):
        x = sample_discrete(x_pmf)
        y = sample_discrete(y_pmfs[x])
        yield (x * x_scale - 1.0, y * y_scale - 1.0)
Exemplo n.º 6
0
def test_mixture_score(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    groups = [module.Group.from_values(shared, [value]) for value in values]
    mixture = module.Mixture()
    for group in groups:
        mixture.append(group)
    mixture.init(shared)

    def check_score_value(value):
        expected = [group.score_value(shared, value) for group in groups]
        actual = numpy.zeros(len(mixture), dtype=numpy.float32)
        noise = numpy.random.randn(len(actual))
        actual += noise
        mixture.score_value(shared, value, actual)
        actual -= noise
        assert_close(actual, expected, err_msg='score_value {}'.format(value))
        another = [
            mixture.score_value_group(shared, i, value)
            for i in xrange(len(groups))
        ]
        assert_close(
            another,
            expected,
            err_msg='score_value_group {}'.format(value))
        return actual

    def check_score_data():
        expected = sum(group.score_data(shared) for group in groups)
        actual = mixture.score_data(shared)
        assert_close(actual, expected, err_msg='score_data')

    print 'init'
    for value in values:
        check_score_value(value)
    check_score_data()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_score_value(value)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        groups[groupid].add_value(shared, value)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)
        check_score_data()

    print 'removing'
    for value, groupid in zip(values, groupids):
        groups[groupid].remove_value(shared, value)
        mixture.remove_value(shared, groupid, value)
        scores = check_score_value(value)
        check_score_data()
def test_mixture_score(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    groups = [module.Group.from_values(shared, [value]) for value in values]
    mixture = module.Mixture()
    for group in groups:
        mixture.append(group)
    mixture.init(shared)

    def check_score_value(value):
        expected = [group.score_value(shared, value) for group in groups]
        actual = numpy.zeros(len(mixture), dtype=numpy.float32)
        noise = numpy.random.randn(len(actual))
        actual += noise
        mixture.score_value(shared, value, actual)
        actual -= noise
        assert_close(actual, expected, err_msg='score_value {}'.format(value))
        another = [
            mixture.score_value_group(shared, i, value)
            for i in xrange(len(groups))
        ]
        assert_close(another,
                     expected,
                     err_msg='score_value_group {}'.format(value))
        return actual

    def check_score_data():
        expected = sum(group.score_data(shared) for group in groups)
        actual = mixture.score_data(shared)
        assert_close(actual, expected, err_msg='score_data')

    print 'init'
    for value in values:
        check_score_value(value)
    check_score_data()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_score_value(value)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        groups[groupid].add_value(shared, value)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)
        check_score_data()

    print 'removing'
    for value, groupid in zip(values, groupids):
        groups[groupid].remove_value(shared, value)
        mixture.remove_value(shared, groupid, value)
        scores = check_score_value(value)
        check_score_data()
Exemplo n.º 8
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    if module.__name__ == 'distributions.lp.models.dpd':
        raise SkipTest('FIXME known segfault here')
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
Exemplo n.º 9
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    if module.__name__ == 'distributions.lp.models.dpd':
        raise SkipTest('FIXME known segfault here')
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
Exemplo n.º 10
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
Exemplo n.º 11
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
Exemplo n.º 12
0
def _sample_crp(n, alpha):
    """
    generate an assignment vector of length n from a CRP with alpha
    """
    if n <= 0:
        raise ValueError("need positive n")
    if alpha <= 0.:
        raise ValueError("need positive alpha")
    counts = np.array([1])
    assignments = np.zeros(n, dtype=np.int)
    assignments[0] = 0
    for i in xrange(1, n):
        dist = np.append(counts, alpha).astype(np.float, copy=False)
        dist /= dist.sum()
        choice = sample_discrete(dist)
        if choice == len(counts):
            # new cluster
            counts = np.append(counts, 1)
        else:
            # existing cluster
            counts[choice] += 1
        assignments[i] = choice
    return assignments
Exemplo n.º 13
0
def _sample_crp(n, alpha):
    """
    generate an assignment vector of length n from a CRP with alpha
    """
    if n <= 0:
        raise ValueError("need positive n")
    if alpha <= 0.:
        raise ValueError("need positive alpha")
    counts = np.array([1])
    assignments = np.zeros(n, dtype=np.int)
    assignments[0] = 0
    for i in xrange(1, n):
        dist = np.append(counts, alpha).astype(np.float, copy=False)
        dist /= dist.sum()
        choice = sample_discrete(dist)
        if choice == len(counts):
            # new cluster
            counts = np.append(counts, 1)
        else:
            # existing cluster
            counts[choice] += 1
        assignments[i] = choice
    return assignments
Exemplo n.º 14
0
def test_classifier_score(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values']

    groups = [model.group_create([value]) for value in values]
    classifier = Model.Classifier()
    for group in groups:
        classifier.append(group)
    model.classifier_init(classifier)

    def check_scores():
        expected = [model.score_value(group, value) for group in groups]
        actual = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, actual)
        assert_close(actual, expected, err_msg='scores')
        return actual

    print 'init'
    for value in values:
        check_scores()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_scores()
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.group_add_value(groups[groupid], value)
        model.classifier_add_value(classifier, groupid, value)
        groupids.append(groupid)

    print 'removing'
    for value, groupid in zip(values, groupids):
        model.group_remove_value(groups[groupid], value)
        model.classifier_remove_value(classifier, groupid, value)
        scores = check_scores()
Exemplo n.º 15
0
 def eval(self, shared):
     return sample_discrete(self.ps)
Exemplo n.º 16
0
 def eval(self, shared):
     index = sample_discrete(self.probs)
     return self.values[index]
Exemplo n.º 17
0
 def eval(self, shared):
     index = sample_discrete(self.probs)
     return self.values[index]
Exemplo n.º 18
0
 def eval(self, shared):
     return sample_discrete(self.ps)
Exemplo n.º 19
0
 def sampler_eval(self, sampler):
     index = sample_discrete(sampler)
     if index == len(self.betas):
         return OTHER
     else:
         return index
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(
                group_size,
                nonempty_group_count,
                sample_count,
                empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)
Exemplo n.º 21
0
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(group_size, nonempty_group_count,
                                  sample_count, empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)