Пример #1
0
def test_log_stirling1_row():
    require_cython()
    from distributions.lp.special import log_stirling1_row
    MAX_N = 128

    rows = [[1]]
    for n in range(1, MAX_N + 1):
        prev = rows[-1]
        middle = [(n - 1) * prev[k] + prev[k - 1] for k in range(1, n)]
        row = [0] + middle + [1]
        rows.append(row)

    for n in range(1, MAX_N + 1):
        print 'Row {}:'.format(n),
        row_py = numpy.log(numpy.array(rows[n][1:], dtype=numpy.double))
        row_cpp = log_stirling1_row(n)[1:]
        assert_equal(len(row_py), len(row_cpp))

        # Only the slopes need to be accurate
        # print 0,
        # assert_close(row_py[0], row_cpp[0])
        # print len(row_py)
        # assert_close(row_py[-1], row_cpp[-1])

        diff_py = numpy.diff(row_py)
        diff_cpp = numpy.diff(row_cpp)
        for k_minus_1, (dx_py, dx_cpp) in enumerate(zip(diff_py, diff_cpp)):
            k = k_minus_1 + 1
            print '%d-%d' % (k, k + 1),
            assert_close(dx_py, dx_cpp, tol=0.5)
        print
Пример #2
0
def test_chisq_draw():
    require_cython()
    import distributions.hp.random
    nus = [1.5 ** i for i in range(-10, 11)]
    for nu in nus:
        # Assume scipy.stats is correct
        # yield _test_chisq_draw, scipy.stats.chi2.rvs, nu
        _test_chisq_draw(distributions.hp.random.sample_chisq, nu)
def test_chisq_draw():
    require_cython()
    import distributions.hp.random
    nus = [1.5 ** i for i in range(-10, 11)]
    for nu in nus:
        # Assume scipy.stats is correct
        #yield _test_chisq_draw, scipy.stats.chi2.rvs, nu
        _test_chisq_draw(distributions.hp.random.sample_chisq, nu)
def test_seed():
    require_cython()
    import distributions.hp.random
    global_rng = distributions.hp.random.random
    distributions.hp.random.seed(0)
    values1 = [global_rng() for _ in xrange(10)]
    distributions.hp.random.seed(0)
    values2 = [global_rng() for _ in xrange(10)]
    assert_equal(values1, values2)
def test_log_sum_exp():
    require_cython()
    import distributions.lp.random

    for size in xrange(20):
        scores = numpy.random.normal(size=size).tolist()
        expected = numpy.logaddexp.reduce(scores) if size else 0.0
        actual = distributions.lp.random.log_sum_exp(scores)
        assert_close(actual, expected, err_msg='log_sum_exp')
Пример #6
0
def test_log_sum_exp():
    require_cython()
    import distributions.lp.random

    for size in xrange(20):
        scores = numpy.random.normal(size=size).tolist()
        expected = numpy.logaddexp.reduce(scores) if size else 0.0
        actual = distributions.lp.random.log_sum_exp(scores)
        assert_close(actual, expected, err_msg='log_sum_exp')
Пример #7
0
def test_seed():
    require_cython()
    import distributions.hp.random
    global_rng = distributions.hp.random.random
    distributions.hp.random.seed(0)
    values1 = [global_rng() for _ in xrange(10)]
    distributions.hp.random.seed(0)
    values2 = [global_rng() for _ in xrange(10)]
    assert_equal(values1, values2)
Пример #8
0
def test_sample_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 10):
        scores = numpy.random.normal(size=size).tolist()

        def sampler():
            return distributions.lp.random.sample_prob_from_scores(scores)

        assert_samples_match_scores(sampler)
Пример #9
0
def test_normal_draw():
    require_cython()
    import distributions.hp.random
    means = [1.0 * i for i in range(-2, 3)]
    variances = [10.0**i for i in range(-3, 4)]
    for mean, variance in itertools.product(means, variances):
        # Assume scipy.stats is correct
        # yield _test_normal_draw, scipy_normal_draw, mean, variance
        _test_normal_draw(distributions.hp.random.sample_normal, mean,
                          variance)
Пример #10
0
def test_sample_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 10):
        scores = numpy.random.normal(size=size).tolist()

        def sampler():
            return distributions.lp.random.sample_prob_from_scores(scores)

        assert_samples_match_scores(sampler)
Пример #11
0
def test_normal_draw():
    require_cython()
    import distributions.hp.random
    means = [1.0 * i for i in range(-2, 3)]
    variances = [10.0 ** i for i in range(-3, 4)]
    for mean, variance in itertools.product(means, variances):
        # Assume scipy.stats is correct
        # yield _test_normal_draw, scipy_normal_draw, mean, variance
        _test_normal_draw(
            distributions.hp.random.sample_normal,
            mean,
            variance)
Пример #12
0
def test_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 100):
        scores = numpy.random.normal(size=size).tolist()
        for _ in xrange(size):
            sample, prob1 = distributions.lp.random.sample_prob_from_scores(
                scores)
            assert 0 <= sample and sample < size
            prob2 = distributions.lp.random.prob_from_scores(sample, scores)
            assert_close(prob1,
                         prob2,
                         err_msg='sample_prob_from_scores != prob_from_scores')
Пример #13
0
def test_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 100):
        scores = numpy.random.normal(size=size).tolist()
        for _ in xrange(size):
            sample, prob1 = distributions.lp.random.sample_prob_from_scores(
                scores)
            assert 0 <= sample and sample < size
            prob2 = distributions.lp.random.prob_from_scores(
                sample,
                scores)
            assert_close(
                prob1,
                prob2,
                err_msg='sample_prob_from_scores != prob_from_scores')
Пример #14
0
def test_sample_discrete():
    require_cython()
    import distributions.lp.random

    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([.5], dtype=numpy.float32)), 0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1.], dtype=numpy.float32)), 0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1e-3], dtype=numpy.float32)), 0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1 - 1e-3, 1e-3], dtype=numpy.float32)), 0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1e-3, 1 - 1e-3], dtype=numpy.float32)), 1)
Пример #15
0
def test_sample_pair_from_urn():
    require_cython()
    import distributions.lp.random
    TEST_FAIL_PROB = 1e-5
    ITEM_COUNT = 10

    items = range(ITEM_COUNT)
    counts = {(i, j): 0 for i in items for j in items if i != j}
    pair_count = len(counts)

    def test_fail_prob(sample_count):
        '''
        Let X1,...,XK ~iid uniform({1, ..., N = pair_count})
        and for n in {1,..,N} let Cn = sum_k (1 if Xk = n else 0).
        Then for each n,

            P(Cn = 0) = ((N-1) / N)^K
            P(Cn > 0) = 1 - ((N-1) / N)^K
            P(test fails) = 1 - P(for all n, Cn > 0)
                          ~ 1 - (1 - ((N-1) / N)^K)^N
        '''
        item_fail_prob = ((pair_count - 1.0) / pair_count) ** sample_count
        test_fail_prob = 1 - (1 - item_fail_prob) ** pair_count
        return test_fail_prob

    sample_count = 1
    while test_fail_prob(sample_count) > TEST_FAIL_PROB:
        sample_count *= 2
    print 'pair_count = {}'.format(pair_count)
    print 'sample_count = {}'.format(sample_count)

    for _ in xrange(sample_count):
        i, j = distributions.lp.random.sample_pair_from_urn(items)
        assert i != j
        counts[i, j] += 1

    assert_less(0, min(counts.itervalues()))
def test_sample_pair_from_urn():
    require_cython()
    import distributions.lp.random
    TEST_FAIL_PROB = 1e-5
    ITEM_COUNT = 10

    items = range(ITEM_COUNT)
    counts = {(i, j): 0 for i in items for j in items if i != j}
    pair_count = len(counts)

    def test_fail_prob(sample_count):
        '''
        Let X1,...,XK ~iid uniform({1, ..., N = pair_count})
        and for n in {1,..,N} let Cn = sum_k (1 if Xk = n else 0).
        Then for each n,

            P(Cn = 0) = ((N-1) / N)^K
            P(Cn > 0) = 1 - ((N-1) / N)^K
            P(test fails) = 1 - P(for all n, Cn > 0)
                          ~ 1 - (1 - ((N-1) / N)^K)^N
        '''
        item_fail_prob = ((pair_count - 1.0) / pair_count) ** sample_count
        test_fail_prob = 1 - (1 - item_fail_prob) ** pair_count
        return test_fail_prob

    sample_count = 1
    while test_fail_prob(sample_count) > TEST_FAIL_PROB:
        sample_count *= 2
    print 'pair_count = {}'.format(pair_count)
    print 'sample_count = {}'.format(sample_count)

    for _ in xrange(sample_count):
        i, j = distributions.lp.random.sample_pair_from_urn(items)
        assert i != j
        counts[i, j] += 1

    assert_less(0, min(counts.itervalues()))
Пример #17
0
def test_sample_discrete():
    require_cython()
    import distributions.lp.random

    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([.5], dtype=numpy.float32)),
        0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1.], dtype=numpy.float32)),
        0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1e-3], dtype=numpy.float32)),
        0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1 - 1e-3, 1e-3], dtype=numpy.float32)),
        0)
    assert_equal(
        distributions.lp.random.sample_discrete(
            numpy.array([1e-3, 1 - 1e-3], dtype=numpy.float32)),
        1)
    assert_equal,
    assert_less,
    assert_greater,
    assert_is_instance,
)
from distributions.dbg.random import sample_discrete
from goftests import discrete_goodness_of_fit
from distributions.tests.util import (
    require_cython,
    seed_all,
    assert_hasattr,
    assert_close,
)
from distributions.dbg.random import scores_to_probs
import distributions.dbg.clustering
require_cython()
import distributions.lp.clustering
from distributions.lp.clustering import count_assignments
from distributions.lp.mixture import MixtureIdTracker

MODELS = {
    'dbg.LowEntropy': distributions.dbg.clustering.LowEntropy,
    'lp.PitmanYor': distributions.lp.clustering.PitmanYor,
    'lp.LowEntropy': distributions.lp.clustering.LowEntropy,
}

SKIP_EXPENSIVE_TESTS = False
SAMPLE_COUNT = 2000
MIN_GOODNESS_OF_FIT = 1e-3

Пример #19
0
    assert_equal,
    assert_less,
    assert_greater,
    assert_is_instance,
)
from distributions.dbg.random import sample_discrete
from goftests import discrete_goodness_of_fit
from distributions.tests.util import (
    require_cython,
    seed_all,
    assert_hasattr,
    assert_close,
)
from distributions.dbg.random import scores_to_probs
import distributions.dbg.clustering
require_cython()
import distributions.lp.clustering
from distributions.lp.clustering import count_assignments
from distributions.lp.mixture import MixtureIdTracker

MODELS = {
    'dbg.LowEntropy': distributions.dbg.clustering.LowEntropy,
    'lp.PitmanYor': distributions.lp.clustering.PitmanYor,
    'lp.LowEntropy': distributions.lp.clustering.LowEntropy,
}

SKIP_EXPENSIVE_TESTS = False
SAMPLE_COUNT = 2000
MIN_GOODNESS_OF_FIT = 1e-3