Пример #1
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
        for i, obs in enumerate(data)])
    hp = {
            'gamma': 1.,
            'alpha': 1.,
            'beta0': beta0,
            'betas': betas
         }
    ss = {'counts': data}
    cm = ComponentModel(
            impl,
            ss=ss,
            hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Пример #2
0
def check_probs(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    a.realize_hp()
    b = ComponentModel(b, hp=a.dump_hp())
    dps = [a.sample_data() for _ in range(DPS)]
    for y in dps:
        assert_almost_equal(a.data_prob(), b.data_prob())
        assert_almost_equal(a.pred_prob(y), b.pred_prob(y))
        a.add_data(y)
        b.add_data(y)
Пример #3
0
def test_crp_equals_pyp():
    alphas = [1., 5., 10.]
    for alpha in alphas:
        n = 1000
        crp = ComponentModel('CRP', hp={'alpha': alpha})
        pyp = ComponentModel('PYP', hp={'alpha': alpha, 'd': 0.})
        y = [0] * n
        for i in range(n):
            y[i] = crp.sample_data()
            assert_almost_equal(crp.pred_prob(y[i]), pyp.pred_prob(y[i]))
            crp.add_data(y[i])
            pyp.add_data(y[i])
        assert_almost_equal(crp.data_prob(), pyp.data_prob())
Пример #4
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
                  for i, obs in enumerate(data)])
    hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas}
    ss = {'counts': data}
    cm = ComponentModel(impl, ss=ss, hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Пример #5
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
Пример #6
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
Пример #7
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()}
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Пример #8
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {
                'count': data_count,
                'mean': data.mean(),
                'variance': data.var()
             }
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Пример #9
0
def add_remove_add(name, raw_hps, raw_ss0=None):
    '''
    This tests add_data, remove_data, pred_prob, data_prob
    '''

    DATA_COUNT = 20

    for raw_hp in raw_hps:

        cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0)
        cm.realize_hp()
        data = []
        score = 0

        for _ in range(DATA_COUNT):
            dp = cm.sample_data()
            data.append(dp)
            score += cm.pred_prob(dp)
            cm.add_data(dp)

        cm_all = ComponentModel(name, ss=cm.dump_ss())
        assert_close(
                score,
                cm.data_prob(),
                err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

        random.shuffle(data)

        for dp in data:
            cm.remove_data(dp)

        cm0 = ComponentModel(name, ss=raw_ss0)
        assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss')

        random.shuffle(data)

        for dp in data:
            cm.add_data(dp)

        assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')
Пример #10
0
def add_remove_add(name, raw_hps, raw_ss0=None):
    '''
    This tests add_data, remove_data, pred_prob, data_prob
    '''

    DATA_COUNT = 20

    for raw_hp in raw_hps:

        cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0)
        cm.realize_hp()
        data = []
        score = 0

        for _ in range(DATA_COUNT):
            dp = cm.sample_data()
            data.append(dp)
            score += cm.pred_prob(dp)
            cm.add_data(dp)

        cm_all = ComponentModel(name, ss=cm.dump_ss())
        assert_close(score,
                     cm.data_prob(),
                     err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

        random.shuffle(data)

        for dp in data:
            cm.remove_data(dp)

        cm0 = ComponentModel(name, ss=raw_ss0)
        assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss')

        random.shuffle(data)

        for dp in data:
            cm.add_data(dp)

        assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')