示例#1
0
def check_summarize(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = []
    for _ in range(COUNT):
        x.append(cm.sample_data())
        summarize(name, x)
示例#2
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
        for i, obs in enumerate(data)])
    hp = {
            'gamma': 1.,
            'alpha': 1.,
            'beta0': beta0,
            'betas': betas
         }
    ss = {'counts': data}
    cm = ComponentModel(
            impl,
            ss=ss,
            hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
示例#3
0
def check_generate(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    params = cm.generate_post()
    b = BasicDistribution(name, pm=params)
    b.sample_data()
示例#4
0
def check_ss_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
    cm.add_data(cm.sample_data())
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
示例#5
0
def check_ss_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
    cm.add_data(cm.sample_data())
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
示例#6
0
def check_summarize(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = []
    for _ in range(COUNT):
        x.append(cm.sample_data())
        summarize(name, x)
示例#7
0
def check_generate(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    params = cm.generate_post()
    b = BasicDistribution(name, pm=params)
    b.sample_data()
示例#8
0
def check_dd(impl, data_count, D):
    check_cm(impl)
    data = histogram(np.random.randint(D, size=data_count), bin_count=D)
    cm = ComponentModel(
            impl,
            ss={'counts': data},
            p={'D': D})
    cm.realize_hp()
    _check_discrete(cm)
示例#9
0
def check_gp(impl, data_count, lam):
    check_cm(impl)
    data = np.random.poisson(lam, size=data_count)
    ss = {
        'n': data_count,
        'sum': np.sum(data),
        'log_prod': np.sum(np.log(data))
    }
    cm = ComponentModel(impl, ss=ss)
    _check_discrete(cm)
示例#10
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
示例#11
0
def check_sample_post_seed(name):
    check_cm(name)
    seed(0)
    cm1 = ComponentModel(name)
    post_values1 = [cm1.sample_post() for _ in range(COUNT)]
    seed(0)
    cm2 = ComponentModel(name)
    post_values2 = [cm2.sample_post() for _ in range(COUNT)]
    for i in range(COUNT):
        assert_array_almost_equal(post_values1[i], post_values2[i])
示例#12
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
示例#13
0
def check_gp(impl, data_count, lam):
    check_cm(impl)
    data = np.random.poisson(lam, size=data_count)
    ss = {
            'n': data_count,
            'sum': np.sum(data),
            'log_prod': np.sum(np.log(data))
         }
    cm = ComponentModel(impl, ss=ss)
    _check_discrete(cm)
示例#14
0
def check_sample_post_seed(name):
    check_cm(name)
    seed(0)
    cm1 = ComponentModel(name)
    post_values1 = [cm1.sample_post() for _ in range(COUNT)]
    seed(0)
    cm2 = ComponentModel(name)
    post_values2 = [cm2.sample_post() for _ in range(COUNT)]
    for i in range(COUNT):
        assert_array_almost_equal(post_values1[i], post_values2[i])
示例#15
0
def check_probs(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    a.realize_hp()
    b = ComponentModel(b, hp=a.dump_hp())
    dps = [a.sample_data() for _ in range(DPS)]
    for y in dps:
        assert_almost_equal(a.data_prob(), b.data_prob())
        assert_almost_equal(a.pred_prob(y), b.pred_prob(y))
        a.add_data(y)
        b.add_data(y)
示例#16
0
def check_sample_data_seed(name):
    check_cm(name)
    n = 10
    seed(0)
    cm1 = ComponentModel(name)
    cm1.realize_hp()
    data_values1 = [cm1.sample_data() for _ in range(n)]
    seed(0)
    cm2 = ComponentModel(name)
    cm2.realize_hp()
    data_values2 = [cm2.sample_data() for _ in range(n)]
    for i in range(n):
        assert_almost_equal(data_values1[i], data_values2[i])
示例#17
0
def check_sample_data_seed(name):
    check_cm(name)
    n = 10
    seed(0)
    cm1 = ComponentModel(name)
    cm1.realize_hp()
    data_values1 = [cm1.sample_data() for _ in range(n)]
    seed(0)
    cm2 = ComponentModel(name)
    cm2.realize_hp()
    data_values2 = [cm2.sample_data() for _ in range(n)]
    for i in range(n):
        assert_almost_equal(data_values1[i], data_values2[i])
示例#18
0
def check_ss(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    a.realize_hp()
    b = ComponentModel(b, hp=a.dump_hp())
    dps = [a.sample_data() for _ in range(DPS)]
    assert_equal(a.dump_ss(), b.dump_ss())
    for y in dps:
        a.add_data(y)
        b.add_data(y)
        assert_close(a.dump_ss(), b.dump_ss())
    for y in dps:
        a.remove_data(y)
        b.remove_data(y)
        assert_close(a.dump_ss(), b.dump_ss())
示例#19
0
def check_exchangeable(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    p1 = permutation(COUNT)
    p2 = permutation(COUNT)
    for i in range(COUNT):
        cm.add_data(values[p1[i]])
    prob1 = cm.data_prob()
    for i in range(COUNT):
        cm.remove_data(values[p1[i]])
    assert_almost_equal(cm.data_prob(), 0.)
    for i in range(COUNT):
        cm.add_data(values[p2[i]])
    prob2 = cm.data_prob()
    assert_almost_equal(prob1, prob2)
示例#20
0
def check_exchangeable(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    p1 = permutation(COUNT)
    p2 = permutation(COUNT)
    for i in range(COUNT):
        cm.add_data(values[p1[i]])
    prob1 = cm.data_prob()
    for i in range(COUNT):
        cm.remove_data(values[p1[i]])
    assert_almost_equal(cm.data_prob(), 0.)
    for i in range(COUNT):
        cm.add_data(values[p2[i]])
    prob2 = cm.data_prob()
    assert_almost_equal(prob1, prob2)
示例#21
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()}
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
示例#22
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
                  for i, obs in enumerate(data)])
    hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas}
    ss = {'counts': data}
    cm = ComponentModel(impl, ss=ss, hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
示例#23
0
def test_vectorize():
    for name in MODELS:
        check_cm(name)
        cm0 = ComponentModel(name)
        cm0.realize_hp()
        hp0 = cm0.dump_hp()
        cms = [ComponentModel(name, hp=hp0) for _ in range(COMPS)]
        for cm in cms:
            dps = [cm.sample_data() for _ in range(DPS)]
            for dp in dps:
                cm.add_data(dp)

        mod = cms[0].mod
        hp = cms[0].hp
        ss = [cm.ss for cm in cms]
        for cm in cms:
            y = cm.sample_data()
            scores = numpy.zeros(COMPS)
            mod.add_pred_probs(hp, ss, y, scores)
            for cm, score in zip(cms, scores):
                assert_almost_equal(score, cm.pred_prob(y))
示例#24
0
def test_vectorize():
    for name in MODELS:
        check_cm(name)
        cm0 = ComponentModel(name)
        cm0.realize_hp()
        hp0 = cm0.dump_hp()
        cms = [ComponentModel(name, hp=hp0) for _ in range(COMPS)]
        for cm in cms:
            dps = [cm.sample_data() for _ in range(DPS)]
            for dp in dps:
                cm.add_data(dp)

        mod = cms[0].mod
        hp = cms[0].hp
        ss = [cm.ss for cm in cms]
        for cm in cms:
            y = cm.sample_data()
            scores = numpy.zeros(COMPS)
            mod.add_pred_probs(hp, ss, y, scores)
            for cm, score in zip(cms, scores):
                assert_almost_equal(score, cm.pred_prob(y))
示例#25
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {
                'count': data_count,
                'mean': data.mean(),
                'variance': data.var()
             }
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
示例#26
0
def check_hp_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, hp=cm.dump_hp()).dump_hp(), cm.dump_hp())
示例#27
0
def check_dd(impl, data_count, D):
    check_cm(impl)
    data = histogram(np.random.randint(D, size=data_count), bin_count=D)
    cm = ComponentModel(impl, ss={'counts': data}, p={'D': D})
    cm.realize_hp()
    _check_discrete(cm)
示例#28
0
def check_summarize_N(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = cm.sample_data(COUNT)
    summarize(name, x)
示例#29
0
def check_hp_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, hp=cm.dump_hp()).dump_hp(), cm.dump_hp())
示例#30
0
def check_summarize_N(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = cm.sample_data(COUNT)
    summarize(name, x)
示例#31
0
def check_hp(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    b = ComponentModel(b)
    assert_equal(a.dump_hp(), b.dump_hp())