def check_summarize(name): check_cm(name) cm = ComponentModel(name) x = [] for _ in range(COUNT): x.append(cm.sample_data()) summarize(name, x)
def check_dpm(impl, data_count, beta0): check_cm(impl) data = histogram(np.random.randint(50, size=data_count)) data = dict([(str(i), obs) for i, obs in enumerate(data)]) betas = dict([(str(i), (1 - beta0) / len(data)) for i, obs in enumerate(data)]) hp = { 'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas } ss = {'counts': data} cm = ComponentModel( impl, ss=ss, hp=hp) samples = cm.sample_data(SAMPS) counts = list(histogram([y for y in samples if y != -1])) probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])) counts.append(len([y for y in samples if y == -1])) probs.append(np.exp(cm.pred_prob(-1))) assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_generate(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() params = cm.generate_post() b = BasicDistribution(name, pm=params) b.sample_data()
def check_ss_io(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss()) cm.add_data(cm.sample_data()) assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
def check_dd(impl, data_count, D): check_cm(impl) data = histogram(np.random.randint(D, size=data_count), bin_count=D) cm = ComponentModel( impl, ss={'counts': data}, p={'D': D}) cm.realize_hp() _check_discrete(cm)
def check_gp(impl, data_count, lam): check_cm(impl) data = np.random.poisson(lam, size=data_count) ss = { 'n': data_count, 'sum': np.sum(data), 'log_prod': np.sum(np.log(data)) } cm = ComponentModel(impl, ss=ss) _check_discrete(cm)
def check_sums(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() values = [cm.sample_data() for _ in range(COUNT)] score = 0. for value in values: score += cm.pred_prob(value) cm.add_data(value) assert_almost_equal(score, cm.data_prob())
def check_sample_post_seed(name): check_cm(name) seed(0) cm1 = ComponentModel(name) post_values1 = [cm1.sample_post() for _ in range(COUNT)] seed(0) cm2 = ComponentModel(name) post_values2 = [cm2.sample_post() for _ in range(COUNT)] for i in range(COUNT): assert_array_almost_equal(post_values1[i], post_values2[i])
def check_probs(a, b): check_cm(a) check_cm(b) a = ComponentModel(a) a.realize_hp() b = ComponentModel(b, hp=a.dump_hp()) dps = [a.sample_data() for _ in range(DPS)] for y in dps: assert_almost_equal(a.data_prob(), b.data_prob()) assert_almost_equal(a.pred_prob(y), b.pred_prob(y)) a.add_data(y) b.add_data(y)
def check_sample_data_seed(name): check_cm(name) n = 10 seed(0) cm1 = ComponentModel(name) cm1.realize_hp() data_values1 = [cm1.sample_data() for _ in range(n)] seed(0) cm2 = ComponentModel(name) cm2.realize_hp() data_values2 = [cm2.sample_data() for _ in range(n)] for i in range(n): assert_almost_equal(data_values1[i], data_values2[i])
def check_ss(a, b): check_cm(a) check_cm(b) a = ComponentModel(a) a.realize_hp() b = ComponentModel(b, hp=a.dump_hp()) dps = [a.sample_data() for _ in range(DPS)] assert_equal(a.dump_ss(), b.dump_ss()) for y in dps: a.add_data(y) b.add_data(y) assert_close(a.dump_ss(), b.dump_ss()) for y in dps: a.remove_data(y) b.remove_data(y) assert_close(a.dump_ss(), b.dump_ss())
def check_exchangeable(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() values = [cm.sample_data() for _ in range(COUNT)] p1 = permutation(COUNT) p2 = permutation(COUNT) for i in range(COUNT): cm.add_data(values[p1[i]]) prob1 = cm.data_prob() for i in range(COUNT): cm.remove_data(values[p1[i]]) assert_almost_equal(cm.data_prob(), 0.) for i in range(COUNT): cm.add_data(values[p2[i]]) prob2 = cm.data_prob() assert_almost_equal(prob1, prob2)
def check_nich(impl, data_count, mean, std): check_cm(impl) ss = None if data_count: data = np.random.normal(mean, std, size=data_count) ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()} cm = ComponentModel(impl, ss=ss) samples = cm.sample_data(SAMPS) counts, bin_ranges = bin_samples(samples) #use of quadrature is unfortunate but for now #it's the easiest way to score bins and seems to work pdf = lambda x: np.exp(cm.pred_prob(x)) probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges] assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_dpm(impl, data_count, beta0): check_cm(impl) data = histogram(np.random.randint(50, size=data_count)) data = dict([(str(i), obs) for i, obs in enumerate(data)]) betas = dict([(str(i), (1 - beta0) / len(data)) for i, obs in enumerate(data)]) hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas} ss = {'counts': data} cm = ComponentModel(impl, ss=ss, hp=hp) samples = cm.sample_data(SAMPS) counts = list(histogram([y for y in samples if y != -1])) probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])) counts.append(len([y for y in samples if y == -1])) probs.append(np.exp(cm.pred_prob(-1))) assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def test_vectorize(): for name in MODELS: check_cm(name) cm0 = ComponentModel(name) cm0.realize_hp() hp0 = cm0.dump_hp() cms = [ComponentModel(name, hp=hp0) for _ in range(COMPS)] for cm in cms: dps = [cm.sample_data() for _ in range(DPS)] for dp in dps: cm.add_data(dp) mod = cms[0].mod hp = cms[0].hp ss = [cm.ss for cm in cms] for cm in cms: y = cm.sample_data() scores = numpy.zeros(COMPS) mod.add_pred_probs(hp, ss, y, scores) for cm, score in zip(cms, scores): assert_almost_equal(score, cm.pred_prob(y))
def check_nich(impl, data_count, mean, std): check_cm(impl) ss = None if data_count: data = np.random.normal(mean, std, size=data_count) ss = { 'count': data_count, 'mean': data.mean(), 'variance': data.var() } cm = ComponentModel(impl, ss=ss) samples = cm.sample_data(SAMPS) counts, bin_ranges = bin_samples(samples) #use of quadrature is unfortunate but for now #it's the easiest way to score bins and seems to work pdf = lambda x: np.exp(cm.pred_prob(x)) probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges] assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_hp_io(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() assert_equal(ComponentModel(name, hp=cm.dump_hp()).dump_hp(), cm.dump_hp())
def check_dd(impl, data_count, D): check_cm(impl) data = histogram(np.random.randint(D, size=data_count), bin_count=D) cm = ComponentModel(impl, ss={'counts': data}, p={'D': D}) cm.realize_hp() _check_discrete(cm)
def check_summarize_N(name): check_cm(name) cm = ComponentModel(name) x = cm.sample_data(COUNT) summarize(name, x)
def check_hp(a, b): check_cm(a) check_cm(b) a = ComponentModel(a) b = ComponentModel(b) assert_equal(a.dump_hp(), b.dump_hp())