def testCustomTrainer(): # raise(SkipTest('for quick results.')) "Train a proposal to inverse a QMR model with rare diseases, using custom trainer" ripl = get_ripl() ripl.assume("d0", "(scope_include (quote D) 0 (bernoulli 0.01))", label="D0") ripl.assume("d1", "(scope_include (quote D) 1 (bernoulli 0.01))", label="D1") ripl.assume("d2", "(scope_include (quote D) 2 (bernoulli 0.005))", label="D2") ripl.assume("joint", "(+ (* d0 4) (* d1 2) d2)", label="pid") ripl.observe("(scope_include (quote S) 0 (bernoulli (- 1.0001 (pow 0.5 (+ d0 d1)))))", 1.0) ripl.observe("(scope_include (quote S) 1 (bernoulli (- 1.0001 (pow 0.5 (+ d0 d2)))))", 1.0) ripl.observe("(scope_include (quote S) 2 (bernoulli (- 1.0001 (pow 0.5 (+ d1 d2)))))", 1.0) ripl.register_proposal_program_class("LogisticRegressionProposalProgram", LogisticRegressionProposalProgram) ripl.register_trainer_src("QMR_highprior", QMR_highprior) # Posterior for d2 is Bernoulli with p(d2=1) = 0.5 proposal_src = """ [declare { "name":"logreg", "class":"LogisticRegressionProposalProgram", "conditioned":[["S",0], ["S",1], ["S",2]], "target":[["D",0], ["D",1], ["D",2]], "trainer":"QMR_highprior", "num_samples":1000}] """ ripl.execute_program(proposal_src) predictions = collectSamples(ripl,"pid",infer="(custommh logreg aux 1 5)") ans = [(0, 0.000001), (1, 0.003), (2, 0.006), (3, 0.25), (4, 0.006), (5, 0.25), (6, 0.5), (7, 0.004)] return reportKnownDiscrete(ans, predictions)
def testCategorical1(seed): # A simple test that checks the interface of categorical and its # simulate method ripl = get_ripl(seed=seed) ripl.assume("x", "(categorical (simplex 0.1 0.2 0.3 0.4) (array 1 2 3 4))") ripl.assume("y", "(categorical (simplex 0.2 0.6 0.2) (array 1 2 3))") ripl.predict("(+ x y)", label="pid") predictions = collectSamples(ripl, "pid") ans = [(2, 0.1 * 0.2), (3, 0.1 * 0.6 + 0.2 * 0.2), (4, 0.1 * 0.2 + 0.2 * 0.6 + 0.3 * 0.2), (5, 0.2 * 0.2 + 0.3 * 0.6 + 0.4 * 0.2), (6, 0.3 * 0.2 + 0.4 * 0.6), (7, 0.4 * 0.2)] return reportKnownDiscrete(ans, predictions)
def testHMMObservationZero(seed): ripl = get_ripl(seed=seed) ripl.assume("f",""" (make_lazy_hmm (simplex 0.5 0.5) (matrix (array (array 0.7 0.3) (array 0.3 0.7))) (matrix (array (array 0.9 0.2) (array 0.1 0.8)))) """) ripl.observe("(f 0)", "integer<0>") ripl.predict("(f 1)", label="pid") predictions = collectSamples(ripl,"pid") ans = [(0, 0.69/1.1), (1, 0.41/1.1)] return reportKnownDiscrete(ans, predictions)
def testBinomial3(seed): # A simple test that checks the binomial enumerate method ripl = get_ripl(seed=seed) b = 0.7 p1 = 0.3 p2 = 0.4 n = 4 ripl.assume("p","(tag 0 1 (if (flip %f) %f %f))" % (b,p1,p2)) ripl.predict("(tag 0 0 (binomial %d p))" % n,label="pid") predictions = collectSamples(ripl,"pid",infer="(repeat %s (do (mh 0 1 1) (gibbs 0 0 1)))" % default_num_transitions_per_sample()) ans = [(x,b * scipy.stats.binom.pmf(x,n,p1) + (1 - b) * scipy.stats.binom.pmf(x,n,p2)) for x in range(n+1)] assert_almost_equal(sum([xx[1] for xx in ans]),1) return reportKnownDiscrete(ans, predictions)
def testSprinkler1(seed): # Classic Bayes-net example, with no absorbing when proposing to 'rain' ripl = get_ripl(seed=seed) ripl.assume("rain", "(bernoulli 0.2)", label="pid") ripl.assume("sprinkler", "(if rain (bernoulli 0.01) (bernoulli 0.4))") ripl.assume( "grassWet", """ (if rain (if sprinkler (bernoulli 0.99) (bernoulli 0.8)) (if sprinkler (bernoulli 0.9) (bernoulli 0.00001))) """) ripl.observe("grassWet", True) predictions = collectSamples(ripl, "pid") ans = [(True, .3577), (False, .6433)] return reportKnownDiscrete(ans, predictions)
def checkEnumerativeGibbsXOR1(in_parallel, seed): # Tests that an XOR chain mixes with enumerative gibbs. # Note that with RESET=True, this will seem to mix with MH. # The next test accounts for that.""" ripl = get_ripl(seed=seed) ripl.assume("x", "(tag 0 0 (bernoulli 0.001))", label="pid") ripl.assume("y", "(tag 0 0 (bernoulli 0.001))") ripl.assume("noisy_true", "(lambda (pred noise) (flip (if pred 1.0 noise)))") ripl.observe("(noisy_true (= (+ x y) 1) .000001)", "true") infer = "(gibbs 0 0 %s %s)" % \ (default_num_transitions_per_sample(), in_parallel) predictions = collectSamples(ripl, "pid", infer=infer) ans = [(True, .5), (False, .5)] return reportKnownDiscrete(ans, predictions)
def testBinomial2(seed): # A simple test that checks the binomial logdensity ripl = get_ripl(seed=seed) b = 0.7 p1 = 0.3 p2 = 0.4 n = 4 ripl.assume("p", "(if (flip %f) %f %f)" % (b, p1, p2)) ripl.predict("(binomial %d p)" % n, label="pid") predictions = collectSamples(ripl, "pid") ans = [(x, b * scipy.stats.binom.pmf(x, n, p1) + (1 - b) * scipy.stats.binom.pmf(x, n, p2)) for x in range(n + 1)] assert_almost_equal(sum([xx[1] for xx in ans]), 1) return reportKnownDiscrete(ans, predictions)
def test_foreign_aaa_infer(seed): # Same as test.inference_quality.micro.test_misc_aaa.testMakeBetaBernoulli1 builtins = builtin.builtInSPs() ripl = get_ripl(seed=seed) ripl.bind_foreign_sp("test_beta_bernoulli", builtins["make_uc_beta_bernoulli"]) ripl.assume("a", "(normal 10.0 1.0)") ripl.assume("f", "(test_beta_bernoulli a a)") ripl.predict("(f)", label="pid") for _ in range(20): ripl.observe("(f)", "true") predictions = collectSamples(ripl, "pid") ans = [(False, .25), (True, .75)] return reportKnownDiscrete(ans, predictions)
def testHPYLanguageModel1(seed): # Nice model from http://www.cs.berkeley.edu/~jordan/papers/teh-jordan-bnp.pdf. # Checks that it learns that 1 follows 0 ripl = get_ripl(seed=seed) loadPYMem(ripl) # 5 letters for now ripl.assume("G_init", "(make_sym_dir_cat 0.5 5)") # globally shared parameters for now ripl.assume("alpha", "(gamma 1.0 1.0)") ripl.assume("d", "(uniform_continuous 0.0 0.01)") # G(letter1 letter2 letter3) ~ pymem(alpha,d,G(letter2 letter3)) ripl.assume( "G", """ (mem (lambda (context) (if (is_pair context) (pymem alpha d (G (rest context))) (pymem alpha d G_init)))) """) ripl.assume("noisy_true", "(lambda (pred noise) (flip (if pred 1.0 noise)))") atoms = [0, 1, 2, 3, 4] * 5 for i in range(1, len(atoms)): ripl.observe( """ (noisy_true (eq ((G (list atom<%d>))) atom<%d>) 0.001) """ % (atoms[i - 1], atoms[i]), "true") ripl.predict("((G (list atom<0>)))", label="pid") raise SkipTest( "Skipping testHPYLanguageModel because it's slow and I don't how fast it is expected to converge. Issue https://app.asana.com/0/9277419963067/9801332616429" ) predictions = collectSamples(ripl, "pid") ans = [(0, 0.03), (1, 0.88), (2, 0.03), (3, 0.03), (4, 0.03)] return reportKnownDiscrete(ans, predictions)
def testMemArray(seed): # Same as testMem2 but when the arguments are arrays ripl = get_ripl(seed=seed) ripl.assume( "f", "(mem (lambda (arg) (categorical (simplex 0.4 0.6) (array 1 2))))") ripl.assume("x", "(f (array 1 2))") ripl.assume("y", "(f (array 1 2))") ripl.assume("w", "(f (array 3 4))") ripl.assume("z", "(f (array 3 4))") ripl.assume("q", "(categorical (simplex 0.1 0.9) (array 1 2))") ripl.predict('(add x y w z q)', label="pid") predictions = collectSamples(ripl, "pid") ans = [(5, 0.4 * 0.4 * 0.1), (6, 0.4 * 0.4 * 0.9), (7, 0.4 * 0.6 * 0.1 * 2), (8, 0.4 * 0.6 * 0.9 * 2), (9, 0.6 * 0.6 * 0.1), (10, 0.6 * 0.6 * 0.9)] return reportKnownDiscrete(ans, predictions)
def checkDirCatObjectVariation(maker_form, seed): # Testing for Issue #452. r = get_ripl(seed=seed) r.assume("x1", "(flip)") r.assume("x2", "(flip)") r.assume("x", "(array x1 x2)") r.assume("f", maker_form) r.observe("(f)", "true") r.observe("(f)", "true") r.observe("(f)", "true") predictions = collectSamples(r, "x", infer="mixes_slowly") ans = [ ([True, True], 1), ([True, False], 0.25), ([False, True], 0.25), ([False, False], 0), ] return reportKnownDiscrete(ans, predictions)
def testUserDensityBound(seed): r = get_ripl(seed=seed) r.set_mode("venture_script") r.execute_program(""" assume f = (small) ~> { if (small) { uniform_continuous(0, 1) } else { uniform_continuous(0, 10) }}; assume small = flip(0.5); observe f(small) = 0.2; """) predictions = collectSamples(r, "small", infer="rejection(default, all, 0, 1)") return reportKnownDiscrete([(True, 10), (False, 1)], predictions)
def testMem2(seed): # Ensures that all (f 1) and (f 2) are the same ripl = get_ripl(seed=seed) ripl.assume( "f", "(mem (lambda (arg) (categorical (simplex 0.4 0.6) (array 1 2))))") ripl.assume("x", "(f 1)") ripl.assume("y", "(f 1)") ripl.assume("w", "(f 2)") ripl.assume("z", "(f 2)") ripl.assume("q", "(categorical (simplex 0.1 0.9) (array 1 2))") ripl.predict('(add x y w z q)', label="pid") predictions = collectSamples(ripl, "pid") ans = [(5, 0.4 * 0.4 * 0.1), (6, 0.4 * 0.4 * 0.9), (7, 0.4 * 0.6 * 0.1 * 2), (8, 0.4 * 0.6 * 0.9 * 2), (9, 0.6 * 0.6 * 0.1), (10, 0.6 * 0.6 * 0.9)] return reportKnownDiscrete(ans, predictions)
def testOccasionalRejectionScope(seed): # Like the previous test but in a custom scope, because Lite # special-cases the default scope when computing the # number-of-blocks correction. # Note: The "frob" scope registers as always having two blocks, even # though one of them will, at runtime, end up having no # unconstrained random choices. raise SkipTest("Issue #495 was apparently never actually fixed.") r = get_ripl(seed=seed) r.execute_program(""" (assume cluster_id (tag "frob" 0 (flip))) (assume cluster (mem (lambda (id) (tag "frob" 1 (normal 0 1))))) (observe (cluster cluster_id) 1) """) infer = '(do (force cluster_id true) (gibbs "frob" one 1 false))' predictions = collectSamples(r, address="cluster_id", infer=infer) ans = [(True, 0.5), (False, 0.5)] return reportKnownDiscrete(ans, predictions)
def testMem3(seed): # Same as testMem3 but with booby traps ripl = get_ripl(seed=seed) ripl.assume( "f", "(mem (lambda (arg) (categorical (simplex 0.4 0.6) (array 1 2))))") ripl.assume("g", "((lambda () (mem (lambda (y) (f (add y 1))))))") ripl.assume("x", "(f ((if (bernoulli 0.5) (lambda () 1) (lambda () 1))))") ripl.assume("y", "(g ((lambda () 0)))") ripl.assume("w", "((lambda () (f 2)))") ripl.assume("z", "(g 1)") ripl.assume("q", "(categorical (simplex 0.1 0.9) (array 1 2))") ripl.predict('(add x y w z q)', label="pid") predictions = collectSamples(ripl, "pid") ans = [(5, 0.4 * 0.4 * 0.1), (6, 0.4 * 0.4 * 0.9), (7, 0.4 * 0.6 * 0.1 * 2), (8, 0.4 * 0.6 * 0.9 * 2), (9, 0.6 * 0.6 * 0.1), (10, 0.6 * 0.6 * 0.9)] return reportKnownDiscrete(ans, predictions)
def testSprinkler2(seed): # Classic Bayes-net example, absorbing at 'sprinkler' when proposing # to 'rain'. This test needs more iterations than most others, # because it mixes badly. ripl = get_ripl(seed=seed) ripl.assume("rain", "(bernoulli 0.2)", label="pid") ripl.assume("sprinkler", "(bernoulli (if rain 0.01 0.4))") ripl.assume( "grassWet", """ (bernoulli (if rain (if sprinkler 0.99 0.8) (if sprinkler 0.9 0.00001))) """) ripl.observe("grassWet", True) predictions = collectSamples(ripl, "pid", infer="mixes_slowly") ans = [(True, .3577), (False, .6433)] return reportKnownDiscrete(ans, predictions)
def checkEnumerativeGibbsXOR3(in_parallel, seed): # A regression catching a mysterious math domain error. ripl = get_ripl(seed=seed) ripl.assume("x", "(tag 0 0 (bernoulli 0.0015))", label="pid") ripl.assume("y", "(tag 0 0 (bernoulli 0.0005))") ripl.assume("noisy_true", "(lambda (pred noise) (tag 0 0 (flip (if pred 1.0 noise))))") # This predict is the different between this test and # testEnumerativeGibbsXOR2, and currently causes a mystery math # domain error. ripl.predict("(noisy_true (= (+ x y) 1) .000001)") ripl.observe("(noisy_true (= (+ x y) 1) .000001)", "true") infer = "(gibbs 0 0 %s %s)" % \ (default_num_transitions_per_sample(), in_parallel) predictions = collectSamples(ripl, "pid", infer=infer) ans = [(True, .75), (False, .25)] return reportKnownDiscrete(ans, predictions)
def checkMakeBetaBernoulli4(maker, seed): if inParallel() and "make_suff_stat_bernoulli" in maker and backend_name( ) == "puma": raise SkipTest( "The Lite SPs in Puma interface is not thread-safe, and make_suff_stat_bernoulli comes from Lite." ) ripl = get_ripl(seed=seed) ripl.assume("a", "(normal 10.0 1.0)") ripl.assume("f", """ (if (lt a 10.0) ({0} a a) ({0} a a))""".format(maker)) ripl.predict("(f)", label="pid") for _ in range(20): ripl.observe("(f)", "true") predictions = collectSamples(ripl, "pid") ans = [(False, .25), (True, .75)] return reportKnownDiscrete(ans, predictions)
def testLogCategoricalAbsorb(seed): # A simple test that checks the interface of log categorical and its # simulate and log density methods ripl = get_ripl(seed=seed) if backend_name() == "puma": # XXX Puma's log_categorical demands a simplex argument, as that's # the best representation the backend has for an array of numbers. # Lite's simplex, however, checks boundary conditions, so can't be # used here. ripl.assume("x", "(simplex (log .1) (log .9))") ripl.assume("y", "(simplex (log .55) (log .45))") else: ripl.assume("x", "(array (log .1) (log .9))") ripl.assume("y", "(array (log .55) (log .45))") ripl.assume("b", "(flip)", label="b") ripl.observe("(log_categorical (if b x y) (array 10 100))", "100") predictions = collectSamples(ripl, "b") ans = [(False, 0.333), (True, 0.667)] return reportKnownDiscrete(ans, predictions)
def checkMakeBetaBernoulli1(maker, hyper, seed): if rejectionSampling() and hyper == "(normal 10.0 1.0)": raise SkipTest( "Too slow. Tightening the rejection bound is Issue #468.") if inParallel() and "make_suff_stat_bernoulli" in maker and backend_name( ) == "puma": raise SkipTest( "The Lite SPs in Puma interface is not thread-safe, and make_suff_stat_bernoulli comes from Lite." ) ripl = get_ripl(seed=seed) ripl.assume("a", hyper) ripl.assume("f", "(%s a a)" % maker) ripl.predict("(f)", label="pid") for _ in range(20): ripl.observe("(f)", "true") predictions = collectSamples(ripl, "pid") ans = [(False, .25), (True, .75)] return reportKnownDiscrete(ans, predictions)
def testHMMSP1(seed): ripl = get_ripl(seed=seed) ripl.assume("f",""" (make_lazy_hmm (simplex 0.5 0.5) (matrix (array (array 0.7 0.3) (array 0.3 0.7))) (matrix (array (array 0.9 0.2) (array 0.1 0.8)))) """) ripl.observe("(f 1)","integer<0>") ripl.observe("(f 2)","integer<0>") ripl.observe("(f 3)","integer<1>") ripl.observe("(f 4)","integer<0>") ripl.observe("(f 5)","integer<0>") ripl.predict("(f 6)",label="pid") ripl.predict("(f 7)") ripl.predict("(f 8)") predictions = collectSamples(ripl,"pid") ans = [(0,0.6528), (1,0.3472)] return reportKnownDiscrete(ans, predictions)
def testBasicParticleFilter1(seed): # A sanity test for particle filtering (discrete) P = 10 N = default_num_samples() predictions = [] os = zip(range(1, 6), [False, False, True, False, False]) rng = random.Random(seed) for _ in range(N): ripl = initBasicPFripl1(rng.randint(1, 2**31 - 1)) for t, val in os: ripl.infer("(resample %d)" % P) ripl.predict("(f %d)" % t) ripl.infer("(mh 0 %d 5)" % t) ripl.observe("(g %d)" % t, val) ripl.infer("(resample 1)") ripl.predict("(g 6)", label="pid") predictions.append(ripl.report("pid")) ans = [(0, 0.6528), (1, 0.3472)] return reportKnownDiscrete(ans, predictions)
def testOccasionalRejectionBrushScope(seed): # Another version, this time requiring correct computation of the # correction on a custom scope (which is carefully arranged to avoid # creating blocks where some principal node might be in the brush). # # This particular arrangment of blocks is chosen to falsify the # heuristic present at the time of writing in both Lite and Puma's # correction computation, which is to add the number of blocks the # scope had remaining in the pre-proposal trace with the number of # blocks that gained root nodes in the proposal. This heuristic is # wrong if a block that is not empty in the pre-proposal trace gains # a new node due to the proposal, which is what happens here, when # `flip2` proposes to move from True to False. r = get_ripl(seed=seed) r.execute_program(""" (assume flip1 (tag "frob" 1 (flip))) (assume flip2 (tag "frob" 2 (flip))) (assume flip2_or_flip3 (if flip2 true (tag "frob" 1 (flip)))) (observe (exactly (or flip1 flip2_or_flip3)) true) """) infer = '(gibbs "frob" one %s false)' % default_num_transitions_per_sample( ) predictions = collectSamples(r, address="flip2", infer=infer, num_samples=default_num_samples(10)) # TODO Would be nice to do the power analysis to pick the number of # samples. Not sure exactly what distribution the expected bug # produces, but empirically it looks like it might be 2:1 # True:False. (The incorrect computation being singled out # overcorrects, which I think means more rejections of True->False # moves than are justified.) A reasonable fallback might be "Pick # the closest distribution given by comparably small integer ratios # that is skewed in the expected direction". ans = [(True, 4.0 / 7), (False, 3.0 / 7)] return reportKnownDiscrete(ans, predictions)
def test_serialize_forget(seed): with tempfile.NamedTemporaryFile(prefix='serialized.ripl') as f: v1 = get_ripl(seed=seed) v1.assume('is_tricky', '(flip 0.2)') v1.assume('theta', '(if is_tricky (beta 1.0 1.0) 0.5)') v1.assume('flip_coin', '(lambda () (flip theta))') for i in range(10): v1.observe('(flip_coin)', 'true', label='y{}'.format(i)) v1.infer("(incorporate)") v1.save(f.name) v2 = get_ripl() v2.load(f.name) for i in range(10): v2.forget('y{}'.format(i)) v2.predict('is_tricky', label='pid') infer = "(mh default one %s)" % default_num_transitions_per_sample() samples = collectStateSequence(v2, 'pid', infer=infer) ans = [(False, 0.8), (True, 0.2)] return reportKnownDiscrete(ans, samples)
def testHMMSP4(seed): ripl = get_ripl(seed=seed) ripl.assume("z", "(flip)") ripl.assume("f", """ (make_lazy_hmm (simplex 0.5 0.5) (matrix (array (array 0.7 0.3) (array 0.3 0.7))) (if z (matrix (array (array 0.9 0.2) (array 0.1 0.8))) (matrix (array (array 0.8 0.8) (array 0.2 0.2))))) """) ripl.observe("(f 1)","integer<0>") ripl.observe("(f 2)","integer<0>") ripl.observe("(f 3)","integer<1>") ripl.observe("(f 4)","integer<0>") ripl.observe("(f 5)","integer<0>") ripl.predict("z",label="pid") predictions = collectSamples(ripl,"pid") ans = [(True, 0.2952), (False, 0.7048)] return reportKnownDiscrete(ans, predictions)
def testFlip2(seed): ripl = get_ripl(seed=seed) ripl.predict("(bernoulli 0.5)", label="pid") predictions = collectSamples(ripl, "pid") return reportKnownDiscrete([[True, 0.5], [False, 0.5]], predictions)