def check_loggamma_ks(shape, seed): np_rng = numpy.random.RandomState(seed) nsamples = default_num_samples() log_samples = [simulateLogGamma(shape, np_rng) for _ in xrange(nsamples)] samples = np.exp(np.array(log_samples)) dist = scipy.stats.gamma(shape) return reportKnownContinuous(dist.cdf, samples)
def testOneSample(seed): np_rng = npr.RandomState(seed) obs_inputs = np.array([1.3, -2.0, 0.0]) obs_outputs = np.array([5.0, 2.3, 8.0]) test_input = 1.4 expect_mu = 4.6307 expect_sig = 0.0027 sigma = 2.1 l = 1.8 observations = OrderedDict(zip(obs_inputs, obs_outputs)) mean = gp.mean_const(0.) covariance = cov.scale(sigma**2, cov.se(l**2)) # _gp_sample(..., test_input) should be normally distributed with # mean expect_mu. n = default_num_samples(4) def sample(): s = gp._gp_sample(mean, covariance, observations, [test_input], np_rng) return s[0] samples = np.array([sample() for _ in xrange(n)]) assert samples.shape == (n, ) return reportKnownGaussian(expect_mu, np.sqrt(expect_sig), samples)
def testCollectSmoke3(seed): ripl = get_ripl(seed=seed) prog = """ (let ((d (empty))) (do (repeat %s (bind (collect (labelled (normal 0 1) label)) (curry into d))) (return d)))""" % default_num_samples() predictions = extract_from_dataset(ripl.infer(prog), 'label') return reportKnownGaussian(0.0, 1.0, predictions)
def checkResamplingSmoke(mode, seed): n = default_num_samples() r = get_ripl(seed=seed) r.infer("(resample%s %s)" % (mode, n)) stack_dicts = r.sivm.core_sivm.engine.sample_all( r._ensure_parsed_expression("(normal 0 1)")) predictions = [d["value"] for d in stack_dicts] return reportKnownGaussian(0, 1, predictions)
def collectLikelihoodWeighted(ripl, address): vs = [] wts = [] for _ in range(default_num_samples()): ripl.infer("(likelihood_weight)") vs.append(ripl.report(address)) wts.append(ripl.sivm.core_sivm.engine.model.log_weights[0]) return (vs, wts)
def testEnumerateCoupledChoices2(seed): # A second illustration of Issue #462 (second manifestation). # # If enumeration computes the set of candidate values before # detaching, as an independent product, it will invent combinations # that are actually distinct representations of semantically the # same option. Thus, even though all possibilities will (as in this # variant) be considered, some will be overweighted. # # Specifically, if the initial state is three calls to the same CRP # with distinct values (arranged by the force statements), # enumeration will invent 4^3 different combinations of tables to # try; whereas there are only 5 that differ up to renumbering of the # tables: (1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 2), and (1, 2, 3). # They cannot, therefore, be overrepresented evenly, and this leads # to the wrong posterior. raise SkipTest( "Fails due to https://github.com/probcomp/Venturecxx/issues/462") r = get_ripl(seed=seed) r.assume("crp", "(make_crp 1)") r.assume("result1", "(crp)") r.assume("result2", "(crp)") r.assume("result3", "(crp)") r.predict( "(and (not (eq result1 result2))" "(and (not (eq result2 result3))" "(not (eq result1 result3))))", label="pid") ans = collectSamples(r, "pid", infer="reset_to_prior", num_samples=default_num_samples(4)) gibbs_from_different = """(do (force result1 atom<1>) (force result2 atom<2>) (force result3 atom<3>) (gibbs default all 1))""" # One step of Gibbs from any initial condition should move to the # posterior (which in this case equals the prior). predicts = collectSamples(r, "pid", infer=gibbs_from_different, num_samples=default_num_samples(4)) return reportSameDiscrete(ans, predicts)
def testResamplingSmoke4(seed): # Check that limiting the number of processes doesn't screw up # inference too much. n = default_num_samples() r = get_ripl(seed=seed) r.infer("(resample_multiprocess %s %s)" % (n, n / 2)) # Limit the number of processes predictions = r.sample_all("(normal 0 1)") eq_(n, len(predictions)) return reportKnownGaussian(0, 1, predictions)
def testEnumerateCoupledChoices1(seed): # A test case for the first problem identified in Issue #462. # # If enumaration collects the candidate value sets all at once at # the beginning of the enumeration run, and if the set of options # for a later choice depends on the choice taken at an earlier one # (e.g., for the made SP of make_crp), trouble will ensue because we # need to compute a dependent rather than independent product. # # This example suffices to bring the problem into relief. If a CRP # has only one extant table at the time enumeration is invoked, # (arranged by the force calls), each node will consider that table # and one new table as the only options. Enumeration will therefore # consider 8 options, in none of which will all three nodes be # assigned to distinct tables. raise SkipTest( "Fails due to https://github.com/probcomp/Venturecxx/issues/462") r = get_ripl(seed=seed) r.assume("crp", "(make_crp 1)") r.assume("result1", "(crp)") r.assume("result2", "(crp)") r.assume("result3", "(crp)") r.predict( "(and (not (eq result1 result2))" "(and (not (eq result2 result3))" "(not (eq result1 result3))))", label="pid") ans = collectSamples(r, "pid", infer="reset_to_prior", num_samples=default_num_samples(4)) gibbs_from_same = """(do (force result1 atom<1>) (force result2 atom<1>) (force result3 atom<1>) (gibbs default all 1))""" # One step of Gibbs from any initial condition should move to the # posterior (which in this case equals the prior). predicts = collectSamples(r, "pid", infer=gibbs_from_same, num_samples=default_num_samples(4)) return reportSameDiscrete(ans, predicts)
def try_at_five(maker): r = get_ripl(seed=seed) r.assume("mu", "(normal 0 1)") r.assume("obs", "(%s mu 1 1 1)" % maker) r.observe("(obs)", 5) infer = "(mh default all %d)" % default_num_transitions_per_sample() return collectSamples(r, "mu", infer=infer, num_samples=default_num_samples(2))
def testCollectSmoke1(seed): ripl = get_ripl(seed=seed) ripl.assume("x", "(normal 0 1)") prog = """ (let ((d (empty))) (do (repeat %s (do (mh default one 1) (bind (collect x) (curry into d)))) (return d)))""" % default_num_samples() predictions = extract_from_dataset(ripl.infer(prog), 'x') return reportKnownGaussian(0.0, 1.0, predictions)
def check_loggamma_ks_quad(shape, seed): inf = float('inf') np_rng = numpy.random.RandomState(seed) nsamples = default_num_samples() samples = [simulateLogGamma(shape, np_rng) for _ in xrange(nsamples)] def pdf(x): return exp(logDensityLogGamma(x, shape)) def cdf(x): p, _e = scipy.integrate.quad(pdf, -inf, x) return p return reportKnownContinuous(np.vectorize(cdf), samples)
def extract_sample(maker, params, index, seed): r = get_ripl(seed=seed) r.assume("maker", maker) expr = v.app(v.sym("list"), *[v.app(v.sym("made")) for _ in range(index+1)]) def one_sample(): r.assume("made", v.app(v.sym("maker"), *params)) ans = r.sample(expr)[-1] r.forget("made") return ans results = [one_sample() for _ in range(default_num_samples(5))] return results
def testExecuteSmoke(seed): ripl = get_ripl(seed=seed) predictions = [] for _ in range(default_num_samples()): ripl.clear() ripl.execute_program("""[assume x (normal 0 1)] ;; An observation [observe (normal x 1) 2] ; with an end-of-line comment [infer (mh default one %s)]""" % default_num_transitions_per_sample()) predictions.append(ripl.sample("x")) return reportKnownGaussian(1, math.sqrt(0.5), predictions)
def extract_cross_sample(maker, params, index1, index2, combiner, seed): r = get_ripl(seed=seed) r.assume("maker", maker) index = max(index1, index2) expr = v.app(v.sym("list"), *[v.app(v.sym("made")) for _ in range(index+1)]) def one_sample(): r.assume("made", v.app(v.sym("maker"), *params)) vec = r.sample(expr) r.forget("made") return combiner(vec[index1], vec[index2]) results = [one_sample() for _ in range(default_num_samples(5))] return results
def testObserveThroughRef(seed): ripl = get_ripl(seed=seed) ripl.assume("coin", "(make_beta_bernoulli 1 1)") ripl.assume("items", "(list (ref (coin)) (ref (coin)))") ripl.observe("(deref (first items))", True) ripl.predict("(deref (second items))", label="pid") predictions = collectSamples(ripl, "pid", num_samples=default_num_samples(5)) ans = [(False, 0.333), (True, 0.666)] return reportKnownDiscrete(ans, predictions)
def testGPMean1(seed): ripl = get_ripl(seed=seed) prep_ripl(ripl) ripl.assume('gp', '(make_gp zero sq_exp)') ripl.predict("(gp (array 0))", label="pid") predictions = collectSamples(ripl, "pid", num_samples=default_num_samples(2)) xs = [p[0] for p in predictions] return reportKnownGaussian(0, 1, xs)
def testResampling1(seed): P = 10 ripl = get_ripl(seed=seed) def a_sample(): ripl.clear() ripl.infer("(resample %d)" % P) ripl.assume("x", "(normal 0 1)") ripl.observe("(normal x 1)", 2) ripl.infer("(resample 1)") return ripl.sample("x") predictions = [a_sample() for _ in range(default_num_samples())] return reportKnownGaussian(1, math.sqrt(0.5), predictions)
def testMVNormalRandomWalkSoundness(seed): # This exercises the subtlety involving block proposals and delta # kernels described in the "joint-delta-kernels" footnote in # doc/on-latents.md. r = get_ripl(seed=seed) r.assume("mean", "(multivariate_normal (array 0) (id_matrix 1))") r.assume("y", "(multivariate_normal mean (id_matrix 1))") predictions = [ c[0] for c in collectSamples(r, "y", infer="(mh default all 50)", num_samples=default_num_samples(10)) ] return reportKnownGaussian(0, math.sqrt(2), predictions)
def testEnumerateCoupledChoices3(seed): # A third illustration of Issue #462 (second manifestation). # # Enumerating a single choice should not depend on the initial value # of that choice, but due to #462 it does. The setup here is # enumerating one of two choices from a CRP. If they are initially # distinct, enumeration will consider three options, the latter two # of which will be equivalent: "become the same as the other point", # "remain the same as I was", and "become a unique snowflake". This # will cause it to overweight the state where the choices are # distinct by 2:1. r = get_ripl(seed=seed) r.assume("crp", "(make_crp 1)") r.assume("result1", "(crp)") r.assume("result2", "(crp)") r.predict("(eq result1 result2)", label="pid") gibbs_from_same = """(do (force result1 atom<1>) (force result2 atom<1>) (gibbs default one 1))""" ans = collectSamples(r, "pid", infer=gibbs_from_same, num_samples=default_num_samples(6)) gibbs_from_different = """(do (force result1 atom<1>) (force result2 atom<2>) (gibbs default one 1))""" # In this case, gibbs_from_same happens to compute the exact # posterior, which equals the prior, and is 50-50 on whether the # atoms are the same. predicts = collectSamples(r, "pid", infer=gibbs_from_different, num_samples=default_num_samples(6)) return reportSameDiscrete(ans, predicts)
def testModelSwitchingSmoke(seed): ripl = get_ripl(seed=seed, persistent_inference_trace=True) ripl.execute_program(""" [define normal_through_model (lambda (mu sigma) (do (m <- (new_model)) (res <- (in_model m (do (assume x (normal 0 ,(* (sqrt 2) sigma))) (assume y (normal x ,(* (sqrt 2) sigma))) (observe y (* 2 mu)) (mh default one %s) (sample x)))) (return (first res))))] """ % default_num_transitions_per_sample()) predictions = [ripl.infer("(normal_through_model 0 1)") for _ in range(default_num_samples())] return reportKnownGaussian(0.0, 1.0, predictions)
def testModelForkingSmoke(seed): ripl = get_ripl(seed=seed, persistent_inference_trace=True) ripl.execute_program(""" [assume p (beta 1 1)] [define beta_through_model (lambda (a b) (do (m <- (fork_model)) (res <- (in_model m (do (repeat (- a 1) (observe (flip p) true)) (repeat (- b 1) (observe (flip p) false)) (mh default one %s) (sample p)))) (return (first res))))] """ % default_num_transitions_per_sample()) predictions = [ripl.infer("(beta_through_model 3 2)") for _ in range(default_num_samples())] cdf = stats.beta(3,2).cdf return reportKnownContinuous(cdf, predictions)
def checkForEachParticleCustomMH(mode, seed): n = max(2, default_num_samples()) ripl = get_ripl(seed=seed, persistent_inference_trace=True) ripl.define( "drift_mh", """\ (lambda (scope block) (mh_correct (on_subproblem default all (symmetric_local_proposal (lambda (x) (normal x 1)))))) """) ripl.assume("x", "(normal 0 1)") ripl.observe("(normal x 1)", 2) ripl.infer("(resample%s %s)" % (mode, n)) for _ in range(default_num_transitions_per_sample()): ripl.infer("(for_each_particle (drift_mh default all))") predictions = ripl.infer("(for_each_particle (sample x))") return reportKnownGaussian(1, 0.5**0.5, predictions)
def testResampling2(seed): # This differs from testResampling1 by an extra resample step, which # is supposed to be harmless P = 20 ripl = get_ripl(seed=seed) def a_sample(): ripl.clear() ripl.infer("(resample %d)" % P) ripl.assume("x", "(normal 0 1)") ripl.observe("(normal x 1)", 2) ripl.infer("(incorporate)") ripl.infer("(resample %d)" % P) ripl.infer("(incorporate)") ripl.infer("(resample 1)") return ripl.sample("x") predictions = [a_sample() for _ in range(4 * default_num_samples())] return reportKnownGaussian(1, math.sqrt(0.5), predictions)
def testBasicParticleFilter2(seed): # A sanity test for particle filtering (continuous) P = 10 N = default_num_samples() predictions = [] os = zip(range(0, 5), [1, 2, 3, 4, 5]) rng = random.Random(seed) for _ in range(N): ripl = initBasicPFripl2(rng.randint(1, 2**31 - 1)) for t, val in os: ripl.infer("(resample %d)" % P) ripl.predict("(f %d)" % t) ripl.infer("(mh 0 %d 5)" % t) ripl.observe("(g %d)" % t, val) ripl.infer("(resample 1)") ripl.predict("(f 4)", label="pid") predictions.append(ripl.report("pid")) return reportKnownGaussian(390 / 89.0, math.sqrt(55 / 89.0), predictions)
def testBasicParticleFilter1(seed): # A sanity test for particle filtering (discrete) P = 10 N = default_num_samples() predictions = [] os = zip(range(1, 6), [False, False, True, False, False]) rng = random.Random(seed) for _ in range(N): ripl = initBasicPFripl1(rng.randint(1, 2**31 - 1)) for t, val in os: ripl.infer("(resample %d)" % P) ripl.predict("(f %d)" % t) ripl.infer("(mh 0 %d 5)" % t) ripl.observe("(g %d)" % t, val) ripl.infer("(resample 1)") ripl.predict("(g 6)", label="pid") predictions.append(ripl.report("pid")) ans = [(0, 0.6528), (1, 0.3472)] return reportKnownDiscrete(ans, predictions)
def testOccasionalRejectionBrushScope(seed): # Another version, this time requiring correct computation of the # correction on a custom scope (which is carefully arranged to avoid # creating blocks where some principal node might be in the brush). # # This particular arrangment of blocks is chosen to falsify the # heuristic present at the time of writing in both Lite and Puma's # correction computation, which is to add the number of blocks the # scope had remaining in the pre-proposal trace with the number of # blocks that gained root nodes in the proposal. This heuristic is # wrong if a block that is not empty in the pre-proposal trace gains # a new node due to the proposal, which is what happens here, when # `flip2` proposes to move from True to False. r = get_ripl(seed=seed) r.execute_program(""" (assume flip1 (tag "frob" 1 (flip))) (assume flip2 (tag "frob" 2 (flip))) (assume flip2_or_flip3 (if flip2 true (tag "frob" 1 (flip)))) (observe (exactly (or flip1 flip2_or_flip3)) true) """) infer = '(gibbs "frob" one %s false)' % default_num_transitions_per_sample( ) predictions = collectSamples(r, address="flip2", infer=infer, num_samples=default_num_samples(10)) # TODO Would be nice to do the power analysis to pick the number of # samples. Not sure exactly what distribution the expected bug # produces, but empirically it looks like it might be 2:1 # True:False. (The incorrect computation being singled out # overcorrects, which I think means more rejections of True->False # moves than are justified.) A reasonable fallback might be "Pick # the closest distribution given by comparably small integer ratios # that is skewed in the expected direction". ans = [(True, 4.0 / 7), (False, 3.0 / 7)] return reportKnownDiscrete(ans, predictions)
def myCollectSamples(ripl, method): return collectSamples(ripl, "pid", num_samples=default_num_samples(4), infer=inferCommand(method))
def samples(infer): return collectIidSamples(r, address="datum_0", num_samples=default_num_samples(), infer=infer)
def checkForEachParticleIsIndependent(mode, prop, seed): n = max(2, default_num_samples()) ripl = get_ripl(seed=seed) ripl.infer("(resample%s %s)" % (mode, n)) predictions = ripl.infer("(for_each_particle (sample (normal 0 1)))") return prop(predictions)
def checkForEachParticleSmoke2(mode): n = max(2, default_num_samples()) ripl = get_ripl() ripl.infer("(resample%s %s)" % (mode, n)) eq_([5 for _ in range(n)], ripl.infer("(for_each_particle (return 5))")) eq_(5, ripl.infer("(on_particle 1 (return 5))"))