Exemplo n.º 1
0
    def test_chain_bernoulli(self):
        # runs the sampler on a distribution of infdependent bernoulli variables
        # and compares the mean
        d = 5
        ps = rand(d)
        ps /= norm(ps)
        distribution = Bernoulli(ps)

        num_history = 100
        Z = distribution.sample(num_history).samples
        threshold = 0.8
        spread = 0.2

        gamma = 0.2
        kernel = HypercubeKernel(gamma)

        mcmc_sampler = DiscreteKameleon(distribution, kernel, Z, threshold,
                                        spread)

        start = zeros(distribution.dimension, dtype=numpy.bool8)
        mcmc_params = MCMCParams(start=start, num_iterations=1000)
        chain = MCMCChain(mcmc_sampler, mcmc_params)

        chain.run()
        self.assertAlmostEqual(norm(mean(chain.samples, 0) - ps), 0, delta=0.2)
Exemplo n.º 2
0
def main():
    Log.set_loglevel(logging.DEBUG)

    prior = Gaussian(Sigma=eye(2) * 100)

    posterior = OzonePosterior(prior,
                               logdet_alg="scikits",
                               solve_method="scikits")

    proposal_cov = diag([4.000000000000000e-05, 1.072091680000000e+02])
    mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov)

    start = asarray([-11.35, -13.1])
    mcmc_params = MCMCParams(start=start, num_iterations=5000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1))

    home = expanduser("~")
    folder = os.sep.join([home, "sample_ozone_posterior_average_serial"])
    store_chain_output = StoreChainOutput(folder)
    chain.append_mcmc_output(store_chain_output)

    loaded = store_chain_output.load_last_stored_chain()
    if loaded is None:
        logging.info("Running chain from scratch")
    else:
        logging.info("Running chain from iteration %d" % loaded.iteration)
        chain = loaded

    chain.run()

    f = open(folder + os.sep + "final_chain", "w")
    dump(chain, f)
    f.close()
def main():
    # define the MCMC target distribution
    # possible distributions are in kameleon_mcmc.distribution: Banana, Flower, Ring
#    distribution = Banana(dimension=2, bananicity=0.03, V=100.0)
    distribution = Ring()
    
    # create instance of kameleon sampler that learns the length scale
    # can be replaced by any other sampler in kameleon_mcmc.mcmc.samplers
    kernel = GaussianKernel(sigma=5)
    mcmc_sampler = KameleonWindowLearnScale(distribution, kernel, stop_adapt=inf, nu2=0.05)
    
    # mcmc chain and its parameters
    start = asarray([0,-3])
    mcmc_params = MCMCParams(start=start, num_iterations=30000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)
    
    # plot every iteration and print some statistics
    chain.append_mcmc_output(PlottingOutput(distribution, plot_from=2000))
    chain.append_mcmc_output(StatisticsOutput())
    
    # run cmcm
    chain.run()
    
    # print empirical quantiles
    burnin=10000
    print distribution.emp_quantiles(chain.samples[burnin:])
    
    Visualise.visualise_distribution(distribution, chain.samples)
def main():
    # covariance has stretched Eigenvalues, and rotated basis
    Sigma = eye(2)
    Sigma[0, 0] = 30
    Sigma[1, 1] = 1
    theta = -pi / 4
    U = MatrixTools.rotation_matrix(theta)
    Sigma = U.T.dot(Sigma).dot(U)

    gaussian = Gaussian(Sigma=Sigma)
    distribution = GaussianFullConditionals(gaussian, [0., 0.])

    mcmc_sampler = Gibbs(distribution)

    start = zeros(distribution.dimension)
    mcmc_params = MCMCParams(start=start, num_iterations=20000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=100))
    chain.append_mcmc_output(
        PlottingOutput(distribution=gaussian,
                       plot_from=1,
                       colour_by_likelihood=False,
                       num_samples_plot=0,
                       lag=100))
    chain.run()
def sample_gibbs(distribution, num_samples=1000):
    mcmc_sampler = Gibbs(distribution)
    current_state = [rand() < 0.5 for _ in range(distribution.dimension)]
    mcmc_params = MCMCParams(start=asarray(current_state, dtype=numpy.bool8), num_iterations=num_samples)
    chain = MCMCChain(mcmc_sampler, mcmc_params)
    chain.run()
    return chain.samples
def main():
    d = 5
    ps = rand(d)
    ps /= norm(ps)
    distribution = Bernoulli(ps)

    num_history = 100
    Z = distribution.sample(num_history).samples
    threshold = 0.8
    spread = 0.2

    gamma = 0.2
    kernel = HypercubeKernel(gamma)

    mcmc_sampler = DiscreteKameleon(distribution, kernel, Z, threshold, spread)

    start = zeros(distribution.dimension, dtype=numpy.bool8)
    mcmc_params = MCMCParams(start=start, num_iterations=1000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.append_mcmc_output(DiscretePlottingOutput(plot_from=0, lag=100))

    chain.run()
    print "ps", ps
    print "empirical", mean(chain.samples, 0)
Exemplo n.º 7
0
def main():
    distribution = Banana()
    #     distribution = Flower(amplitude=6, frequency=6, variance=1, radius=10, dimension=8)
    #     Visualise.visualise_distribution(distribution)
    show()
    #
    sigma = 5
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    mcmc_sampler = KameleonWindowLearnScale(distribution,
                                            kernel,
                                            stop_adapt=inf)

    start = asarray([0, -5.])
    mcmc_params = MCMCParams(start=start, num_iterations=30000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(
        PlottingOutput(distribution,
                       plot_from=3000,
                       colour_by_likelihood=False,
                       num_samples_plot=0))
    chain.append_mcmc_output(StatisticsOutput(plot_times=False))
    chain.run()

    print distribution.emp_quantiles(chain.samples[10000:])
def main():
    d = 5
    b = randn(d)
    V = randn(d, d)
    W = V + V.T
    fill_diagonal(W, zeros(d))
    hopfield = Hopfield(W, b)
    current_state = [rand() < 0.5 for _ in range(d)]
    distribution = HopfieldFullConditionals(full_target=hopfield,
                                            current_state=current_state)
    
    print("Running Gibbs to produce chain history")
    Z = sample_gibbs(distribution, num_samples=2000)[1500:].astype(numpy.bool8)
    inds = permutation(len(Z))
    Z = Z[inds[:500]]
    print("done")
    
    threshold = 0.8
    spread = 0.2
    
    gamma = 0.2
    kernel = HypercubeKernel(gamma)
    
    mcmc_sampler = DiscreteKameleon(hopfield, kernel, Z, threshold, spread)
    
    start = zeros(distribution.dimension, dtype=numpy.bool8)
    mcmc_params = MCMCParams(start=start, num_iterations=10000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)
    
    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.append_mcmc_output(DiscretePlottingOutput(plot_from=0, lag=500))
    
    chain.run()
def main():
    Log.set_loglevel(logging.DEBUG)

    prior = Gaussian(Sigma=eye(2) * 100)
    num_estimates = 1000

    home = expanduser("~")
    folder = os.sep.join([home, "sample_ozone_posterior_rr_sge"])

    # cluster admin set project jump for me to exclusively allocate nodes
    parameter_prefix = ""  # #$ -P jump"

    cluster_parameters = BatchClusterParameters(
        foldername=folder,
        memory=7.8,
        loglevel=logging.DEBUG,
        parameter_prefix=parameter_prefix,
        max_walltime=60 * 60 * 24 - 1)

    computation_engine = SGEComputationEngine(cluster_parameters,
                                              check_interval=10)

    rr_instance = RussianRoulette(1e-3, block_size=400)

    posterior = OzonePosteriorRREngine(rr_instance=rr_instance,
                                       computation_engine=computation_engine,
                                       num_estimates=num_estimates,
                                       prior=prior)

    posterior.logdet_method = "shogun_estimate"

    proposal_cov = diag([4.000000000000000e-05, 1.072091680000000e+02])
    mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov)

    start = asarray([-11.55, -10.1])
    mcmc_params = MCMCParams(start=start, num_iterations=5000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    #    chain.append_mcmc_output(PlottingOutput(None, plot_from=1, lag=1))
    chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1))

    store_chain_output = StoreChainOutput(folder, lag=1)
    chain.append_mcmc_output(store_chain_output)

    loaded = store_chain_output.load_last_stored_chain()
    if loaded is None:
        logging.info("Running chain from scratch")
    else:
        logging.info("Running chain from iteration %d" % loaded.iteration)
        chain = loaded

    chain.run()

    f = open(folder + os.sep + "final_chain", "w")
    dump(chain, f)
    f.close()
def main():
    Log.set_loglevel(logging.DEBUG)

    modulename = "sample_ozone_posterior_average_slurm"
    if not FileSystem.cmd_exists("sbatch"):
        engine = SerialComputationEngine()
    else:
        johns_slurm_hack = "#SBATCH --partition=intel-ivy,wrkstn,compute"
        johns_slurm_hack = "#SBATCH --partition=intel-ivy,compute"

        folder = os.sep + os.sep.join(["nfs", "data3", "ucabhst", modulename])
        batch_parameters = BatchClusterParameters(
            foldername=folder,
            max_walltime=24 * 60 * 60,
            resubmit_on_timeout=False,
            memory=3,
            parameter_prefix=johns_slurm_hack)
        engine = SlurmComputationEngine(batch_parameters,
                                        check_interval=1,
                                        do_clean_up=True)

    prior = Gaussian(Sigma=eye(2) * 100)
    num_estimates = 100

    posterior = OzonePosteriorAverageEngine(computation_engine=engine,
                                            num_estimates=num_estimates,
                                            prior=prior)
    posterior.logdet_method = "shogun_estimate"

    proposal_cov = diag([4.000000000000000e-05, 1.072091680000000e+02])
    mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov)

    start = asarray([-11.35, -13.1])
    mcmc_params = MCMCParams(start=start, num_iterations=2000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1))

    home = expanduser("~")
    folder = os.sep.join([home, modulename])
    store_chain_output = StoreChainOutput(folder)
    chain.append_mcmc_output(store_chain_output)

    loaded = store_chain_output.load_last_stored_chain()
    if loaded is None:
        logging.info("Running chain from scratch")
    else:
        logging.info("Running chain from iteration %d" % loaded.iteration)
        chain = loaded

    chain.run()

    f = open(folder + os.sep + "final_chain", "w")
    dump(chain, f)
    f.close()
def run_sm_chain(hopfield, start, num_iterations):
    current_state = [x for x in start]
    spread = 0.03
    sampler = StandardMetropolisDiscrete(hopfield, spread)
    params = MCMCParams(start=asarray(current_state, dtype=numpy.bool8),
                        num_iterations=num_iterations)
    chain = MCMCChain(sampler, params)
    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=1000))
    chain.run()

    return chain
def run_kameleon_chain(Z, hopfield, start, num_iterations):
    threshold = 0.8
    spread = 0.03
    gamma = 0.2
    kernel = HypercubeKernel(gamma)
    sampler = DiscreteKameleon(hopfield, kernel, Z, threshold, spread)
    params = MCMCParams(start=start, num_iterations=num_iterations)
    chain = MCMCChain(sampler, params)
    chain.run()

    return chain
def run_gibbs_chain(hopfield, start, num_iterations):
    d = hopfield.dimension
    current_state = [x for x in start]
    distribution = HopfieldFullConditionals(full_target=hopfield,
                                            current_state=current_state)
    sampler = Gibbs(distribution)
    params = MCMCParams(start=asarray(current_state, dtype=numpy.bool8),
                        num_iterations=num_iterations * d)
    chain = MCMCChain(sampler, params)
    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=1000))
    chain.run()

    return chain
Exemplo n.º 14
0
def main():
    distribution = Banana(dimension=8)

    sigma = 5
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    mcmc_sampler = Kameleon(distribution, kernel,
                            distribution.sample(100).samples)

    start = zeros(distribution.dimension)
    mcmc_params = MCMCParams(start=start, num_iterations=20000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.run()
def main():
    Log.set_loglevel(logging.DEBUG)

    prior = Gaussian(Sigma=eye(2) * 100)
    num_estimates = 2

    home = expanduser("~")
    folder = os.sep.join([home, "sample_ozone_posterior_rr_sge"])

    computation_engine = SerialComputationEngine()

    rr_instance = RussianRoulette(1e-3, block_size=10)

    posterior = OzonePosteriorRREngine(rr_instance=rr_instance,
                                       computation_engine=computation_engine,
                                       num_estimates=num_estimates,
                                       prior=prior)

    posterior.logdet_method = "shogun_estimate"

    proposal_cov = diag([4.000000000000000e-05, 1.072091680000000e+02])
    mcmc_sampler = StandardMetropolis(posterior, scale=1.0, cov=proposal_cov)

    start = asarray([-11.35, -13.1])
    mcmc_params = MCMCParams(start=start, num_iterations=200)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    #    chain.append_mcmc_output(PlottingOutput(None, plot_from=1, lag=1))
    chain.append_mcmc_output(StatisticsOutput(print_from=1, lag=1))

    store_chain_output = StoreChainOutput(folder, lag=50)
    chain.append_mcmc_output(store_chain_output)

    loaded = store_chain_output.load_last_stored_chain()
    if loaded is None:
        logging.info("Running chain from scratch")
    else:
        logging.info("Running chain from iteration %d" % loaded.iteration)
        chain = loaded

    chain.run()

    f = open(folder + os.sep + "final_chain", "w")
    dump(chain, f)
    f.close()
def all_tests(gaussian1,gaussian2,n=200):
    oracle_samples1 = gaussian1.sample(n=n).samples
    oracle_samples2 = gaussian2.sample(n=n).samples
    
    distribution1 = GaussianFullConditionals(gaussian1, list(gaussian1.mu))
    distribution2 = GaussianFullConditionals(gaussian2, list(gaussian2.mu))
    
    mcmc_sampler1 = Gibbs(distribution1)
    mcmc_sampler2 = Gibbs(distribution2)
    
    start = zeros(2)
    mcmc_params = MCMCParams(start=start, num_iterations=2000+n, burnin=2000)
    
    chain1 = MCMCChain(mcmc_sampler1, mcmc_params)
    chain1.run()
    gibbs_samples1 = chain1.get_samples_after_burnin()
    
    
    chain2 = MCMCChain(mcmc_sampler2, mcmc_params)
    chain2.run()
    gibbs_samples2 = chain2.get_samples_after_burnin()
    
    sigma = GaussianKernel.get_sigma_median_heuristic(concatenate((oracle_samples1,oracle_samples2),axis=0))
    kernel = GaussianKernel(sigma=sigma)
    
    vanillap=empty((2,2))
    blockp=empty((2,2))
    wildp=empty((2,2))
    
    vanillap[0,0]=kernel.TwoSampleTest(oracle_samples1,oracle_samples2,method='vanilla')
    vanillap[0,1]=kernel.TwoSampleTest(oracle_samples1,gibbs_samples2,method='vanilla')
    vanillap[1,0]=kernel.TwoSampleTest(gibbs_samples1,oracle_samples2,method='vanilla')
    vanillap[1,1]=kernel.TwoSampleTest(gibbs_samples1,gibbs_samples2,method='vanilla')
    
    blockp[0,0]=kernel.TwoSampleTest(oracle_samples1,oracle_samples2,method='block')
    blockp[0,1]=kernel.TwoSampleTest(oracle_samples1,gibbs_samples2,method='block')
    blockp[1,0]=kernel.TwoSampleTest(gibbs_samples1,oracle_samples2,method='block')
    blockp[1,1]=kernel.TwoSampleTest(gibbs_samples1,gibbs_samples2,method='block')
    
    wildp[0,0]=kernel.TwoSampleTest(oracle_samples1,oracle_samples2,method='wild')
    wildp[0,1]=kernel.TwoSampleTest(oracle_samples1,gibbs_samples2,method='wild')
    wildp[1,0]=kernel.TwoSampleTest(gibbs_samples1,oracle_samples2,method='wild')
    wildp[1,1]=kernel.TwoSampleTest(gibbs_samples1,gibbs_samples2,method='wild')
    
    return vanillap,blockp,wildp
Exemplo n.º 17
0
def main():
    distribution = Banana(dimension=8, bananicity=0.1, V=100.0)

    sigma = 5
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    mcmc_sampler = KameleonWindow(distribution, kernel)

    start = zeros(distribution.dimension)
    mcmc_params = MCMCParams(start=start, num_iterations=80000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    #    chain.append_mcmc_output(PlottingOutput(distribution, plot_from=3000))
    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.run()

    print distribution.emp_quantiles(chain.samples)
Exemplo n.º 18
0
def main():
    distribution = Banana(dimension=2, bananicity=0.03, V=100.0)

    mcmc_sampler = StandardMetropolis(distribution)

    start = zeros(distribution.dimension)
    start = asarray([0., -2.])
    mcmc_params = MCMCParams(start=start, num_iterations=10000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=1000))
    #     chain.append_mcmc_output(PlottingOutput(distribution, plot_from=1, num_samples_plot=0,
    #                                             colour_by_likelihood=False))

    chain.run()
    f = open("std_metropolis_chain_gaussian.bin", 'w')
    dump(chain, f)
    f.close()
Exemplo n.º 19
0
def main():
    d = 5
    b = randn(d)
    V = randn(d, d)
    W = V + V.T
    fill_diagonal(W, zeros(d))
    hopfield = Hopfield(W, b)
    current_state = [rand() < 0.5 for _ in range(d)]
    distribution = HopfieldFullConditionals(full_target=hopfield,
                                            current_state=current_state)

    mcmc_sampler = Gibbs(distribution)

    mcmc_params = MCMCParams(start=asarray(current_state, dtype=numpy.bool8),
                             num_iterations=10000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=100))
    chain.append_mcmc_output(DiscretePlottingOutput(plot_from=0, lag=100))
    chain.run()
Exemplo n.º 20
0
def main():
    d = 5
    ps = rand(d)
    ps /= norm(ps)
    print "ps", ps
    full_target = Bernoulli(ps)
    current_state = [0. for _ in range(d)]
    distribution = BernoulliFullConditionals(full_target, current_state)

    mcmc_sampler = Gibbs(distribution)

    mcmc_params = MCMCParams(start=asarray(current_state, dtype=numpy.bool8),
                             num_iterations=10000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=1000))
    chain.append_mcmc_output(DiscretePlottingOutput(plot_from=0, lag=1000))
    chain.run()

    print "marginals:", ps
    print "estimated:", mean(chain.samples, 0)
Exemplo n.º 21
0
    distribution = Banana(dimension=8, bananicity=0.03, V=100)
    sigma = GaussianKernel.get_sigma_median_heuristic(
        distribution.sample(1000).samples)
    sigma = 10
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    burnin = 20000
    num_iterations = 40000

    mcmc_sampler = KameleonWindowLearnScale(distribution,
                                            kernel,
                                            stop_adapt=burnin)
    mean_est = zeros(distribution.dimension, dtype="float64")
    cov_est = 1.0 * eye(distribution.dimension)
    cov_est[0, 0] = distribution.V
    #mcmc_sampler = AdaptiveMetropolisLearnScale(distribution, mean_est=mean_est, cov_est=cov_est)
    #mcmc_sampler = AdaptiveMetropolis(distribution, mean_est=mean_est, cov_est=cov_est)
    #mcmc_sampler = StandardMetropolis(distribution)

    start = zeros(distribution.dimension, dtype="float64")
    mcmc_params = MCMCParams(start=start,
                             num_iterations=num_iterations,
                             burnin=burnin)

    mcmc_chain = MCMCChain(mcmc_sampler, mcmc_params)
    mcmc_chain.append_mcmc_output(StatisticsOutput())

    experiment = SingleChainExperiment(mcmc_chain, experiment_dir)
    experiment.run()
Exemplo n.º 22
0
def main():
    numTrials = 500
    n = 200
    Sigma1 = eye(2)
    Sigma1[0, 0] = 30.0
    Sigma1[1, 1] = 1.0
    theta = -pi / 4
    U = MatrixTools.rotation_matrix(theta)
    Sigma1 = U.T.dot(Sigma1).dot(U)
    print Sigma1
    gaussian1 = Gaussian(Sigma=Sigma1)
    gaussian2 = Gaussian(mu=array([1., 0.]), Sigma=Sigma1)

    oracle_samples1 = gaussian1.sample(n=n).samples
    oracle_samples2 = gaussian2.sample(n=n).samples

    print 'mean1:', mean(oracle_samples1, 0)
    print 'mean2:', mean(oracle_samples2, 0)
    plot(oracle_samples1[:, 0], oracle_samples1[:, 1], 'b*')
    plot(oracle_samples2[:, 0], oracle_samples2[:, 1], 'r*')
    show()
    distribution1 = GaussianFullConditionals(gaussian1, list(gaussian1.mu))
    distribution2 = GaussianFullConditionals(gaussian2, list(gaussian2.mu))

    H0_samples = zeros(numTrials)
    HA_samples = zeros(numTrials)
    mcmc_sampler1 = Gibbs(distribution1)
    mcmc_sampler2 = Gibbs(distribution2)
    burnin = 9000
    thin = 5
    start = zeros(2)
    mcmc_params = MCMCParams(start=start,
                             num_iterations=burnin + thin * n,
                             burnin=burnin)
    sigma = GaussianKernel.get_sigma_median_heuristic(
        concatenate((oracle_samples1, oracle_samples2), axis=0))
    print 'using bandwidth: ', sigma
    kernel = GaussianKernel(sigma=sigma)

    for ii in arange(numTrials):
        start = time.time()
        print 'trial:', ii

        oracle_samples1 = gaussian1.sample(n=n).samples
        oracle_samples1a = gaussian1.sample(n=n).samples
        oracle_samples2 = gaussian2.sample(n=n).samples

        #         chain1 = MCMCChain(mcmc_sampler1, mcmc_params)
        #         chain1.run()
        #         gibbs_samples1 = chain1.get_samples_after_burnin()
        #         gibbs_samples1 = gibbs_samples1[thin*arange(n)]
        #
        #         chain1a = MCMCChain(mcmc_sampler1, mcmc_params)
        #         chain1a.run()
        #         gibbs_samples1a = chain1a.get_samples_after_burnin()
        #         gibbs_samples1a = gibbs_samples1a[thin*arange(n)]
        #
        #         chain2 = MCMCChain(mcmc_sampler2, mcmc_params)
        #         chain2.run()
        #         gibbs_samples2 = chain2.get_samples_after_burnin()
        #         gibbs_samples2 = gibbs_samples2[thin*arange(n)]

        #         H0_samples[ii]=kernel.estimateMMD(gibbs_samples1,gibbs_samples1a)
        #         HA_samples[ii]=kernel.estimateMMD(gibbs_samples1,gibbs_samples2)
        #
        H0_samples[ii] = kernel.estimateMMD(oracle_samples1, oracle_samples1a)
        HA_samples[ii] = kernel.estimateMMD(oracle_samples1, oracle_samples2)
        end = time.time()
        print 'time elapsed: ', end - start

    f = open("/home/dino/git/mmdIIDTrueSamples.dat", "w")
    dump(H0_samples, f)
    dump(HA_samples, f)
    dump(gaussian1, f)
    dump(gaussian2, f)
    f.close()
    return None
def create_ground_truth():
    filename_chain = "chain.bin"
    filename_Z = "Z.bin"
    filename_hopfield = "hopfield.bin"

    try:
        f = open(filename_Z, "r")
        Z = load(f)
        f.close()

        f = open(filename_hopfield, "r")
        hopfield = load(f)
        f.close()
        print("Loaded existing ground truth samples and hopfield netword.")
    except IOError:
        print("No existing ground truth samples. Creating.")

        # the network to sample from
        try:
            f = open(filename_hopfield, "r")
            hopfield = load(f)
            f.close()
            d = hopfield.dimension
            print("Loaded hopfield network")
        except IOError:
            d = 50
            b = randn(d)
            V = randn(d, d)
            W = V + V.T
            fill_diagonal(W, zeros(d))
            hopfield = Hopfield(W, b)

        # dump hopfield network
        f = open(filename_hopfield, "w")
        dump(hopfield, f)
        f.close()

        # iterations
        num_iterations = 10000000
        warm_up = 100000
        thin = 2000

        current_state = [rand() < 0.5 for _ in range(d)]
        distribution = HopfieldFullConditionals(full_target=hopfield,
                                                current_state=current_state,
                                                schedule="random_permutation")
        mcmc_sampler = Gibbs(distribution)
        #         spread = .0001
        #         mcmc_sampler = StandardMetropolisDiscrete(hopfield, spread)

        mcmc_params = MCMCParams(start=asarray(current_state,
                                               dtype=numpy.bool8),
                                 num_iterations=num_iterations)
        chain = MCMCChain(mcmc_sampler, mcmc_params)

        chain.append_mcmc_output(StatisticsOutput(plot_times=True, lag=1000))
        # chain.append_mcmc_output(StoreChainOutput(".", lag=100000))

        #     chain.append_mcmc_output(DiscretePlottingOutput(plot_from=0, lag=100))
        chain.run()

        # dump chain
        try:
            f = open(filename_chain, "w")
            dump(chain, f)
            f.close()
        except IOError:
            print("Could not save MCMC chain")

        # warmup and thin
        Z = chain.samples[(warm_up):]
        Z = Z[arange(len(Z), step=thin)]
        Z = Z.astype(numpy.bool8)

        # dump ground truth samples
        try:
            f = open(filename_Z, "w")
            dump(Z, f)
            f.close()
        except IOError:
            print("Could not save Z")

    return Z, hopfield