Пример #1
0
    def test_unbiased_checkerboard(self):
        """
        Test that checkerboard problem returns both valid solutions.
        """

        sampler = GibbsSampler()
        checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={})
        result = sampler.sample(checkerboard, 10000)

        solutions = [s.as_tuple for s in result]
        assert (-1, 1, -1, 1) in solutions
        assert (1, -1, 1, -1) in solutions
Пример #2
0
def main():
    """
    Main program.
    """
    data = initialize()

    g = GibbsSampler(sequences=data['sequences'],
              motif_width=data['width'])

    g.find_motif(iterations=data['iterations'])

    print_sequences(data['sequences'], data['width'])
    return
Пример #3
0
class Solver:
    # Calculate the log of the posterior probability of a given sentence
    #  with a given part-of-speech labeling. Right now just returns -999 -- fix this!

    def __init__(self, train_file_name, test_file_name):
        self.train_file = train_file_name
        #self.trainer_builder = Trainer_builder(self.train_file).get_trainer()
        #a = GibbsSampler(self.trainer_builder)
        #label =  a.do_gibbs('The discovery struck Nick like a blow .'.split())
        # a.calculate_posterior(('at', 'the', 'same', 'instant', ',', 'nick', 'hit', 'the', 'barrel', 'and', 'threw', 'himself', 'upon', 'the', 'smaller', 'man', '.'), map(lambda item: item.upper(), label))

    def posterior(self, model, sentence, label):
        if model == "Simple":
            simple_instance = Simplified(sentence)
            return simple_instance.calc_posterior(self.trainer_builder, label)
        elif model == "Complex":
            return self.gibbs_instance.calculate_posterior(
                sentence, map(lambda item: item.upper(), label))
        elif model == "HMM":
            viterbi_instance = Viterbi(sentence)
            return viterbi_instance.calc_posterior(self.trainer_builder, label)
        else:
            print("Unknown algo!")

    # Do the training!
    #
    def train(self, data):
        self.trainer_builder = Trainer_builder(self.train_file).get_trainer()

    # Functions for each algorithm. Right now this just returns nouns -- fix this!
    #
    def simplified(self, sentence):
        simple_instance = Simplified(sentence)
        return simple_instance.get_most_probable_tags(self.trainer_builder)

    def complex_mcmc(self, sentence):
        self.gibbs_instance = GibbsSampler(self.trainer_builder)
        return self.gibbs_instance.do_gibbs(sentence)

    def hmm_viterbi(self, sentence):
        viterbi_instance = Viterbi(sentence)
        self.hmm_tags = viterbi_instance.get_most_probable_tags(
            self.trainer_builder)[0]
        return self.hmm_tags

    # This solve() method is called by label.py, so you should keep the interface the
    #  same, but you can change the code itself.
    # It should return a list of part-of-speech labelings of the sentence, one
    #  part of speech per word.
    #
    def solve(self, model, sentence):
        if model == "Simple":
            return self.simplified(sentence)
        elif model == "Complex":
            return self.complex_mcmc(sentence)
        elif model == "HMM":
            output = self.hmm_viterbi(sentence)
            return output
        else:
            print("Unknown algo!")
Пример #4
0
def main():
    model_width = 4
    model_height = 4
    temperature = 1
    decimals = 2

    builder = RandomBuilder(model_width, model_height)
    model = builder.generate(decimals=decimals)

    # First produce a graph with D-Wave
    sampler = DWaveSampler()
    results_dwave = sample_and_histogram(sampler, model, temperature)

    # Then with gibbss sampleler
    sampler = GibbsSampler(n_variables=model_height * model_width)
    results_gibbs = sample_and_histogram(sampler, model, temperature)

    save_experiment("histogram", {
        'width': model_width,
        'height': model_height,
        'model_decimals': decimals,
        'temperature': temperature,
        'dwave_samples': results_dwave.raw_data,
        'gibbs_samples': results_gibbs.raw_data
    })
Пример #5
0
    def __init__(self, f, X, y, x_range, eval_only, extra, options):
        self.f = f
        self.x_range = x_range
        self.options = options
        self.well_defined = X.shape[0] > 0
        self.solver = GibbsSampler(X, y, options)
        self.eval_only = eval_only
        self.opt_n = options['opt_n']
        self.dx = options['dx']
        if eval_only:
            self.newX = extra
        else:
            self.n_bo = extra
            self.opt_n = np.maximum(self.opt_n, self.n_bo * 2)

        self.max_value = self.options['max_value']
        self.n_bo_top_percent = self.options['n_bo_top_percent']
Пример #6
0
    def test_biased_checkerboard(self):
        """
        Test that biased checkerboard (at one qubit) returns only one solution.
        """

        sampler = GibbsSampler()
        biased_checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={0: 2})
        result = sampler.sample(biased_checkerboard, 10000)

        solutions = [s.as_tuple for s in result]
        assert (-1, 1, -1, 1) in solutions
        assert len(solutions) == 1

        # Enforce the opposite checkerboard
        biased_checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={0: -2})
        result = sampler.sample(biased_checkerboard, 10000)

        solutions = [s.as_tuple for s in result]
        assert (1, -1, 1, -1) in solutions
        assert len(solutions) == 1
Пример #7
0
class bo(object):
    def __init__(self, f, X, y, x_range, eval_only, extra, options):
        self.f = f
        self.t = options['t']
        self.x_range = x_range
        self.options = options
        self.well_defined = X.shape[0] > 0
        self.solver = GibbsSampler(X, y, options)
        self.eval_only = eval_only
        self.opt_n = options['opt_n']
        self.dx = options['dx']
        if eval_only:
            self.newX = extra
        else:
            self.n_bo = extra
            self.opt_n = np.maximum(self.opt_n, self.n_bo * 2)

        if 'ziw' in options:
            self.max_value = self.options['max_value'] + np.random.normal(
            ) * 0.05
        else:
            self.max_value = self.options['max_value']

        self.n_bo_top_percent = self.options['n_bo_top_percent']

    def learn(self):
        self.gp, self.z, self.k = self.solver.run(self.options['gibbs_iter'])

    def run(self):
        if self.eval_only:
            ynew = [self.f(x) for x in self.newX]
            return ynew

        # return random inputs if X is empty
        if not self.well_defined:
            xnew = np.random.uniform(self.x_range[0], self.x_range[1],
                                     (self.n_bo, self.dx))
            acfnew = [self.max_value] * self.n_bo
            return xnew, acfnew, self.solver.z, self.solver.k, self.max_value

        # learn and optimize
        self.learn()
        # initialization
        xnew = np.empty((self.n_bo, self.dx))
        xnew[0] = np.random.uniform(self.x_range[0], self.x_range[1])
        # optimize group by group
        all_cat = np.unique(self.z)
        for a in np.random.permutation(all_cat):
            active = self.z == a
            af = lambda x: MAX_VALUE(x, xnew[0], active, self.max_value, self.
                                     gp)  # lower confidential bound
            xnew[:, active], acfnew = global_minimize(af, self.x_range[:,active], \
              self.opt_n, self.n_bo, self.n_bo_top_percent)
        return xnew, acfnew, self.z, self.k, self.max_value
Пример #8
0
parser.add_argument("--resume", help = "Specify if you want to resume sampling", action = "store_true")
parser.add_argument("-y", "--yaml", help = "Specify yaml file")
args = parser.parse_args()


yaml_file = args.yaml
with open(yaml_file) as yml:
    cdict = yaml.load(yml, Loader = yaml.FullLoader)

Ldict = {key : cdict[key] for key in cdict if (key in LikelihoodFg.__init__.__code__.co_varnames)}
Lclass = LikelihoodFg(**Ldict)

Gdict = {key : cdict[key] for key in cdict if (key in GibbsSampler.__init__.__code__.co_varnames)}

Nstep = 30000
gibbs_sampler = GibbsSampler(Nstep, Lclass, args.resume, **Gdict)
gibbs_sampler.run()


def get_lag(id, kmax, accepted):
    a = accepted[:, id]
    a_avg = np.mean(a)
    N = len(a)
    denom = np.sum((a-a_avg)**2)
    lag = []
    for k in range(1, kmax + 1):
        num = 0
        for i in range(0, N - k):
            num += (a[i] - a_avg) * (a[i+k]-a_avg)
        lag.append(num)
    lag = np.array(lag)
Пример #9
0
 def complex_mcmc(self, sentence):
     self.gibbs_instance = GibbsSampler(self.trainer_builder)
     return self.gibbs_instance.do_gibbs(sentence)
from gibbs import GibbsSampler
from csb.io import dump, load


def create_posterior(filename):
    with open(filename) as script:
        exec(script)
    return posterior


n_particles = (100, 200)[0]
isdfile = 'posterior_betagal_{}.py'.format(n_particles)

posterior = create_posterior(isdfile)
diameter = posterior.priors[0].forcefield.d[0, 0]
gibbs = GibbsSampler(posterior, stepsize=1e-2)
samples = isd.Ensemble(posterior.params)

# run Gibbs sampler
with isd.take_time('Gibbs sampling'):
    while len(samples) < 500:
        with isd.take_time('Gibbs step'):
            next(gibbs)
        samples.update(posterior.params)
        cc = []
        for image in posterior.likelihoods:
            cc.append(isd.crosscorr(image.data, image.mock.get()))
        print(np.round([np.mean(cc), np.min(cc), np.max(cc)], 3) * 100)

if False:
    # look at structure
Пример #11
0
'''
This is an example using 3 input ensembles Adenylate Kinase pdb_chainID: 1AKE_A, 4AKE_A, 1ANK_A
'''
import numpy as np
from gibbs import load_coordinates, GibbsSampler
from matplotlib import pyplot as plt

input_coordinate = load_coordinates('1AKE_A', '4AKE_A', '1ANK_A')

#Run Gibb sampler using 2 priors
gibb = GibbsSampler(input_coordinate, prior=2)
gibb.run(500)

print 'Number of Domain = ', np.unique(gibb.membership).shape[0]
print 'Membership', gibb.membership
print 'Log likelihood = ', gibb.log_likelihood

plt.plot(gibb.membership)
plt.title('Membership')
plt.show()





Пример #12
0
def main():
    res = []

    for width in range(1, 2):
        for height in range(width, 3):
            model_width = width * 2
            model_height = height * 2
            decimals = 2
            temperature = 1
            runs = 2
            print("{} x {}".format(model_width, model_height))

            # Create the model
            builder = RandomBuilder(model_width, model_height)
            model = builder.generate(decimals=decimals)

            # Compute partition function of this model
            bruteforcer = BruteforceSampler()
            Z = bruteforcer.partition_function(model, temperature)

            # First produce a graph with D-Wave
            sampler = DWaveSampler()
            results_dwave_prob = [
                sampler.sample(model, 10000, temperature) for _ in range(runs)
            ]

            embedding = builder.embedding_two()
            results_dwave_two = [
                sampler.sample(model, 10000, temperature, embedding=embedding)
                for _ in range(runs)
            ]

            embedding = builder.embedding_four()
            results_dwave_four = [
                sampler.sample(model, 10000, temperature, embedding=embedding)
                for _ in range(runs)
            ]

            sampler = GibbsSampler(n_variables=model_width * model_height)
            results_gibbs = [
                sampler.sample(model, 10000, temperature) for _ in range(runs)
            ]

            compute_kl = lambda r_list: [
                r.KL_divergence(Z, temperature) for r in r_list
            ]

            kl_gibbs = compute_kl(results_gibbs)
            kl_dwave_prob = compute_kl(results_dwave_prob)
            kl_dwave_two = compute_kl(results_dwave_two)
            kl_dwave_four = compute_kl(results_dwave_four)

            print("KL divergence: {}".format(kl_gibbs))
            print("KL divergence: {}".format(kl_dwave_prob))
            print("KL divergence: {}".format(kl_dwave_two))
            print("KL divergence: {}".format(kl_dwave_four))

            res += [{
                'type': 'gibbs',
                'width': model_width,
                'height': model_height,
                'kl': kl
            } for kl in kl_gibbs]
            res += [{
                'type': 'dwaveP',
                'width': model_width,
                'height': model_height,
                'kl': kl
            } for kl in kl_dwave_prob]
            res += [{
                'type': 'dwave2',
                'width': model_width,
                'height': model_height,
                'kl': kl
            } for kl in kl_dwave_two]
            res += [{
                'type': 'dwave4',
                'width': model_width,
                'height': model_height,
                'kl': kl
            } for kl in kl_dwave_four]

            df = pandas.DataFrame.from_dict(res)
            df.to_csv("data_kl_{}.csv".format(
                datetime.datetime.now().strftime("%Y%m%d_%H%M%S")))
            df.to_json("data_kl_{}.json".format(
                datetime.datetime.now().strftime("%Y%m%d_%H%M%S")))
            save_experiment("kl", {
                'result': res,
            })
Пример #13
0
def main():
    ### Parse command line options
    usage = "usage: %prog [options] datafile"
    cmdline_parser = OptionParser(usage=usage)
    cmdline_parser.add_option('-o',
                              '--output',
                              dest='output_filename',
                              metavar='FILE',
                              default='results.pkl',
                              help='Serialize results to pickle FILE')
    cmdline_parser.add_option('-m',
                              '--model',
                              dest='model',
                              metavar='FILE',
                              default='model',
                              help='Choose model to use')
    cmdline_parser.add_option('-v',
                              '--verbose',
                              dest='loglevel',
                              default=logging.WARNING,
                              action='store_const',
                              const=logging.DEBUG,
                              help='Debug logging level mode')
    cmdline_parser.add_option('-i',
                              '--iterations',
                              dest='iterations',
                              type='int',
                              default=25000,
                              help='Number of Gibbs iterations')
    cmdline_parser.add_option('-b',
                              '--burnin',
                              dest='burnin',
                              type='int',
                              default=500,
                              help='Number of burn-in iterations')
    cmdline_parser.add_option('-s',
                              '--subsample',
                              dest='subsample',
                              type='int',
                              default=10,
                              help='Subsample rate')
    cmdline_parser.add_option('-t',
                              '--start',
                              dest='start',
                              type='int',
                              default=None,
                              help='start')
    cmdline_parser.add_option('-e',
                              '--end',
                              dest='end',
                              type='int',
                              default=None,
                              help='end')
    cmdline_parser.add_option('-g',
                              '--visualize',
                              dest='visualize',
                              action='store_true',
                              help='Visualize intermediate results')
    cmdline_parser.add_option('-G',
                              '--visualize-priors',
                              dest='visualize_priors',
                              action='store_true',
                              help='Visualize prior distributions')
    cmdline_parser.add_option(
        '-p',
        '--parameter-file',
        dest='parameter_filename',
        help='Use known parameters in file (i.e. simulated file).')
    options, args = cmdline_parser.parse_args()

    logging.basicConfig(level=options.loglevel,
                        format='%(asctime)s %(message)s')
    data_filename = args[0]

    gibbs_iters = options.iterations
    burnin = options.burnin
    subsample = options.subsample
    model_module = __import__(options.model)

    if options.parameter_filename is not None:
        known_params = pickle.load(open(options.parameter_filename, 'rb'))

    # Build model and load data
    data = load_as_frame(data_filename, start=options.start, end=options.end)
    model = model_module.define_model(data, known_params)

    # Setup gibbs sampler
    sampler = GibbsSampler(model=model,
                           iterations=gibbs_iters,
                           burnin=burnin,
                           subsample=subsample)
    if options.visualize:
        sampler.add_visualizer(model_module.visualize_gibbs)
    if options.visualize_priors:
        model_module.visualize_priors(model.priors)

    # Begin sampling
    start_time = time.time()
    sampler.run()
    gibbs_time = time.time() - start_time
    print "Gibbs sampler ran %d.1 minutes" % (gibbs_time / 60.)

    # Write out results
    results = {}
    results['options'] = options
    results['variable_names'] = model.variable_names
    results['known_params'] = model.known_params
    results['hyper_params'] = model.hyper_params
    results['filename'] = data_filename
    results['data'] = data
    results['gibbs_results'] = sampler.results()
    pickle.dump(results, open(options.output_filename, 'wb'))
Пример #14
0
def main():
    ### Parse command line options
    usage = "usage: %prog [options] datafile"
    cmdline_parser = OptionParser(usage=usage)
    cmdline_parser.add_option(
            '-o', '--output', dest='output_filename', metavar='FILE',
            default='results.pkl',
            help='Serialize results to pickle FILE')
    cmdline_parser.add_option(
            '-m', '--model', dest='model', metavar='FILE',
            default='model',
            help='Choose model to use')
    cmdline_parser.add_option(
            '-v', '--verbose', dest='loglevel', default=logging.WARNING,
            action='store_const', const=logging.DEBUG,
            help='Debug logging level mode')
    cmdline_parser.add_option(
            '-i', '--iterations', dest='iterations',
            type='int', default=25000,
            help='Number of Gibbs iterations')
    cmdline_parser.add_option(
            '-b', '--burnin', dest='burnin',
            type='int', default=500,
            help='Number of burn-in iterations')
    cmdline_parser.add_option(
            '-s', '--subsample', dest='subsample',
            type='int', default=10,
            help='Subsample rate')
    cmdline_parser.add_option(
            '-t', '--start', dest='start',
            type='int', default=None,
            help='start')
    cmdline_parser.add_option(
            '-e', '--end', dest='end',
            type='int', default=None,
            help='end')
    cmdline_parser.add_option(
            '-g', '--visualize', dest='visualize',
            action='store_true',
            help='Visualize intermediate results')
    cmdline_parser.add_option(
            '-G', '--visualize-priors', dest='visualize_priors',
            action='store_true',
            help='Visualize prior distributions')
    cmdline_parser.add_option(
            '-p', '--parameter-file', dest='parameter_filename',
            help='Use known parameters in file (i.e. simulated file).')
    options, args = cmdline_parser.parse_args()

    logging.basicConfig(level=options.loglevel, format='%(asctime)s %(message)s')
    data_filename = args[0]

    gibbs_iters = options.iterations
    burnin = options.burnin
    subsample = options.subsample
    model_module = __import__(options.model)

    if options.parameter_filename is not None:
        known_params = pickle.load(open(options.parameter_filename, 'rb'))

    # Build model and load data
    data = load_as_frame(data_filename, start=options.start, end=options.end)
    model = model_module.define_model(data, known_params)

    # Setup gibbs sampler
    sampler = GibbsSampler(
            model=model,
            iterations=gibbs_iters,
            burnin=burnin,
            subsample=subsample)
    if options.visualize:
        sampler.add_visualizer(model_module.visualize_gibbs)
    if options.visualize_priors:
        model_module.visualize_priors(model.priors)

    # Begin sampling
    start_time = time.time()
    sampler.run()
    gibbs_time = time.time() - start_time
    print "Gibbs sampler ran %d.1 minutes" % (gibbs_time/60.)

    # Write out results
    results = {}
    results['options'] = options
    results['variable_names'] = model.variable_names
    results['known_params'] = model.known_params
    results['hyper_params'] = model.hyper_params
    results['filename'] = data_filename
    results['data'] = data
    results['gibbs_results'] = sampler.results()
    pickle.dump(results, open(options.output_filename, 'wb'))