def test_verify_AIS(self): model = RBM(input_size=self.input_size, hidden_size=self.hidden_size) model.W.set_value(self.W) model.b.set_value(self.b) model.c.set_value(self.c) # Brute force print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..." V = theano.shared( value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX)) brute_force_lnZ = logsumexp(-model.free_energy(V), 0) f_brute_force_lnZ = theano.function([], brute_force_lnZ) params_bak = [param.get_value() for param in model.parameters] print "Approximating lnZ using AIS..." import time start = time.time() try: experiment_path = tempfile.mkdtemp() result = compute_AIS(model, M=self.nb_samples, betas=self.betas, seed=1234, experiment_path=experiment_path, force=True) logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result[ 'logcummean_Z'], result['logcumstd_Z_down'], result[ 'logcumstd_Z_up'] std_lnZ = result['std_lnZ'] print "{0} sec".format(time.time() - start) import pylab as plt plt.gca().set_xmargin(0.1) plt.errorbar(range(1, self.nb_samples + 1), logcummean_Z, yerr=[std_lnZ, std_lnZ], fmt='or') plt.errorbar(range(1, self.nb_samples + 1), logcummean_Z, yerr=[logcumstd_Z_down, logcumstd_Z_up], fmt='ob') plt.plot([1, self.nb_samples], [f_brute_force_lnZ()] * 2, '--g') plt.ticklabel_format(useOffset=False, axis='y') plt.show() AIS_logZ = logcummean_Z[-1] assert_array_equal(params_bak[0], model.W.get_value()) assert_array_equal(params_bak[1], model.b.get_value()) assert_array_equal(params_bak[2], model.c.get_value()) print "Absolute diff:", np.abs(AIS_logZ - f_brute_force_lnZ()) assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2) finally: shutil.rmtree(experiment_path)
def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 model = iRBM(input_size=input_size, hidden_size=32, beta=1.01, CDk=1, rng=np.random.RandomState(42)) W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX) model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True) b = rng.rand(model.hidden_size).astype(theano.config.floatX) model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = rng.rand(model.input_size).astype(theano.config.floatX) model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [model.W, model.b, model.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(model.free_energy(chain_start)) - T.mean(model.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = RBM.sample_h_given_v(model, chain_start, return_probs=True) _h = RBM.sample_h_given_v(model, chain_end, return_probs=True) icdf = model.icdf_z_given_v(chain_start) _icdf = model.icdf_z_given_v(chain_end) if model.penalty == "softplus_bi": penalty = model.beta * T.nnet.sigmoid(model.b) elif self.penalty == "softplus0": penalty = model.beta * T.nnet.sigmoid(0) grad_W = (T.dot(chain_end.T, _h*_icdf) - T.dot(chain_start.T, h*icdf)).T / batch_size grad_b = T.mean((_h-penalty)*_icdf - (h-penalty)*icdf, axis=0) grad_c = T.mean(chain_end - chain_start, axis=0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value}) param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value}) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name, decimal=5) # decimal=5 needed for float32
def __init__(self, input_size, hidden_size, beta=1, penalty="softplus_bi", *args, **kwargs): RBM.__init__(self, input_size, hidden_size, *args, **kwargs) self.penalty = penalty self.beta = theano.shared(np.array(beta, dtype=theano.config.floatX), name="beta")
def get_base_rate(self, base_rate_type="uniform"): base_rate, annealable_params = RBM.get_base_rate(self, base_rate_type) #annealable_params.append(self.beta) # Seems to work better without annealing self.beta (see unit tests) if base_rate_type == "uniform": def compute_lnZ(self): # Since biases and weights are all 0, there are $2^input_size$ different # visible neuron's states having the following energy # $\sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2))$ r = T.exp((1-self.beta) * T.log(2)) # Ratio of a geometric serie lnZ = T.log((r - r**(self.hidden_size+1)) / (1-r)) return (self.input_size * T.log(2) + # ln(2^input_size) lnZ) # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$ elif base_rate_type == "c": def compute_lnZ(self): # Since the hidden biases (but not the visible ones) and the weights are all 0 r = T.exp((1-self.beta) * T.log(2)) # Ratio of a geometric serie lnZ = T.log((r - r**(self.hidden_size+1)) / (1-r)) return (lnZ + # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$ T.sum(T.nnet.softplus(self.c))) elif base_rate_type == "b": raise NotImplementedError() import types base_rate.compute_lnZ = types.MethodType(compute_lnZ, base_rate) return base_rate, annealable_params
def get_base_rate(self, base_rate_type="uniform"): base_rate, annealable_params = RBM.get_base_rate(self, base_rate_type) #annealable_params.append(self.beta) # Seems to work better without annealing self.beta (see unit tests) if base_rate_type == "uniform": def compute_lnZ(self): # Since biases and weights are all 0, there are $2^input_size$ different # visible neuron's states having the following energy # $\sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2))$ r = T.exp((1-self.beta) * T.log(2)) # Ratio of a geometric serie lnZ = T.log(r / (1-r)) # Convergence of the geometric serie return (self.input_size * T.log(2) + # ln(2^input_size) lnZ) # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$ elif base_rate_type == "c": def compute_lnZ(self): # Since the hidden biases (but not the visible ones) and the weights are all 0 r = T.exp((1-self.beta) * T.log(2)) # Ratio of a geometric serie lnZ = T.log(r / (1-r)) # Convergence of the geometric serie return (lnZ + # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$ T.sum(T.nnet.softplus(self.c))) elif base_rate_type == "b": raise NotImplementedError() import types base_rate.compute_lnZ = types.MethodType(compute_lnZ, base_rate) return base_rate, annealable_params
def setUp(self): self.input_size = 4 self.hidden_size = 3 self.batch_size = 100 rng = np.random.RandomState(42) self.W = rng.randn(self.hidden_size, self.input_size).astype(config.floatX) self.b = rng.randn(self.hidden_size).astype(config.floatX) self.c = rng.randn(self.input_size).astype(config.floatX) self.model = RBM(input_size=self.input_size, hidden_size=self.hidden_size) self.model.W.set_value(self.W) self.model.b.set_value(self.b) self.model.c.set_value(self.c)
def test_verify_AIS(self): model = RBM(input_size=self.input_size, hidden_size=self.hidden_size) model.W.set_value(self.W) model.b.set_value(self.b) model.c.set_value(self.c) # Brute force print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..." V = theano.shared(value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX)) brute_force_lnZ = logsumexp(-model.free_energy(V), 0) f_brute_force_lnZ = theano.function([], brute_force_lnZ) params_bak = [param.get_value() for param in model.parameters] print "Approximating lnZ using AIS..." import time start = time.time() try: ais_working_dir = tempfile.mkdtemp() result = compute_AIS(model, M=self.nb_samples, betas=self.betas, seed=1234, ais_working_dir=ais_working_dir, force=True) logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result['logcummean_Z'], result['logcumstd_Z_down'], result['logcumstd_Z_up'] std_lnZ = result['std_lnZ'] print "{0} sec".format(time.time() - start) import pylab as plt plt.gca().set_xmargin(0.1) plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[std_lnZ, std_lnZ], fmt='or') plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[logcumstd_Z_down, logcumstd_Z_up], fmt='ob') plt.plot([1, self.nb_samples], [f_brute_force_lnZ()]*2, '--g') plt.ticklabel_format(useOffset=False, axis='y') plt.show() AIS_logZ = logcummean_Z[-1] assert_array_equal(params_bak[0], model.W.get_value()) assert_array_equal(params_bak[1], model.b.get_value()) assert_array_equal(params_bak[2], model.c.get_value()) print "Absolute diff:", np.abs(AIS_logZ - f_brute_force_lnZ()) assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2) finally: shutil.rmtree(ais_working_dir)
def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 rbm = RBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True) b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX) rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [rbm.W, rbm.b, rbm.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = rbm.sample_h_given_v(chain_start, return_probs=True) _h = rbm.sample_h_given_v(chain_end, return_probs=True) grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size grad_b = T.mean(_h - h, 0) grad_c = T.mean(chain_end - chain_start, 0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value}) param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value}) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def __getstate__(self): state = {} state.update(RBM.__getstate__(self)) state["oRBM_version"] = 1 # Hyper parameters state["beta"] = self.beta.get_value() state["penalty"] = self.penalty return state
def __getstate__(self): state = {} state.update(RBM.__getstate__(self)) state['oRBM_version'] = 1 # Hyper parameters state['beta'] = self.beta.get_value() state['penalty'] = self.penalty return state
class Test_RBM(unittest.TestCase): def setUp(self): self.input_size = 4 self.hidden_size = 3 self.batch_size = 100 rng = np.random.RandomState(42) self.W = rng.randn(self.hidden_size, self.input_size).astype(config.floatX) self.b = rng.randn(self.hidden_size).astype(config.floatX) self.c = rng.randn(self.input_size).astype(config.floatX) self.model = RBM(input_size=self.input_size, hidden_size=self.hidden_size) self.model.W.set_value(self.W) self.model.b.set_value(self.b) self.model.c.set_value(self.c) def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) Fv = logsumexp_E(v1, H) # Marginalization over $\bh$ v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv]*self.batch_size) def test_marginalize_over_v(self): v = T.matrix('v') h = T.matrix('h') E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) h1 = np.random.rand(1, self.hidden_size).astype(config.floatX) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) expected_energy = E(V, h1) h = T.matrix('h') marginalize_over_v = theano.function([h], self.model.marginalize_over_v(h)) assert_array_almost_equal(marginalize_over_v(h1), [expected_energy]) h2 = np.tile(h1, (self.batch_size, 1)) assert_array_almost_equal(marginalize_over_v(h2), [expected_energy]*self.batch_size) def test_compute_lnZ(self): v = T.matrix('v') h = T.matrix('h') lnZ = theano.function([v, h], logsumexp(-self.model.E(v, h))) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) lnZ_using_free_energy = theano.function([v], logsumexp(-self.model.free_energy(v))) assert_equal(lnZ_using_free_energy(V), lnZ(V, H)) lnZ_using_marginalize_over_v = theano.function([h], logsumexp(-self.model.marginalize_over_v(h))) assert_almost_equal(lnZ_using_marginalize_over_v(H), lnZ(V, H), decimal=6) def test_base_rate(self): # All binary combinaisons for V and H. V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype(config.floatX) brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) @npt.dec.slow def test_binomial_from_uniform_cpu(self): #Test using numpy rng = np.random.RandomState(42) probs = rng.rand(10) seed = 1337 nb_samples = 1000000 rng = np.random.RandomState(seed) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng.binomial(n=1, p=probs) rng = np.random.RandomState(seed) success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += (rng.rand(len(probs)) < probs).astype('int') success1 = success1 / nb_samples success2 = success2 / nb_samples assert_array_almost_equal(success1, success2) #Test using Theano's default RandomStreams theano_rng = RandomStreams(1337) rng_bin = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng_bin.eval() theano_rng = RandomStreams(1337) rng_bin = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += rng_bin.eval() assert_array_almost_equal(success1/nb_samples, success2/nb_samples) #Test using Theano's sandbox MRG RandomStreams theano_rng = MRG_RandomStreams(1337) success1 = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) theano_rng = MRG_RandomStreams(1337) success2 = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs assert_array_equal(success1.eval(), success2.eval()) def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 rbm = RBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True) b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX) rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [rbm.W, rbm.b, rbm.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = rbm.sample_h_given_v(chain_start, return_probs=True) _h = rbm.sample_h_given_v(chain_end, return_probs=True) grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size grad_b = T.mean(_h - h, 0) grad_c = T.mean(chain_end - chain_start, 0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value}) param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value}) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def main(): parser = buildArgsParser() args = parser.parse_args() # Check that a least one of --view or --save has been given. if not args.view and not args.save: parser.error("At least one the following options must be chosen: --view or --save") # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) if not os.path.isfile(pjoin(experiment_path, "model.pkl")): parser.error('Cannot find model for experiment: {0}!'.format(experiment_path)) if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")): parser.error('Cannot find hyperparams for experiment: {0}!'.format(experiment_path)) # Load experiments hyperparameters hyperparams = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) with Timer("Loading model"): if hyperparams["model"] == "rbm": from iRBM.models.rbm import RBM model_class = RBM elif hyperparams["model"] == "orbm": from iRBM.models.orbm import oRBM model_class = oRBM elif hyperparams["model"] == "irbm": from iRBM.models.irbm import iRBM model_class = iRBM # Load the actual model. model = model_class.load(pjoin(experiment_path, "model.pkl")) rng = np.random.RandomState(args.seed) # Sample from uniform # TODO: sample from Bernouilli distribution parametrized with visible biases chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(theano.config.floatX) with Timer("Building sampling function"): v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX)) v1 = model.gibbs_step(v0) gibbs_step = theano.function([], updates={v0: v1}) if args.full_gibbs_step: print "Using z=K" # Use z=K for first Gibbs step. from iRBM.models.rbm import RBM h0 = RBM.sample_h_given_v(model, v0) v1 = RBM.sample_v_given_h(model, h0) v0.set_value(v1.eval()) with Timer("Sampling"): for k in range(args.cdk): gibbs_step() samples = v0.get_value() if args.save: np.savez(args.out, samples) if args.view: if hyperparams["dataset"] == "binarized_mnist": image_shape = (28, 28) elif hyperparams["dataset"] == "caltech101_silhouettes28": image_shape = (28, 28) else: raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"])) data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.show()
def __setstate__(self, state): RBM.__setstate__(self, state) # Hyper parameters self.beta = theano.shared(state['beta'], name="beta") self.penalty = state['penalty']
def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 model = oRBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX) model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True) b = rng.rand(model.hidden_size).astype(theano.config.floatX) model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = rng.rand(model.input_size).astype(theano.config.floatX) model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [model.W, model.b, model.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(model.free_energy(chain_start)) - T.mean( model.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = RBM.sample_h_given_v(model, chain_start, return_probs=True) _h = RBM.sample_h_given_v(model, chain_end, return_probs=True) icdf = model.icdf_z_given_v(chain_start) _icdf = model.icdf_z_given_v(chain_end) if model.penalty == "softplus_bi": penalty = model.beta * T.nnet.sigmoid(model.b) elif self.penalty == "softplus0": penalty = model.beta * T.nnet.sigmoid(0) grad_W = (T.dot(chain_end.T, _h * _icdf) - T.dot(chain_start.T, h * icdf)).T / batch_size grad_b = T.mean((_h - penalty) * _icdf - (h - penalty) * icdf, axis=0) grad_c = T.mean(chain_end - chain_start, axis=0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) param2 = gparam_manual.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def model_factory(model_name, input_size, hyperparams): #Set learning rate method that will be used. if hyperparams["ConstantLearningRate"] is not None: infos = hyperparams["ConstantLearningRate"].split() lr = float(infos[0]) lr_method = ConstantLearningRate(lr=lr) elif hyperparams["ADAGRAD"] is not None: infos = hyperparams["ADAGRAD"].split() lr = float(infos[0]) eps = float(infos[1]) if len(infos) > 1 else 1e-6 lr_method = ADAGRAD(lr=lr, eps=eps) else: raise ValueError("The update rule is mandatory!") #Set regularization method that will be used. regularization_method = NoRegularization() if hyperparams["L1Regularization"] is not None and hyperparams[ "L1Regularization"] != 0: lambda_factor = float(hyperparams["L1Regularization"]) regularization_method = L1Regularization(lambda_factor) elif hyperparams["L2Regularization"] is not None and hyperparams[ "L2Regularization"] != 0: lambda_factor = float(hyperparams["L2Regularization"]) regularization_method = L2Regularization(lambda_factor) #Set contrastive divergence method to use. CD_method = ContrastiveDivergence() if hyperparams["PCD"]: CD_method = PersistentCD(input_size, nb_particles=hyperparams['batch_size']) rng = np.random.RandomState(hyperparams["seed"]) #Build model if model_name == "rbm": from iRBM.models.rbm import RBM model = RBM(input_size=input_size, hidden_size=hyperparams["size"], learning_rate=lr_method, regularization=regularization_method, CD=CD_method, CDk=hyperparams["cdk"], rng=rng) elif model_name == "orbm": from iRBM.models.orbm import oRBM model = oRBM(input_size=input_size, hidden_size=hyperparams["size"], beta=hyperparams["beta"], learning_rate=lr_method, regularization=regularization_method, CD=CD_method, CDk=hyperparams["cdk"], rng=rng) elif model_name == "irbm": from iRBM.models.irbm import iRBM model = iRBM(input_size=input_size, hidden_size=hyperparams["size"], beta=hyperparams["beta"], learning_rate=lr_method, regularization=regularization_method, CD=CD_method, CDk=hyperparams["cdk"], rng=rng) return model
class Test_RBM(unittest.TestCase): def setUp(self): self.input_size = 4 self.hidden_size = 3 self.batch_size = 100 rng = np.random.RandomState(42) self.W = rng.randn(self.hidden_size, self.input_size).astype(config.floatX) self.b = rng.randn(self.hidden_size).astype(config.floatX) self.c = rng.randn(self.input_size).astype(config.floatX) self.model = RBM(input_size=self.input_size, hidden_size=self.hidden_size) self.model.W.set_value(self.W) self.model.b.set_value(self.b) self.model.c.set_value(self.c) def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) Fv = logsumexp_E(v1, H) # Marginalization over $\bh$ v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv] * self.batch_size) def test_marginalize_over_v(self): v = T.matrix('v') h = T.matrix('h') E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) h1 = np.random.rand(1, self.hidden_size).astype(config.floatX) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) expected_energy = E(V, h1) h = T.matrix('h') marginalize_over_v = theano.function([h], self.model.marginalize_over_v(h)) assert_array_almost_equal(marginalize_over_v(h1), [expected_energy]) h2 = np.tile(h1, (self.batch_size, 1)) assert_array_almost_equal(marginalize_over_v(h2), [expected_energy] * self.batch_size) def test_compute_lnZ(self): v = T.matrix('v') h = T.matrix('h') lnZ = theano.function([v, h], logsumexp(-self.model.E(v, h))) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) lnZ_using_free_energy = theano.function( [v], logsumexp(-self.model.free_energy(v))) assert_equal(lnZ_using_free_energy(V), lnZ(V, H)) lnZ_using_marginalize_over_v = theano.function( [h], logsumexp(-self.model.marginalize_over_v(h))) assert_almost_equal(lnZ_using_marginalize_over_v(H), lnZ(V, H), decimal=6) def test_base_rate(self): # All binary combinaisons for V and H. V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype( config.floatX) brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) @npt.dec.slow def test_binomial_from_uniform_cpu(self): #Test using numpy rng = np.random.RandomState(42) probs = rng.rand(10) seed = 1337 nb_samples = 1000000 rng = np.random.RandomState(seed) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng.binomial(n=1, p=probs) rng = np.random.RandomState(seed) success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += (rng.rand(len(probs)) < probs).astype('int') success1 = success1 / nb_samples success2 = success2 / nb_samples assert_array_almost_equal(success1, success2) #Test using Theano's default RandomStreams theano_rng = RandomStreams(1337) rng_bin = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng_bin.eval() theano_rng = RandomStreams(1337) rng_bin = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += rng_bin.eval() assert_array_almost_equal(success1 / nb_samples, success2 / nb_samples) #Test using Theano's sandbox MRG RandomStreams theano_rng = MRG_RandomStreams(1337) success1 = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) theano_rng = MRG_RandomStreams(1337) success2 = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs assert_array_equal(success1.eval(), success2.eval()) def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 rbm = RBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype( theano.config.floatX) rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True) b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX) rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [rbm.W, rbm.b, rbm.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(rbm.free_energy(chain_start)) - T.mean( rbm.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = rbm.sample_h_given_v(chain_start, return_probs=True) _h = rbm.sample_h_given_v(chain_end, return_probs=True) grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size grad_b = T.mean(_h - h, 0) grad_c = T.mean(chain_end - chain_start, 0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) param2 = gparam_manual.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def main(): parser = buildArgsParser() args = parser.parse_args() # Check that a least one of --view or --save has been given. if not args.view and not args.save: parser.error( "At least one the following options must be chosen: --view or --save" ) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) if not os.path.isfile(pjoin(experiment_path, "model.pkl")): parser.error( 'Cannot find model for experiment: {0}!'.format(experiment_path)) if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")): parser.error('Cannot find hyperparams for experiment: {0}!'.format( experiment_path)) # Load experiments hyperparameters hyperparams = utils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) with Timer("Loading model"): if hyperparams["model"] == "rbm": from iRBM.models.rbm import RBM model_class = RBM elif hyperparams["model"] == "orbm": from iRBM.models.orbm import oRBM model_class = oRBM elif hyperparams["model"] == "irbm": from iRBM.models.irbm import iRBM model_class = iRBM # Load the actual model. model = model_class.load(pjoin(experiment_path, "model.pkl")) rng = np.random.RandomState(args.seed) # Sample from uniform # TODO: sample from Bernouilli distribution parametrized with visible biases chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype( theano.config.floatX) with Timer("Building sampling function"): v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX)) v1 = model.gibbs_step(v0) gibbs_step = theano.function([], updates={v0: v1}) if args.full_gibbs_step: print "Using z=K" # Use z=K for first Gibbs step. from iRBM.models.rbm import RBM h0 = RBM.sample_h_given_v(model, v0) v1 = RBM.sample_v_given_h(model, h0) v0.set_value(v1.eval()) with Timer("Sampling"): for k in range(args.cdk): gibbs_step() samples = v0.get_value() if args.save: np.savez(args.out, samples) if args.view: if hyperparams["dataset"] == "binarized_mnist": image_shape = (28, 28) elif hyperparams["dataset"] == "caltech101_silhouettes28": image_shape = (28, 28) else: raise ValueError("Unknown dataset: {0}".format( hyperparams["dataset"])) data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.show()