def _cost(self, model, data): """ A fake cost that we differentiate symbolically to derive the SML update rule. Parameters ---------- model : Model data : Batch in get_data_specs format Returns ------- cost : 0-d Theano tensor The fake cost """ if not hasattr(self, 'sampler'): self.sampler = BlockGibbsSampler( rbm=model, particles=0.5 + np.zeros( (self.nchains, model.get_input_dim())), rng=model.rng, steps=self.nsteps) # compute negative phase updates sampler_updates = self.sampler.updates() # Compute SML cost pos_v = data neg_v = self.sampler.particles ml_cost = (model.free_energy(pos_v).mean() - model.free_energy(neg_v).mean()) return ml_cost
def _cost(self, model, data): """ A fake cost that we differentiate symbolically to derive the SML update rule. Parameters ---------- model : Model data : Batch in get_data_specs format Returns ------- cost : 0-d Theano tensor The fake cost """ if not hasattr(self, 'sampler'): self.sampler = BlockGibbsSampler( rbm=model, particles=0.5+np.zeros((self.nchains, model.get_input_dim())), rng=model.rng, steps=self.nsteps) # compute negative phase updates sampler_updates = self.sampler.updates() # Compute SML cost pos_v = data neg_v = self.sampler.particles ml_cost = (model.free_energy(pos_v).mean() - model.free_energy(neg_v).mean()) return ml_cost
def _cost(self, model, data): if not hasattr(self,'sampler'): self.sampler = BlockGibbsSampler( rbm=model, particles=0.5+np.zeros((self.nchains,model.get_input_dim())), rng=model.rng, steps=self.nsteps) # compute negative phase updates sampler_updates = self.sampler.updates() # Compute SML cost pos_v = data neg_v = self.sampler.particles ml_cost = (model.free_energy(pos_v).mean()- model.free_energy(neg_v).mean()) return ml_cost
class SML(Cost): """ Stochastic Maximum Likelihood See "On the convergence of Markovian stochastic algorithms with rapidly decreasing ergodicity rates" by Laurent Younes (1998) Also known as Persistent Constrastive Divergence (PCD) See "Training restricted boltzmann machines using approximations to the likelihood gradient" by Tijmen Tieleman (2008) The number of particles fits the batch size. Parameters ---------- batch_size: int Batch size of the training algorithm nsteps: int Number of steps made by the block Gibbs sampler between each epoch """ def __init__(self, batch_size, nsteps ): super(SML, self).__init__() self.nchains = batch_size self.nsteps = nsteps def get_gradients(self, model, data, **kwargs): cost = self._cost(model,data,**kwargs) params = list(model.get_params()) grads = T.grad(cost, params, disconnected_inputs = 'ignore', consider_constant = [self.sampler.particles]) gradients = OrderedDict(izip(params, grads)) updates = OrderedDict() sampler_updates = self.sampler.updates() updates.update(sampler_updates) return gradients, updates def _cost(self, model, data): if not hasattr(self,'sampler'): self.sampler = BlockGibbsSampler( rbm=model, particles=0.5+np.zeros((self.nchains,model.get_input_dim())), rng=model.rng, steps=self.nsteps) # compute negative phase updates sampler_updates = self.sampler.updates() # Compute SML cost pos_v = data neg_v = self.sampler.particles ml_cost = (model.free_energy(pos_v).mean()- model.free_energy(neg_v).mean()) return ml_cost def expr(self, model, data): return None def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source())
class SML(Cost): """ Stochastic Maximum Likelihood See "On the convergence of Markovian stochastic algorithms with rapidly decreasing ergodicity rates" by Laurent Younes (1998) Also known as Persistent Constrastive Divergence (PCD) See "Training restricted boltzmann machines using approximations to the likelihood gradient" by Tijmen Tieleman (2008) """ def __init__(self, batch_size, nsteps): """ The number of particles fits the batch size. Parameters --------- batch_size: int batch size of the training algorithm nsteps: int number of steps made by the block Gibbs sampler between each epoch """ super(SML, self).__init__() self.nchains = batch_size self.nsteps = nsteps def get_gradients(self, model, data, **kwargs): cost = self._cost(model, data, **kwargs) params = list(model.get_params()) grads = T.grad(cost, params, disconnected_inputs='ignore', consider_constant=[self.sampler.particles]) gradients = OrderedDict(izip(params, grads)) updates = OrderedDict() sampler_updates = self.sampler.updates() updates.update(sampler_updates) return gradients, updates def _cost(self, model, data): if not hasattr(self, 'sampler'): self.sampler = BlockGibbsSampler( rbm=model, particles=0.5 + np.zeros( (self.nchains, model.get_input_dim())), rng=model.rng, steps=self.nsteps) # compute negative phase updates sampler_updates = self.sampler.updates() # Compute SML cost pos_v = data neg_v = self.sampler.particles ml_cost = (model.free_energy(pos_v).mean() - model.free_energy(neg_v).mean()) return ml_cost def expr(self, model, data): return None def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source())
data = data_rng.normal(size=(500, 20)).astype(theano.config.floatX) conf = { 'nvis': 20, 'nhid': 30, 'rbm_seed': 1, 'batch_size': 100, 'base_lr': 1e-4, 'anneal_start': 1, 'pcd_steps': 1, } rbm = GaussianBinaryRBM(nvis=conf['nvis'], nhid=conf['nhid'], irange=0.5, energy_function_class = GRBM_Type_1) rng = numpy.random.RandomState(seed=conf.get('rbm_seed', 42)) sampler = BlockGibbsSampler(rbm, data[0:100], rng, steps=conf['pcd_steps']) minibatch = tensor.matrix() optimizer = SGDOptimizer(rbm, conf['base_lr'], conf['anneal_start']) updates = training_updates(visible_batch=minibatch, model=rbm, sampler=sampler, optimizer=optimizer) proxy_cost = rbm.reconstruction_error(minibatch, rng=sampler.s_rng) train_fn = theano.function([minibatch], proxy_cost, updates=updates) vis = tensor.matrix('vis') free_energy_fn = theano.function([vis], rbm.free_energy_given_v(vis)) #utils.debug.setdebug() recon = []