def training_updates(visible_batch, model, sampler, optimizer): """ Combine together updates from various sources for RBM training. Parameters ---------- visible_batch : tensor_like Theano symbolic representing a minibatch on the visible units, with the first dimension indexing training examples and the second indexing data dimensions. rbm : object An instance of `RBM` or a derived class, or one implementing the RBM interface. sampler : object An instance of `Sampler` or a derived class, or one implementing the sampler interface. optimizer : object An instance of `Optimizer` or a derived class, or one implementing the optimizer interface (typically an `SGDOptimizer`). """ pos_v = visible_batch neg_v = sampler.particles grads = model.ml_gradients(pos_v, neg_v) ups = optimizer.updates(gradients=grads) # Add the sampler's updates (negative phase particles, etc.). safe_update(ups, sampler.updates()) return ups
def updates(self, gradients): """ Return symbolic updates to apply given a set of gradients on the parameters being optimized. Parameters ---------- gradients : list of tensor_likes List of symbolic gradients for the parameters contained in self.params, in the same order as in self.params. Returns ------- updates : dict A dictionary with the shared variables in self.params as keys and a symbolic expression of how they are to be updated each SGD step as values. Notes ----- `cost_updates` is a convenient helper function that takes all necessary gradients with respect to a given symbolic cost. """ ups = {} # Add the learning rate/iteration updates l_ups, learn_rates = self.learning_rate_updates(gradients) safe_update(ups, l_ups) # Get the updates from sgd_updates, a PyLearn library function. p_up = dict(self.sgd_updates(self.params, gradients, learn_rates)) # Add the things in p_up to ups safe_update(ups, p_up) # Clip the values if needed. # We do not want the clipping values to force an upcast # of the update: updates should have the same type as params for param, (p_min, p_max) in self.clipping_values.iteritems(): p_min = tensor.as_tensor(p_min) p_max = tensor.as_tensor(p_max) dtype = param.dtype if p_min.dtype != dtype: p_min = tensor.cast(p_min, dtype) if p_max.dtype != dtype: p_max = tensor.cast(p_max, dtype) ups[param] = tensor.clip(ups[param], p_min, p_max) # Return the updates dictionary. return ups
def updates(self, gradients): """ Return symbolic updates to apply given a set of gradients on the parameters being optimized. Parameters ---------- gradients : list of tensor_likes List of symbolic gradients for the parameters contained in self.params, in the same order as in self.params. Returns ------- updates : dict A dictionary with the shared variables in self.params as keys and a symbolic expression of how they are to be updated each SGD step as values. Notes ----- `cost_updates` is a convenient helper function that takes all necessary gradients with respect to a given symbolic cost. """ ups = {} # Add the learning rate/iteration updates l_ups, learn_rates = self.learning_rate_updates() safe_update(ups, l_ups) # Get the updates from sgd_updates, a PyLearn library function. p_up = dict(sgd_updates(self.params, gradients, learn_rates)) # Add the things in p_up to ups safe_update(ups, p_up) # Clip the values if needed. # We do not want the clipping values to force an upcast # of the update: updates should have the same type as params for param, (p_min, p_max) in self.clipping_values.iteritems(): p_min = tensor.as_tensor(p_min) p_max = tensor.as_tensor(p_max) dtype = param.dtype if p_min.dtype != dtype: p_min = tensor.cast(p_min, dtype) if p_max.dtype != dtype: p_max = tensor.cast(p_max, dtype) ups[param] = tensor.clip(ups[param], p_min, p_max) # Return the updates dictionary. return ups
def training_updates(visible_batch, model, sampler, optimizer): """ Combine together updates from various sources for RBM training. Parameters ---------- visible_batch : tensor_like Theano symbolic representing a minibatch on the visible units, \ with the first dimension indexing training examples and the second \ indexing data dimensions. rbm : object An instance of `RBM` or a derived class, or one implementing \ the RBM interface. sampler : object An instance of `Sampler` or a derived class, or one implementing \ the sampler interface. optimizer : object An instance of `_Optimizer` or a derived class, or one implementing \ the optimizer interface (typically an `_SGDOptimizer`). Returns ------- WRITEME """ # TODO: the Optimizer object got deprecated, and this is the only # functionality that requires it. We moved the Optimizer # here with an _ before its name. # We should figure out how best to refactor the code. # Optimizer was problematic because people kept using SGDOptimizer # instead of training_algorithms.sgd. # Compute negative phase updates. sampler_updates = sampler.updates() # Compute SML gradients. pos_v = visible_batch #neg_v = sampler_updates[sampler.particles] neg_v = sampler.particles grads = model.ml_gradients(pos_v, neg_v) # Build updates dictionary combining (gradient, sampler) updates. ups = optimizer.updates(gradients=grads) safe_update(ups, sampler_updates) return ups
def get_monitoring_channels(self, data): """ Notes ----- Monitors quantities related to the approximate posterior parameters phi and the conditional and prior parameters theta. """ space, source = self.get_monitoring_data_specs() space.validate(data) rval = OrderedDict() X = data epsilon_shape = (X.shape[0], self.nhid) epsilon = self.sample_from_epsilon(shape=epsilon_shape) phi = self.encode_phi(X) z = self.sample_from_q_z_given_x(epsilon=epsilon, phi=phi) theta = self.decode_theta(z) X_r = self.means_from_theta(theta) rval["reconstruction_mse"] = T.sqr(X - X_r).mean() posterior_channels = \ self.posterior.monitoring_channels_from_conditional_params(phi) safe_update(rval, posterior_channels) conditional_channels = \ self.conditional.monitoring_channels_from_conditional_params(theta) safe_update(rval, conditional_channels) prior_channels = self.prior.monitoring_channels_from_prior_params() safe_update(rval, prior_channels) return rval
def get_lr_scalers(self): rval = OrderedDict() if self.encoder is not None: safe_update(rval, self.encoder.get_lr_scalers()) return rval
def get_lr_scalers(self): rval = self.prior.get_lr_scalers() safe_update(rval, self.conditional.get_lr_scalers()) safe_update(rval, self.posterior.get_lr_scalers()) return rval