def setup(self, model, dataset): self.model = model self.monitor = Monitor.get_monitor(model) # TODO: monitoring batch size ought to be configurable # separately from training batch size, e.g. if you would rather # monitor on one somewhat big batch but update on many small # batches. self.monitor.set_dataset(dataset=self.monitoring_dataset, batches=self.monitoring_batches, batch_size=self.batch_size) X = T.matrix(name="%s[X]" % self.__class__.__name__) cost_value = self.cost(model, X) if cost_value.name is None: cost_value.name = 'sgd_cost(' + X.name + ')' self.monitor.add_channel(name=cost_value.name, ipt=X, val=cost_value) params = model.get_params() for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads = dict(zip(params, T.grad(cost_value, params))) for param in grads: if grads[param].name is None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) learning_rate = T.scalar('sgd_learning_rate') updates = dict(zip(params, [param - learning_rate * grads[param] for param in params])) for param in updates: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in updates: if updates[param] is None: updates[param].name = 'censor(sgd_update(' + param.name + '))' self.sgd_update = function([X, learning_rate], updates=updates, name='sgd_update') self.params = params num_examples = dataset.get_design_matrix().shape[0] self.slice_iterator = BatchIterator(num_examples, self.batch_size)
class UnsupervisedExhaustiveSGD(TrainingAlgorithm): def __init__(self, learning_rate, cost, batch_size=None, monitoring_batches=None, monitoring_dataset=None, termination_criterion=None, update_callbacks=None): self.learning_rate = float(learning_rate) self.cost = cost self.batch_size = batch_size self.monitoring_dataset = monitoring_dataset self.monitoring_batches = monitoring_batches self.termination_criterion = termination_criterion self._register_update_callbacks(update_callbacks) self.first = True def setup(self, model, dataset): self.model = model self.monitor = Monitor.get_monitor(model) # TODO: monitoring batch size ought to be configurable # separately from training batch size, e.g. if you would rather # monitor on one somewhat big batch but update on many small # batches. self.monitor.set_dataset(dataset=self.monitoring_dataset, batches=self.monitoring_batches, batch_size=self.batch_size) X = T.matrix(name="%s[X]" % self.__class__.__name__) cost_value = self.cost(model, X) if cost_value.name is None: cost_value.name = 'sgd_cost(' + X.name + ')' self.monitor.add_channel(name=cost_value.name, ipt=X, val=cost_value) params = model.get_params() for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads = dict(zip(params, T.grad(cost_value, params))) for param in grads: if grads[param].name is None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) learning_rate = T.scalar('sgd_learning_rate') updates = dict(zip(params, [param - learning_rate * grads[param] for param in params])) for param in updates: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.censor_updates(updates) for param in updates: if updates[param] is None: updates[param].name = 'censor(sgd_update(' + param.name + '))' self.sgd_update = function([X, learning_rate], updates=updates, name='sgd_update') self.params = params num_examples = dataset.get_design_matrix().shape[0] self.slice_iterator = BatchIterator(num_examples, self.batch_size) def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") model = self.model if self.batch_size is None: try: batch_size = model.force_batch_size except AttributeError: raise ValueError("batch_size unspecified in both training " "procedure and model") else: batch_size = self.batch_size if hasattr(model, "force_batch_size"): assert (model.force_batch_size <= 0 or batch_size == model.force_batch_size), ( # TODO: more informative assertion error "invalid force_batch_size attribute" ) for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) if self.first: self.monitor() self.first = False design_matrix = dataset.get_design_matrix() # TODO: add support for reshuffling examples. for batch_slice in self.slice_iterator: batch = np.cast[config.floatX](design_matrix[batch_slice]) self.sgd_update(batch, self.learning_rate) self.monitor.batches_seen += 1 self.monitor.examples_seen += batch_size self.slice_iterator.reset() self.monitor() for callback in self.update_callbacks: callback(self) if self.termination_criterion is None: return True else: return self.termination_criterion(self.model)