Пример #1
0
    def setup(self, model, dataset):
        self.model = model
        self.monitor = Monitor.get_monitor(model)
        # TODO: monitoring batch size ought to be configurable
        # separately from training batch size, e.g. if you would rather
        # monitor on one somewhat big batch but update on many small
        # batches.
        self.monitor.set_dataset(dataset=self.monitoring_dataset,
                                 batches=self.monitoring_batches,
                                 batch_size=self.batch_size)
        X = T.matrix(name="%s[X]" % self.__class__.__name__)
        cost_value = self.cost(model, X)
        if cost_value.name is None:
            cost_value.name = 'sgd_cost(' + X.name + ')'
        self.monitor.add_channel(name=cost_value.name, ipt=X, val=cost_value)
        params = model.get_params()
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i
        grads = dict(zip(params, T.grad(cost_value, params)))
        for param in grads:
            if grads[param].name is None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
        learning_rate = T.scalar('sgd_learning_rate')
        updates = dict(zip(params, [param - learning_rate * grads[param]
                                    for param in params]))
        for param in updates:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in updates:
            if updates[param] is None:
                updates[param].name = 'censor(sgd_update(' + param.name + '))'

        self.sgd_update = function([X, learning_rate], updates=updates,
                                   name='sgd_update')
        self.params = params
        num_examples = dataset.get_design_matrix().shape[0]
        self.slice_iterator = BatchIterator(num_examples, self.batch_size)
Пример #2
0
class UnsupervisedExhaustiveSGD(TrainingAlgorithm):
    def __init__(self, learning_rate, cost, batch_size=None,
                 monitoring_batches=None, monitoring_dataset=None,
                 termination_criterion=None, update_callbacks=None):
        self.learning_rate = float(learning_rate)
        self.cost = cost
        self.batch_size = batch_size
        self.monitoring_dataset = monitoring_dataset
        self.monitoring_batches = monitoring_batches
        self.termination_criterion = termination_criterion
        self._register_update_callbacks(update_callbacks)
        self.first = True

    def setup(self, model, dataset):
        self.model = model
        self.monitor = Monitor.get_monitor(model)
        # TODO: monitoring batch size ought to be configurable
        # separately from training batch size, e.g. if you would rather
        # monitor on one somewhat big batch but update on many small
        # batches.
        self.monitor.set_dataset(dataset=self.monitoring_dataset,
                                 batches=self.monitoring_batches,
                                 batch_size=self.batch_size)
        X = T.matrix(name="%s[X]" % self.__class__.__name__)
        cost_value = self.cost(model, X)
        if cost_value.name is None:
            cost_value.name = 'sgd_cost(' + X.name + ')'
        self.monitor.add_channel(name=cost_value.name, ipt=X, val=cost_value)
        params = model.get_params()
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i
        grads = dict(zip(params, T.grad(cost_value, params)))
        for param in grads:
            if grads[param].name is None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
        learning_rate = T.scalar('sgd_learning_rate')
        updates = dict(zip(params, [param - learning_rate * grads[param]
                                    for param in params]))
        for param in updates:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in updates:
            if updates[param] is None:
                updates[param].name = 'censor(sgd_update(' + param.name + '))'

        self.sgd_update = function([X, learning_rate], updates=updates,
                                   name='sgd_update')
        self.params = params
        num_examples = dataset.get_design_matrix().shape[0]
        self.slice_iterator = BatchIterator(num_examples, self.batch_size)

    def train(self, dataset):
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")
        model = self.model
        if self.batch_size is None:
            try:
                batch_size = model.force_batch_size
            except AttributeError:
                raise ValueError("batch_size unspecified in both training "
                                 "procedure and model")
        else:
            batch_size = self.batch_size
            if hasattr(model, "force_batch_size"):
                assert (model.force_batch_size <= 0 or
                        batch_size == model.force_batch_size), (
                            # TODO: more informative assertion error
                            "invalid force_batch_size attribute"
                        )
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)
        if self.first:
            self.monitor()
        self.first = False
        design_matrix = dataset.get_design_matrix()
        # TODO: add support for reshuffling examples.
        for batch_slice in self.slice_iterator:
            batch = np.cast[config.floatX](design_matrix[batch_slice])
            self.sgd_update(batch, self.learning_rate)
            self.monitor.batches_seen += 1
            self.monitor.examples_seen += batch_size
        self.slice_iterator.reset()
        self.monitor()
        for callback in self.update_callbacks:
            callback(self)
        if self.termination_criterion is None:
            return True
        else:
            return self.termination_criterion(self.model)