def simple_train_sgd(self, trainset, epoch, epoch_end): # train with SGD print 'Train %s with SGD' % self.__class__ idx = range(trainset.shape[0]) minibatch_idx_overall = utils.generate_minibatch_idx( trainset.shape[0], self.minibatch_size) while (epoch < epoch_end): costs_epoch = [] costs_by_step_epoch = [] for k, use_idx in enumerate(minibatch_idx_overall): if self.verbose: sys.stdout.write('\rTraining minibatches %d/%d' % (k, len(minibatch_idx_overall))) sys.stdout.flush() minibatch_data = trainset[use_idx, :] minibatch_mask = utils.generate_masks_deep_orderless_nade( minibatch_data.shape, self.rng_numpy) if 0: # this is deep nade cost = self.train_fn(minibatch_data, minibatch_mask) else: # len(results)==2 results = self.train_fn(minibatch_data, minibatch_mask) cost = results[0] # results[1]: (1,k) costs_by_step = results[1].flatten() costs_epoch.append(cost) costs_by_step_epoch.append(costs_by_step) # now linearly decrease the learning rate current_lr = self.learning_rate.get_value() new_lr = current_lr - numpy.float32(self.lr_decrease) self.learning_rate.set_value(new_lr) cost_epoch_avg = numpy.mean(costs_epoch) cost_by_step_avg = numpy.asarray(costs_by_step_epoch).mean(axis=0) self.costs_steps.append(cost_by_step_avg) self.costs.append(cost_epoch_avg) print '\rTraining %d/%d epochs, cost %.2f, costs by step %s lr %.5f' % ( epoch, epoch_end, cost_epoch_avg, numpy.round(cost_by_step_avg, 2), current_lr) if epoch != 0 and (epoch + 1) % self.valid_freq == 0: numpy.savetxt(self.save_model_path + 'epoch_costs_by_step.txt', self.costs_steps) numpy.savetxt(self.save_model_path + 'epoch_costs.txt', self.costs) if self.channel: self.channel.save() self.sample_nade_v0(epoch) self.make_plots(self.costs) self.visualize_filters(epoch) self.LL(epoch, save_nothing=False) self.inpainting(epoch, self.k) self.save_model(epoch) epoch += 1 # end of training print
def build_theano_fn_nade_k_rbm(self): # this is the variational rbm version of NADE-K self.x = T.fmatrix('inputs') self.x.tag.test_value = numpy.random.binomial( n=1, p=0.5, size=(self.minibatch_size, self.n_visible)).astype(floatX) self.m = T.fmatrix('masks') self.m.tag.test_value = numpy.random.binomial( n=1, p=0.5, size=(self.minibatch_size, self.n_visible)).astype(floatX) t = self.trainset[:self.minibatch_size] self.x.tag.test_value = t self.m.tag.test_value = utils.generate_masks_deep_orderless_nade( t.shape, self.rng_numpy) # params of first layer self.W1 = utils.build_weights(n_row=self.n_visible, n_col=self.n_hidden, style=self.init_weights, name='W1', rng_numpy=self.rng_numpy) self.Wflags = utils.build_weights(n_row=self.n_visible, n_col=self.n_hidden, style=self.init_weights, name='Wflags', rng_numpy=self.rng_numpy) self.b1 = utils.build_bias(size=self.n_hidden, name='b_1') self.c = utils.build_bias(size=self.n_visible, name='c') if self.tied_weights: print 'W1 and V are tied' self.V = self.W1 self.params = [self.W1, self.Wflags, self.b1, self.c] else: print 'W1 and V are untied' self.V = utils.build_weights(n_row=self.n_visible, n_col=self.n_hidden, style=self.init_weights, name='V', rng_numpy=self.rng_numpy) self.params = [self.W1, self.Wflags, self.b1, self.c, self.V] if self.n_layers == 2: self.W2 = utils.build_weights(n_row=self.n_hidden, n_col=self.n_hidden, style=self.init_weights, name='W2', rng_numpy=self.rng_numpy) self.b2 = utils.build_bias(size=self.n_hidden, name='b_2') self.params += [self.W2, self.b2] # (B,k,D) self.mf = theano.shared(value=numpy.zeros( (self.minibatch_size, self.k, self.n_visible)).astype(floatX), name='mean_field_v') cost, costs_by_step = self.get_nade_k_rbm_cost_theano( self.x, self.m, self.k) #L2_cost = T.sum(self.W1**2) + T.sum(self.Wflags**2) #reg_cost = cost + self.l2 * L2_cost for param in self.params: if param.ndim == 2: cost += T.sum(param**2) * constantX(self.l2) # get gradients self.learning_rate = theano.shared(numpy.float32(self.lr), name='learning_rate') updates = OrderedDict() consider_constant = None if self.sgd_type == 0: print 'use momentum sgd' which_type = 0 elif self.sgd_type == 1: print 'use adadelta sgd' which_type = 1 else: raise NotImplementedError() updates = utils.build_updates_with_rules( cost, self.params, consider_constant, updates, self.learning_rate, self.lr_decrease, self.momentum, floatX, which_type) # compile training functions print 'compiling fns ...' self.train_fn = theano.function(inputs=(self.x, self.m), outputs=[cost, costs_by_step], updates=updates, name='train_fn') self.sampling_fn = self.get_nade_k_rbm_sampling_fn_theano(self.k) self.compute_LL_with_ordering_fn = self.get_nade_k_rbm_LL_theano( self.k) # this is build later self.inpainting_fn = None