def __init__(self, config, name): super(AE, self).__init__(config, name) #dimension of hidden layer self.hDim = int(self.readField(config, name, "hidden_dimension")) #dimension of visible layer self.vDim = int(self.readField(config, name, "visible_dimension")) #baise for hidden layer if self.hDim > 0: self.b1 = gp.zeros(self.hDim) #biase for visible layer if self.vDim > 0: self.b2 = gp.zeros(self.vDim) #init weight: uniform between +-sqrt(6)/sqrt(v+h+1) if self.hDim * self.vDim > 0: gp.seed_rand() r = gp.sqrt(6) / gp.sqrt(self.hDim + self.vDim + 1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(config, name)
def test_rnn_on_nn(add_noise=False): print 'Testing RnnOnNeuralNet, ' + ('with' if add_noise else 'without') + ' noise' n_cases = 5 net = create_default_rnn_on_nn(add_noise=add_noise) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) seed = 8 gnp.seed_rand(seed) net.load_target(t) net.clear_gradient() net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def test_compare_ais_with_ruslan(): "Loads Ruslan's RBM and compares AIS results" ref_file = "test/rbm-for-ais-test.mat" epsilon = 0.2 gp.seed_rand(int(time.time())) mdata = scipy.io.loadmat(ref_file) ref_logpf = mdata['logZZ_est'][0,0] ref_logpf_low = mdata['logZZ_est_down'][0,0] ref_logpf_high = mdata['logZZ_est_up'][0,0] n_hid = int(mdata['numhid'][0,0]) X, TX = rbm.util.load_ruslan_mnist() myrbm = rbm.rbm.RestrictedBoltzmannMachine(100, 784, n_hid, 0) rbm.util.load_ruslan_parameters(myrbm, ref_file) ais = rbm.ais.AnnealedImportanceSampler(myrbm) ais.init_using_dataset(X) betas = np.concatenate((np.linspace(0.0, 0.5, 500, endpoint=False), np.linspace(0.5, 0.9, 4000, endpoint=False), np.linspace(0.9, 1.0, 10000))) logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas, ais_runs=100) print "Test: log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high) print "Reference: log Z = %g (%g, %g)" % (ref_logpf, ref_logpf_low, ref_logpf_high) assert abs(logpf - ref_logpf) < epsilon assert abs(logpf_low - ref_logpf_low) < epsilon assert abs(logpf_high - ref_logpf_high) < epsilon
def test_databias_loss_with_net(add_noise, loss_type, **kwargs): print 'Testing Loss <' + loss_type + '> with network, '\ + ('with noise' if add_noise else 'without noise') + ', ' \ + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()]) n_cases = 5 n_datasets = 3 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_databias_net(dropout_rate) net.set_loss(loss_type) print net x = gnp.randn(n_cases, net.in_dim) s = np.arange(n_cases) % n_datasets net.load_target(s, K=n_datasets, **kwargs) if add_noise: gnp.seed_rand(seed) net.clear_gradient() net.forward_prop(x, add_noise=add_noise, compute_loss=True) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def initRandom(self): gp.seed_rand() r = gp.sqrt(6) / gp.sqrt(self.hDim + self.vDim + 1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(self.config, self.name)
def __init__(self, config, name): super(AE, self).__init__(config, name) #dimension of hidden layer self.hDim = int(self.readField(config, name, "hidden_dimension")) #dimension of visible layer self.vDim = int(self.readField(config, name, "visible_dimension")) #baise for hidden layer if self.hDim>0: self.b1 = gp.zeros(self.hDim) #biase for visible layer if self.vDim>0: self.b2 = gp.zeros(self.vDim) #init weight: uniform between +-sqrt(6)/sqrt(v+h+1) if self.hDim*self.vDim>0: gp.seed_rand() r=gp.sqrt(6)/gp.sqrt(self.hDim+self.vDim+1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(config, name)
def test_autoencoder(add_noise=False): print 'Testing AutoEncoder ' + ('with noise' if add_noise else 'without noise') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 autoencoder = create_autoencoder(dropout_rate) print autoencoder x = gnp.randn(n_cases, autoencoder.in_dim) if add_noise: gnp.seed_rand(seed) autoencoder.clear_gradient() autoencoder.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) autoencoder.backward_prop() backprop_grad = autoencoder.get_grad_vec() f = fdiff_grad_generator(autoencoder, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, autoencoder.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def f(w): if add_noise and seed is not None: gnp.seed_rand(seed) w_0 = net.get_param_vec() net.set_param_from_vec(w) net.forward_prop(x, add_noise=add_noise, compute_loss=True) loss = net.get_loss() net.set_param_from_vec(w_0) return loss
def get_morphing_figure(dataset='mnist', mode='input_space'): imsz = [28,28] if dataset=='mnist' else [48,48] net = get_model(dataset=dataset, mode=mode) plt.figure() gnp.seed_rand(8) vis.generation_on_a_line(net, n_points=24, imsz=imsz, nrows=10, h_seeds=net.sample_hiddens(5)) if not os.path.exists('figs'): os.makedirs('figs') plt.savefig('figs/morphing_%s_%s.pdf' % (dataset, mode), bbox_inches='tight')
def f(w): if add_noise: # this makes sure the same units are dropped out every time this # function is called gnp.seed_rand(seed) layer.params.set_param_from_vec(w) layer.forward_prop(x, compute_loss=True, is_test=False) if layer.sparsity_weight == 0: return layer.loss_value else: return layer.loss_value + layer._sparsity_objective
def test_neuralnet(add_noise=False, loss_after_nonlin=False, use_batch_normalization=False): print 'Testing NeuralNet, ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('with BN' if use_batch_normalization else 'without BN') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_neuralnet(dropout_rate, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) if net.loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif net.loss.target_should_be_normalized(): t = t - t.min(axis=1)[:, gnp.newaxis] + 1 t /= t.sum(axis=1)[:, gnp.newaxis] net.load_target(t) if add_noise: gnp.seed_rand(seed) net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.clear_gradient() net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) eps = _BN_GRAD_CHECK_EPS if use_batch_normalization else _GRAD_CHECK_EPS test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=eps, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def get_morphing_figure(dataset='mnist', mode='input_space'): imsz = [28, 28] if dataset == 'mnist' else [48, 48] net = get_model(dataset=dataset, mode=mode) plt.figure() gnp.seed_rand(8) vis.generation_on_a_line(net, n_points=24, imsz=imsz, nrows=10, h_seeds=net.sample_hiddens(5)) if not os.path.exists('figs'): os.makedirs('figs') plt.savefig('figs/morphing_%s_%s.pdf' % (dataset, mode), bbox_inches='tight')
def test_rnn_hybrid(add_noise=False, has_input=True): print 'Testing RNN hybrid, ' + ('with' if add_noise else 'without') + ' noise, ' \ + ('with' if has_input else 'without') + ' input' n_cases = 5 net = create_default_rnn_hybrid(add_noise=add_noise, has_input=has_input) print net x = gnp.randn(n_cases, net.in_dim) if has_input else None t = gnp.randn(n_cases, net.out_dim) net.load_target(t) seed = 8 gnp.seed_rand(seed) net.clear_gradient() """ if not has_input: import ipdb ipdb.set_trace() """ net.forward_prop(X=x, T=n_cases, add_noise=add_noise, compute_loss=True, is_test=False) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed, T=n_cases) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def run_all_tests(): gnp.seed_rand(int(time.time())) n_success = 0 n_tests = 0 test_list = [test_all_nonlin, test_all_loss, test_all_layer, test_all_neuralnet, test_all_stacked_net, test_all_y_net, test_all_autoencoder, test_all_rnn] for batch_test in test_list: success_in_batch, tests_in_batch = batch_test() n_success += success_in_batch n_tests += tests_in_batch print '' print '===================' print 'All tests finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success) print ''
def run_all_tests(): gnp.seed_rand(int(time.time())) n_success = 0 n_tests = 0 test_list = [test_all_generative_mmd_loss, test_all_diff_kernel_mmd_loss, test_all_diff_kernel_per_example_mmd_loss] for batch_test in test_list: success_in_batch, tests_in_batch = batch_test() n_success += success_in_batch n_tests += tests_in_batch print '' print '===================' print 'All tests finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success) print ''
def test_stacked_net_gradient(add_noise=False): print 'Testing StackedNeuralNet' in_dim = 3 out_dim = [5, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 stacked_net = create_stacked_net(in_dim, out_dim, dropout_rate) print stacked_net x = gnp.randn(n_cases, in_dim) t1 = gnp.randn(n_cases, out_dim[0]) t3 = gnp.randn(n_cases, out_dim[2]) stacked_net.load_target(t1, None, t3) if add_noise: gnp.seed_rand(seed) stacked_net.clear_gradient() stacked_net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) stacked_net.backward_prop() backprop_grad = stacked_net.get_grad_vec() f = fdiff_grad_generator(stacked_net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, stacked_net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def test_neuralnet(add_noise=False, loss_after_nonlin=False, use_batch_normalization=False): print 'Testing NeuralNet, ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('with BN' if use_batch_normalization else 'without BN') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_neuralnet(dropout_rate, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) if net.loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif net.loss.target_should_be_normalized(): t = t - t.min(axis=1)[:,gnp.newaxis] + 1 t /= t.sum(axis=1)[:,gnp.newaxis] net.load_target(t) if add_noise: gnp.seed_rand(seed) net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.clear_gradient() net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) eps = _BN_GRAD_CHECK_EPS if use_batch_normalization else _GRAD_CHECK_EPS test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=eps, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def linear_classifier_discrimination(model, data, C_range=[1], verbose=True, samples=None): """ Compute the logistic regression classification accuracy. """ import sklearn.linear_model as lm n_examples = data.shape[0] if samples is None: gnp.seed_rand(8) samples = model.generate_samples(n_samples=n_examples).asarray() x = np.r_[data, samples] t = np.r_[np.zeros(n_examples, dtype=np.int), np.ones(samples.shape[0], dtype=np.int)] best_acc = 0 best_classifier = None for C in C_range: t_start = time.time() lr = lm.LogisticRegression(C=C, dual=False, random_state=8) lr.fit(x, t) acc = (lr.predict(x) == t).mean() if verbose: print 'C=%g acc=%.4f' % (C, acc), if acc > best_acc: best_acc = acc best_classifier = lr if verbose: print '*', else: if verbose: print ' ', if verbose: print 'time=%.2f' % (time.time() - t_start) return best_acc, best_classifier
def run_all_tests(): gnp.seed_rand(int(time.time())) n_success = 0 n_tests = 0 test_list = [ test_all_generative_mmd_loss, test_all_diff_kernel_mmd_loss, test_all_diff_kernel_per_example_mmd_loss ] for batch_test in test_list: success_in_batch, tests_in_batch = batch_test() n_success += success_in_batch n_tests += tests_in_batch print '' print '===================' print 'All tests finished: %d/%d success, %d failed' % ( n_success, n_tests, n_tests - n_success) print ''
def run_all_tests(): gnp.seed_rand(int(time.time())) n_success = 0 n_tests = 0 test_list = [ test_all_nonlin, test_all_loss, test_all_layer, test_all_neuralnet, test_all_stacked_net, test_all_y_net, test_all_autoencoder, test_all_rnn ] for batch_test in test_list: success_in_batch, tests_in_batch = batch_test() n_success += success_in_batch n_tests += tests_in_batch print '' print '===================' print 'All tests finished: %d/%d success, %d failed' % ( n_success, n_tests, n_tests - n_success) print ''
def test_y_net_gradient(add_noise=False): print 'Testing YNeuralNet ' + ('with noise' if add_noise else 'without noise') in_dim = 3 out_dim = [2, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 ynet = create_y_net(in_dim, out_dim, dropout_rate) print ynet x = gnp.randn(n_cases, in_dim) t1 = gnp.randn(n_cases, out_dim[0]) t2 = gnp.randn(n_cases, out_dim[1]) t3 = gnp.randn(n_cases, out_dim[2]) ynet.load_target([None, t1], t2, t3) if add_noise: gnp.seed_rand(seed) ynet.clear_gradient() ynet.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) ynet.backward_prop() backprop_grad = ynet.get_grad_vec() f = fdiff_grad_generator(ynet, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, ynet.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def linear_classifier_discrimination(model, data, C_range=[1], verbose=True, samples=None): """ Compute the logistic regression classification accuracy. """ import sklearn.linear_model as lm n_examples = data.shape[0] if samples is None: gnp.seed_rand(8) samples = model.generate_samples(n_samples=n_examples).asarray() x = np.r_[data, samples] t = np.r_[np.zeros(n_examples, dtype=np.int), np.ones(samples.shape[0], dtype=np.int)] best_acc = 0 best_classifier = None for C in C_range: t_start = time.time() lr = lm.LogisticRegression(C=C, dual=False, random_state=8) lr.fit(x,t) acc = (lr.predict(x) == t).mean() if verbose: print 'C=%g acc=%.4f' % (C, acc), if acc > best_acc: best_acc = acc best_classifier = lr if verbose: print '*', else: if verbose: print ' ', if verbose: print 'time=%.2f' % (time.time() - t_start) return best_acc, best_classifier
def test_stacked_net_gradient(add_noise=False): print 'Testing StackedNeuralNet' in_dim = 3 out_dim = [5, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 stacked_net = create_stacked_net(in_dim, out_dim, dropout_rate) print stacked_net x = gnp.randn(n_cases, in_dim) t1 = gnp.randn(n_cases, out_dim[0]) t3 = gnp.randn(n_cases, out_dim[2]) stacked_net.load_target(t1, None, t3) if add_noise: gnp.seed_rand(seed) stacked_net.clear_gradient() stacked_net.forward_prop(x, add_noise=add_noise, compute_loss=True) stacked_net.backward_prop() backprop_grad = stacked_net.get_grad_vec() f = fdiff_grad_generator(stacked_net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, stacked_net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def __init__(self, ind, schedule): gpu.seed_rand(seed=None) self.logging = schedule["logging"] self.psize = 0 cuts = [0] self.stack = schedule["stack"] for layer in self.stack: ltype = layer["type"] units = layer["units"] l = ltype.__new__(ltype) l.__init__(shape=(ind, units), **layer) self.psize += l.size self.append(l) cuts.append(l.size) ind = units self.params = gzeros(self.psize) self.cuts = np.cumsum(cuts) for layer, (c1, c2) in izip(self, izip(self.cuts[:-1], self.cuts[1:])): layer.p = self.params[c1:c2] if "score" in schedule: self._score = schedule["score"] else: print("You may have a problem: _score_ is NONE") self._score = None
def seed(val): if gpu.GPU: gp.seed_rand(val) else: np.random.seed(val)
def train_rbm(tcfg, print_cost=False): """Trains and returns an RBM using the specified RestrictedBoltzmannMachineTrainingConfiguration""" # seed RNGs gp.seed_rand(tcfg.seed) # Build RBM rbm = RestrictedBoltzmannMachine(tcfg.batch_size, tcfg.n_vis, tcfg.n_hid, tcfg.n_gibbs_steps, tcfg.init_weight_sigma, tcfg.init_bias_sigma) # initialize momentums weights_update = 0 bias_vis_update = 0 bias_hid_update = 0 # train for epoch in range(tcfg.epochs): seen_epoch_samples = 0 if print_cost: pl_bit = 0 pl_sum = 0 rc_sum = 0 for x in draw_slices(tcfg.X, tcfg.batch_size, kind='sequential', samples_are='rows', stop=True): #print >>stderr, "%d / %d (epoch: %d / %d)\r" % (seen_epoch_samples, # tcfg.X.shape[0], # epoch, tcfg.epochs), # binaraize x if tcfg.binarize_data: x = sample_binomial(x) # perform weight update if tcfg.use_pcd: weights_step, bias_vis_step, bias_hid_step = ml.rbm.pcd_update(x) else: weights_step, bias_vis_step, bias_hid_step = ml.rbm.cd_update(x) if epoch >= tcfg.use_final_momentum_from_epoch: momentum = tcfg.final_momentum else: momentum = tcfg.initial_momentum weights_update = momentum * weights_update + \ tcfg.step_rate * (weights_step - tcfg.weight_cost * ml.rbm.weights) bias_vis_update = momentum * bias_vis_update + tcfg.step_rate * bias_vis_step bias_hid_update = momentum * bias_hid_update + tcfg.step_rate * bias_hid_step ml.rbm.weights += weights_update ml.rbm.bias_vis += bias_vis_update ml.rbm.bias_hid += bias_hid_update seen_epoch_samples += tcfg.batch_size if print_cost: # calculate part of pseudo-likelihood pl_sum += gp.sum(rbm.pseudo_likelihood_for_bit(x > 0.5, pl_bit)) pl_bit = (pl_bit + 1) % tcfg.X.shape[1] # calculate part of reconstruction cost rc_sum += gp.sum(rbm.reconstruction_cross_entropy(x > 0.5)) ############################################# # end of batch # save parameters save_parameters(rbm, epoch) # plot weights and current state of PCD chains plot_weights(rbm, epoch) if tcfg.use_pcd: plot_pcd_chains(rbm, epoch) if print_cost: # calculate pseudo likelihood and reconstruction cost pl = pl_sum / seen_epoch_samples * tcfg.X.shape[1] rc = rc_sum / seen_epoch_samples print "Epoch %02d: reconstruction cost=%f, pseudo likelihood=%f" % \ (epoch, rc, pl) return rbm
def test_layer(add_noise=False, no_loss=False, loss_after_nonlin=False, sparsity_weight=0, use_batch_normalization=False): print 'Testing layer ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('without loss' if no_loss else 'with loss') \ + ', ' + ('without sparsity' if sparsity_weight == 0 else 'with sparsity') \ + ', ' + ('without batch normalization' if not use_batch_normalization else 'with batch normalization') in_dim = 4 out_dim = 3 n_cases = 3 sparsity = 0.1 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, out_dim) if no_loss: loss = None else: loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) loss.set_weight(2.5) seed = 8 dropout_rate = 0.5 if add_noise else 0 nonlin_type = ly.NONLIN_NAME_SIGMOID if sparsity_weight > 0 \ else ly.NONLIN_NAME_TANH layer = ly.Layer(in_dim, out_dim, nonlin_type=nonlin_type, dropout=dropout_rate, sparsity=sparsity, sparsity_weight=sparsity_weight, loss=loss, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) if sparsity_weight > 0: # disable smoothing over minibatches layer._sparsity_smoothing = 1.0 w_0 = layer.params.get_param_vec() if add_noise: gnp.seed_rand(seed) layer.params.clear_gradient() layer.forward_prop(x, compute_loss=True, is_test=False) layer.backward_prop() backprop_grad = layer.params.get_grad_vec() def f(w): if add_noise: # this makes sure the same units are dropped out every time this # function is called gnp.seed_rand(seed) layer.params.set_param_from_vec(w) layer.forward_prop(x, compute_loss=True, is_test=False) if layer.sparsity_weight == 0: return layer.loss_value else: return layer.loss_value + layer._sparsity_objective fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair( fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_GRAD_CHECK_EPS if not use_batch_normalization else _BN_GRAD_CHECK_EPS, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
from util import Util import msvcrt import logging as log import copy from scipy import sparse from sklearn import metrics from gnumpy_RBM import RBM from SDA import SDA import operator import time gpu.board_id_to_use = 0 print 'USING GPU' + str(gpu.board_id_to_use) gpu.expensive_check_probability = 0 log.basicConfig(filename='C:/net/log_GPU{0}.txt'.format(gpu.board_id_to_use), format='%(message)s', level=log.DEBUG) gpu.seed_rand(1234) class Deep_net: ''' How to use this deep net: 1. Load data into an array: data = [train + cv data without labels, labels for train and cv data, test set without labels ] If your train and cv set is seperate stack it and use it as the arrays first element 2. Set the train, cross validation and test set size (this test set has labels and is different from the test set above) by setting the set_sizes variable: e.g. set_sizes = [0.8, 0.2, 0] for 80 % train set and 20 % cross validation set; you need to shuffle set before if you want randomize the samples 3. What kind of problem do you use the net on? problem = 'classification' will use logistic units, softmax and will print misclassification error problem = 'regression' will use rectified linear units, linear unit and will print root mean squared error If you need probabilities with regression, make sure to set clip_values = 1 to clip the values into a probability In case of the other parameters do this:
from ml.rbm.util import sample_binomial from ml.common.util import myrand as mr # parameters n_rows = 1000 n_cols = 784 n_iters = 60000 / n_rows * 3 def write_bytes_to_file(file, data): for x in data: file.write(chr(int(x))) # use gnumpy rng gp.seed_rand(1) with open("rng_gnumpy.dat", "wb") as file: for i in range(n_iters): print "%d / %d\r" % (i, n_iters), gx = gp.rand((n_rows, n_cols)) x = gp.as_numpy_array(gx) fx = np.reshape(x, -1) bx = np.floor(fx * 256) write_bytes_to_file(file, bx) # use lcg rng mr.seed(1) with open("rng_lcg.dat", "wb") as file: for i in range(n_iters): print "%d / %d\r" % (i, n_iters), gx = mr.rand((n_rows, n_cols))
import logging as log import copy from scipy import sparse from sklearn import metrics from gnumpy_RBM import RBM from SDA import SDA import operator import time gpu.board_id_to_use = 0 print 'USING GPU' + str(gpu.board_id_to_use) gpu.expensive_check_probability = 0 log.basicConfig(filename='C:/net/log_GPU{0}.txt'.format(gpu.board_id_to_use), format='%(message)s', level=log.DEBUG) gpu.seed_rand(1234) class Deep_net: ''' How to use this deep net: 1. Load data into an array: data = [train + cv data without labels, labels for train and cv data, test set without labels ] If your train and cv set is seperate stack it and use it as the arrays first element 2. Set the train, cross validation and test set size (this test set has labels and is different from the test set above) by setting the set_sizes variable: e.g. set_sizes = [0.8, 0.2, 0] for 80 % train set and 20 % cross validation set; you need to shuffle set before if you want randomize the samples 3. What kind of problem do you use the net on? problem = 'classification' will use logistic units, softmax and will print misclassification error problem = 'regression' will use rectified linear units, linear unit and will print root mean squared error If you need probabilities with regression, make sure to set clip_values = 1 to clip the values into a probability In case of the other parameters do this:
def mnist_mmd_input_space(n_hids=[10,64,256,256,1024], sigma=[2,5,10,20,40,80], learn_rate=2, momentum=0.9): """ n_hids: number of hidden units on all layers (top-down) in the generative network. sigma: a list of scales used for the kernel learn_rate, momentum: parameters for the learning process return: KDE log_likelihood on validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = mnistio.load_data() print '' print 'Training data: %d x %d' % x_train.shape in_dim = n_hids[0] out_dim = x_train.shape[1] net = gen.StochasticGenerativeNet(in_dim, out_dim) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) # place holder loss net.set_loss(ls.LOSS_NAME_MMDGEN, loss_after_nonlin=True, sigma=80, loss_weight=1000) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) output_base = OUTPUT_BASE_DIR + '/mnist/input_space' #sigma = [2,5,10,20,40,80] sigma_weights = [1,1,1,1,1,1] #learn_rate = 1 #momentum = 0.9 minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 40000 i_checkpoint = 2000 output_dir = output_base + '/nhids_%s_sigma_%s_lr_%s_m_%s' % ( '_'.join([str(nh) for nh in n_hids]), '_'.join([str(s) for s in sigma]), str(learn_rate), str(momentum)) print '' print '>>>> output_dir = %s' % output_dir print '' mmd_learner.set_output_dir(output_dir) #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0}, momentum_schedule={10000:1-(1-momentum)/10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() print '' print '====================' print 'Evaluating the model' print '====================' print '' log_prob, std, sigma = ev.kde_eval_mnist(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_mnist(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config(output_dir + '/params_and_results.cfg', { 'n_hids': n_hids, 'sigma': sigma, 'sigma_weights': sigma_weights, 'learn_rate': learn_rate, 'momentum': momentum, 'minibatch_size': minibatch_size, 'n_sample_update_iters': n_sample_update_iters, 'max_iters': max_iters, 'i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_dir print '' return log_prob
def tfd_mmd_code_space(ae_n_hids=[512, 512, 128], ae_dropout=[0.1, 0.1, 0.1], ae_learn_rate=1e-1, ae_momentum=0, mmd_n_hids=[10, 64, 256, 256, 1024], mmd_sigma=[1, 2, 5, 10, 20, 40], mmd_learn_rate=1e-1, mmd_momentum=0.9): """ ae_n_hids: #hid for the encoder, bottom-up ae_dropout: the amount of dropout for each layer in the encoder, same order ae_learn_rate, ae_momentum: . mmd_n_hids: #hid for the generative net, top-down mmd_sigma: scale of the kernel mmd_learn_rate, mmd_momentum: . Return KDE log_likelihood on the validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = load_tfd_fold(0) common_output_base = OUTPUT_BASE_DIR + '/tfd/code_space' output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % ( cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum), cat_list(mmd_n_hids), cat_list(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum)) ####################### # Auto-encoder training ####################### n_dims = x_train.shape[1] h_dim = ae_n_hids[-1] encoder = nn.NeuralNet(n_dims, h_dim) for i in range(len(ae_n_hids) - 1): encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i]) encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1]) decoder = nn.NeuralNet(h_dim, n_dims) for i in range(len(ae_n_hids) - 1)[::-1]: decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1) autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder) print '' print autoenc print '' learn_rate = ae_learn_rate final_momentum = ae_momentum max_iters = 15000 #max_iters = 200 nn_pretrainer = learner.AutoEncoderPretrainer(autoenc) nn_pretrainer.load_data(x_train) nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100, max_grad_norm=10, max_iters=max_iters, iprint=100) nn_learner = learner.Learner(autoenc) nn_learner.set_output_dir(output_base + '/ae') nn_learner.load_data(x_train, x_train) def f_checkpoint(i_iter, w): nn_learner.save_checkpoint('%d' % i_iter) nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100, learn_rate_schedule=None, momentum_schedule={ 50: 0.5, 200: final_momentum }, max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint) nn_learner.save_checkpoint('best') ################## # Training MMD net ################## n_hids = mmd_n_hids in_dim = n_hids[0] out_dim = autoenc.encoder.out_dim net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) sigma = mmd_sigma sigma_weights = [1] * len(sigma) learn_rate = mmd_learn_rate momentum = mmd_momentum minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 48000 #max_iters = 200 i_checkpoint = 2000 mmd_learner.set_output_dir(output_base + '/mmd') #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000: learn_rate / 10.0}, momentum_schedule={10000: 1 - (1 - momentum) / 10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() # Evaluation print '' print '====================' print 'Evaluating the model' print '====================' print '' x_val = load_tfd_all_folds('val') x_test = load_tfd_all_folds('test') log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config( output_base + '/params_and_results.cfg', { 'ae_n_hids': ae_n_hids, 'ae_dropout': ae_dropout, 'ae_learn_rate': ae_learn_rate, 'ae_momentum': ae_momentum, 'mmd_n_hids': mmd_n_hids, 'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate, 'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters, 'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_base print '' return log_prob
def seed_rand(seed): global _gnumpy_loaded random.seed(seed) numpy.random.seed(seed * 7) if _gnumpy_loaded: gp.seed_rand(seed * 13)
def seed_rand(seed): global _gnumpy_loaded random.seed(seed) numpy.random.seed(seed*7) if _gnumpy_loaded: gp.seed_rand(seed*13)
def test_layer(add_noise=False, no_loss=False, loss_after_nonlin=False, sparsity_weight=0, use_batch_normalization=False): print 'Testing layer ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('without loss' if no_loss else 'with loss') \ + ', ' + ('without sparsity' if sparsity_weight == 0 else 'with sparsity') \ + ', ' + ('without batch normalization' if not use_batch_normalization else 'with batch normalization') in_dim = 4 out_dim = 3 n_cases = 3 sparsity = 0.1 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, out_dim) if no_loss: loss = None else: loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) loss.set_weight(2.5) seed = 8 dropout_rate = 0.5 if add_noise else 0 nonlin_type = ly.NONLIN_NAME_SIGMOID if sparsity_weight > 0 \ else ly.NONLIN_NAME_TANH layer = ly.Layer(in_dim, out_dim, nonlin_type=nonlin_type, dropout=dropout_rate, sparsity=sparsity, sparsity_weight=sparsity_weight, loss=loss, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) if sparsity_weight > 0: # disable smoothing over minibatches layer._sparsity_smoothing = 1.0 w_0 = layer.params.get_param_vec() if add_noise: gnp.seed_rand(seed) layer.params.clear_gradient() layer.forward_prop(x, compute_loss=True, is_test=False) layer.backward_prop() backprop_grad = layer.params.get_grad_vec() def f(w): if add_noise: # this makes sure the same units are dropped out every time this # function is called gnp.seed_rand(seed) layer.params.set_param_from_vec(w) layer.forward_prop(x, compute_loss=True, is_test=False) if layer.sparsity_weight == 0: return layer.loss_value else: return layer.loss_value + layer._sparsity_objective fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_GRAD_CHECK_EPS if not use_batch_normalization else _BN_GRAD_CHECK_EPS, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def mnist_mmd_input_space(n_hids=[10, 64, 256, 256, 1024], sigma=[2, 5, 10, 20, 40, 80], learn_rate=2, momentum=0.9): """ n_hids: number of hidden units on all layers (top-down) in the generative network. sigma: a list of scales used for the kernel learn_rate, momentum: parameters for the learning process return: KDE log_likelihood on validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = mnistio.load_data() print '' print 'Training data: %d x %d' % x_train.shape in_dim = n_hids[0] out_dim = x_train.shape[1] net = gen.StochasticGenerativeNet(in_dim, out_dim) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) # place holder loss net.set_loss(ls.LOSS_NAME_MMDGEN, loss_after_nonlin=True, sigma=80, loss_weight=1000) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) output_base = OUTPUT_BASE_DIR + '/mnist/input_space' #sigma = [2,5,10,20,40,80] sigma_weights = [1, 1, 1, 1, 1, 1] #learn_rate = 1 #momentum = 0.9 minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 40000 i_checkpoint = 2000 output_dir = output_base + '/nhids_%s_sigma_%s_lr_%s_m_%s' % ('_'.join([ str(nh) for nh in n_hids ]), '_'.join([str(s) for s in sigma]), str(learn_rate), str(momentum)) print '' print '>>>> output_dir = %s' % output_dir print '' mmd_learner.set_output_dir(output_dir) #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000: learn_rate / 10.0}, momentum_schedule={10000: 1 - (1 - momentum) / 10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() print '' print '====================' print 'Evaluating the model' print '====================' print '' log_prob, std, sigma = ev.kde_eval_mnist(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_mnist(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config( output_dir + '/params_and_results.cfg', { 'n_hids': n_hids, 'sigma': sigma, 'sigma_weights': sigma_weights, 'learn_rate': learn_rate, 'momentum': momentum, 'minibatch_size': minibatch_size, 'n_sample_update_iters': n_sample_update_iters, 'max_iters': max_iters, 'i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_dir print '' return log_prob
rather than opening a terminal and running it from there. The latter is much more tedious unless you are interested in inspecting the hidden states of the RNN using the visualize function. But even then, it may be more convenient to save the parameters by providing hf with save_freq = 5, (for instance), do the learning without an interactive session, but then load the parameters from an interactive session by r.hf.load(); load will know where the file is saved. """ # 0: make sure the experiment is 100% reproducible seed = 20 import gnumpy gnumpy.seed_rand(seed) import numpy numpy.random.seed(seed) # 1: choose a data object. To change the problem, # simply replace pathological.add with anything else; # e.g., pathology.mult or pathology.xor; choose whatever you want. # # All problems (other than xor) are initially set to T=200. Feel free # to edit opt.d.seq.pathology and create other problem variants. For # example, try changing T. For example, change T, the batch size, or # even invent a few problems on your own. import opt.d.seq.pathological as pathology
# -*- coding: utf-8 -*- import time import numpy as np import gnumpy as gp from ml.apps.rbm import mnist_rbm_config as cfg import ml.rbm.util as rbmutil from ml.rbm.rbm import RestrictedBoltzmannMachine from ml.rbm.ais import AnnealedImportanceSampler # AIS parameters gp.seed_rand(int(time.time())) epoch = cfg.epochs - 1 #epoch = 9 ais_runs = 100 ais_gibbs_steps = 1 #ais_betas = np.linspace(0.0, 1.0, 1000) ais_betas = np.concatenate((np.linspace(0.0, 0.5, 500, endpoint=False), np.linspace(0.5, 0.9, 4000, endpoint=False), np.linspace(0.9, 1.0, 10000))) ais_base_samples = 10000 ais_base_chains = 1000 ais_base_gibbs_steps_between_samples = 1000 #ais_iterations = 10 ais_iterations = 1 # debug check_base_rbm_partition_function = False #np.seterr(all='raise')
def tfd_mmd_code_space( ae_n_hids=[512, 512, 128], ae_dropout=[0.1, 0.1, 0.1], ae_learn_rate=1e-1, ae_momentum=0, mmd_n_hids=[10, 64, 256, 256, 1024], mmd_sigma=[1,2,5,10,20,40], mmd_learn_rate=1e-1, mmd_momentum=0.9): """ ae_n_hids: #hid for the encoder, bottom-up ae_dropout: the amount of dropout for each layer in the encoder, same order ae_learn_rate, ae_momentum: . mmd_n_hids: #hid for the generative net, top-down mmd_sigma: scale of the kernel mmd_learn_rate, mmd_momentum: . Return KDE log_likelihood on the validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = load_tfd_fold(0) common_output_base = OUTPUT_BASE_DIR + '/tfd/code_space' output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % ( cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum), cat_list(mmd_n_hids), cat_list(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum)) ####################### # Auto-encoder training ####################### n_dims = x_train.shape[1] h_dim = ae_n_hids[-1] encoder = nn.NeuralNet(n_dims, h_dim) for i in range(len(ae_n_hids) - 1): encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i]) encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1]) decoder = nn.NeuralNet(h_dim, n_dims) for i in range(len(ae_n_hids) - 1)[::-1]: decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1) autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder) print '' print autoenc print '' learn_rate = ae_learn_rate final_momentum = ae_momentum max_iters = 15000 #max_iters = 200 nn_pretrainer = learner.AutoEncoderPretrainer(autoenc) nn_pretrainer.load_data(x_train) nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100, max_grad_norm=10, max_iters=max_iters, iprint=100) nn_learner = learner.Learner(autoenc) nn_learner.set_output_dir(output_base + '/ae') nn_learner.load_data(x_train, x_train) def f_checkpoint(i_iter, w): nn_learner.save_checkpoint('%d' % i_iter) nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100, learn_rate_schedule=None, momentum_schedule={50:0.5, 200:final_momentum}, max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint) nn_learner.save_checkpoint('best') ################## # Training MMD net ################## n_hids = mmd_n_hids in_dim = n_hids[0] out_dim = autoenc.encoder.out_dim net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) sigma = mmd_sigma sigma_weights = [1] * len(sigma) learn_rate = mmd_learn_rate momentum = mmd_momentum minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 48000 #max_iters = 200 i_checkpoint = 2000 mmd_learner.set_output_dir(output_base + '/mmd') #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0}, momentum_schedule={10000:1-(1-momentum)/10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() # Evaluation print '' print '====================' print 'Evaluating the model' print '====================' print '' x_val = load_tfd_all_folds('val') x_test = load_tfd_all_folds('test') log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config(output_base + '/params_and_results.cfg', { 'ae_n_hids' : ae_n_hids, 'ae_dropout' : ae_dropout, 'ae_learn_rate' : ae_learn_rate, 'ae_momentum' : ae_momentum, 'mmd_n_hids': mmd_n_hids, 'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate, 'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters, 'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_base print '' return log_prob