def test_random_feature_mmd_loss_approximation(sigma=[1,10], scale_weight=[0.5,1], n_features=3): print 'Testing random feature MMD loss approximation error' n_dims = 2 n_target = 5 n_pred = 5 target = gnp.rand(n_target, n_dims) pred = gnp.rand(n_pred, n_dims) rand_mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN, sigma=sigma, scale_weight=scale_weight, n_features=n_features) rand_mmd.load_target(target) print rand_mmd mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) rand_loss, rand_grad = rand_mmd.compute_loss_and_grad(pred, compute_grad=True) true_loss, true_grad = mmd.compute_loss_and_grad(pred, compute_grad=True) test_passed = test_vec_pair(rand_grad.asarray().ravel(), 'Approximate Gradient', true_grad.asarray().ravel(), ' True Gradient', error_thres=1e-2) test_passed = test_vec_pair(np.array([rand_loss]), 'Approximate Loss', np.array([true_loss]), ' True Loss', error_thres=1e-2) \ and test_passed print '' return test_passed
def test_databias_loss(loss_type, **kwargs): print 'Testing Loss <' + loss_type + '> ' \ + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()]) n_cases = 5 n_datasets = 3 in_dim = 2 x = gnp.randn(n_cases, in_dim) s = np.arange(n_cases) % n_datasets loss = ls.get_loss_from_type_name(loss_type) loss.load_target(s, K=n_datasets, **kwargs) def f(w): return loss.compute_loss_and_grad(w.reshape(x.shape), compute_grad=True)[0] backprop_grad = loss.compute_loss_and_grad( x, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=100): print 'Testing linear time minibatch MMD loss' n_dims = 3 n_target = 10 n_pred = 10 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MINIBATCH_MMDGEN, sigma=sigma, minibatch_size=minibatch_size) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_mmd_loss(sigma=1.0, use_modified_loss=False, use_absolute_value=False): print 'Testing linear time MMD loss, sigma=%s' % str(sigma) n_dims = 3 n_target = 4 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MMDGEN, sigma=sigma, use_modified_loss=use_modified_loss, use_absolute_value=use_absolute_value) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_pair_mmd_loss_multiscale(sigma=[1, 10], scale_weight=None): print 'Testing generative pair multi-scale MMD loss' n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_random_feature_mmd_loss(sigma=[1, 10], scale_weight=[0.5, 1], n_features=3): print 'Testing random feature MMD loss' n_dims = 2 n_target = 5 n_pred = 5 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN, sigma=sigma, scale_weight=scale_weight, n_features=n_features) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_batch_normalization_layer(): print 'Testing Batch Normalization layer' in_dim = 3 n_cases = 5 x = gnp.randn(n_cases, in_dim) * 2 + 3 t = gnp.randn(n_cases, in_dim) * 2 loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) bn_layer = ly.BatchNormalizationLayer(in_dim) bn_layer.params.gamma = gnp.rand(in_dim) bn_layer.params.beta = gnp.rand(in_dim) w_0 = bn_layer.params.get_param_vec() y = bn_layer.forward_prop(x, is_test=False) _, loss_grad = loss.compute_not_weighted_loss_and_grad(y, True) bn_layer.backward_prop(loss_grad) backprop_grad = bn_layer.params.get_grad_vec() def f(w): bn_layer.params.set_param_from_vec(w) y = bn_layer.forward_prop(x, is_test=False) return loss.compute_not_weighted_loss_and_grad(y)[0] fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_BN_GRAD_CHECK_EPS, use_rel_err=True) print '' return test_passed
def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel MMD loss <%s>' % loss_name n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], pred_per_example=1, target_per_example=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel per example MMD loss <%s>' % loss_name if len(target_per_example) == 1: target_per_example = target_per_example * 3 n_dims = 3 n_target = sum(target_per_example) n_pred = len(target_per_example) * pred_per_example pred = gnp.randn(n_pred, n_dims) target = [] for i_target in target_per_example: target.append(gnp.randn(i_target, n_dims)) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight, pred_per_example=pred_per_example) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel MMD loss <%s>' % loss_name n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_random_feature_mmd_loss_approximation(sigma=[1, 10], scale_weight=[0.5, 1], n_features=3): print 'Testing random feature MMD loss approximation error' n_dims = 2 n_target = 5 n_pred = 5 target = gnp.rand(n_target, n_dims) pred = gnp.rand(n_pred, n_dims) rand_mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN, sigma=sigma, scale_weight=scale_weight, n_features=n_features) rand_mmd.load_target(target) print rand_mmd mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) rand_loss, rand_grad = rand_mmd.compute_loss_and_grad(pred, compute_grad=True) true_loss, true_grad = mmd.compute_loss_and_grad(pred, compute_grad=True) test_passed = test_vec_pair(rand_grad.asarray().ravel(), 'Approximate Gradient', true_grad.asarray().ravel(), ' True Gradient', error_thres=1e-2) test_passed = test_vec_pair(np.array([rand_loss]), 'Approximate Loss', np.array([true_loss]), ' True Loss', error_thres=1e-2) \ and test_passed print '' return test_passed
def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], pred_per_example=1, target_per_example=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel per example MMD loss <%s>' % loss_name if len(target_per_example) == 1: target_per_example = target_per_example * 3 n_dims = 3 n_target = sum(target_per_example) n_pred = len(target_per_example) * pred_per_example pred = gnp.randn(n_pred, n_dims) target = [] for i_target in target_per_example: target.append(gnp.randn(i_target, n_dims)) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight, pred_per_example=pred_per_example) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_generative_multi_scale_mmd_loss(sigma=[1, 10], scale_weight=None): print 'Testing generative multi-scale MMD loss, sigma=%s' % str(sigma) n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_mmd_loss(sigma=1.0, use_modified_loss=False, use_absolute_value=False): print 'Testing linear time MMD loss, sigma=%s' % str(sigma) n_dims = 3 n_target = 4 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MMDGEN, sigma=sigma, use_modified_loss=use_modified_loss, use_absolute_value=use_absolute_value) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_databias_loss(loss_type, **kwargs): print 'Testing Loss <' + loss_type + '> ' \ + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()]) n_cases = 5 n_datasets = 3 in_dim = 2 x = gnp.randn(n_cases, in_dim) s = np.arange(n_cases) % n_datasets loss = ls.get_loss_from_type_name(loss_type) loss.load_target(s, K=n_datasets, **kwargs) def f(w): return loss.compute_loss_and_grad(w.reshape(x.shape), compute_grad=True)[0] backprop_grad = loss.compute_loss_and_grad(x, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=100): print 'Testing linear time minibatch MMD loss' n_dims = 3 n_target = 10 n_pred = 10 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MINIBATCH_MMDGEN, sigma=sigma, minibatch_size=minibatch_size) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_random_feature_mmd_loss(sigma=[1,10], scale_weight=[0.5, 1], n_features=3): print 'Testing random feature MMD loss' n_dims = 2 n_target = 5 n_pred = 5 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN, sigma=sigma, scale_weight=scale_weight, n_features=n_features) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_generative_mmd_loss(sigma=1): print 'Testing generative MMD loss, sigma=%g' % sigma n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN, sigma=sigma) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_layer(add_noise=False, no_loss=False, loss_after_nonlin=False, sparsity_weight=0, use_batch_normalization=False): print 'Testing layer ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('without loss' if no_loss else 'with loss') \ + ', ' + ('without sparsity' if sparsity_weight == 0 else 'with sparsity') \ + ', ' + ('without batch normalization' if not use_batch_normalization else 'with batch normalization') in_dim = 4 out_dim = 3 n_cases = 3 sparsity = 0.1 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, out_dim) if no_loss: loss = None else: loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) loss.set_weight(2.5) seed = 8 dropout_rate = 0.5 if add_noise else 0 nonlin_type = ly.NONLIN_NAME_SIGMOID if sparsity_weight > 0 \ else ly.NONLIN_NAME_TANH layer = ly.Layer(in_dim, out_dim, nonlin_type=nonlin_type, dropout=dropout_rate, sparsity=sparsity, sparsity_weight=sparsity_weight, loss=loss, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) if sparsity_weight > 0: # disable smoothing over minibatches layer._sparsity_smoothing = 1.0 w_0 = layer.params.get_param_vec() if add_noise: gnp.seed_rand(seed) layer.params.clear_gradient() layer.forward_prop(x, compute_loss=True, is_test=False) layer.backward_prop() backprop_grad = layer.params.get_grad_vec() def f(w): if add_noise: # this makes sure the same units are dropped out every time this # function is called gnp.seed_rand(seed) layer.params.set_param_from_vec(w) layer.forward_prop(x, compute_loss=True, is_test=False) if layer.sparsity_weight == 0: return layer.loss_value else: return layer.loss_value + layer._sparsity_objective fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair( fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_GRAD_CHECK_EPS if not use_batch_normalization else _BN_GRAD_CHECK_EPS, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def test_layer(add_noise=False, no_loss=False, loss_after_nonlin=False, sparsity_weight=0, use_batch_normalization=False): print 'Testing layer ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('without loss' if no_loss else 'with loss') \ + ', ' + ('without sparsity' if sparsity_weight == 0 else 'with sparsity') \ + ', ' + ('without batch normalization' if not use_batch_normalization else 'with batch normalization') in_dim = 4 out_dim = 3 n_cases = 3 sparsity = 0.1 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, out_dim) if no_loss: loss = None else: loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) loss.set_weight(2.5) seed = 8 dropout_rate = 0.5 if add_noise else 0 nonlin_type = ly.NONLIN_NAME_SIGMOID if sparsity_weight > 0 \ else ly.NONLIN_NAME_TANH layer = ly.Layer(in_dim, out_dim, nonlin_type=nonlin_type, dropout=dropout_rate, sparsity=sparsity, sparsity_weight=sparsity_weight, loss=loss, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) if sparsity_weight > 0: # disable smoothing over minibatches layer._sparsity_smoothing = 1.0 w_0 = layer.params.get_param_vec() if add_noise: gnp.seed_rand(seed) layer.params.clear_gradient() layer.forward_prop(x, compute_loss=True, is_test=False) layer.backward_prop() backprop_grad = layer.params.get_grad_vec() def f(w): if add_noise: # this makes sure the same units are dropped out every time this # function is called gnp.seed_rand(seed) layer.params.set_param_from_vec(w) layer.forward_prop(x, compute_loss=True, is_test=False) if layer.sparsity_weight == 0: return layer.loss_value else: return layer.loss_value + layer._sparsity_objective fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_GRAD_CHECK_EPS if not use_batch_normalization else _BN_GRAD_CHECK_EPS, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed