def test_skip_linear_transform(self, BatchNormLayer): input_shape = (20, 30, 40) # random input tensor, beta, gamma input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) # create layers without beta or gamma layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma) layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None) # check that one parameter is missing assert len(layer1.get_params()) == 3 assert len(layer2.get_params()) == 3 # call get_output_for() result1 = layer1.get_output_for(theano.tensor.constant(input), deterministic=False).eval() result2 = layer2.get_output_for(theano.tensor.constant(input), deterministic=False).eval() # compute expected results and expected updated parameters mean = input.mean(axis=(0, 2)) std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon) exp_result = (input - mean[None, :, None]) / std[None, :, None] exp_result1 = exp_result * gamma[None, :, None] # no beta exp_result2 = exp_result + beta[None, :, None] # no gamma # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(result1, exp_result1, **tol) assert np.allclose(result2, exp_result2, **tol)
def test_skip_linear_transform(self, BatchNormLayer): input_shape = (20, 30, 40) # random input tensor, beta, gamma input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) # create layers without beta or gamma layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma) layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None) # check that one parameter is missing assert len(layer1.get_params()) == 3 assert len(layer2.get_params()) == 3 # call get_output_for() result1 = layer1.get_output_for(theano.tensor.constant(input), deterministic=False).eval() result2 = layer2.get_output_for(theano.tensor.constant(input), deterministic=False).eval() # compute expected results and expected updated parameters mean = input.mean(axis=(0, 2)) std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon) exp_result = (input - mean[None, :, None]) / std[None, :, None] exp_result1 = exp_result * gamma[None, :, None] # no beta exp_result2 = exp_result + beta[None, :, None] # no gamma # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(result1, exp_result1, **tol) assert np.allclose(result2, exp_result2, **tol)
def test_get_output_for(self, BatchNormLayer, deterministic, use_averages, update_averages): input_shape = (20, 30, 40) # random input tensor, beta, gamma, mean, inv_std and alpha input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) mean = np.random.randn(30).astype(theano.config.floatX) inv_std = np.random.rand(30).astype(theano.config.floatX) alpha = np.random.rand() # create layer (with default axes: normalize over all but second axis) layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean, inv_std=inv_std, alpha=alpha) # call get_output_for() kwargs = {'deterministic': deterministic} if use_averages is not None: kwargs['batch_norm_use_averages'] = use_averages else: use_averages = deterministic if update_averages is not None: kwargs['batch_norm_update_averages'] = update_averages else: update_averages = not deterministic result = layer.get_output_for(theano.tensor.constant(input), **kwargs).eval() # compute expected results and expected updated parameters input_mean = input.mean(axis=(0, 2)) input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon) if use_averages: use_mean, use_inv_std = mean, inv_std else: use_mean, use_inv_std = input_mean, input_inv_std bcast = (np.newaxis, slice(None), np.newaxis) exp_result = (input - use_mean[bcast]) * use_inv_std[bcast] exp_result = exp_result * gamma[bcast] + beta[bcast] if update_averages: new_mean = (1 - alpha) * mean + alpha * input_mean new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std else: new_mean, new_inv_std = mean, inv_std # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(layer.mean.get_value(), new_mean, **tol) assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol) assert np.allclose(result, exp_result, **tol)
def test_get_output_for(self, BatchNormLayer, deterministic, use_averages, update_averages): input_shape = (20, 30, 40) # random input tensor, beta, gamma, mean, inv_std and alpha input = (np.random.randn(*input_shape).astype(theano.config.floatX) + np.random.randn(1, 30, 1).astype(theano.config.floatX)) beta = np.random.randn(30).astype(theano.config.floatX) gamma = np.random.randn(30).astype(theano.config.floatX) mean = np.random.randn(30).astype(theano.config.floatX) inv_std = np.random.rand(30).astype(theano.config.floatX) alpha = np.random.rand() # create layer (with default axes: normalize over all but second axis) layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean, inv_std=inv_std, alpha=alpha) # call get_output_for() kwargs = {'deterministic': deterministic} if use_averages is not None: kwargs['batch_norm_use_averages'] = use_averages else: use_averages = deterministic if update_averages is not None: kwargs['batch_norm_update_averages'] = update_averages else: update_averages = not deterministic result = layer.get_output_for(theano.tensor.constant(input), **kwargs).eval() # compute expected results and expected updated parameters input_mean = input.mean(axis=(0, 2)) input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon) if use_averages: use_mean, use_inv_std = mean, inv_std else: use_mean, use_inv_std = input_mean, input_inv_std bcast = (np.newaxis, slice(None), np.newaxis) exp_result = (input - use_mean[bcast]) * use_inv_std[bcast] exp_result = exp_result * gamma[bcast] + beta[bcast] if update_averages: new_mean = (1 - alpha) * mean + alpha * input_mean new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std else: new_mean, new_inv_std = mean, inv_std # compare expected results to actual results tol = {'atol': 1e-5, 'rtol': 1e-6} assert np.allclose(layer.mean.get_value(), new_mean, **tol) assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol) assert np.allclose(result, exp_result, **tol)