def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True): """Batch Normalization Args: tensor_in: input Tensor, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. decay: decay rate for exponential moving average. convnet: Whether this is for convolutional net use. If this is True, moments will sum across axis [0, 1, 2]. Otherwise, only [0]. scale_after_normalization: Whether to scale after normalization. """ shape = tensor_in.get_shape().as_list() with vs.variable_scope("batch_norm"): gamma = vs.get_variable("gamma", [shape[-1]], initializer=init_ops.random_normal_initializer(1.0, 0.02)) beta = vs.get_variable("beta", [shape[-1]], initializer=init_ops.constant_initializer(0.0)) ema = moving_averages.ExponentialMovingAverage(decay=decay) if convnet: assign_mean, assign_var = nn.moments(tensor_in, [0, 1, 2]) else: assign_mean, assign_var = nn.moments(tensor_in, [0]) ema_assign_op = ema.apply([assign_mean, assign_var]) ema_mean, ema_var = ema.average(assign_mean), ema.average(assign_var) def update_mean_var(): """Internal function that updates mean and variance during training""" with ops.control_dependencies([ema_assign_op]): return array_ops_.identity(assign_mean), array_ops_.identity(assign_var) is_training = array_ops_.squeeze(ops.get_collection("IS_TRAINING")) mean, variance = control_flow_ops.cond(is_training, update_mean_var, lambda: (ema_mean, ema_var)) return nn.batch_norm_with_global_normalization( tensor_in, mean, variance, beta, gamma, epsilon, scale_after_normalization=scale_after_normalization )
def _testBatchNormGradient(self, param_index, tag, scale_after_normalization, err_tolerance=1e-11): x_shape = [3, 5, 4, 5] param_shape = [5] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) m_val = np.random.random_sample(param_shape).astype(np.float64) v_val = np.random.random_sample(param_shape).astype(np.float64) beta_val = np.random.random_sample(param_shape).astype(np.float64) gamma_val = np.random.random_sample(param_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 # If scale_after_normalization is False, backprop for gamma # will be 0. gamma is unchanged. output = nn.batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) all_params = [x, m, v, beta, gamma] all_shapes = [ x_shape, param_shape, param_shape, param_shape, param_shape ] err = gc.ComputeGradientError(all_params[param_index], all_shapes[param_index], output, x_shape) print( "Batch normalization %s gradient %s scale err = " % (tag, "with" if scale_after_normalization else "without"), err) self.assertLess(err, err_tolerance)
def testBatchNorm(self): x_shape = [3, 5, 4, 2] param_shape = [2] x_val = np.random.random_sample(x_shape).astype(np.float32) m_val = np.random.random_sample(param_shape).astype(np.float32) v_val = np.random.random_sample(param_shape).astype(np.float32) beta_val = np.random.random_sample(param_shape).astype(np.float32) gamma_val = np.random.random_sample(param_shape).astype(np.float32) for use_gpu in [True, False]: with self.test_session(use_gpu=use_gpu) as sess: x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 for scale_after_normalization in [True, False]: bn = nn.batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) on = self._opsBatchNorm(x, m, v, beta, gamma, epsilon, scale_after_normalization) np_batch_norm = self._npBatchNorm( x_val, m_val, v_val, beta_val, gamma_val, epsilon, scale_after_normalization) tf_batch_norm, ops_batch_norm = sess.run([bn, on]) self.assertAllClose(np_batch_norm, tf_batch_norm, atol=0.000001) self.assertAllClose(np_batch_norm, ops_batch_norm, atol=0.000001) self.assertAllClose(tf_batch_norm, ops_batch_norm, atol=0.000001)
def _testBatchNormGradient(self, param_index, tag, scale_after_normalization, err_tolerance=1e-11): x_shape = [3, 5, 4, 5] param_shape = [5] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) m_val = np.random.random_sample(param_shape).astype(np.float64) v_val = np.random.random_sample(param_shape).astype(np.float64) beta_val = np.random.random_sample(param_shape).astype(np.float64) gamma_val = np.random.random_sample(param_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 # If scale_after_normalization is False, backprop for gamma # will be 0. gamma is unchanged. output = nn.batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) all_params = [x, m, v, beta, gamma] all_shapes = [x_shape, param_shape, param_shape, param_shape, param_shape] err = gc.ComputeGradientError(all_params[param_index], all_shapes[param_index], output, x_shape) print "Batch normalization %s gradient %s scale err = " % ( tag, "with" if scale_after_normalization else "without" ), err self.assertLess(err, err_tolerance)
def testBatchNorm(self): x_shape = [3, 5, 4, 2] param_shape = [2] x_val = np.random.random_sample(x_shape).astype(np.float32) m_val = np.random.random_sample(param_shape).astype(np.float32) v_val = np.random.random_sample(param_shape).astype(np.float32) beta_val = np.random.random_sample(param_shape).astype(np.float32) gamma_val = np.random.random_sample(param_shape).astype(np.float32) for use_gpu in [True, False]: with self.test_session(use_gpu=use_gpu) as sess: x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 for scale_after_normalization in [True, False]: bn = nn.batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) on = self._opsBatchNorm( x, m, v, beta, gamma, epsilon, scale_after_normalization) np_batch_norm = self._npBatchNorm( x_val, m_val, v_val, beta_val, gamma_val, epsilon, scale_after_normalization) tf_batch_norm, ops_batch_norm = sess.run([bn, on]) self.assertAllClose(np_batch_norm, tf_batch_norm, atol=0.000001) self.assertAllClose(np_batch_norm, ops_batch_norm, atol=0.000001) self.assertAllClose(tf_batch_norm, ops_batch_norm, atol=0.000001)
def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True): """Batch normalization. Args: tensor_in: input `Tensor`, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. convnet: Whether this is for convolutional net use. If `True`, moments will sum across axis `[0, 1, 2]`. Otherwise, only `[0]`. decay: Decay rate for exponential moving average. scale_after_normalization: Whether to scale after normalization. Returns: A batch-normalized `Tensor`. """ shape = tensor_in.get_shape().as_list() with vs.variable_scope("batch_norm"): gamma = vs.get_variable( "gamma", [shape[-1]], initializer=init_ops.random_normal_initializer(1., 0.02)) beta = vs.get_variable("beta", [shape[-1]], initializer=init_ops.constant_initializer(0.)) moving_mean = vs.get_variable( 'moving_mean', shape=[shape[-1]], initializer=init_ops.zeros_initializer, trainable=False) moving_var = vs.get_variable( 'moving_var', shape=[shape[-1]], initializer=init_ops.ones_initializer, trainable=False) def _update_mean_var(): """Internal function that updates mean and variance during training.""" axis = [0, 1, 2] if convnet else [0] mean, var = nn.moments(tensor_in, axis) update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_var = moving_averages.assign_moving_average( moving_var, var, decay) with ops.control_dependencies([update_moving_mean, update_moving_var]): return array_ops_.identity(mean), array_ops_.identity(var) is_training = array_ops_.squeeze(ops.get_collection("IS_TRAINING")) mean, variance = control_flow_ops.cond(is_training, _update_mean_var, lambda: (moving_mean, moving_var)) return nn.batch_norm_with_global_normalization( tensor_in, mean, variance, beta, gamma, epsilon, scale_after_normalization=scale_after_normalization)
def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True): """Batch Normalization Args: tensor_in: input Tensor, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. decay: decay rate for exponential moving average. convnet: Whether this is for convolutional net use. If this is True, moments will sum across axis [0, 1, 2]. Otherwise, only [0]. scale_after_normalization: Whether to scale after normalization. """ shape = tensor_in.get_shape().as_list() with vs.variable_scope("batch_norm"): gamma = vs.get_variable("gamma", [shape[-1]], initializer=init_ops.random_normal_initializer( 1., 0.02)) beta = vs.get_variable("beta", [shape[-1]], initializer=init_ops.constant_initializer(0.)) ema = moving_averages.ExponentialMovingAverage(decay=decay) if convnet: assign_mean, assign_var = nn.moments(tensor_in, [0, 1, 2]) else: assign_mean, assign_var = nn.moments(tensor_in, [0]) ema_assign_op = ema.apply([assign_mean, assign_var]) ema_mean, ema_var = ema.average(assign_mean), ema.average(assign_var) def update_mean_var(): """Internal function that updates mean and variance during training""" with ops.control_dependencies([ema_assign_op]): return array_ops_.identity(assign_mean), array_ops_.identity( assign_var) is_training = array_ops_.squeeze(ops.get_collection("IS_TRAINING")) mean, variance = control_flow_ops.cond(is_training, update_mean_var, lambda: (ema_mean, ema_var)) return nn.batch_norm_with_global_normalization( tensor_in, mean, variance, beta, gamma, epsilon, scale_after_normalization=scale_after_normalization)