def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = self.input_spec[0].shape reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if self.mode == 2: x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) else: # mode 0 if self.called_with not in {None, x} and False: raise Exception('You are attempting to share a ' 'same `BatchNormalization` layer across ' 'different data flows. ' 'This is not possible. ' 'You should use `mode=2` in ' '`BatchNormalization`, which has ' 'a similar behavior but is shareable ' '(see docs for a description of ' 'the behavior).') self.called_with = x x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.updates = [K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum)] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) x_normed = self.gamma * x_normed + self.beta return x_normed
def update_erm(): normed_training, mean, variance = K.normalize_batch_in_training( x=inputs, beta=None, gamma=None, reduction_axes=reduction_axes) self.add_update( [K.moving_average_update(self.values, mean, self.momentum)], inputs=inputs) return self.values
def call(self, inputs, training=None): inputs, spk_id = inputs spk_id = K.cast(K.flatten(spk_id)[0], 'int32') def normalize_inference(): return K.normalize_batch_in_training(inputs, self.gamma[spk_id], self.beta[spk_id], [0, 1], epsilon=self.epsilon)[0] normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma[spk_id], self.beta[spk_id], [0, 1], epsilon=self.epsilon) sample_size = K.shape(inputs)[1] sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if self.mode == 0: self.add_update([ K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum) ], x) if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) x_normed = self.gamma * x_normed + self.beta else: return None return x_normed
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] normed, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) return normed #K.in_train_phase(normed,
def call(self, x, mask=None): output = K.conv2d(x, self.W, strides=self.subsample, border_mode=self.border_mode, dim_ordering=self.dim_ordering, filter_shape=self.W_shape) # added for batch normalization input_shape = K.int_shape(output) axis = 1 reduction_axes = list(range(len(input_shape))) del reduction_axes[axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[axis] = input_shape[axis] output_normed, mean, std = K.normalize_batch_in_training( output, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum)], output) if sorted(reduction_axes) == range(K.ndim(output))[:-1]: output_normed_running = K.batch_normalization( output, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) output_normed_running = K.batch_normalization( output, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of output corresponding to the training phase output_normed = K.in_train_phase(output_normed, output_normed_running) if self.bias: if self.dim_ordering == 'th': output_normed += K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': output_normed += K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) output = self.activation(output_normed) return output
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] x_normed, mean, std = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) return x_normed
def batch_norm(inputs, gamma, beta, dims, ind): """ Normalize batch and update moving averages for mean and std Input: inputs: (batchsize, n_points, k, n_features * 2) - edge_features gamma: weight - gamma for batch normalization beta: weight - beta for batch normalization dims: list - dimensions along which to normalize ind: int - indicating which weights to use Returns: During training: normed: (batchsize, n_points, k, n_features * 2) - normalized batch of data using actual batch for normalization Else: normed_moving: same, but using the updated average values """ # Calculate normalized data, mean and std for batch normed, batch_mean, batch_var = K.normalize_batch_in_training( x=inputs, gamma=gamma, beta=beta, reduction_axes=dims) # Update the moving averages self.add_update([ K.moving_average_update(self.moving_mean[ind], batch_mean, 0.9), K.moving_average_update(self.moving_var[ind], batch_var, 0.9)]) # Calculate normalization using the averages normed_moving = K.batch_normalization( x=inputs, mean=self.moving_mean[ind], var=self.moving_var[ind], beta=beta, gamma=gamma) # If training return normed, else normed_moving return K.in_train_phase(normed, normed_moving)
def normalize_inference(): normed_inference, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon ) if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_mean = K.reshape(mean, broadcast_shape) broadcast_variance = K.reshape(variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_mean, broadcast_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, mean, variance, self.beta, self.gamma, epsilon=self.epsilon)
def call(self, inputs, training=None): orig_inputs = inputs #create a fake input by concatentating reverse-complemented pairs #along the length dimension inputs = K.concatenate( tensors=[inputs[:,:,:int(self.num_input_chan/2)], inputs[:,:,int(self.num_input_chan/2):][:,:,::-1]], axis=1) input_shape = K.int_shape(inputs) #Prepare broadcasting shape ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1]*len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] #Determines when broadcasting is needed #I guess broadcasting is needed when the broadcasting axes are #not just a list of the first few axes before the first #non-broadcast dimension needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if (needs_broadcasting): #In this case we must explicitly broadcast all parameters broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if (self.center): broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if (self.scale): broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, #axis=self.axis, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, #axis=self.axis, epsilon=self.epsilon) #If the learning phase is *static* and set to inference: if training in {0, False}: normed_inputs = normalize_inference() else: #If the learning is either dynamic or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod([K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample vairance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum)], inputs) normed_inputs = K.in_train_phase(normed_training, normalize_inference, training=training) true_normed_inputs = K.concatenate( tensors=[normed_inputs[:,:self.input_len,:], normed_inputs[:,self.input_len:,:][:,:,::-1]], axis=2) return true_normed_inputs
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return my_batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) else: return my_batch_normalization(inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, axis=self.axis, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod( [K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': sample_size = K.cast(sample_size, dtype='float32') # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def evaluate_layer(self, layer, weights, x, training=None): old_weights = weights weights = reshape_params(layer["weights_shapes"], weights) if layer["type"] == "dense": x = K.dot(x, weights[0]) if layer["use_bias"]: x = x + weights[1] x = get_activation(layer["activation"])(x) elif layer["type"] == "conv1d": x = K.conv1d(x, weights[0], strides=layer["strides"][0], padding=layer["padding"], data_format="channels_last", dilation_rate=layer["dilation_rate"][0]) if layer["use_bias"]: x = x + weights[1] x = get_activation(layer["activation"])(x) elif layer["type"] == "feature_transform": x = x * weights[0] + weights[1] elif layer["type"] == "lhuc": x = x * weights[0] elif layer["type"] == "renorm": dim = K.cast(K.shape(x)[-1], K.floatx()) x = K.l2_normalize(x, axis=-1) * K.sqrt(dim) elif layer["type"] == "batchnorm": x = K.normalize_batch_in_training(x, weights[0], weights[1], [0, 1], epsilon=layer["epsilon"])[0] elif layer["type"] == "standard-batchnorm": moving_mean = self.moving_means[layer["name"]] moving_var = self.moving_vars[layer["name"]] def normalize_training(): return K.batch_normalization(x, self.mean, self.variance, weights[1], weights[0], epsilon=layer["epsilon"]) def normalize_inference(): return K.batch_normalization(x, moving_mean, moving_var, weights[1], weights[0], epsilon=layer["epsilon"]) if training is True: x = K.in_train_phase(normalize_training, normalize_inference) else: x = normalize_inference() elif layer["type"] == "batch-renorm": moving_mean = self.moving_means[layer["name"]] moving_var = self.moving_vars[layer["name"]] def normalize_training(): r_max = 3. d_max = 5. std = K.sqrt(self.variance + layer["epsilon"]) moving_std = K.sqrt(moving_var + layer["epsilon"]) r = K.stop_gradient(K.clip(std / moving_std, 1. / r_max, r_max)) d = K.stop_gradient( K.clip((self.mean - moving_mean) / moving_std, -d_max, d_max)) #r = tf.Print(r, [r, K.shape(r)], "r") #d = tf.Print(d, [d, K.shape(d)], "d") x_norm = (x - self.mean) / std * r + d return weights[0] * x_norm + weights[1] #inv = tf.rsqrt(self.variance + layer["epsilon"]) * weights[0] * r #return x * inv - self.mean * inv + weights[0] * d + weights[1] def normalize_inference(): return K.batch_normalization(x, moving_mean, moving_var, weights[1], weights[0], epsilon=layer["epsilon"]) if training is True: x = K.in_train_phase(normalize_training, normalize_inference) else: x = normalize_inference() elif layer["type"] == "activation": x = get_activation(layer["activation"])(x) return x
def call(self, x, training=True): output = K.conv2d(x, self.W, strides=self.subsample, padding=self.border_mode, data_format=self.data_format) # batch normalization input_shape = K.int_shape(output) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( output, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( output, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: output_normed = normalize_inference() else: # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( output, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum)], output) # Pick the normalized form corresponding to the training phase. output_normed = K.in_train_phase(normed_training, normalize_inference, training=training) if self.bias: if self.dim_ordering == 'th': output_normed += K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': output_normed += K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) output = self.activation(output_normed) return output
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): normed_inference, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon ) if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_mean = K.reshape(mean, broadcast_shape) broadcast_variance = K.reshape(variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_mean, broadcast_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, mean, variance, self.beta, self.gamma, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: # In standard BN, mean and variance during training would be collected # and added to a moving average that's used at inference time. # In AdaBN we don't do this, as mean, var will be set on the testing data. normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod([K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) ##self.add_update([K.moving_average_update(self.moving_mean, ## mean, ## self.momentum), ## K.moving_average_update(self.moving_variance, ## variance, ## self.momentum)], ## inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, # in training normalize_inference, # not in training training=training)
def normalize_inference(): return K.normalize_batch_in_training(inputs, self.gamma[spk_id], self.beta[spk_id], [0, 1], epsilon=self.epsilon)[0]
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum)], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # mean_batch, var_batch = K.moments(x, reduction_axes, shift=None, keep_dims=False) normed, mean_batch, var_batch = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) std_batch = (K.sqrt(var_batch + self.epsilon)) r_max_value = K.get_value(self.r_max) r = std_batch / (K.sqrt(self.running_std + self.epsilon)) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (mean_batch - self.running_mean) / K.sqrt(self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_batch = (x - mean_batch) / std_batch x_normed = (x_normed_batch * r + d) * self.gamma + self.beta else: # need broadcasting broadcast_mean = K.reshape(mean_batch, broadcast_shape) broadcast_std = K.reshape(std_batch, broadcast_shape) broadcast_r = K.reshape(r, broadcast_shape) broadcast_d = K.reshape(d, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_batch = (x - broadcast_mean) / broadcast_std x_normed = (x_normed_batch * broadcast_r + broadcast_d) * broadcast_gamma + broadcast_beta # explicit update to moving mean and standard deviation self.add_update([ K.moving_average_update(self.running_mean, mean_batch, self.momentum), K.moving_average_update(self.running_std, std_batch**2, self.momentum) ], x) # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) if self.mode == 0: if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase # for batch renormalization, inference time remains same as batchnorm x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=self.axis, keepdims=True) std = K.sqrt( K.var(x, axis=self.axis, keepdims=True) + self.epsilon) x_normed_batch = (x - m) / (std + self.epsilon) r_max_value = K.get_value(self.r_max) r = std / (self.running_std + self.epsilon) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (m - self.running_mean) / (self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) return x_normed
def call(self, x, mask=None): input_shape = x.get_shape().as_list() if self.dim_ordering == 'th': rows = input_shape[2] cols = input_shape[3] elif self.dim_ordering == 'tf': rows = input_shape[1] cols = input_shape[2] else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) rows = 2 * rows cols = 2 * cols if self.dim_ordering == 'th': outputShape = (self.batch_size, 3, rows, cols ) # 32 = input_shape[0] elif self.dim_ordering == 'tf': outputShape = (self.batch_size, rows, cols, 3) #print "output Shape (outputShape):", outputShape height_factor = 2 width_factor = 2 if self.dim_ordering == 'th': new_height = x.shape[2].value * height_factor new_width = x.shape[3].value * width_factor x = tf.transpose(x, [0, 2, 3, 1]) x = tf.image.resize_nearest_neighbor(x, (new_height, new_width)) output = tf.transpose(x, [0, 3, 1, 2]) elif self.dim_ordering == 'tf': new_height = x.shape[1].value * height_factor new_width = x.shape[2].value * width_factor output = tf.image.resize_nearest_neighbor(x, (new_height, new_width)) else: raise Exception('Invalid dim_ordering: ' + dim_ordering) #print "output Shape:", output # added for batch normalization input_shape = K.int_shape(output) axis = 1 reduction_axes = list(range(len(input_shape))) del reduction_axes[axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[axis] = input_shape[axis] output_normed, mean, std = K.normalize_batch_in_training( output, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([ K.moving_average_update(self.running_mean, mean, self.momentum), K.moving_average_update(self.running_std, std, self.momentum) ], output) if sorted(reduction_axes) == range(K.ndim(output))[:-1]: output_normed_running = K.batch_normalization(output, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) output_normed_running = K.batch_normalization( output, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of output corresponding to the training phase output_normed = K.in_train_phase(output_normed, output_normed_running) if self.bias: if self.dim_ordering == 'th': output_normed += K.reshape(self.b, (1, self.nb_filter, 1, 1)) elif self.dim_ordering == 'tf': output_normed += K.reshape(self.b, (1, 1, 1, self.nb_filter)) else: raise ValueError('Invalid dim_ordering:', self.dim_ordering) output = self.activation(output_normed) return output
def call(self, inputs, training=None): return K.normalize_batch_in_training(inputs, self.gamma, self.beta, [0, 1], epsilon=self.epsilon)[0]
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization(inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)