def call(self, inputs, training=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(inputs) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] mean_batch, var_batch = K.moments(inputs, reduction_axes, shift=None, keep_dims=False) std_batch = (K.sqrt(var_batch + self.epsilon)) r_max_value = K.get_value(self.r_max) r = std_batch / (K.sqrt(self.running_variance + self.epsilon)) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]: x_normed_batch = (inputs - mean_batch) / std_batch x_normed = (x_normed_batch * r + d) * self.gamma + self.beta else: # need broadcasting broadcast_mean = K.reshape(mean_batch, broadcast_shape) broadcast_std = K.reshape(std_batch, broadcast_shape) broadcast_r = K.reshape(r, broadcast_shape) broadcast_d = K.reshape(d, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_batch = (inputs - broadcast_mean) / broadcast_std x_normed = (x_normed_batch * broadcast_r + broadcast_d) * broadcast_gamma + broadcast_beta # explicit update to moving mean and standard deviation self.add_update([ K.moving_average_update(self.running_mean, mean_batch, self.momentum), K.moving_average_update(self.running_variance, std_batch**2, self.momentum) ], inputs) # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / (1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], inputs) if training in {0, False}: return x_normed else: def normalize_inference(): if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]: x_normed_running = K.batch_normalization( inputs, self.running_mean, self.running_variance, self.beta, self.gamma, epsilon=self.epsilon) return x_normed_running else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_variance, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( inputs, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed_running # pick the normalized form of inputs corresponding to the training phase # for batch renormalization, inference time remains same as batchnorm x_normed = K.in_train_phase(x_normed, normalize_inference, training=training) return x_normed
def call(self, inputs, training=None): def augmented(): return tf.image.rgb_to_grayscale(inputs) return K.in_train_phase(augmented, augmented, training=training)
def call(self, x, mask=None): if self.mode == 0 or self.mode == 2: assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # mean_batch, var_batch = K.moments(x, reduction_axes, shift=None, keep_dims=False) normed, mean_batch, var_batch = K.normalize_batch_in_training( x, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) std_batch = (K.sqrt(var_batch + self.epsilon)) r_max_value = K.get_value(self.r_max) r = std_batch / (K.sqrt(self.running_std + self.epsilon)) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (mean_batch - self.running_mean) / K.sqrt(self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_batch = (x - mean_batch) / std_batch x_normed = (x_normed_batch * r + d) * self.gamma + self.beta else: # need broadcasting broadcast_mean = K.reshape(mean_batch, broadcast_shape) broadcast_std = K.reshape(std_batch, broadcast_shape) broadcast_r = K.reshape(r, broadcast_shape) broadcast_d = K.reshape(d, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_batch = (x - broadcast_mean) / broadcast_std x_normed = (x_normed_batch * broadcast_r + broadcast_d) * broadcast_gamma + broadcast_beta # explicit update to moving mean and standard deviation self.add_update([ K.moving_average_update(self.running_mean, mean_batch, self.momentum), K.moving_average_update(self.running_std, std_batch**2, self.momentum) ], x) # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) if self.mode == 0: if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed_running = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed_running = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) # pick the normalized form of x corresponding to the training phase # for batch renormalization, inference time remains same as batchnorm x_normed = K.in_train_phase(x_normed, x_normed_running) elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=self.axis, keepdims=True) std = K.sqrt( K.var(x, axis=self.axis, keepdims=True) + self.epsilon) x_normed_batch = (x - m) / (std + self.epsilon) r_max_value = K.get_value(self.r_max) r = std / (self.running_std + self.epsilon) r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) d_max_value = K.get_value(self.d_max) d = (m - self.running_mean) / (self.running_std + self.epsilon) d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) x_normed = ((x_normed_batch * r) + d) * self.gamma + self.beta # update r_max and d_max t_val = K.get_value(self.t) r_val = self.r_max_value / ( 1 + (self.r_max_value - 1) * np.exp(-t_val)) d_val = self.d_max_value / (1 + ( (self.d_max_value / 1e-3) - 1) * np.exp(-(2 * t_val))) t_val += float(self.t_delta) self.add_update([ K.update(self.r_max, r_val), K.update(self.d_max, d_val), K.update(self.t, t_val) ], x) return x_normed
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return tf.nn.batch_normalization(#K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, #axis=self.axis, self.epsilon)#epsilon=self.epsilon) else: return tf.nn.batch_normalization(#K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, #axis=self.axis, self.epsilon)#epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = _regular_normalize_batch_in_training(#K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod([K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum)], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def call(self, x): y_pred = viterbi_decode(x, self.U, self.b_start, self.b_end) nb_classes = self.input_spec[0].shape[2] y_pred_one_hot = K.one_hot(y_pred, nb_classes) return K.in_train_phase(x, y_pred_one_hot)
def PadSymmetricInTestPhase(): pad = Lambda(lambda x: K.in_train_phase( x, tf.pad(x, tf.constant([[0, 0], [2, 2], [2, 2], [0, 0]]), 'SYMMETRIC' ))) pad.uses_learning_phase = True return pad
def __call__(self, loss): reg = - 0.5 * K.mean(1 + self.p - K.exp(self.p), axis=None) return K.in_train_phase(loss + self.weight*reg, loss)
def one_zero(x): return K.in_train_phase(K.zeros_like(x), K.ones_like(x))
def call(self, x, training=None): def noised(): return x + K.clip( self.amount*K.random_normal(K.shape(x)), self.lower, self.upper) return K.in_train_phase(noised, x, training=training)
def call(self, x, training=None): def noised(): Ni = K.shape(x)[0] Nipd = K.shape(x)[2] return x + K.random_uniform((Ni,Nipd), self.lower,self.upper)[:,None,:] return K.in_train_phase(noised, x, training=training)
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') _embeddings = K.in_train_phase(K.dropout(self.embeddings, self.dropout_rate, noise_shape=[self.input_dim,1]), self.embeddings) if self.dropout_rate > 0 else self.embeddings out = K.gather(_embeddings, inputs) return out
def call(self, inputs, training=None): # These were moved here from build() because tf2 eager was not # tracking gradients: repeated_gamma = K.reshape( K.tile(K.expand_dims(self.gamma, -1), [1, self.n]), [-1], ) repeated_beta = K.reshape( K.tile(K.expand_dims(self.beta, -1), [1, self.n]), [-1], ) repeated_moving_mean = K.reshape( K.tile(K.expand_dims(self.moving_mean, -1), [1, self.n]), [-1], ) repeated_moving_variance = K.reshape( K.tile(K.expand_dims(self.moving_variance, -1), [1, self.n]), [-1], ) def unrepeat(w): n = 1 if self.h == 'C4': n *= 4 elif self.h == 'D4': n *= 8 elif self.h == 'Z2': n *= 1 else: raise ValueError('Wrong h: %s' % self.h) return K.mean(K.reshape(w, (K.int_shape(w)[0] // n, n)), -1) input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(repeated_moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(repeated_moving_variance, broadcast_shape) broadcast_beta = K.reshape(repeated_beta, broadcast_shape) broadcast_gamma = K.reshape(repeated_gamma, broadcast_shape) return K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization(inputs, repeated_moving_mean, repeated_moving_variance, repeated_beta, repeated_gamma, epsilon=self.epsilon) def _get_training_value(training, trainable_flag): """ Return a flag indicating whether a layer should be called in training or inference mode. Modified from https://git.io/JUGHX training: the setting used when layer is called for inference. trainable: flag indicating whether the layer is trainable. """ if training is None: training = K.learning_phase() if isinstance(training, int): training = bool(training) # If layer not trainable, override value passed from model. if trainable_flag is False: training = False return training # If the learning phase is *static* and set to inference: training_val = _get_training_value(training, self.trainable) if training_val is False: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, repeated_gamma, repeated_beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod( [K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, unrepeat(mean), self.momentum), K.moving_average_update(self.moving_variance, unrepeat(variance), self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def call(self, inputs, training = None): input_shape = K.int_shape(inputs) # .shape ndim = len(input_shape) # 4 reduction_axes = list(range(ndim)) # If ndim == 4, list(range(ndim)) == [0, 1, 2, 3] del reduction_axes[self.axis] # --> [0, 1, 2], self.axis == -1 input_dim = input_shape[self.axis] // 2 mu = K.mean(inputs, axis = reduction_axes) # real mu, imag mu broadcast_mu_shape = [1] * len(input_shape) # [1, 1, 1, 1] broadcast_mu_shape[self.axis] = input_shape[self.axis] # [1, 1, 1, input_shape[self.axis]] broadcast_mu = K.reshape(mu, broadcast_mu_shape) # mu shape is [1, 1, 1, 2] """ real parts에는 real mean을 빼고 imag parts에는 imag mean을 뺀다 centred_squared == (x - E(x))^2 """ if self.center: input_centred = inputs - broadcast_mu else: input_centred = inputs centred_squared = input_centred ** 2 'for Conv2D' centred_squared_real = centred_squared[:, :, :, :input_dim] # real centred_squared_imag = centred_squared[:, :, :, input_dim:] # imag centred_real = input_centred[:, :, :, :input_dim] # real centred_imag = input_centred[:, :, :, input_dim:] # imag if self.scale: Vrr = K.mean(centred_squared_real, axis=reduction_axes) + self.epsilon Vii = K.mean(centred_squared_imag, axis=reduction_axes) + self.epsilon Vri = K.mean(centred_real * centred_imag, axis=reduction_axes,) # Vri contains the real and imaginary covariance for each feature map. elif self.center: Vrr = None Vii = None Vri = None else: raise ValueError('Error. Both scale and center in batchnorm are set to False.') """ 1. Calcultae BatchNormalization for real parts, imag parts of complex numbers 2. If Training == True, Under self.center and self.scale condition, Update parameter moving mean, moving_Vrr, moving_Vii, moving_Vri """ input_bn = complex_batchnorm(input_centred, Vrr, Vii, Vri, self.beta, self.gamma_rr, self.gamma_ri, self.gamma_ii, self.scale, self.center, axis = self.axis) if training in {0, False}: return input_bn else: # traning is True!!! update_list = [] if self.center: update_list.append(K.moving_average_update(self.moving_mean, mu, self.momentum)) if self.scale: update_list.append(K.moving_average_update(self.moving_Vrr, Vrr, self.momentum)) update_list.append(K.moving_average_update(self.moving_Vii, Vii, self.momentum)) update_list.append(K.moving_average_update(self.moving_Vri, Vri, self.momentum)) self.add_update(update_list, inputs) def normalize_inference(): if self.center: inference_centred = inputs - K.reshape(self.moving_mean, broadcast_mu_shape) else: inference_centred = inputs return complex_batchnorm(inference_centred, self.moving_Vrr, self.moving_Vii, self.moving_Vri, self.beta, self.gamma_rr, self.gamma_ri, self.gamma_ii, self.scale, self.center, axis = self.axis) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(input_bn, normalize_inference, training = training)
def _apply_dropout(self, inputs): dropped = K.dropout(inputs, self.dropout) return K.in_train_phase(dropped, inputs)
def call(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, mask=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if type(mask) is list: mask = mask[0] if self.model is None: raise Exception('Empty RecurrentModel.') num_req_states = self.num_states if self.readout: num_actual_states = num_req_states - 1 else: num_actual_states = num_req_states if type(inputs) is list: inputs_list = inputs[:] inputs = inputs_list.pop(0) initial_states = inputs_list[:num_actual_states] if len(initial_states) > 0: if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) inputs_list = inputs_list[num_actual_states:] if self.readout: initial_readout = inputs_list.pop(0) if self.teacher_force: ground_truth = inputs_list.pop() else: if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_state(inputs) if self.readout: if initial_readout is None or self._is_optional_input_placeholder( initial_readout): output_shape = K.int_shape(_to_list((self.model.output))[0]) output_ndim = len(output_shape) input_ndim = K.ndim(inputs) initial_readout = K.zeros_like(inputs) slices = [slice(None)] + [0] * (input_ndim - 1) initial_readout = initial_readout[slices] # (batch_size,) initial_readout = K.reshape(initial_readout, (-1, ) + (1, ) * (output_ndim - 1)) initial_readout = K.tile(initial_readout, (1, ) + tuple(output_shape[1:])) initial_states.append(initial_readout) if self.teacher_force: if ground_truth is None or self._is_optional_input_placeholder( ground_truth): raise Exception( 'ground_truth must be provided for RecurrentModel with teacher_force=True.' ) if K.backend() == 'tensorflow': with tf.control_dependencies(None): counter = K.zeros((1, )) else: counter = K.zeros((1, )) counter = K.cast(counter, 'int32') initial_states.insert(-1, counter) initial_states[-2] initial_states.insert(-1, ground_truth) num_req_states += 2 if len(initial_states) != num_req_states: raise ValueError('Layer requires ' + str(num_req_states) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') preprocessed_input = self.preprocess_input(inputs, training=None) constants = self.get_constants(inputs, training=None) if self.decode: initial_states.insert(0, inputs) preprocessed_input = K.zeros((1, self.output_length, 1)) input_length = self.output_length else: input_length = input_shape[1] if self.uses_learning_phase: with learning_phase_scope(0): last_output_test, outputs_test, states_test, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) with learning_phase_scope(1): last_output_train, outputs_train, states_train, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) last_output = K.in_train_phase(last_output_train, last_output_test, training=training) outputs = K.in_train_phase(outputs_train, outputs_test, training=training) states = [] for state_train, state_test in zip(states_train, states_test): states.append( K.in_train_phase(state_train, state_test, training=training)) else: last_output, outputs, states, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) states = list(states) if self.decode: states.pop(0) if self.readout: states.pop() if self.teacher_force: states.pop() states.pop() if len(updates) > 0: self.add_update(updates) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout + self.recurrent_dropout: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: y = outputs else: y = last_output if self.return_states: return [y] + states else: return y
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] if self.axis != 0: del reduction_axes[0] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] mean_instance = K.mean(inputs, reduction_axes, keepdims=True) variance_instance = K.var(inputs, reduction_axes, keepdims=True) mean_layer = K.mean(mean_instance, self.axis, keepdims=True) temp = variance_instance + K.square(mean_instance) variance_layer = K.mean(temp, self.axis, keepdims=True) - K.square(mean_layer) def training_phase(): mean_batch = K.mean(mean_instance, axis=0, keepdims=True) variance_batch = K.mean(temp, axis=0, keepdims=True) - K.square(mean_batch) mean_batch_reshaped = K.flatten(mean_batch) variance_batch_reshaped = K.flatten(variance_batch) if K.backend() != 'cntk': sample_size = K.prod( [K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance_batch_reshaped *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, mean_batch_reshaped, self.momentum), K.moving_average_update(self.moving_variance, variance_batch_reshaped, self.momentum) ], ) return normalize_func(mean_batch, variance_batch) def inference_phase(): mean_batch = self.moving_mean variance_batch = self.moving_variance return normalize_func(mean_batch, variance_batch) def normalize_func(mean_batch, variance_batch): mean_batch = K.reshape(mean_batch, broadcast_shape) variance_batch = K.reshape(variance_batch, broadcast_shape) mean_weights = K.softmax(self.mean_weights, axis=0) variance_weights = K.softmax(self.variance_weights, axis=0) mean = (mean_weights[0] * mean_instance + mean_weights[1] * mean_layer + mean_weights[2] * mean_batch) variance = (variance_weights[0] * variance_instance + variance_weights[1] * variance_layer + variance_weights[2] * variance_batch) outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) outputs = outputs * broadcast_gamma if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) outputs = outputs + broadcast_beta return outputs if training in {0, False}: return inference_phase() return K.in_train_phase(training_phase, inference_phase, training=training)
def call(self, x, training=None): batch_size, length, input_dim = x.shape.as_list() # NOTE Get padding is_padding_value = tf.equal(x, self.padding_value) is_padding = tf.reduce_all(is_padding_value, axis=-1, keepdims=True) is_padding = tf.to_float(is_padding) pad_mask = tf.reshape(is_padding, [-1]) non_pad_indices = tf.to_int32(tf.where(pad_mask < self.epsilon)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = tf.reshape(x, [-1, input_dim]) x = tf.gather_nd(x, indices=non_pad_indices) # Reshape x from 2 dimensions to 3 dimensions. x.set_shape([None, input_dim]) x = tf.expand_dims(x, axis=0) # print('x / expand_dims: {}'.format(x.shape)) output = K.dot(x, self.kernel_filter) + self.bias_filter if self.activation is not None: output = self.activation(output) # Dropout if 0.0 < self.rate < 1.0: noise_shape = self._get_noise_shape(output) def dropped_inputs(): return K.dropout( x=output, level=self.rate, noise_shape=noise_shape, seed=self.seed) output = K.in_train_phase( dropped_inputs, output, training=training) # Dense # output = tf.einsum('ijk,kl->ijl', output, self.kernel_hidden) output = K.dot(output, self.kernel_hidden) + self.bias_hidden if self.activation is not None: output = self.activation(output) output = tf.squeeze(output, axis=0) scatter_shape = (-1, self.hidden_size) output = tf.scatter_nd( indices=non_pad_indices, updates=output, shape=scatter_shape) out_shape = (-1, length, self.hidden_size) output = tf.reshape(output, out_shape) return output
def call(self, layer, inputs, *args, **kwargs): output = K.in_train_phase( K.switch(K.random_uniform([]) > self.rate, layer(inputs, *args, **kwargs), inputs), layer(inputs, *args, **kwargs)) return output
def call(self, inputs, **kwargs): x = tf.ones_like(inputs) y = tf.zeros_like(inputs) return K.in_train_phase(x, y, training=kwargs.get('training', None))