def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs), self.dropout, training=training, count=2) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[1]), self.recurrent_dropout, training=training, count=2) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if 0 < self.dropout < 1.: inputs_f = inputs * dp_mask[0] inputs_c = inputs * dp_mask[1] else: inputs_f = inputs inputs_c = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_f = h_tm1 * rec_dp_mask[0] h_tm1_c = h_tm1 * rec_dp_mask[1] else: h_tm1_f = h_tm1 h_tm1_c = h_tm1 x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, padding=self.padding) h_f = self.recurrent_conv(h_tm1_f, self.recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c) f = self.recurrent_activation(x_f + h_f) cf = self.recurrent_activation(x_f + h_f - self.beta) ci = self.activation(x_c + h_c) c = f * c_tm1 + (1. - cf) * ci h = c if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._recurrent_masks is None): _recurrent_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, self.units), self.recurrent_dropout, training=training, count=1) self._recurrent_masks = _recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._recurrent_masks h_tm1 = states[0] # previous state if 0. < self.dropout < 1.: inputs *= dp_mask[0] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_masks[0] h = K.dot(inputs, self.kernel) h = h + (h_tm1 * self.recurrent_kernel) if self.use_bias: h = K.bias_add(h, self.bias) h = self.activation(h) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=1) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state if 0 < self.dropout < 1.: inputs = inputs * dp_mask[0] if 0 < self.recurrent_dropout < 1.: h_tm1 = h_tm1 * rec_dp_mask[0] u1 = self.input_conv(inputs, self.kernel, self.bias, padding=self.padding) u2 = self.recurrent_conv(h_tm1, self.recurrent_kernel) u = self.recurrent_activation(u1 + u2) h = (1 - u) * h_tm1 + u * inputs if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._nested_recurrent_masks is None): _nested_recurrent_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=self.depth) self._nested_recurrent_masks = _nested_recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._nested_recurrent_masks h_tm1 = states[0] # previous memory state c_tm1 = states[1:self.depth + 1] # previous carry states if 0. < self.dropout < 1.: inputs *= dp_mask[0] h, c = self.nested_recurrence(inputs, hidden_state=h_tm1, cell_states=c_tm1, recurrent_masks=rec_dp_masks, current_depth=0) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, c
def call(self, inputs, states, training=None): samples, inFeatures = states[0].shape h_tm1 = states[0] # previous state time_step = states[1] if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if dp_mask is not None: inputs *= dp_mask if rec_dp_mask is not None: h_tm1 *= rec_dp_mask if self.split_method: # Update State, module-by-module h_mod = [] unitsPerMod = self.units // self.clock_numPeriods def if_true(): hModule = K.dot(h_tm1[:, s:], self.rec_kernel_c_mod[i]) + K.dot( inputs, self.kernel_c_mod[i]) if self.use_bias: hModule = K.bias_add(hModule, self.bias_mod[i]) if self.recurrent_activation is not None: hModule = self.recurrent_activation(hModule) return hModule def if_false(): return hModule for i, period in enumerate(self.clock_periods): s = i * unitsPerMod e = (i + 1) * unitsPerMod hModule = h_tm1[:, s:e] h_mod.append( tf.cond(K.equal(K.tf.mod(time_step[0][0], period), 0), if_true, if_false)) hidden = K.concatenate(h_mod) else: # Update State, all at once, then only use certain updates h = K.dot(inputs, self.kernel) + K.dot( h_tm1, self.recurrent_kernel_c * self.cw_mask) if self.bias is not None: h = K.bias_add(h, self.bias) if self.recurrent_activation is not None: h = self.recurrent_activation(h) h = K.switch(K.equal(K.tf.mod(time_step, self.cw_periods), 0), h, h_tm1) hidden = h # Calculate Output output = K.dot(hidden, self.recurrent_kernel_o) if self.activation is not None: output = self.activation(output) # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: if training is None: output._uses_learning_phase = True return output, [hidden, time_step + 1]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) # Adding projeciton h = K.dot(h, self.projection) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, time, constants=None, training=None, **kwargs): old_vertices, neighbors, mapping, reverse_mapping = constants if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state h_tm2 = kwargs['previous_state'][0] c_tm2 = kwargs['previous_state'][1] previous_position = reverse_mapping[:, time] c = tf.convert_to_tensor(numpy.arange(K.int_shape(previous_position)[0]), dtype=tf.int32) previous_position_4_gather = K.tf.stack([c, previous_position], axis=-1) ng_rows = K.tf.gather_nd(neighbors, previous_position_4_gather) def sum_rows(input_ng): ng_row, batch_map = input_ng ng_num = K.sum(ng_row, axis=-1) batch_ng = K.tf.where(K.equal(ng_row, 1))[:, 0] current_positions = K.gather(batch_map, batch_ng) def sum_unknown(input_time): return tf.cond(input_time < time, lambda: h_tm1, lambda: h_tm2) tmp_states = K.map_fn(sum_unknown, current_positions, dtype=tf.float32) tmp_states = K.tf.div_no_nan(K.sum(tmp_states, axis=[0, 1]), ng_num) return tmp_states ngs = K.map_fn(sum_rows, (ng_rows, mapping), dtype=tf.float32) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.W_i) x_f = K.dot(inputs_f, self.W_f) x_f_avg = K.dot(inputs_f, self.W_f) x_c = K.dot(inputs_c, self.W_c) x_o = K.dot(inputs_o, self.W_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_f_avg = K.bias_add(x_f_avg, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = x_i + K.dot(h_tm1_i, self.U_i) + K.dot(ngs, self.Un_i) f_avg = x_f_avg + K.dot(h_tm1, self.Un_f) f = x_f + K.dot(h_tm1_f, self.U_f) c = x_c + K.dot(h_tm1_c, self.U_c) + K.dot(ngs, self.Un_c) o = x_o + K.dot(h_tm1_o, self.U_o) + K.dot(ngs, self.Un_o) i = self.recurrent_activation(i) f_avg = self.recurrent_activation(f_avg) f = self.recurrent_activation(f) o = self.recurrent_activation(o) c = self.activation(c) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.W) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.use_bias: z = K.bias_add(z, self.bias) i = z[:, :self.units] f_avg = z[:, self.units: 2 * self.units] f = z[:, self.units: 2 * self.units] c = z[:, 2 * self.units: 3 * self.units] o = z[:, 3 * self.units:] i += K.dot(h_tm1, self.U_i) + K.dot(ngs, self.Un_i) f_avg += K.dot(h_tm1, self.Un_f) f += K.dot(h_tm1, self.U_f) o += K.dot(h_tm1, self.U_o) + K.dot(ngs, self.Un_o) c += K.dot(h_tm1, self.U_c) + K.dot(ngs, self.Un_c) i = self.recurrent_activation(i) f_avg = self.recurrent_activation(f_avg) f = self.recurrent_activation(f) o = self.recurrent_activation(o) c = self.activation(c) def sum_memories(input_ng): ng_row, batch_map = input_ng ng_num = K.sum(ng_row, axis=-1) batch_ng = K.tf.where(K.equal(ng_row, 1))[:, 0] current_positions = K.gather(batch_map, batch_ng) def sum_unknown_memories(input_time): return tf.cond(input_time < time, lambda: f_avg * c_tm1, lambda: f_avg * c_tm2) tmp_states = K.map_fn(sum_unknown_memories, current_positions, dtype=tf.float32) tmp_states = K.tf.div_no_nan(K.sum(tmp_states, axis=[0, 1]), ng_num) return tmp_states memory = K.map_fn(sum_memories, (ng_rows, mapping), dtype=tf.float32) memory += f * c_tm1 + i * c h = o * self.activation(memory) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, memory]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[1]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 x_i = self.input_conv(inputs_i, w=self.kernel_i, w_1x1=self.kernel_1x1_i, b=self.bias_i, padding=self.padding) x_f = self.input_conv(inputs_f, w=self.kernel_f, w_1x1=self.kernel_1x1_f, b=self.bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, w=self.kernel_c, w_1x1=self.kernel_1x1_c, b=self.bias_c, padding=self.padding) x_o = self.input_conv(inputs_o, w=self.kernel_o, w_1x1=self.kernel_1x1_o, b=self.bias_o, padding=self.padding) h_i = self.recurrent_conv(h_tm1_i, self.recurrent_kernel_i, self.recurrent_kernel_1x1_i) h_f = self.recurrent_conv(h_tm1_f, self.recurrent_kernel_f, self.recurrent_kernel_1x1_f) h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c, self.recurrent_kernel_1x1_c) h_o = self.recurrent_conv(h_tm1_o, self.recurrent_kernel_o, self.recurrent_kernel_1x1_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) c = f * c_tm1 + i * self.activation(x_c + h_c) o = self.recurrent_activation(x_o + h_o) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs), self.dropout, training=training, count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.dot(inputs_z, self.kernel_z) x_r = K.dot(inputs_r, self.kernel_r) x_h = K.dot(inputs_h, self.kernel_h) if self.use_bias: x_z = K.bias_add(x_z, self.input_bias_z) x_r = K.bias_add(x_r, self.input_bias_r) x_h = K.bias_add(x_h, self.input_bias_h) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z) recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) # reset gate applied after/before matrix multiplication if self.reset_after: recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, self.recurrent_bias_h) recurrent_h = r * recurrent_h else: recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) hh = self.activation(x_h + recurrent_h) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] # inputs projected by all gate matrices at once matrix_x = K.dot(inputs, self.kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i matrix_x = K.bias_add(matrix_x, self.input_bias) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units:2 * self.units] x_h = matrix_x[:, 2 * self.units:] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:] else: recurrent_h = K.dot(r * h_tm1, self.recurrent_kernel[:, 2 * self.units:]) hh = self.activation(x_h + recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=4) if (0 < self.zoneout_c < 1 and self._zoneout_mask_c is None): self._zoneout_mask_c = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.zoneout_c, training=training, count=1) if (0 < self.zoneout_h < 1 and self._zoneout_mask_h is None): self._zoneout_mask_h = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.zoneout_h, training=training, count=1) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(self.ln(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i))) f = self.recurrent_activation(self.ln(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f))) c = f * c_tm1 + i * self.activation(self.ln(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))) o = self.recurrent_activation(self.ln(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o))) h = o * self.activation(self.ln(c)) if 0 < self.dropout + self.recurrent_dropout + self.zoneout_c + self.zoneout_h: if training is None: h._uses_learning_phase = True if 0 < self.zoneout_h < 1: h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout_h), h - h_tm1) h = h * (1. - self.zoneout_h) + h_tm1 if 0 < self.zoneout_c < 1: c = K.in_train_phase(K.dropout(c - c_tm1, self.zoneout_c), c - c_tm1) c = c * (1. - self.zoneout_c) + c_tm1 return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=2) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=2) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_f = inputs * dp_mask[0] inputs_c = inputs * dp_mask[1] else: inputs_f = inputs inputs_c = inputs x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) if self.use_bias: x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) if 0 < self.recurrent_dropout < 1.: h_tm1_f = h_tm1 * rec_dp_mask[0] h_tm1_c = h_tm1 * rec_dp_mask[1] else: h_tm1_f = h_tm1 h_tm1_c = h_tm1 f = self.recurrent_activation( x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + (1. - f) * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] f = self.recurrent_activation(z0) c = f * c_tm1 + (1. - f) * self.activation(z1) h = c if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = [_generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=1)] if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = [_generate_dropout_mask( K.ones_like(states[1]), self.recurrent_dropout, training=training, count=1)] # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[0] # previous carry state if 0 < self.dropout < 1.: #inputs_i = inputs * dp_mask[0] #inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[0] #inputs_o = inputs * dp_mask[3] else: #inputs_i = inputs #inputs_f = inputs inputs_c = inputs #inputs_o = inputs if 0 < self.recurrent_dropout < 1.: #h_tm1_i = h_tm1 * rec_dp_mask[0] #h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[0] #h_tm1_o = h_tm1 * rec_dp_mask[3] else: #h_tm1_i = h_tm1 #h_tm1_f = h_tm1 h_tm1_c = h_tm1 #h_tm1_o = h_tm1 #x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i, # padding=self.padding) #x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, # padding=self.padding) x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, padding=self.padding) #x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o, # padding=self.padding) #h_i = self.recurrent_conv(h_tm1_i, # self.recurrent_kernel_i) #h_f = self.recurrent_conv(h_tm1_f, # self.recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c) #h_o = self.recurrent_conv(h_tm1_o, # self.recurrent_kernel_o) #i = self.recurrent_activation(x_i + h_i) #f = self.recurrent_activation(x_f + h_f) f = self._retention_ratio c = f * c_tm1 + (1-f) * self.activation(x_c + h_c) #o = self.recurrent_activation(x_o + h_o) # h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: c._uses_learning_phase = True return c, [c, c]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state seriesNum = inputs.shape[2] dataDim = inputs.shape[3] channels = inputs.shape[1] # print('=========') # print(h_tm1.shape) # print(c_tm1.shape) if channels == 1: inputs_inner = tf.reshape(inputs, (-1, seriesNum, dataDim)) inputs_inter = tf.reshape(inputs, (-1, seriesNum, dataDim)) # h_tm1_inner = h_tm1 # h_tm1_inter = h_tm1 # c_tm1_inner = c_tm1 # c_tm1_inter = c_tm1 # h_tm1_inner = tf.reshape(h_tm1_inner,(-1,seriesNum,dataDim)) # h_tm1_inter = tf.reshape(h_tm1_inter,(-1,seriesNum,dataDim)) # c_tm1_inner = tf.reshape(c_tm1_inner,(-1,seriesNum,dataDim)) # c_tm1_inter = tf.reshape(c_tm1_inter,(-1,seriesNum,dataDim)) else: inputs_inner = inputs[:, 0:1, :, :] inputs_inner = tf.reshape(inputs_inner, (-1, seriesNum, dataDim)) inputs_inter = inputs[:, 1:2, :, :] inputs_inter = tf.reshape(inputs_inter, (-1, seriesNum, dataDim)) # h_tm1_inner = h_tm1[:,0:1,:,:] # h_tm1_inter = h_tm1[:,1:2,:,:] # c_tm1_inner = c_tm1[:,0:1,:,:] # c_tm1_inter = c_tm1[:,1:2,:,:] # h_tm1_inner = tf.reshape(h_tm1_inner,(-1,seriesNum,dataDim)) # h_tm1_inter = tf.reshape(h_tm1_inter,(-1,seriesNum,dataDim)) # c_tm1_inner = tf.reshape(c_tm1_inner,(-1,seriesNum,dataDim)) # c_tm1_inter = tf.reshape(c_tm1_inter,(-1,seriesNum,dataDim)) h_tm1_inner = h_tm1[:, 0:1, :, :] h_tm1_inter = h_tm1[:, 1:2, :, :] c_tm1_inner = c_tm1[:, 0:1, :, :] c_tm1_inter = c_tm1[:, 1:2, :, :] h_tm1_inner = tf.reshape(h_tm1_inner, (-1, seriesNum, self.units)) h_tm1_inter = tf.reshape(h_tm1_inter, (-1, seriesNum, self.units)) c_tm1_inner = tf.reshape(c_tm1_inner, (-1, seriesNum, self.units)) c_tm1_inter = tf.reshape(c_tm1_inter, (-1, seriesNum, self.units)) # print('dot shape') # print(inputs_inter.shape) # print(self.S_kernel.shape) inputs_inter = K.dotSelf(self.S_kernel, inputs_inter) # print(inputs_inter.shape) if 0 < self.dropout < 1 and self._dropout_mask is None: #print('start drop') self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs_inner), self.dropout, training=training, count=8) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): #print('start recurrent_dropout') self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(c_tm1_inter), self.recurrent_dropout, training=training, count=8) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if 0 < self.dropout < 1.: inner_inputs_i = inputs_inner * dp_mask[0] inner_inputs_f = inputs_inner * dp_mask[1] inner_inputs_c = inputs_inner * dp_mask[2] inner_inputs_o = inputs_inner * dp_mask[3] inter_inputs_i = inputs_inter * dp_mask[4] inter_inputs_f = inputs_inter * dp_mask[5] inter_inputs_c = inputs_inter * dp_mask[6] inter_inputs_o = inputs_inter * dp_mask[7] else: inner_inputs_i = inputs_inner inner_inputs_f = inputs_inner inner_inputs_c = inputs_inner inner_inputs_o = inputs_inner inter_inputs_i = inputs_inter inter_inputs_f = inputs_inter inter_inputs_c = inputs_inter inter_inputs_o = inputs_inter if 0 < self.recurrent_dropout < 1.: inner_h_tm1_i = h_tm1_inner * rec_dp_mask[0] inner_h_tm1_f = h_tm1_inner * rec_dp_mask[1] inner_h_tm1_c = h_tm1_inner * rec_dp_mask[2] inner_h_tm1_o = h_tm1_inner * rec_dp_mask[3] inter_h_tm1_i = h_tm1_inter * rec_dp_mask[4] inter_h_tm1_f = h_tm1_inter * rec_dp_mask[5] inter_h_tm1_c = h_tm1_inter * rec_dp_mask[6] inter_h_tm1_o = h_tm1_inter * rec_dp_mask[7] else: inner_h_tm1_i = h_tm1_inner inner_h_tm1_f = h_tm1_inner inner_h_tm1_c = h_tm1_inner inner_h_tm1_o = h_tm1_inner inter_h_tm1_i = h_tm1_inter inter_h_tm1_f = h_tm1_inter inter_h_tm1_c = h_tm1_inter inter_h_tm1_o = h_tm1_inter x_i_inner = K.dot(inner_inputs_i, self.inner_kernel_i) x_f_inner = K.dot(inner_inputs_f, self.inner_kernel_f) x_o_inner = K.dot(inner_inputs_o, self.inner_kernel_o) x_c_inner = K.dot(inner_inputs_c, self.inner_kernel_c) x_i_inter = K.dot(inter_inputs_i, self.inter_kernel_i) x_f_inter = K.dot(inter_inputs_f, self.inter_kernel_f) x_o_inter = K.dot(inter_inputs_o, self.inter_kernel_o) x_c_inter = K.dot(inter_inputs_c, self.inter_kernel_c) h_i_inner = K.dot(inner_h_tm1_i, self.inner_recurrent_kernel_i) h_f_inner = K.dot(inner_h_tm1_f, self.inner_recurrent_kernel_f) h_o_inner = K.dot(inner_h_tm1_o, self.inner_recurrent_kernel_o) h_c_inner = K.dot(inner_h_tm1_c, self.inner_recurrent_kernel_c) h_i_inter = K.dot(inter_h_tm1_i, self.inter_recurrent_kernel_i) h_f_inter = K.dot(inter_h_tm1_f, self.inter_recurrent_kernel_f) h_o_inter = K.dot(inter_h_tm1_o, self.inter_recurrent_kernel_o) h_c_inter = K.dot(inter_h_tm1_c, self.inter_recurrent_kernel_c) if self.use_bias: # x_i_inner = K.bias_add(x_i_inner, self.inner_bias_i) # x_f_inner = K.bias_add(x_f_inner, self.inner_bias_f) # x_o_inner = K.bias_add(x_o_inner, self.inner_bias_o) # x_c_inner = K.bias_add(x_c_inner, self.inner_bias_c) # x_i_inter = K.bias_add(x_i_inter, self.inter_bias_i) # x_f_inter = K.bias_add(x_f_inter, self.inter_bias_f) # x_o_inter = K.bias_add(x_o_inter, self.inter_bias_o) # x_c_inter = K.bias_add(x_c_inter, self.inter_bias_c) x_i_inner = x_i_inner + self.inner_bias_i x_f_inner = x_f_inner + self.inner_bias_f x_o_inner = x_o_inner + self.inner_bias_o x_c_inner = x_c_inner + self.inner_bias_c x_i_inter = x_i_inter + self.inter_bias_i x_f_inter = x_f_inter + self.inter_bias_f x_o_inter = x_o_inter + self.inter_bias_o x_c_inter = x_c_inter + self.inter_bias_c inner_i = self.recurrent_activation(x_i_inner + h_i_inner) inner_f = self.recurrent_activation(x_f_inner + h_f_inner) inner_o = self.recurrent_activation(x_o_inner + h_o_inner) inner_c = inner_f * c_tm1_inner + inner_i * self.activation(x_c_inner + h_c_inner) inner_h = inner_o * self.activation(inner_c) # temp = inner_o + h_o_inter # temp2= inner_o + x_o_inter inter_i = self.recurrent_activation(x_i_inter + h_i_inter) inter_f = self.recurrent_activation(x_f_inter + h_f_inter) inter_o = self.recurrent_activation(x_o_inter + h_o_inter) inter_c = inter_f * c_tm1_inter + inter_i * self.activation(x_c_inter + h_c_inter) inter_h = inter_o * self.activation(inter_c) inner_h = tf.reshape(inner_h, (-1, 1, inner_h.shape[1], inner_h.shape[2])) inter_h = tf.reshape(inter_h, (-1, 1, inter_h.shape[1], inter_h.shape[2])) inner_c = tf.reshape(inner_c, (-1, 1, inner_c.shape[1], inner_c.shape[2])) inter_c = tf.reshape(inter_c, (-1, 1, inter_c.shape[1], inter_c.shape[2])) # print('concat') # print(inputs.shape) # print(inter_i.shape) # print(x_c_inter.shape) # print(h_c_inter.shape) # print(inter_o.shape) # print(inner_h.shape) # print(inter_h.shape) # print(inner_c.shape) # print(inter_c.shape) h = tf.concat([inner_h, inter_h], 1) c = tf.concat([inner_c, inter_c], 1) # print('hshape') # print(h.shape) # print(c.shape) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): """We need to reimplmenet `call` entirely rather than reusing that from `GRUCell` since there are lots of differences. Args: inputs: One tensor which is stacked by 3 inputs (x, m, s) x and m are of shape (n_batch * input_dim). s is of shape (n_batch, 1). states: states and other values from the previous step. (h_tm1, x_keep_tm1, s_prev_tm1) """ # Get inputs and states input_x = inputs[:, :self.true_input_dim] # inputs x, m, s input_m = inputs[:, self.true_input_dim:-1] input_s = inputs[:, -1:] # Need to add broadcast for time_stamp if using theano backend. if K.backend() == 'theano': input_s = K.pattern_broadcast(input_s, [False, True]) h_tm1, x_keep_tm1, s_prev_tm1 = states # previous memory ([n_batch * self.units]) # previous input x ([n_batch * input_dim]) # and the subtraction term (of delta_t^d in Equation (2)) # ([n_batch * input_dim]) input_1m = K.cast_to_floatx(1.) - input_m input_d = input_s - s_prev_tm1 # Get dropout if 0. < self.dropout < 1. and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(input_x), self.dropout, training=training, count=3) if (0. < self.recurrent_dropout < 1. and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if self.feed_masking: if 0. < self.dropout < 1. and self._masking_dropout_mask is None: self._masking_dropout_mask = _generate_dropout_mask( K.ones_like(input_m), self.dropout, training=training, count=3) m_dp_mask = self._masking_dropout_mask # Compute decay if any if self.input_decay is not None: gamma_di = input_d * self.input_decay_kernel if self.use_decay_bias: gamma_di = K.bias_add(gamma_di, self.input_decay_bias) gamma_di = self.input_decay(gamma_di) if self.hidden_decay is not None: gamma_dh = K.dot(input_d, self.hidden_decay_kernel) if self.use_decay_bias: gamma_dh = K.bias_add(gamma_dh, self.hidden_decay_bias) gamma_dh = self.hidden_decay(gamma_dh) if self.feed_masking and self.masking_decay is not None: gamma_dm = input_d * self.masking_decay_kernel if self.use_decay_bias: gamma_dm = K.bias_add(gamma_dm, self.masking_decay_bias) gamma_dm = self.masking_decay(gamma_dm) # Get the imputed or decayed input if needed # and `x_keep_t` for the next time step if self.input_decay is not None: x_keep_t = K.switch(input_m, input_x, x_keep_tm1) x_t = K.switch(input_m, input_x, gamma_di * x_keep_t) elif self.x_imputation == 'forward': x_t = K.switch(input_m, input_x, x_keep_tm1) x_keep_t = x_t elif self.x_imputation == 'zero': x_t = K.switch(input_m, input_x, K.zeros_like(input_x)) x_keep_t = x_t elif self.x_imputation == 'raw': x_t = input_x x_keep_t = x_t else: raise ValueError('No input decay or invalid x_imputation ' '{}.'.format(self.x_imputation)) # Get decayed hidden if needed if self.hidden_decay is not None: h_tm1d = gamma_dh * h_tm1 else: h_tm1d = h_tm1 # Get decayed masking if needed if self.feed_masking: m_t = input_1m if self.masking_decay is not None: m_t = gamma_dm * m_t # Apply the dropout if 0. < self.dropout < 1.: x_z, x_r, x_h = x_t * dp_mask[0], x_t * dp_mask[1], x_t * dp_mask[2] if self.feed_masking: m_z, m_r, m_h = (m_t * m_dp_mask[0], m_t * m_dp_mask[1], m_t * m_dp_mask[2]) else: x_z, x_r, x_h = x_t, x_t, x_t if self.feed_masking: m_z, m_r, m_h = m_t, m_t, m_t if 0. < self.recurrent_dropout < 1.: h_tm1_z, h_tm1_r = ( h_tm1d * rec_dp_mask[0], h_tm1d * rec_dp_mask[1], ) else: h_tm1_z, h_tm1_r = h_tm1d, h_tm1d # Get z_t, r_t, hh_t z_t = K.dot(x_z, self.kernel_z) + K.dot(h_tm1_z, self.recurrent_kernel_z) r_t = K.dot(x_r, self.kernel_r) + K.dot(h_tm1_r, self.recurrent_kernel_r) hh_t = K.dot(x_h, self.kernel_h) if self.feed_masking: z_t += K.dot(m_z, self.masking_kernel_z) r_t += K.dot(m_r, self.masking_kernel_r) hh_t += K.dot(m_h, self.masking_kernel_h) if self.use_bias: z_t = K.bias_add(z_t, self.input_bias_z) r_t = K.bias_add(r_t, self.input_bias_r) hh_t = K.bias_add(hh_t, self.input_bias_h) z_t = self.recurrent_activation(z_t) r_t = self.recurrent_activation(r_t) if 0. < self.recurrent_dropout < 1.: h_tm1_h = r_t * h_tm1d * rec_dp_mask[2] else: h_tm1_h = r_t * h_tm1d hh_t = self.activation(hh_t + K.dot(h_tm1_h, self.recurrent_kernel_h)) # get h_t h_t = z_t * h_tm1 + (1 - z_t) * hh_t if 0. < self.dropout + self.recurrent_dropout: if training is None: h_t._uses_learning_phase = True # get s_prev_t s_prev_t = K.switch(input_m, K.tile(input_s, [1, self.state_size[-1]]), s_prev_tm1) return h_t, [h_t, x_keep_t, s_prev_t]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs), self.dropout, training=training, count=5) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] inputs_myl = inputs * dp_mask[3] inputs_myl2 = inputs * dp_mask[4] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs inputs_myl = inputs inputs_myl2 = inputs x_z = K.dot(inputs_z, self.kernel_z) x_r = K.dot(inputs_r, self.kernel_r) x_h = K.dot(inputs_h, self.kernel_h) x_myl = K.dot(inputs_myl, self.kernel_myl) x_myl2 = K.dot(inputs_myl2, self.kernel_myl2) if self.use_bias: x_z = K.bias_add(x_z, self.input_bias_z) x_r = K.bias_add(x_r, self.input_bias_r) x_h = K.bias_add(x_h, self.input_bias_h) x_myl = K.bias_add(x_myl, self.input_bias_myl) x_myl2 = K.bias_add(x_myl2, self.input_bias_myl2) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] h_tm1_myl = h_tm1 * rec_dp_mask[3] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_myl = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) recurrent_myl = K.dot(h_tm1_myl, self.recurrent_kernel_myl) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z) recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r) recurrent_myl = K.bias_add(recurrent_myl, self.recurrent_bias_myl) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) myl = self.recurrent_activation(x_myl + recurrent_myl) # reset gate applied after/before matrix multiplication if self.reset_after: # MyGRU 选这个 recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, self.recurrent_bias_h) recurrent_h = r * recurrent_h else: # default recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) hh = self.activation(x_h + recurrent_h) + myl * x_myl2 else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] # inputs projected by all gate matrices at once matrix_x = K.dot(inputs, self.kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i matrix_x = K.bias_add(matrix_x, self.input_bias) x_z = matrix_x[:, :self.units] x_r = matrix_x[:, self.units:2 * self.units] x_h = matrix_x[:, 2 * self.units:3 * self.units] x_myl = matrix_x[:, 3 * self.units:4 * self.units] x_myl2 = matrix_x[:, 4 * self.units:5 * self.units] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] if self.reset_after: recurrent_myl = matrix_inner[:, self.units * 3:4 * self.units] else: recurrent_myl = K.dot( h_tm1, self.recurrent_kernel[:, 3 * self.units:4 * self.units]) z = self.recurrent_activation(x_z + recurrent_z) r = self.recurrent_activation(x_r + recurrent_r) myl = self.recurrent_activation(x_myl + recurrent_myl) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:3 * self.units] else: recurrent_h = K.dot( r * h_tm1, self.recurrent_kernel[:, 2 * self.units:3 * self.units]) hh = self.activation(x_h + recurrent_h) + myl * x_myl2 # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh # todo why # h = (1-z) * h_tm1 + z * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True # tcell for i in range(self.t_cell_num): h_tm1 = h if (0 < self.t_recurrent_dropout < 1 and self.t__recurrent_dropout_mask[i] is None): self.t__recurrent_dropout_mask[i] = _generate_dropout_mask( K.ones_like(h_tm1), self.t_recurrent_dropout, training=training, count=3) rec_dp_mask = self.t__recurrent_dropout_mask[i] if self.implementation == 1: if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.t_recurrent_kernel_z[i]) recurrent_r = K.dot(h_tm1_r, self.t_recurrent_kernel_r[i]) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, self.t_recurrent_bias_z[i]) recurrent_r = K.bias_add(recurrent_r, self.t_recurrent_bias_r[i]) z = self.recurrent_activation(recurrent_z) r = self.recurrent_activation(recurrent_r) # reset gate applied after/before matrix multiplication if self.reset_after: # TGRU 选这个 recurrent_h = K.dot(h_tm1_h, self.t_recurrent_kernel_h[i]) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, self.t_recurrent_bias_h[i]) recurrent_h = r * recurrent_h else: # default recurrent_h = K.dot(r * h_tm1_h, self.t_recurrent_bias_h[i]) hh = self.activation(recurrent_h) else: if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.t_recurrent_kernel[i]) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, self.t_recurrent_bias[i]) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot( h_tm1, self.t_recurrent_kernel[i][:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = self.recurrent_activation(recurrent_z) r = self.recurrent_activation(recurrent_r) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:3 * self.units] else: recurrent_h = K.dot( r * h_tm1, self.recurrent_kernel[i][:, 2 * self.units:3 * self.units]) hh = self.activation(recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh # todo why # h = (1-z) * h_tm1 + z * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
def call(self, states, training=None): h_tm1 = states # previous memory if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) if self.reset_after and self.use_bias: recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z) recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r) z = self.recurrent_activation(recurrent_z) r = self.recurrent_activation(recurrent_r) # reset gate applied after/before matrix multiplication if self.reset_after: # TGRU 选这个 recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h) if self.use_bias: recurrent_h = K.bias_add(recurrent_h, self.recurrent_bias_h) recurrent_h = r * recurrent_h else: # default recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) hh = self.activation(recurrent_h) else: if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = self.recurrent_activation(recurrent_z) r = self.recurrent_activation(recurrent_r) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:3 * self.units] else: recurrent_h = K.dot( r * h_tm1, self.recurrent_kernel[:, 2 * self.units:3 * self.units]) hh = self.activation(recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh # todo why # h = (1-z) * h_tm1 + z * hh if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=8) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): _recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=8) self._recurrent_dropout_mask = _recurrent_dropout_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_0 = inputs * dp_mask[0] inputs_1 = inputs * dp_mask[1] inputs_2 = inputs * dp_mask[2] inputs_3 = inputs * dp_mask[3] inputs_4 = inputs * dp_mask[4] inputs_5 = inputs * dp_mask[5] inputs_6 = inputs * dp_mask[6] inputs_7 = inputs * dp_mask[7] else: inputs_0 = inputs inputs_1 = inputs inputs_2 = inputs inputs_3 = inputs inputs_4 = inputs inputs_5 = inputs inputs_6 = inputs inputs_7 = inputs x_0 = K.dot(inputs_0, self.kernel_0) x_1 = K.dot(inputs_1, self.kernel_1) x_2 = K.dot(inputs_2, self.kernel_2) x_3 = K.dot(inputs_3, self.kernel_3) x_4 = K.dot(inputs_4, self.kernel_4) x_5 = K.dot(inputs_5, self.kernel_5) x_6 = K.dot(inputs_6, self.kernel_6) x_7 = K.dot(inputs_7, self.kernel_7) if self.use_bias: x_0 = K.bias_add(x_0, self.bias_0) x_1 = K.bias_add(x_1, self.bias_1) x_2 = K.bias_add(x_2, self.bias_2) x_3 = K.bias_add(x_3, self.bias_3) x_4 = K.bias_add(x_4, self.bias_4) x_5 = K.bias_add(x_5, self.bias_5) x_6 = K.bias_add(x_6, self.bias_6) x_7 = K.bias_add(x_7, self.bias_7) if 0 < self.recurrent_dropout < 1.: h_tm1_0 = h_tm1 * rec_dp_mask[0] h_tm1_1 = h_tm1 * rec_dp_mask[1] h_tm1_2 = h_tm1 * rec_dp_mask[2] h_tm1_3 = h_tm1 * rec_dp_mask[3] h_tm1_4 = h_tm1 * rec_dp_mask[4] h_tm1_5 = h_tm1 * rec_dp_mask[5] h_tm1_6 = h_tm1 * rec_dp_mask[6] h_tm1_7 = h_tm1 * rec_dp_mask[7] else: h_tm1_0 = h_tm1 h_tm1_1 = h_tm1 h_tm1_2 = h_tm1 h_tm1_3 = h_tm1 h_tm1_4 = h_tm1 h_tm1_5 = h_tm1 h_tm1_6 = h_tm1 h_tm1_7 = h_tm1 # First Layer layer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0)) layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1)) layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2)) layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3)) layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4)) layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5)) layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6)) layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7)) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre # create a new cell layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] zr = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: zr = K.bias_add(zr, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units: 4 * self.units] z4 = z[:, 4 * self.units: 5 * self.units] z5 = z[:, 5 * self.units: 6 * self.units] z6 = z[:, 6 * self.units: 7 * self.units] z7 = z[:, 7 * self.units:] zr0 = zr[:, :self.units] zr1 = zr[:, self.units: 2 * self.units] zr2 = zr[:, 2 * self.units: 3 * self.units] zr3 = zr[:, 3 * self.units: 4 * self.units] zr4 = zr[:, 4 * self.units: 5 * self.units] zr5 = zr[:, 5 * self.units: 6 * self.units] zr6 = zr[:, 6 * self.units: 7 * self.units] zr7 = zr[:, 7 * self.units:] # First Layer layer1_0 = self.recurrent_activation(z0 + zr0) layer1_1 = self.cell_activation(z1 + zr1) layer1_2 = self.recurrent_activation(z2 + zr2) layer1_3 = self.cell_activation(z3 * zr3) layer1_4 = self.activation(z4 + zr4) layer1_5 = self.recurrent_activation(z5 + zr5) layer1_6 = self.activation(z6 + zr6) layer1_7 = self.recurrent_activation(z7 + zr7) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=8) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): _recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=8) self._recurrent_dropout_mask = _recurrent_dropout_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_0 = inputs * dp_mask[0] inputs_1 = inputs * dp_mask[1] inputs_2 = inputs * dp_mask[2] inputs_3 = inputs * dp_mask[3] inputs_4 = inputs * dp_mask[4] inputs_5 = inputs * dp_mask[5] inputs_6 = inputs * dp_mask[6] inputs_7 = inputs * dp_mask[7] else: inputs_0 = inputs inputs_1 = inputs inputs_2 = inputs inputs_3 = inputs inputs_4 = inputs inputs_5 = inputs inputs_6 = inputs inputs_7 = inputs x_0 = K.dot(inputs_0, self.kernel_0) x_1 = K.dot(inputs_1, self.kernel_1) x_2 = K.dot(inputs_2, self.kernel_2) x_3 = K.dot(inputs_3, self.kernel_3) x_4 = K.dot(inputs_4, self.kernel_4) x_5 = K.dot(inputs_5, self.kernel_5) x_6 = K.dot(inputs_6, self.kernel_6) x_7 = K.dot(inputs_7, self.kernel_7) if self.use_bias: x_0 = K.bias_add(x_0, self.bias_0) x_1 = K.bias_add(x_1, self.bias_1) x_2 = K.bias_add(x_2, self.bias_2) x_3 = K.bias_add(x_3, self.bias_3) x_4 = K.bias_add(x_4, self.bias_4) x_5 = K.bias_add(x_5, self.bias_5) x_6 = K.bias_add(x_6, self.bias_6) x_7 = K.bias_add(x_7, self.bias_7) if 0 < self.recurrent_dropout < 1.: h_tm1_0 = h_tm1 * rec_dp_mask[0] h_tm1_1 = h_tm1 * rec_dp_mask[1] h_tm1_2 = h_tm1 * rec_dp_mask[2] h_tm1_3 = h_tm1 * rec_dp_mask[3] h_tm1_4 = h_tm1 * rec_dp_mask[4] h_tm1_5 = h_tm1 * rec_dp_mask[5] h_tm1_6 = h_tm1 * rec_dp_mask[6] h_tm1_7 = h_tm1 * rec_dp_mask[7] else: h_tm1_0 = h_tm1 h_tm1_1 = h_tm1 h_tm1_2 = h_tm1 h_tm1_3 = h_tm1 h_tm1_4 = h_tm1 h_tm1_5 = h_tm1 h_tm1_6 = h_tm1 h_tm1_7 = h_tm1 # First Layer layer1_0 = self.recurrent_activation( x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0)) layer1_1 = self.cell_activation( x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1)) layer1_2 = self.recurrent_activation( x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2)) layer1_3 = self.cell_activation( x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3)) layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4)) layer1_5 = self.recurrent_activation( x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5)) layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6)) layer1_7 = self.recurrent_activation( x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7)) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre # create a new cell layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] zr = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: zr = K.bias_add(zr, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:4 * self.units] z4 = z[:, 4 * self.units:5 * self.units] z5 = z[:, 5 * self.units:6 * self.units] z6 = z[:, 6 * self.units:7 * self.units] z7 = z[:, 7 * self.units:] zr0 = zr[:, :self.units] zr1 = zr[:, self.units:2 * self.units] zr2 = zr[:, 2 * self.units:3 * self.units] zr3 = zr[:, 3 * self.units:4 * self.units] zr4 = zr[:, 4 * self.units:5 * self.units] zr5 = zr[:, 5 * self.units:6 * self.units] zr6 = zr[:, 6 * self.units:7 * self.units] zr7 = zr[:, 7 * self.units:] # First Layer layer1_0 = self.recurrent_activation(z0 + zr0) layer1_1 = self.cell_activation(z1 + zr1) layer1_2 = self.recurrent_activation(z2 + zr2) layer1_3 = self.cell_activation(z3 * zr3) layer1_4 = self.activation(z4 + zr4) layer1_5 = self.recurrent_activation(z5 + zr5) layer1_6 = self.activation(z6 + zr6) layer1_7 = self.recurrent_activation(z7 + zr7) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1] + self.annotation_units), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state # attention mechanism # repeat the hidden state to the length of the sequence _stm = K.repeat(h_tm1, self.annotation_timesteps) # multiplty the weight matrix with the repeated (current) hidden state _Wxstm = K.dot(_stm, self.kernel_w) # calculate the attention probabilities et = K.dot(activations.tanh(_Wxstm + self._uh), K.expand_dims(self.kernel_v)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.annotation_timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.annotations, axes=1), axis=1) # append the context vector to the inputs inputs = K.concatenate([inputs, context]) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=3) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.dot(inputs_z, self.kernel_z) x_r = K.dot(inputs_r, self.kernel_r) x_h = K.dot(inputs_h, self.kernel_h) if self.use_bias: x_z = K.bias_add(x_z, self.bias_z) x_r = K.bias_add(x_r, self.bias_r) x_h = K.bias_add(x_h, self.bias_h) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) a_z = self.ln(x_z + recurrent_z) a_r = self.ln(x_r + recurrent_r) if self.scale: a_z *= self.gamma_z a_r *= self.gamma_r if self.center: a_z += self.beta_z a_r += self.beta_r z = self.recurrent_activation(a_z) r = self.recurrent_activation(a_r) recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) a_h = self.ln(x_h + recurrent_h) if self.scale: a_h *= self.gamma_h if self.center: a_h += self.beta_h hh = self.activation(a_h) # ignore implementation 2 h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout + self.zoneout: if training is None: h._uses_learning_phase = True if 0 < self.zoneout < 1: h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout), h - h_tm1) h = h * (1. - self.zoneout) + h_tm1 return h, [h]