def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._recurrent_masks is None): _recurrent_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, self.units), self.recurrent_dropout, training=training, count=1) self._recurrent_masks = _recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._recurrent_masks h_tm1 = states[0] # previous state if 0. < self.dropout < 1.: inputs *= dp_mask[0] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_masks[0] h = K.dot(inputs, self.kernel) h = h + (h_tm1 * self.recurrent_kernel) if self.use_bias: h = K.bias_add(h, self.bias) h = self.activation(h) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._nested_recurrent_masks is None): _nested_recurrent_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=self.depth) self._nested_recurrent_masks = _nested_recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._nested_recurrent_masks h_tm1 = states[0] # previous memory state c_tm1 = states[1:self.depth + 1] # previous carry states if 0. < self.dropout < 1.: inputs *= dp_mask[0] h, c = self.nested_recurrence(inputs, hidden_state=h_tm1, cell_states=c_tm1, recurrent_masks=rec_dp_masks, current_depth=0) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, c
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1] + self.annotation_units), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state # attention mechanism # repeat the hidden state to the length of the sequence _stm = K.repeat(h_tm1, self.annotation_timesteps) # multiplty the weight matrix with the repeated (current) hidden state _Wxstm = K.dot(_stm, self.kernel_w) # calculate the attention probabilities et = K.dot(activations.tanh(_Wxstm + self._uh), K.expand_dims(self.kernel_v)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.annotation_timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.annotations, axes=1), axis=1) # append the context vector to the inputs inputs = K.concatenate([inputs, context]) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=2) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=2) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_f = inputs * dp_mask[0] inputs_c = inputs * dp_mask[1] else: inputs_f = inputs inputs_c = inputs x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) if self.use_bias: x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) if 0 < self.recurrent_dropout < 1.: h_tm1_f = h_tm1 * rec_dp_mask[0] h_tm1_c = h_tm1 * rec_dp_mask[1] else: h_tm1_f = h_tm1 h_tm1_c = h_tm1 f = self.recurrent_activation( x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + (1. - f) * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] f = self.recurrent_activation(z0) c = f * c_tm1 + (1. - f) * self.activation(z1) h = c if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=4) if (0 < self.zoneout_c < 1 and self._zoneout_mask_c is None): self._zoneout_mask_c = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.zoneout_c, training=training, count=1) if (0 < self.zoneout_h < 1 and self._zoneout_mask_h is None): self._zoneout_mask_h = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.zoneout_h, training=training, count=1) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation(self.ln(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i))) f = self.recurrent_activation(self.ln(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f))) c = f * c_tm1 + i * self.activation(self.ln(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))) o = self.recurrent_activation(self.ln(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o))) h = o * self.activation(self.ln(c)) if 0 < self.dropout + self.recurrent_dropout + self.zoneout_c + self.zoneout_h: if training is None: h._uses_learning_phase = True if 0 < self.zoneout_h < 1: h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout_h), h - h_tm1) h = h * (1. - self.zoneout_h) + h_tm1 if 0 < self.zoneout_c < 1: c = K.in_train_phase(K.dropout(c - c_tm1, self.zoneout_c), c - c_tm1) c = c * (1. - self.zoneout_c) + c_tm1 return h, [h, c]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=3) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask if self.implementation == 1: if 0. < self.dropout < 1.: inputs_z = inputs * dp_mask[0] inputs_r = inputs * dp_mask[1] inputs_h = inputs * dp_mask[2] else: inputs_z = inputs inputs_r = inputs inputs_h = inputs x_z = K.dot(inputs_z, self.kernel_z) x_r = K.dot(inputs_r, self.kernel_r) x_h = K.dot(inputs_h, self.kernel_h) if self.use_bias: x_z = K.bias_add(x_z, self.bias_z) x_r = K.bias_add(x_r, self.bias_r) x_h = K.bias_add(x_h, self.bias_h) if 0. < self.recurrent_dropout < 1.: h_tm1_z = h_tm1 * rec_dp_mask[0] h_tm1_r = h_tm1 * rec_dp_mask[1] h_tm1_h = h_tm1 * rec_dp_mask[2] else: h_tm1_z = h_tm1 h_tm1_r = h_tm1 h_tm1_h = h_tm1 recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) a_z = self.ln(x_z + recurrent_z) a_r = self.ln(x_r + recurrent_r) if self.scale: a_z *= self.gamma_z a_r *= self.gamma_r if self.center: a_z += self.beta_z a_r += self.beta_r z = self.recurrent_activation(a_z) r = self.recurrent_activation(a_r) recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) a_h = self.ln(x_h + recurrent_h) if self.scale: a_h *= self.gamma_h if self.center: a_h += self.beta_h hh = self.activation(a_h) # ignore implementation 2 h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout + self.zoneout: if training is None: h._uses_learning_phase = True if 0 < self.zoneout < 1: h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout), h - h_tm1) h = h * (1. - self.zoneout) + h_tm1 return h, [h]
def call(self, inputs, states, training=None): samples, inFeatures = states[0].shape h_tm1 = states[0] # previous state time_step = states[1] if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if dp_mask is not None: inputs *= dp_mask if rec_dp_mask is not None: h_tm1 *= rec_dp_mask if self.split_method: # Update State, module-by-module h_mod = [] unitsPerMod = self.units // self.clock_numPeriods def if_true(): hModule = K.dot(h_tm1[:, s:], self.rec_kernel_c_mod[i]) + K.dot( inputs, self.kernel_c_mod[i]) if self.use_bias: hModule = K.bias_add(hModule, self.bias_mod[i]) if self.recurrent_activation is not None: hModule = self.recurrent_activation(hModule) return hModule def if_false(): return hModule for i, period in enumerate(self.clock_periods): s = i * unitsPerMod e = (i + 1) * unitsPerMod hModule = h_tm1[:, s:e] h_mod.append( tf.cond(K.equal(K.tf.mod(time_step[0][0], period), 0), if_true, if_false)) hidden = K.concatenate(h_mod) else: # Update State, all at once, then only use certain updates h = K.dot(inputs, self.kernel) + K.dot( h_tm1, self.recurrent_kernel_c * self.cw_mask) if self.bias is not None: h = K.bias_add(h, self.bias) if self.recurrent_activation is not None: h = self.recurrent_activation(h) h = K.switch(K.equal(K.tf.mod(time_step, self.cw_periods), 0), h, h_tm1) hidden = h # Calculate Output output = K.dot(hidden, self.recurrent_kernel_o) if self.activation is not None: output = self.activation(output) # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: if training is None: output._uses_learning_phase = True return output, [hidden, time_step + 1]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=8) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): _recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=8) self._recurrent_dropout_mask = _recurrent_dropout_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_0 = inputs * dp_mask[0] inputs_1 = inputs * dp_mask[1] inputs_2 = inputs * dp_mask[2] inputs_3 = inputs * dp_mask[3] inputs_4 = inputs * dp_mask[4] inputs_5 = inputs * dp_mask[5] inputs_6 = inputs * dp_mask[6] inputs_7 = inputs * dp_mask[7] else: inputs_0 = inputs inputs_1 = inputs inputs_2 = inputs inputs_3 = inputs inputs_4 = inputs inputs_5 = inputs inputs_6 = inputs inputs_7 = inputs x_0 = K.dot(inputs_0, self.kernel_0) x_1 = K.dot(inputs_1, self.kernel_1) x_2 = K.dot(inputs_2, self.kernel_2) x_3 = K.dot(inputs_3, self.kernel_3) x_4 = K.dot(inputs_4, self.kernel_4) x_5 = K.dot(inputs_5, self.kernel_5) x_6 = K.dot(inputs_6, self.kernel_6) x_7 = K.dot(inputs_7, self.kernel_7) if self.use_bias: x_0 = K.bias_add(x_0, self.bias_0) x_1 = K.bias_add(x_1, self.bias_1) x_2 = K.bias_add(x_2, self.bias_2) x_3 = K.bias_add(x_3, self.bias_3) x_4 = K.bias_add(x_4, self.bias_4) x_5 = K.bias_add(x_5, self.bias_5) x_6 = K.bias_add(x_6, self.bias_6) x_7 = K.bias_add(x_7, self.bias_7) if 0 < self.recurrent_dropout < 1.: h_tm1_0 = h_tm1 * rec_dp_mask[0] h_tm1_1 = h_tm1 * rec_dp_mask[1] h_tm1_2 = h_tm1 * rec_dp_mask[2] h_tm1_3 = h_tm1 * rec_dp_mask[3] h_tm1_4 = h_tm1 * rec_dp_mask[4] h_tm1_5 = h_tm1 * rec_dp_mask[5] h_tm1_6 = h_tm1 * rec_dp_mask[6] h_tm1_7 = h_tm1 * rec_dp_mask[7] else: h_tm1_0 = h_tm1 h_tm1_1 = h_tm1 h_tm1_2 = h_tm1 h_tm1_3 = h_tm1 h_tm1_4 = h_tm1 h_tm1_5 = h_tm1 h_tm1_6 = h_tm1 h_tm1_7 = h_tm1 # First Layer layer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0)) layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1)) layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2)) layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3)) layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4)) layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5)) layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6)) layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7)) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre # create a new cell layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] zr = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: zr = K.bias_add(zr, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units: 2 * self.units] z2 = z[:, 2 * self.units: 3 * self.units] z3 = z[:, 3 * self.units: 4 * self.units] z4 = z[:, 4 * self.units: 5 * self.units] z5 = z[:, 5 * self.units: 6 * self.units] z6 = z[:, 6 * self.units: 7 * self.units] z7 = z[:, 7 * self.units:] zr0 = zr[:, :self.units] zr1 = zr[:, self.units: 2 * self.units] zr2 = zr[:, 2 * self.units: 3 * self.units] zr3 = zr[:, 3 * self.units: 4 * self.units] zr4 = zr[:, 4 * self.units: 5 * self.units] zr5 = zr[:, 5 * self.units: 6 * self.units] zr6 = zr[:, 6 * self.units: 7 * self.units] zr7 = zr[:, 7 * self.units:] # First Layer layer1_0 = self.recurrent_activation(z0 + zr0) layer1_1 = self.cell_activation(z1 + zr1) layer1_2 = self.recurrent_activation(z2 + zr2) layer1_3 = self.cell_activation(z3 * zr3) layer1_4 = self.activation(z4 + zr4) layer1_5 = self.recurrent_activation(z5 + zr5) layer1_6 = self.activation(z6 + zr6) layer1_7 = self.recurrent_activation(z7 + zr7) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training, count=8) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): _recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=8) self._recurrent_dropout_mask = _recurrent_dropout_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if self.implementation == 1: if 0 < self.dropout < 1.: inputs_0 = inputs * dp_mask[0] inputs_1 = inputs * dp_mask[1] inputs_2 = inputs * dp_mask[2] inputs_3 = inputs * dp_mask[3] inputs_4 = inputs * dp_mask[4] inputs_5 = inputs * dp_mask[5] inputs_6 = inputs * dp_mask[6] inputs_7 = inputs * dp_mask[7] else: inputs_0 = inputs inputs_1 = inputs inputs_2 = inputs inputs_3 = inputs inputs_4 = inputs inputs_5 = inputs inputs_6 = inputs inputs_7 = inputs x_0 = K.dot(inputs_0, self.kernel_0) x_1 = K.dot(inputs_1, self.kernel_1) x_2 = K.dot(inputs_2, self.kernel_2) x_3 = K.dot(inputs_3, self.kernel_3) x_4 = K.dot(inputs_4, self.kernel_4) x_5 = K.dot(inputs_5, self.kernel_5) x_6 = K.dot(inputs_6, self.kernel_6) x_7 = K.dot(inputs_7, self.kernel_7) if self.use_bias: x_0 = K.bias_add(x_0, self.bias_0) x_1 = K.bias_add(x_1, self.bias_1) x_2 = K.bias_add(x_2, self.bias_2) x_3 = K.bias_add(x_3, self.bias_3) x_4 = K.bias_add(x_4, self.bias_4) x_5 = K.bias_add(x_5, self.bias_5) x_6 = K.bias_add(x_6, self.bias_6) x_7 = K.bias_add(x_7, self.bias_7) if 0 < self.recurrent_dropout < 1.: h_tm1_0 = h_tm1 * rec_dp_mask[0] h_tm1_1 = h_tm1 * rec_dp_mask[1] h_tm1_2 = h_tm1 * rec_dp_mask[2] h_tm1_3 = h_tm1 * rec_dp_mask[3] h_tm1_4 = h_tm1 * rec_dp_mask[4] h_tm1_5 = h_tm1 * rec_dp_mask[5] h_tm1_6 = h_tm1 * rec_dp_mask[6] h_tm1_7 = h_tm1 * rec_dp_mask[7] else: h_tm1_0 = h_tm1 h_tm1_1 = h_tm1 h_tm1_2 = h_tm1 h_tm1_3 = h_tm1 h_tm1_4 = h_tm1 h_tm1_5 = h_tm1 h_tm1_6 = h_tm1 h_tm1_7 = h_tm1 # First Layer layer1_0 = self.recurrent_activation( x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0)) layer1_1 = self.cell_activation( x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1)) layer1_2 = self.recurrent_activation( x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2)) layer1_3 = self.cell_activation( x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3)) layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4)) layer1_5 = self.recurrent_activation( x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5)) layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6)) layer1_7 = self.recurrent_activation( x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7)) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre # create a new cell layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] zr = K.dot(h_tm1, self.recurrent_kernel) if self.use_bias: zr = K.bias_add(zr, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:4 * self.units] z4 = z[:, 4 * self.units:5 * self.units] z5 = z[:, 5 * self.units:6 * self.units] z6 = z[:, 6 * self.units:7 * self.units] z7 = z[:, 7 * self.units:] zr0 = zr[:, :self.units] zr1 = zr[:, self.units:2 * self.units] zr2 = zr[:, 2 * self.units:3 * self.units] zr3 = zr[:, 3 * self.units:4 * self.units] zr4 = zr[:, 4 * self.units:5 * self.units] zr5 = zr[:, 5 * self.units:6 * self.units] zr6 = zr[:, 6 * self.units:7 * self.units] zr7 = zr[:, 7 * self.units:] # First Layer layer1_0 = self.recurrent_activation(z0 + zr0) layer1_1 = self.cell_activation(z1 + zr1) layer1_2 = self.recurrent_activation(z2 + zr2) layer1_3 = self.cell_activation(z3 * zr3) layer1_4 = self.activation(z4 + zr4) layer1_5 = self.recurrent_activation(z5 + zr5) layer1_6 = self.activation(z6 + zr6) layer1_7 = self.recurrent_activation(z7 + zr7) # Second Layer layer2_0 = self.activation(layer1_0 * layer1_1) layer2_1 = self.activation(layer1_2 + layer1_3) layer2_2 = self.activation(layer1_4 * layer1_5) layer2_3 = self.recurrent_activation(layer1_6 + layer1_7) # Inject the Cell layer2_0 = self.activation(layer2_0 + c_tm1) # Third Layer layer3_0_pre = layer2_0 * layer2_1 c = layer3_0_pre layer3_0 = layer3_0_pre layer3_1 = self.activation(layer2_2 + layer2_3) # Final Layer h = self.activation(layer3_0 * layer3_1) if self.projection_units is not None: h = self.projection_activation(K.dot(h, self.projection_kernel)) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]