def preprocess_input(self, inputs, training=None): if self.implementation == 0: input_shape = K.int_shape(inputs) input_dim = input_shape[2] timesteps = input_shape[1] x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z, self.dropout, input_dim, self.units, timesteps, training=training) x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r, self.dropout, input_dim, self.units, timesteps, training=training) x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h, self.dropout, input_dim, self.units, timesteps, training=training) return K.concatenate([x_z, x_r, x_h], axis=2) else: return inputs
def preprocess_input(self, inputs, training=None): if self.implementation == 0: input_shape = K.int_shape(inputs) input_dim = input_shape[2] timesteps = input_shape[1] x_w = _time_distributed_dense(inputs, self.kernel_w, None, self.dropout, input_dim, self.units, timesteps, training=training) x_f = _time_distributed_dense(inputs, self.kernel_f, self.bias_f, self.dropout, input_dim, self.units, timesteps, training=training) x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r, self.dropout, input_dim, self.units, timesteps, training=training) x_f = self.recurrent_activation(x_f) x_r = self.recurrent_activation(x_r) if self.kernel_dim == 4: x_p = _time_distributed_dense(inputs, self.kernel_p, None, self.dropout, input_dim, self.units, timesteps, training=training) return K.concatenate([x_w, x_f, x_r, x_p], axis=2) else: return K.concatenate([x_w, x_f, x_r], axis=2) else: return inputs
def preprocess_input(self, inputs, training=None): ''' We have to override this preprocessing step, because if we are using the cpu, we do the weight - input multiplications in the internals of the GRU as seperate, smaller matrix multiplications and concatenate them after. Therefore, before this happens, we split off the attention and then add it back afterwards. ''' if self.implementation == 0: attention = inputs[:, :, 0] # Shape:(samples, knowledge_length) inputs = inputs[:, :, 1:] # Shape:(samples, knowledge_length, word_dim) input_shape = self.input_spec[0].shape input_dim = input_shape[2] - 1 timesteps = input_shape[1] x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z, self.dropout, input_dim, self.units, timesteps, training=training) x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r, self.dropout, input_dim, self.units, timesteps, training=training) x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h, self.dropout, input_dim, self.units, timesteps, training=training) # Add attention back on to it's original place. return K.concatenate([K.expand_dims(attention, 2), x_z, x_r, x_h], axis=2) else: return inputs
def preprocess_input(self, x, training=None): input_dim = self.input_spec[1].shape[2] timesteps = self.input_spec[1].shape[1] x = x[1] x_z = _time_distributed_dense(x, self.W_z, self.b_z, self.dropout_W, input_dim, self.output_dim, timesteps) x_r = _time_distributed_dense(x, self.W_r, self.b_r, self.dropout_W, input_dim, self.output_dim, timesteps) x_h = _time_distributed_dense(x, self.W_h, self.b_h, self.dropout_W, input_dim, self.output_dim, timesteps) return K.concatenate([x_z, x_r, x_h], axis=2)
def step(self, x_input, states): input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = super(pointerLayer, self).step(x_input, states[:-1]) # vt*tanh(W1*e+W2*d) dec_seq = K.repeat(h, input_shape[1]) Eij = _time_distributed_dense(en_seq, self.W1, output_dim=1) Dij = _time_distributed_dense(dec_seq, self.W2, output_dim=1) U = self.vt * tanh(Eij + Dij) U = K.squeeze(U, 2) # ----------- Probability tensor ----------------- # pointer = softmax(U) return pointer, [h, c]
def preprocess_input(self, inputs, training=None): if self.implementation == 0: input_shape = K.int_shape(inputs) input_dim = input_shape[2] timesteps = input_shape[1] x_i = _time_distributed_dense(inputs, self.kernel_i, self.bias_i, self.dropout, input_dim, self.units, timesteps, training=training) x_f = _time_distributed_dense(inputs, self.kernel_f, self.bias_f, self.dropout, input_dim, self.units, timesteps, training=training) x_c = _time_distributed_dense(inputs, self.kernel_c, self.bias_c, self.dropout, input_dim, self.units, timesteps, training=training) x_o = _time_distributed_dense(inputs, self.kernel_o, self.bias_o, self.dropout, input_dim, self.units, timesteps, training=training) x_m = _time_distributed_dense(inputs, self.kernel_m, self.bias_m, self.dropout, input_dim, self.units, timesteps, training=training) return K.concatenate([x_i, x_f, x_c, x_o, x_m], axis=2) else: return inputs
def step(self, x_input, states): # print "x_input:", x_input, x_input.shape # <TensorType(float32, matrix)> input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1]) # vt*tanh(W1*e+W2*d) dec_seq = K.repeat(h, input_shape[1]) Eij = _time_distributed_dense(en_seq, self.W1, output_dim=1) Dij = _time_distributed_dense(dec_seq, self.W2, output_dim=1) U = self.vt * tanh(Eij + Dij) U = K.squeeze(U, 2) # make probability tensor pointer = softmax(U) return pointer, [h, c]
def call(self, x): self.x_seq = x self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.units) return super(AttentionDecoder, self).call(x)
def step(self, x_input, states): input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1]) print('****************************************'+str(en_seq.shape)) en_shape = K.int_shape(en_seq) en_input_dim = en_shape[2] en_timesteps = en_shape[1] # vt*tanh(W1*e+W2*d) dec_seq = K.repeat(h, input_shape[1]) print('----------------------------------------'+str(dec_seq.shape)) Eij = _time_distributed_dense(en_seq, self.W1,input_dim=en_input_dim,timesteps=en_timesteps,output_dim=1) Dij = _time_distributed_dense(dec_seq, self.W2, output_dim=1) U = self.vt * tanh(Eij + Dij) U = K.squeeze(U, 2) # make probability tensor pointer = softmax(U) return pointer, [h, c]
def call(self, x): # store the whole sequence so we can "attend" to it at each timestep self.x_seq = x # apply the a dense layer over the time dimension of the sequence # do it here because it doesn't depend on any previous steps # thefore we can save computation time: self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.units) return super(AttentionDecoder, self).call(x)
def call(self, x): # store the whole sequence so we can "attend" to it at each timestep self.x_seq = x # apply the a dense layer over the time dimension of the sequence # do it here because it doesn't depend on any previous steps # thefore we can save computation time: self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.units) return super(AttentionDecoder, self).call(x)
def call(self, x): # x is the hidden state of encoder. self.x_seq = x # attention model: # a_ij = softmax(V_a^T tanh(W_a \dot s_{t-1} + U_a \dot h_t)) # apply a dense layer over the time dimension of the sequence # (get the U_a \dot h_t) part). self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.output_dim) x = self._uxpb return super(AttentionPointer, self).call(x)