def QRNcell(): xq = Input(batch_shape=(batch_size, embedding_dim * 2)) # Split into context and query xt = Lambda(lambda x, dim: x[:, :dim], arguments={'dim': embedding_dim}, output_shape=lambda s: (s[0], s[1] / 2))(xq) qt = Lambda(lambda x, dim: x[:, dim:], arguments={'dim': embedding_dim}, output_shape=lambda s: (s[0], s[1] / 2))(xq) h_tm1 = Input(batch_shape=(batch_size, embedding_dim)) zt = Dense(1, activation='sigmoid', bias_initializer=Constant(2.5))(multiply([xt, qt])) zt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), arguments={'dim': embedding_dim})(zt) ch = Dense(embedding_dim, activation='tanh')(concatenate([xt, qt], axis=-1)) rt = Dense(1, activation='sigmoid')(multiply([xt, qt])) rt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), arguments={'dim': embedding_dim})(rt) ht = add([ multiply([zt, ch, rt]), multiply( [Lambda(lambda x: 1 - x, output_shape=lambda s: s)(zt), h_tm1]) ]) return RecurrentModel(input=xq, output=ht, initial_states=[h_tm1], final_states=[ht], return_sequences=True)
def sru(input, initial_state=None, depth=1, dropout=0.2, recurrent_dropout=0.2, return_sequences=False, **kwargs): units = K.int_shape(input)[-1] input_masked = Masking(mask_value=0.)(input) mask = Lambda(lambda x, mask: mask, output_shape=lambda s: s[:2])(input_masked) W = Dense(units * 3) def drop(x, p): shape = K.shape(x) noise_shape = (shape[0], 1, shape[2]) return Dropout(p, noise_shape=noise_shape).call(x) input_dropped = Lambda(drop, arguments={'p': dropout}, output_shape=lambda s: s)(input_masked) ones = Lambda(lambda x: x * 0. + 1., output_shape=lambda s: s)(input) dropped_ones = Dropout(recurrent_dropout)(ones) xfr = W(input_dropped) ixfrd = concatenate([input, xfr, dropped_ones]) ixfrd = Lambda(lambda x: x[0], mask=lambda x, _: x[1], output_shape=lambda s: s[0])([ixfrd, mask]) recurrent_input = Input((units * 5,)) def unpack(x, n): return [Lambda(lambda x, i: x[:,units * i : units * (i + 1)], arguments={'i': i}, output_shape=lambda s: (s[0], units))(x) for i in range(n)] x_t, x_p_t, f_t, r_t, drop = unpack(recurrent_input, 5) f_t = Activation('sigmoid')(f_t) r_t = Activation('sigmoid')(r_t) inv = Lambda(lambda x: 1. - x, output_shape=lambda s: s) c_tm1 = Input((units, )) c_t = c_tm1 h_t = x_t for _ in range(depth): c_t = add([multiply([f_t, c_t]), multiply([inv(f_t), x_p_t])]) c_t = multiply([c_t, drop]) h_t = add([multiply([r_t, Activation('tanh')(c_t)]), multiply([inv(r_t), h_t])]) xfr = W(h_t) x_p_t, f_t, r_t = unpack(xfr, 3) rnn = RecurrentModel(recurrent_input, h_t, c_tm1, c_t, return_sequences=return_sequences, **kwargs) output = rnn(ixfrd, initial_state=initial_state) return output
def RWA(input_dim, output_dim): x = Input((input_dim, )) h_tm1 = Input((output_dim, )) n_tm1 = Input((output_dim, )) d_tm1 = Input((output_dim, )) x_h = concatenate([x, h_tm1]) u = Dense(output_dim)(x) g = Dense(output_dim, activation='tanh')(x_h) a = Dense(output_dim, use_bias=False)(x_h) e_a = Lambda(lambda x: K.exp(x))(a) z = multiply([u, g]) nt = add([n_tm1, multiply([z, e_a])]) dt = add([d_tm1, e_a]) dt = Lambda(lambda x: 1.0 / x)(dt) ht = multiply([nt, dt]) ht = Activation('tanh')(ht) return RecurrentModel( input=x, output=ht, initial_states=[h_tm1, n_tm1, d_tm1], final_states=[ht, nt, dt], state_initializer=[initializers.random_normal(stddev=1.0)])
def RHN(input_dim, hidden_dim, depth): # Wrapped model inp = Input(batch_shape=(batch_size, input_dim)) state = Input(batch_shape=(batch_size, hidden_dim)) drop_mask = Input(batch_shape=(batch_size, hidden_dim)) # To avoid all zero mask causing gradient to vanish inverted_drop_mask = Lambda(lambda x: 1.0 - x, output_shape=lambda s: s)(drop_mask) drop_mask_2 = Lambda(lambda x: x + 0., output_shape=lambda s: s)(inverted_drop_mask) dropped_state = multiply([state, inverted_drop_mask]) y, new_state = RHNCell( units=hidden_dim, recurrence_depth=depth, kernel_initializer=weight_init, kernel_regularizer=l2(weight_decay), kernel_constraint=max_norm(gradient_clip), bias_initializer=Constant(transform_bias), recurrent_initializer=weight_init, recurrent_regularizer=l2(weight_decay), recurrent_constraint=max_norm(gradient_clip))([inp, dropped_state]) return RecurrentModel(input=inp, output=y, initial_states=[state, drop_mask], final_states=[new_state, drop_mask_2])
def test_model(): x = Input((5, )) h_tm1 = Input((10, )) h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) h = Activation('tanh')(h) a = Input((7, 5)) rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, final_states=h) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) model.predict(np.zeros((32, 7, 5)))
def test_readout(): x = Input((5, )) y_tm1 = Input((5, )) h_tm1 = Input((5, )) h = add([Dense(5)(add([x, y_tm1])), Dense(5, use_bias=False)(h_tm1)]) h = Activation('tanh')(h) rnn = RecurrentModel(input=x, initial_states=h_tm1, output=h, final_states=h, readout_input=y_tm1) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit(np.random.random((32, 7, 5)), np.random.random((32, 5))) model.predict(np.zeros((32, 7, 5)))
def build(self): patch = Input((self.patch_size, self.patch_width), name="InputPatch") memory_tm1 = Input(batch_shape=self.memory_shape_batch, name="Memory") memory_t = memory_tm1 # conv = self.combine_nodes(patch, working_width) # first_node = Lambda(lambda x: x[:,:self.patch_data_width])(flat_patch) patch_without_memory_addr = Lambda( lambda x: x[:, :, :self.patch_data_width:])(patch) flat_patch = Reshape([self.patch_size * self.patch_data_width ])(patch_without_memory_addr) working_memory = Dense(self.working_width, activation='relu')(flat_patch) # conv = self.combine_nodes(patch, self.working_width) # working_memory = concatenate([working_memory, conv]) # working_memory = Dense(self.working_width, activation='relu')(working_memory) pre_memory = working_memory use_memory = False if use_memory: # ------- Memory operations --------- # primary_address = Lambda( lambda x: x[:, 3, self.patch_data_width:])(patch) print(primary_address) address = self.generate_address(primary_address, patch, name="address_read1") read1 = self.read(memory_t, address) # Turn batch dimension from None to batch_size batched_working_memory = Lambda(lambda x: K.reshape( x, [self.batch_size, self.working_width]))(working_memory) batched_working_memory = concatenate( [batched_working_memory, read1], batch_size=self.batch_size) batched_working_memory = Dense( self.working_width, activation='relu')(batched_working_memory) erase_word = Dense(self.word_size, name="DenseEraseWord", activation='relu')(batched_working_memory) # address = self.generate_address(batched_working_memory, patch, name="address_erase") erase_word = Lambda(lambda x: K.ones_like(x))(erase_word) memory_t = self.erase(memory_t, primary_address, erase_word) write_word = Dense(self.word_size, name="DenseWriteWord", activation='relu')(batched_working_memory) # address = self.generate_address(batched_working_memory, patch, name="address_write") memory_t = self.write(memory_t, primary_address, write_word) # address = self.generate_address(batched_working_memory, patch, name="address_read2") # read2 = self.read(memory_t, address) # working_memory = concatenate([batched_working_memory, read1]) working_memory = Dense(self.working_width, activation="relu")(batched_working_memory) return RecurrentModel( input=patch, output=working_memory, return_sequences=True, stateful=True, initial_states=[memory_tm1], final_states=[memory_t], state_initializer=[initializers.random_normal(stddev=1.0)])
from recurrentshop import * x_t = Input(shape=(5, )) # The input to the RNN at time t h_tm1 = Input(shape=(20, )) # Previous hidden state # Compute new hidden state h_t = add([Dense(20)(x_t), Dense(20, use_bias=False)(h_tm1)]) # tanh activation h_t = Activation('tanh')(h_t) y_t = Dense(5, activation='softmax')(h_t) # Build the RNN # RecurrentModel is a standard Keras `Recurrent` layer. # RecurrentModel also accepts arguments such as unroll, return_sequences etc rnn = RecurrentModel(input=x_t, initial_states=[h_tm1], output=y_t, final_states=[h_t]) # return_sequences is False by default # so it only returns the last h_t state # Build a Keras Model using our RNN layer # input dimensions are (Time_steps, Depth) x = Input(shape=(4, 5)) y = rnn(x) model = keras.models.Model(x, y) # Run the RNN over a random sequence # Don't forget the batch shape when calling the model! out = model.predict(np.random.random((1, 4, 5)))
def test_memory_rnn_gradient(self): # Data setup memory_size = 20 word_size = 4 batch_size = 1 patch_size = 10 patch_width = memory_size + 5 sequence_length = 10 header = ExperimentHeader( params={ "word_size": word_size, "memory_size": memory_size, "patch_size": patch_size, "patch_width": patch_width }) experiment = Experiment("test_memory_cell", header, Args(batch_size)) pb = NTMBase(experiment) patch = Input((patch_size, patch_width), name="patch") memory_tm1 = Input((memory_size, word_size), name="memory") memory_t = memory_tm1 flat_patch = Reshape((patch_size * patch_width, ))(patch) write_word = Dense(word_size)(flat_patch) erase_word = Dense(word_size)(flat_patch) ptr = Dense(patch_size)(flat_patch) address = pb.resolve_address(ptr, patch) memory_t = pb.erase(memory_t, address, erase_word) ptr = Dense(patch_size)(flat_patch) address = pb.resolve_address(ptr, patch) memory_t = pb.write(memory_t, address, write_word) ptr = Dense(patch_size)(flat_patch) address = pb.resolve_address(ptr, patch) read = pb.read(memory_t, address) out = Dense(3)(read) rnn = RecurrentModel(input=patch, output=out, initial_states=[memory_tm1], final_states=[memory_t]) a = Input((sequence_length, patch_size, patch_width), name="patch_seq") b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit( { "patch_seq": np.random.random( (batch_size, sequence_length, patch_size, patch_width)), # "memory": np.random.random((batch_size, memory_size, word_size)), }, np.random.random((batch_size, 3))) model.predict({ "patch_seq": np.zeros((batch_size, sequence_length, patch_size, patch_width)), # "memory": np.zeros((batch_size, memory_size, word_size)), })
att_scores = add([W_xt, W_ht]) att_mask = Activation(K.softmax, name='att_mask')(att_scores) lstms_input = dot([att_mask, X_t], axes=(1, 1)) cells = [LSTMCell(attentive_lstm_dim) for _ in range(attentive_lstm_depth)] lstms_output, h, c = lstms_input, h_tm1, c_tm1 for cell in cells: lstms_output, h, c = cell([lstms_output, h, c]) attentive_lstm = RecurrentModel(input=X_t, output=lstms_output, initial_states=[h_tm1, c_tm1], final_states=[h, c], readout_input=readout_input, return_states=False, return_sequences=True) #--- Full Model ---# fmap_seq = TimeDistributed(inception)(input_patche_seq) lstm_out1 = attentive_lstm(fmap_seq) lstm_out2 = LSTM(8, activation='tanh')(lstm_out1) hazard = Dense(1, activation='linear')(lstm_out2) model = Model(input_patche_seq, hazard) model.compile(loss='mean_squared_error', optimizer='adadelta') #model.compile(loss=partial_likelihood, optimizer='adadelta') #---------------------------------------------------------------------- # Fit:
# Readout input. readout_input = layers.Input(shape=(100, )) # Internal inputs for the LSTM cell. last_state = layers.Input(shape=(100, )) last_output = layers.Input(shape=(100, )) # Create the LSTM layer. fused_inputs = layers.concatenate([cell_input, readout_input]) lstm1_o, lstm1_h, lstm1_c = LSTMCell(100)( [cell_input, last_state, last_output]) # Build the RNN. rnn = RecurrentModel(input=cell_input, output=lstm1_o, initial_states=[last_state, last_output], final_states=[lstm1_h, lstm1_c], readout_input=readout_input) # Main sequence input. sequence_input = layers.Input(shape=(50, 10)) # Initial readout input. initial_readout = layers.Input(shape=(100, )) rnn_output = rnn(sequence_input, initial_readout=initial_readout) # Build the Keras model. model = Model(inputs=[sequence_input, initial_readout], outputs=rnn_output) opt = optimizers.SGD(lr=0.001, momentum=0.9) model.compile(loss="mean_squared_error", optimizer=opt)
def assemble_model_recurrent(input_shape, num_filters, num_classes, normalization=LayerNorm, norm_kwargs=None, weight_norm=False, num_outputs=1, weight_decay=0.0005, init='he_normal'): from recurrentshop import RecurrentModel assert (num_outputs == 1) if norm_kwargs is None: norm_kwargs = {} # Inputs model_input = Input(batch_shape=input_shape, name='model_input') input_t = Input(batch_shape=(input_shape[0], ) + input_shape[2:]) hidden_input_t = Input(batch_shape=(input_shape[0], num_filters) + input_shape[3:]) # Common convolution kwargs. convolution_kwargs = { 'filters': num_filters, 'kernel_size': 3, 'ndim': 2, 'padding': 'same', 'weight_norm': weight_norm, 'kernel_initializer': init } # GRU input. x_t = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='relu', name=_unique('conv_x'))(input_t) if normalization is not None: x_t = normalization(**norm_kwargs)(x_t) # GRU block. gate_replace_x = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='sigmoid', name=_unique('conv_gate_replace'))(x_t) #if normalization is not None: #gate_replace_x = normalization(**norm_kwargs)(gate_replace_x) gate_replace_h = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='sigmoid', name=_unique('conv_gate_replace'))( hidden_input_t) #if normalization is not None: #gate_replace_h = normalization(**norm_kwargs)(gate_replace_h) gate_replace = merge_add([gate_replace_x, gate_replace_h]) gate_read_x = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='sigmoid', name=_unique('conv_gate_read'))(x_t) #if normalization is not None: #gate_read_x = normalization(**norm_kwargs)(gate_read_x) gate_read_h = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='sigmoid', name=_unique('conv_gate_read'))(hidden_input_t) #if normalization is not None: #gate_read_h = normalization(**norm_kwargs)(gate_read_h) gate_read = merge_add([gate_read_x, gate_read_h]) hidden_read_t = merge_multiply([gate_read, hidden_input_t]) #if normalization is not None: #hidden_read_t = normalization(**norm_kwargs)(hidden_read_t) mix_t_pre = merge_concatenate([x_t, hidden_read_t], axis=1) mix_t = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='tanh', name=_unique('conv_mix'))(mix_t_pre) #if normalization is not None: #mix_t = normalization(**norm_kwargs)(mix_t) lambda_inputs = [mix_t, hidden_input_t, gate_replace] hidden_t = Lambda(function=lambda ins: ins[2] * ins[0] + (1 - ins[2]) * ins[1], output_shape=lambda x: x[0])(lambda_inputs) # GRU output. out_t = Convolution(**convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='relu', name=_unique('conv_out'))(hidden_t) class_convolution_kwargs = copy.copy(convolution_kwargs) class_convolution_kwargs['filters'] = num_classes out_t = Convolution(**class_convolution_kwargs, kernel_regularizer=_l2(weight_decay), activation='linear', name=_unique('conv_out'))(hidden_t) #if normalization is not None: #out_t = normalization(**norm_kwargs)(out_t) # Classifier. out_t = Permute((2, 3, 1))(out_t) if num_classes == 1: out_t = Activation('sigmoid')(out_t) else: out_t = Activation(_softmax)(out_t) out_t = Permute((3, 1, 2))(out_t) # Make it a recurrent block. # # NOTE: a bidirectional 'stateful' GRU has states passed between blocks # of the reverse path in non-temporal order. Only the forward pass is # stateful in sequential/temporal order. cobject = {LayerNorm.__name__: LayerNorm} output_layer = Bidirectional_(RecurrentModel( input=input_t, initial_states=[hidden_input_t], output=out_t, final_states=[hidden_t], stateful=True, return_sequences=True), merge_mode='sum', custom_objects=cobject) output_layer.name = 'output_0' model = Model(inputs=model_input, outputs=output_layer(model_input)) return model
from recurrentshop import RecurrentModel from keras.models import Model from keras.layers import * x = Input((5, )) h_tm1 = Input((10, )) h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) h = Activation('tanh')(h) a = Input((7, 5)) rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, final_states=h) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) model.predict(np.zeros((32, 7, 5))) rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, final_states=h, state_initializer='random_normal') b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) model.predict(np.zeros((32, 7, 5)))