def test_merge_mask_2d(): rand = lambda *shape: np.asarray(np.random.random(shape) > 0.5, dtype='int32') # inputs input_a = layers.Input(shape=(3, )) input_b = layers.Input(shape=(3, )) # masks masked_a = layers.Masking(mask_value=0)(input_a) masked_b = layers.Masking(mask_value=0)(input_b) # three different types of merging merged_sum = legacy_layers.merge([masked_a, masked_b], mode='sum') merged_concat = legacy_layers.merge([masked_a, masked_b], mode='concat', concat_axis=1) merged_concat_mixed = legacy_layers.merge([masked_a, input_b], mode='concat', concat_axis=1) # test sum model_sum = models.Model([input_a, input_b], [merged_sum]) model_sum.compile(loss='mse', optimizer='sgd') model_sum.fit([rand(2, 3), rand(2, 3)], [rand(2, 3)], epochs=1) # test concatenation model_concat = models.Model([input_a, input_b], [merged_concat]) model_concat.compile(loss='mse', optimizer='sgd') model_concat.fit([rand(2, 3), rand(2, 3)], [rand(2, 6)], epochs=1) # test concatenation with masked and non-masked inputs model_concat = models.Model([input_a, input_b], [merged_concat_mixed]) model_concat.compile(loss='mse', optimizer='sgd') model_concat.fit([rand(2, 3), rand(2, 3)], [rand(2, 6)], epochs=1)
def initialize_critic_model(self): model = Sequential() model.add(layers.Masking(mask_value=0., input_shape=(self.lookback, self.ob_dim))) model.add(layers.GRU(16, input_dim=(self.lookback, self.ob_dim), activation='tanh', kernel_initializer='zeros')) model.add(layers.Dense(1, activation='linear')) model.compile(loss='mean_squared_error', optimizer=optimizers.Adam(lr=self.learning_rate)) return model
def initialize_actor_model(self): model = Sequential() model.add(layers.Masking(mask_value=0., input_shape=(self.lookback, self.ob_dim))) model.add(layers.GRU(16, input_dim=(self.lookback, self.ob_dim), activation='tanh', kernel_initializer='zeros')) model.add(layers.Dense(self.ac_dim, activation='softmax')) return model
def build_copy_model(model, has_masking=False): """Copies the model up to the attention layer to get its coefficients.""" sequence_in = layers.Input(shape=(None, 2), name='input') if has_masking: masked_in = layers.Masking(name='Mask')(sequence_in) offset = 1 else: masked_in = sequence_in offset = 0 lstm2 = layers.LSTM(5, return_sequences=True, name='LSTM', weights=model.layers[offset + 1].get_weights())(masked_in) _, att = AttentionLayer(return_coefficients=True, weights=model.layers[offset + 2].get_weights())(lstm2) model = models.Model(inputs=[sequence_in], outputs=[att]) model.summary() return model
def create_model(MAXLEN, LAYERS, ENCODE_LENGTH, HIDDEN_SIZE): RNN = layers.GRU print('Build model...') # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). input_tensor = layers.Input(shape=(MAXLEN, ENCODE_LENGTH)) mask_layer = layers.Masking(mask_value=0.0)(input_tensor) hid_layer = RNN(HIDDEN_SIZE, activation='relu')(mask_layer) hid_layer = layers.Dense(HIDDEN_SIZE)(hid_layer) hid_layer = layers.RepeatVector(MAXLEN)(hid_layer) # As the decoder RNN's input, repeatedly provide with the last hidden state of # RNN for each time step. #model.add(layers.RepeatVector(MAXLEN)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(LAYERS): # By setting return_sequences to True hid_layer = RNN(HIDDEN_SIZE, activation='tanh', return_sequences=True)(hid_layer) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. hid_layer = layers.Dense(HIDDEN_SIZE)(hid_layer) #linear_mapping = layers.TimeDistributed(layers.Dense(ENCODE_LENGTH))(hid_layer) linear_mapping = layers.Dense(ENCODE_LENGTH)(hid_layer) pred = layers.Activation('sigmoid')(linear_mapping) model = Model(inputs=input_tensor, outputs=pred) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def test_TimeDistributed_with_masking_layer(): # test with Masking layer model = Sequential() model.add( wrappers.TimeDistributed(layers.Masking(mask_value=0., ), input_shape=(None, 4))) model.add(wrappers.TimeDistributed(layers.Dense(5))) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.compile(optimizer='rmsprop', loss='mse') model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [ model.layers[0].compute_mask(model.input, compute_mask=True) ] mask_outputs += [ model.layers[1].compute_mask(model.layers[1].input, mask_outputs[-1], compute_mask=True) ] func = K.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1))
def buildLSTMModel(input_size, max_output_seq_len, hidden_size): model = km.Sequential() layer0 = kl.Masking(mask_value=0, input_shape=(max_output_seq_len, input_size)) model.add(layer0) # print layer0.input_shape, layer0.output_shape layer1 = kl.LSTM(input_dim=input_size, output_dim=hidden_size, return_sequences=False) model.add(layer1) # print layer1.input_shape, layer1.output_shape layer2 = kl.Dense(hidden_size, activation='relu') model.add(layer2) # print layer2.input_shape, layer2.output_shape layer3 = kl.RepeatVector(max_output_seq_len) model.add(layer3) # print layer3.input_shape, layer3.output_shape layer4 = kl.LSTM(hidden_size, return_sequences=True) model.add(layer4) # print layer4.input_shape, layer4.output_shape layer5 = kl.TimeDistributed(kl.Dense(output_dim=1, activation="linear")) model.add(layer5) # print layer5.input_shape, layer5.output_shape model.compile(loss='mse', optimizer='adam') return model
def _build_model(self): # Neural Net for Deep-Q learning Model inp = KL.Input(shape=(self.state_size)) x = inp x = KL.Conv1D(64,72,strides=8,activation='relu')(x) x = KL.Conv1D(64,12,strides=4,activation='relu')(x) x = KL.Conv1D(128,7,strides=3,activation='relu')(x) x = KL.Conv1D(128,3,strides=3,activation='relu')(x) x = KL.Conv1D(256,3,activation='relu')(x) x = KL.MaxPool1D(3)(x) x = KL.Flatten()(x) x = KL.Dropout(0.3)(x) x = KL.Dense(64,activation='relu')(x) inp_R = KL.Input(shape=(self.statement_size,1)) R = inp_R R = KL.Masking()(R) R = KL.GRU(64)(R) out = KL.Add()([x,R]) out = KL.Dense(128,activation='relu')(out) out = KL.Dense(self.action_size)(out) model = Model([inp,inp_R],out) model.compile(loss=self._huber_loss, optimizer=Adam(lr=self.learning_rate)) return model
def rnn_autoencoder(window_size, n_features): n_in = window_size n_out = window_size # define encoder visible = layers.Input(shape=(n_in, n_features)) masked = layers.Masking(mask_value=0.)(visible) encoder = layers.LSTM(128, activation='relu')(masked) # define reconstruction decoder decoder1 = layers.RepeatVector(n_in)(encoder) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.TimeDistributed(Dense(n_features))(decoder1) # define prediction decoder decoder2 = layers.RepeatVector(n_out)(encoder) decoder2 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder2) decoder2 = layers.TimeDistributed(Dense(n_features))(decoder2) # tie it together model = models.Model(inputs=visible, outputs=[decoder1, decoder2]) model.summary() model.compile(optimizer='adam', loss='mse') try: keras.utils.plot_model(model, show_shapes=True, to_file='composite_lstm_autoencoder.png') except: print('>>>> plot not working!') return model
def test_sequential_as_downstream_of_masking_layer(): inputs = layers.Input(shape=(3, 4)) x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs) s = Sequential() s.add(layers.Dense(5, input_shape=(4, ))) s.add(layers.Activation('relu')) x = layers.wrappers.TimeDistributed(s)(x) model = Model(inputs=inputs, outputs=x) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] mask_outputs += [ model.layers[2].compute_mask(model.layers[2].input, mask_outputs[-1]) ] func = K.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1))
def addPreAttentionLayer(self, merged_input): """Add attention mechanisms to the tensor merged_input. Args: merged_input: 3-dimensional Tensor, where the first dimension corresponds to the batch size, the second to the sequence timesteps and the last one to the concatenation of features. Retruns: 3-dimensional Tensor of the same dimension as merged_input """ activation = self.params.get('attentionActivation', None) if activation == 'None': activation = None feature_vector_size = K.int_shape(merged_input)[-1] merged_input = layers.Permute((2, 1))(merged_input) att_layer = layers.TimeDistributed( layers.Dense(self.max_sentece_length, activation=activation), name='attention_matrix_score')(merged_input) # Calculate a single score for each timestep att_layer = layers.Lambda(lambda x: K.mean(x, axis=1), name='attention_vector_score')(att_layer) # Reshape to obtain the same shape as input att_layer = layers.RepeatVector(feature_vector_size)(att_layer) merged_input = layers.multiply([att_layer, merged_input]) merged_input = layers.Permute((2, 1))(merged_input) # We re add the mask layer after the attention is applied. # Of course we have the risk of masking elements that were zeroed # after the application of the attention scores. merged_input = layers.Masking(mask_value=0.0)(merged_input) return merged_input
def assemble_rnn(params, final_reshape=True): """Construct an RNN/LSTM/GRU model of the form: X-[H1-H2-...-HN]-Y. All the H-layers are optional recurrent layers and depend on whether they are specified in the params dictionary. """ # Input layer input_shape = params['input_shape'] inputs = layers.Input(shape=input_shape) # inputs = layers.Input(batch_shape=[20] + list(input_shape)) # Masking layer previous = layers.Masking(mask_value=0.0)(inputs) # Hidden layers for layer in params['hidden_layers']: Layer = layers.deserialize({ 'class_name': layer['name'], 'config': layer['config'] }) previous = Layer(previous) if 'dropout' in layer and layer['dropout'] is not None: previous = layers.Dropout(layer['dropout'])(previous) if 'batch_norm' in layer and layer['batch_norm'] is not None: previous = layers.BatchNormalization( **layer['batch_norm'])(previous) # Output layer output_shape = params['output_shape'] output_dim = np.prod(output_shape) outputs = layers.Dense(output_dim)(previous) if final_reshape: outputs = layers.Reshape(output_shape)(outputs) return KerasModel(inputs=inputs, outputs=outputs)
def build_input(input_id, input_maxlen): signal_inp = layers.Input(shape=(input_maxlen, 6), name=f'res_{input_id}') signal_msk = layers.Masking(mask_value=0, name=f'mask-{input_id}')(signal_inp) signal_drp = layers.Dropout(rate=drop_prob, name=f'dropout-{input_id}')(signal_msk) return signal_inp, signal_drp
def get_model(shape, class_num): model = models.Sequential() model.add(layers.Masking(mask_value=0, input_shape=(shape[0], shape[1]))) model.add(layers.LSTM(128, input_shape=(shape[0], shape[1]))) model.add(layers.Dense(class_num, activation='softmax')) optimizer = Adam(1e-3) # optimizer = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer, 'categorical_crossentropy', metrics=['accuracy', acc_top3]) return model
def multitask_rnn_2(window_size, n_features): n_in = window_size # define encoder visible = layers.Input(shape=(n_in, n_features)) masked = layers.Masking(mask_value=0.)(visible) encoder = layers.LSTM(128, activation='relu', return_sequences=True)(masked) encoder = layers.LSTM(128, activation='relu')(encoder) # define reconstruction decoder decoder1 = layers.RepeatVector(n_in)(encoder) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.LSTM(128, activation='relu', return_sequences=True)(decoder1) decoder1 = layers.TimeDistributed(Dense(n_features), name='decoder1_output')(decoder1) # define forecasting decoder pred_hidden = layers.RepeatVector(n_in)(encoder) pred_hidden = layers.LSTM(128, activation='relu', return_sequences=True)(pred_hidden) decoder2 = layers.LSTM(64, activation='relu', return_sequences=True)(pred_hidden) decoder2 = layers.TimeDistributed(Dense(1), name='decoder2_output')(decoder2) # define outcome predictor predictor = layers.LSTM(64, activation='relu')(pred_hidden) predictor = layers.Dense(64, activation='relu')(predictor) predictor = layers.Dense(2, activation='softmax', name='predictor_output')(predictor) # tie it together model = models.Model(inputs=visible, outputs=[decoder1, decoder2, predictor]) model.summary() keras.utils.plot_model(model, show_shapes=True, to_file='multitask_rnn_v3.png') model.compile(optimizer='adam', loss={ 'decoder1_output': 'mse', 'decoder2_output': 'mse', 'predictor_output': 'categorical_crossentropy' }, loss_weights={ 'decoder1_output': args.weight, 'decoder2_output': 1 - args.weight, 'predictor_output': 1 - args.weight }) # model.compile(optimizer='adam', loss='mse') model_predictor = models.Model(inputs=model.inputs, outputs=predictor) return model, model_predictor
def build(self): # the seed hits are first encoded using an LSTM. # the encoded seed is used as the initial cell state below. self.seed_input = layers.Input(shape=(n_seed_layers, 1)) seeds = layers.Masking(mask_value=-1)(self.seed_input) seeds_forward = layers.LSTM(self.hidden_size)(seeds) seeds_forward = layers.Activation('tanh')(seeds_forward) seeds_backward = layers.LSTM(self.hidden_size)(seeds) seeds_backward = layers.Activation('tanh')(seeds_backward) # seeds_for_hits = layers.LSTM(self.in_size)(seeds) # seeds_for_hits = layers.Activation('tanh')(seeds_for_hits) # the initial hidden state is 0 for each LSTM zeros = layers.Lambda(lambda x: K.zeros_like(x))(seeds_forward) # zeros_for_hits = layers.Lambda(lambda x: K.zeros_like(x))(seeds_for_hits) # run an LSTM on each layer's list of input hits # to transform them to a fixed representation. self.hit_input = layers.Input((n_target_layers, max_layer_hits, 1)) hits = layers.TimeDistributed(layers.Masking(mask_value=-1))( self.hit_input) hits = layers.TimeDistributed(layers.LSTM(self.in_size))(hits) hits = layers.Activation('tanh')(hits) forward_lstm = layers.LSTM(self.hidden_size, return_sequences=True)( hits, initial_state=[zeros, seeds_forward]) backward_lstm = layers.LSTM(self.hidden_size, return_sequences=True, go_backwards=True)( forward_lstm, initial_state=[zeros, seeds_backward]) output = layers.TimeDistributed(layers.Dense( self.hidden_size))(backward_lstm) output = layers.TimeDistributed(layers.Dense(1))(output) self.model = models.Model(inputs=[self.seed_input, self.hit_input], outputs=[output]) self.model.compile(loss='mse', optimizer='adam', sample_weight_mode='temporal')
def test_globalpooling_1d_supports_masking(): # Test GlobalAveragePooling1D supports masking model = Sequential() model.add(layers.Masking(mask_value=0., input_shape=(3, 4))) model.add(layers.GlobalAveragePooling1D()) model.compile(loss='mae', optimizer='adam') model_input = np.random.randint(low=1, high=5, size=(2, 3, 4)) model_input[0, 1:, :] = 0 output = model.predict(model_input) assert np.array_equal(output[0], model_input[0, 0, :])
def _build_gru(self): inp = KL.Input(shape=(100, self.feature_size)) x = inp x = KL.Masking()(x) x = KL.GRU(32, return_sequences=True)(x) x = KL.GRU(1)(x) out = x model = keras.Model(inp, out) model.compile(optimizer=keras.optimizers.Adam(lr=0.001), loss='mse') return model
def train_model(gen_train, gen_valid, idx): model = models.Sequential() model.add(layers.InputLayer(input_shape=(None, 4))) model.add(layers.Masking(mask_value=0., input_shape=(None, 4))) model.add(layers.BatchNormalization()) model.add( layers.Bidirectional(layers.GRU(16, return_sequences=True), merge_mode='ave')) model.add(layers.BatchNormalization()) model.add( layers.Bidirectional(layers.GRU(16, return_sequences=True), merge_mode='ave')) model.add(layers.BatchNormalization()) model.add(layers.Dense(16)) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu')) model.add(layers.Dense(2, activation='sigmoid')) model.summary() callbacks_list = [ callbacks.EarlyStopping(monitor="val_symmetric_accuracy", patience=1, min_delta=0.001, mode='max'), callbacks.ModelCheckpoint(filepath="track_%i_weights.h5" % idx, monitor="val_symmetric_accuracy", save_best_only=True, save_weights_only=True), callbacks.ReduceLROnPlateau(monitor="val_symmetric_accuracy", factor=0.5, mode='max', min_delta=0.001, patience=1) ] model.compile(optimizer=optimizers.Adam(lr=0.002), loss=losses.binary_crossentropy, metrics=[symmetric_accuracy, mask_accuracy]) out = model.fit_generator(gen_train, steps_per_epoch=len(gen_train), epochs=50, callbacks=callbacks_list, validation_data=gen_valid, validation_steps=len(gen_valid)) model.save('model_tracker_%i.h5' % idx) with open("history_track_%i.pkl" % idx, "wb") as file: pickle.dump(out.history, file)
def create_branches(a, b): def sym(l, a, b): x = l(a) y = l(b) return x, y a, b = sym(layers.Masking(0), a, b) n_token = 20**params["k_mer"] a, b = sym(layers.Embedding(n_token, params["d_embed"]), a, b) a, b = sym(layers.LSTM(params["units"], return_sequences=True), a, b) a, b = sym(layers.Dropout(params["p_dropout"]), a, b) a, b = sym(layers.Flatten(), a, b) for dim in params["d_encode"]: a, b = sym(layers.Dense(dim), a, b) a, b = sym(layers.Dropout(params["p_dropout"]), a, b) return a, b
def LSTM_classifier(timesteps, features): model = Sequential() model.add(layers.Masking(mask_value=0., input_shape=(timesteps, features))) model.add(layers.LSTM(128, return_sequences=True)) model.add(layers.Activation('elu')) model.add(layers.LSTM(128)) model.add(layers.Activation('elu')) model.add(layers.Dense(256)) model.add(layers.Activation('elu')) model.add(layers.Dense(128)) model.add(layers.Activation('elu')) model.add(layers.Dense(11, activation='softmax', name="OutputLayer")) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy']) return model
def ctc_model(input_dim, nb_labels, padding_value, regression=True): reg = regressor(input_dim) i = models.Input(batch_shape=(None, None, input_dim)) o = layers.Masking(mask_value=padding_value)(i) if regression: o = reg(o) o = layers.Bidirectional( layers.GRU(128, return_sequences=True, dropout=0.1))(o) o = layers.Bidirectional(layers.GRU(64, return_sequences=True, dropout=0.1))(o) o = layers.Bidirectional(layers.GRU(32, return_sequences=True, dropout=0.1))(o) o = layers.TimeDistributed(layers.Dense(nb_labels, activation='softmax'))(o) model = CTCModel([i], [o]) model.compile(optimizer=optimizers.Adam(lr=1e-2)) return model
def buildMultiInputLSTM(): mainInput = kl.Input(shape=(MAX_STEP, 2), name='mainInput') mainInput1 = kl.Masking(mask_value=0)(mainInput) auxInput1 = kl.Input(shape=(MAX_STEP, ), name='auxInput1') auxInput2 = kl.Input(shape=(MAX_STEP, ), name='auxInput2') auxInput3 = kl.Input(shape=(MAX_STEP, ), name='auxInput3') auxOutput1 = kl.Embedding(output_dim=2, input_dim=MAX_STEP, mask_zero=True)(auxInput1) auxOutput2 = kl.Embedding(output_dim=2, input_dim=MAX_STEP, mask_zero=True)(auxInput2) auxOutput3 = kl.Embedding(output_dim=2, input_dim=MAX_STEP, mask_zero=True)(auxInput3) out = kl.merge([mainInput1, auxOutput1, auxOutput2, auxOutput3], mode='concat') lstmOut = kl.LSTM(128)(out) mainOutput = kl.Dense(MAX_STEP, activation='softmax', name='mainOutput')(lstmOut) model = km.Model(input=[mainInput, auxInput1, auxInput2, auxInput3], output=[mainOutput]) model.compile(optimizer='rmsprop', loss='binary_crossentropy') return model
def get_model(num_classes=2, cudnn=False, masking=False): """Build a model graph of a predefined structure. :param num_classes: number of output classes. :param cudnn: Use CuDNN layer. :param masking: Allow use of masking (to process sequences of varying length). """ conv_filters = 96 conv_window_size = 11 stride = 5 gru_size = 96 grulayer = layers.CuDNNGRU if cudnn else layers.GRU logger.info('Building model with: {}.'.format(grulayer)) model = Sequential() model.add( layers.Conv1D(conv_filters, conv_window_size, strides=stride, padding='same', input_shape=(None, 1))) if masking: model.add(layers.Masking(mask_value=__mask_value__)) for i, direction in enumerate(('rev', 'fwd', 'rev', 'fwd', 'rev')): if direction == 'rev': model.add(layers.Lambda(lambda x: x[:, ::-1, :], )) model.add( grulayer(gru_size, return_sequences=True, name="gru_{}_{}".format(i, direction))) if direction == 'rev': model.add(layers.Lambda(lambda x: x[:, ::-1, :], )) model.add(grulayer(gru_size, return_sequences=False, name="gru_labeller")) model.add(layers.Dense(num_classes, activation='softmax', name='classify')) return model
def test_masking(self): x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5])) y = layers.Masking()(x.value())
def _build_model(self): # Inputs: # - product: query product # - orders: products ordered in the past max_days days product = layers.Input(shape=(1, ), dtype='int32', name='product') orders = layers.Input(shape=(self.max_days, self.max_products_per_day), name='orders') # Submodels product_emb_model = self._build_product_embedding_submodel() days_attn_model = self._build_days_attention_submodel() # Compute the embedding for the query product product_emb = product_emb_model(product) # Flatten the orders orders_emb = layers.Reshape( (self.max_days * self.max_products_per_day, 1))(orders) # Compute the embedding for the previously ordered products orders_emb = layers.Masking(mask_value=0.0, name='mask_zero')(orders_emb) orders_emb = layers.TimeDistributed(product_emb_model, name='orders_emb')(orders_emb) # Calculate the dot product between the query product and each previously ordered product # (see https://github.com/fchollet/keras/issues/6151 for a batch_dot example) f = lambda x: K.batch_dot(x[0], x[1], axes=(1, 2)) sim = layers.Lambda(f, name='sim')([product_emb, orders_emb]) # Reshape it back into a sequence with one element per day sim = layers.Reshape((self.max_days, self.max_products_per_day))(sim) # Compute the attention vector with one entry for each day days_attn = days_attn_model(product_emb) rep = int(self.max_products_per_day) # Fixes serialization issues f = lambda x: K.repeat_elements(K.expand_dims(x), rep, 2) repeated_days_attn = layers.Lambda( f, name='repeated_days_attn')(days_attn) # Scale each element of the sequence by the attention value scaled_sim = layers.multiply([sim, repeated_days_attn], name='scaled_sim') lstm = layers.Bidirectional(layers.LSTM(self.lstm_units), merge_mode='concat', name='lstm')(scaled_sim) hidden = lstm layer_units = self._hidden_layer_units(self.hidden_layers, 2 * self.lstm_units, 1) for k, units in enumerate(layer_units): hidden_name = 'hidden_{}'.format(k + 1) hidden = layers.Dense(units, activation=self.hidden_layers_activation, name=hidden_name)(hidden) prediction = layers.Dense(1, activation='sigmoid', name='prediction')(hidden) model = models.Model(inputs=[product, orders], outputs=prediction) model.compile(loss='binary_crossentropy', optimizer=self.optimizer) # model.summary() return model
N, T, D, H = 2, 3, 4, 5 x = np.random.uniform(size=(N, T, D)) x[0, -1:, :] = np.nan x[1, -2:, :] = np.nan h0 = np.random.uniform(size=(N, H)) hr = np.random.uniform(size=(N, H)) rnn_cell = RNNCell(in_features=D, units=H) brnn = BidirectionalRNN(rnn_cell, h0=h0, hr=hr) out = brnn.forward(x) keras_x = layers.Input(shape=(T, D), name='x') keras_h0 = layers.Input(shape=(H, ), name='h0') keras_hr = layers.Input(shape=(H, ), name='hr') keras_x_masked = layers.Masking(mask_value=0.)(keras_x) keras_rnn = layers.RNN(layers.SimpleRNNCell(H), return_sequences=True) keras_brnn = layers.Bidirectional(keras_rnn, merge_mode='concat', name='brnn')( keras_x_masked, initial_state=[keras_h0, keras_hr]) keras_model = keras.Model(inputs=[keras_x, keras_h0, keras_hr], outputs=keras_brnn) keras_model.get_layer('brnn').set_weights([ brnn.forward_rnn.kernel, brnn.forward_rnn.recurrent_kernel, brnn.forward_rnn.bias, brnn.backward_rnn.kernel, brnn.backward_rnn.recurrent_kernel, brnn.backward_rnn.bias ]) keras_out = keras_model.predict_on_batch([np.nan_to_num(x), h0, hr]) nan_indices = np.where(np.any(np.isnan(x), axis=2)) keras_out[nan_indices[0], nan_indices[1], :] = np.nan print('Relative error (<1e-5 will be fine): {}'.format(
def test_merge(): # test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'. input_shapes = [(3, 2), (3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] # test functional API for mode in ['sum', 'mul', 'concat', 'ave', 'max']: print(mode) input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) merged = legacy_layers.merge([input_a, input_b], mode=mode) model = models.Model([input_a, input_b], merged) model.compile('rmsprop', 'mse') expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = models.Model.from_config(config) model.compile('rmsprop', 'mse') # test Merge (#2460) merged = legacy_layers.Merge(mode=mode)([input_a, input_b]) model = models.Model([input_a, input_b], merged) model.compile('rmsprop', 'mse') expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape # test lambda with output_shape lambda input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) merged = legacy_layers.merge( [input_a, input_b], mode=lambda tup: K.concatenate([tup[0], tup[1]]), output_shape=lambda tup: tup[0][:-1] + (tup[0][-1] + tup[1][-1], )) model = models.Model([input_a, input_b], merged) expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = models.Model.from_config(config) model.compile('rmsprop', 'mse') # test function with output_shape function def fn_mode(tup): x, y = tup return K.concatenate([x, y], axis=1) def fn_output_shape(tup): s1, s2 = tup return (s1[0], s1[1] + s2[1]) + s1[2:] input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) merged = legacy_layers.merge([input_a, input_b], mode=fn_mode, output_shape=fn_output_shape) model = models.Model([input_a, input_b], merged) expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = models.Model.from_config(config) model.compile('rmsprop', 'mse') # test function with output_mask function # time dimension is required for masking input_shapes = [(4, 3, 2), (4, 3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] def fn_output_mask(tup): x_mask, y_mask = tup return K.concatenate([x_mask, y_mask]) input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) a = layers.Masking()(input_a) b = layers.Masking()(input_b) merged = legacy_layers.merge([a, b], mode=fn_mode, output_shape=fn_output_shape, output_mask=fn_output_mask) model = models.Model([input_a, input_b], merged) expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = models.Model.from_config(config) model.compile('rmsprop', 'mse') mask_inputs = (np.zeros(input_shapes[0][:-1]), np.ones(input_shapes[1][:-1])) expected_mask_output = np.concatenate(mask_inputs, axis=-1) mask_input_placeholders = [ K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes ] mask_output = model.layers[-1]._output_mask(mask_input_placeholders) assert np.all( K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output) # test lambda with output_mask lambda input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) a = layers.Masking()(input_a) b = layers.Masking()(input_b) merged = legacy_layers.merge( [a, b], mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1), output_shape=lambda tup: (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:], output_mask=lambda tup: K.concatenate([tup[0], tup[1]])) model = models.Model([input_a, input_b], merged) expected_output_shape = model.compute_output_shape(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = models.Model.from_config(config) model.compile('rmsprop', 'mse') mask_output = model.layers[-1]._output_mask(mask_input_placeholders) assert np.all( K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output) # test with arguments input_shapes = [(3, 2), (3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] def fn_mode(tup, a, b): x, y = tup return x * a + y * b input_a = layers.Input(shape=input_shapes[0][1:]) input_b = layers.Input(shape=input_shapes[1][1:]) merged = legacy_layers.merge([input_a, input_b], mode=fn_mode, output_shape=lambda s: s[0], arguments={ 'a': 0.7, 'b': 0.3 }) model = models.Model([input_a, input_b], merged) output = model.predict(inputs) config = model.get_config() model = models.Model.from_config(config) assert np.all(model.predict(inputs) == output)
return acc def label_id(pred): max_pro = -1 label = 0 for i in range(0, len(pred)): if pred[i] > max_pro: max_pro = pred[i] label = i return label if __name__ == "__main__": if FLAGS.do_train: text_input = Input(shape=(None, 768,), dtype='float32', name='text') l_mask = layers.Masking(mask_value=-99.)(text_input) # Which we encoded in a single vector via a LSTM encoded_text = layers.LSTM(100, )(l_mask) out_dense = layers.Dense(30, activation='relu')(encoded_text) # And we add a softmax classifier on top out = layers.Dense(len(FLAGS.labels), activation='softmax')(out_dense) # At model instantiation, we specify the input and the output: model = Model(text_input, out) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc']) model.summary() df_train = read_csv(FLAGS.raw_data_dir, FLAGS.sup_train_file) df_val = read_csv(FLAGS.raw_data_dir, FLAGS.sup_dev_file) print(df_val.head(20))
def Transfer_wide_Beta_GRU(X_train,Y_train,Var): # Here the whole model is loaded, fixed and then built into the residual as a path # Problem. submodels as layers have a problem with masking : # https://github.com/keras-team/keras/issues/3524 # https://github.com/keras-team/keras/issues/6541 # PRobable solution is to do it with get_weights; set_weights, See next function class ASQSModel: destination=('C:/Users/310122653/Documents/GitHub/DNN/Results/') modelname='4_TEst_to_get_weightsmodel' modelweights='4_TEst_to_get_weightsmodel_weigths' class ASISModel: destination=('C:/Users/310122653/Documents/GitHub/DNN/Results/') modelname='4_TEst_to_get_weightsmodel' modelweights='4_TEst_to_get_weightsmodel_weigths' class ASCTWModel: destination=('C:/Users/310122653/Documents/PhD/Article_4_(MMC)/Results/') modelname='' modelweights='' class QSISModel: destination=('C:/Users/310122653/Documents/PhD/Article_4_(MMC)/Results/') modelname='' modelweights='' class QSCTWModel: destination=('C:/Users/310122653/Documents/PhD/Article_4_(MMC)/Results/') modelname='' modelweights='' class ISCTWModel: destination=('C:/Users/310122653/Documents/PhD/Article_4_(MMC)/Results/') modelname='' modelweights='' def ModelLaden(destination,modelname): json_file = open(destination+modelname+'.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model return loaded_model def Block_unit(X_train,Var,hidden_units): def unit(x): ident = x x=layers.Bidirectional(GRU(hidden_units, activation=Var.activationF, return_sequences=True, kernel_regularizer=regularizers.l2(Var.Kr), activity_regularizer=regularizers.l2(Var.Ar), kernel_constraint=max_norm(max_value=3.), dropout=Var.dropout, recurrent_dropout=Var.dropout))(x) x=layers.Bidirectional(GRU(hidden_units, return_sequences=True, kernel_regularizer=regularizers.l2(Var.Kr), activity_regularizer=regularizers.l2(Var.Ar), kernel_constraint=max_norm(max_value=3.), dropout=Var.dropout, recurrent_dropout=Var.dropout))(x) x=layers.Dropout(Var.dropout, noise_shape=(None, 1, hidden_units*2))(x) x=layers.Dense(Var.Dense_Unit, activation=Var.activationF, kernel_constraint=max_norm(max_value=3.))(x) x=layers.add([ident,x]) return x return unit def cake(Var, hidden_units): def unit(x): for j in range(Var.residual_blocks): x=Block_unit(X_train,Var,hidden_units)(x) return x return unit # def extraingredient(Var,loaded_model): # def BiUnit(x,loaded_model): # ident = x # x=loaded_model(x) # return x # return BiUnit inp = Input(shape=(X_train.shape[1],X_train.shape[2])) # i = inp i=layers.Masking(mask_value=Var.mask_value,input_shape=(X_train.shape[1],X_train.shape[2]))(inp) i=layers.Dropout(Var.dropout/2, noise_shape=(None, 1, X_train.shape[2]))(i) i=layers.Dense(Var.Dense_Unit, activation=Var.activationF, kernel_constraint=max_norm(max_value=3.))(i) intro_out=BatchNormalization(axis=1)(i) Pfad1 = cake(Var,32)(intro_out) Pfad1 = cake(Var,32)(Pfad1) Pfad2 = cake(Var,2)(intro_out) Pfad2 = cake(Var,2)(Pfad2) Pfad3 = cake(Var,64)(intro_out) Pfad3 = cake(Var,64)(Pfad3) loaded_model=ModelLaden(ASQSModel.destination,ASQSModel.modelname) loaded_model.load_weights(ASQSModel.destination+ASQSModel.modelweights+'.h5') loaded_model.trainable = False # loaded_model.support_masking=True # for layer in loaded_model.layers[1:6]: #0-6 is all 0:4 leaves one dense at the end which is needed for the targets (when using advancedmodel3 or 4 [see build_model]) # layer.trainable=False loaded_model.build(input_shape=(X_train.shape[1],X_train.shape[2])) loaded_model_functional_api = loaded_model.model Pfad4 = loaded_model_functional_api(intro_out) loaded_model=ModelLaden(ASISModel.destination,ASISModel.modelname) loaded_model.load_weights(ASISModel.destination+ASISModel.modelweights+'.h5') loaded_model.trainable = False Pfad5 = loaded_model(intro_out) # loaded_model=ModelLaden(ASCTWModel.destination,ASCTWModel.modelname) # loaded_model.load_weights(ASCTWModel.destination+ASCTWModel.modelweights+'.h5') # loaded_model.trainable = False # Pfad6 = loaded_model(intro_out) # # loaded_model=ModelLaden(QSISModel.destination,QSISModel.modelname) # loaded_model.load_weights(QSISModel.destination+QSISModel.modelweights+'.h5') # loaded_model.trainable = False # Pfad7 = loaded_model(intro_out) # # loaded_model=ModelLaden(QSCTWModel.destination,QSCTWModel.modelname) # loaded_model.load_weights(QSCTWModel.destination+QSCTWModel.modelweights+'.h5') # loaded_model.trainable = False # Pfad8 = loaded_model(intro_out) i = layers.concatenate([Pfad1, Pfad2, Pfad3, Pfad4, Pfad5]) # Outro_out=layers.Bidirectional(GRU(Var.hidden_units, return_sequences=True, kernel_constraint=max_norm(max_value=3.), dropout=Var.dropout, recurrent_dropout=Var.dropout))(i) # Outro_out = Dense(Y_train.shape[-1],activation='softmax', kernel_constraint=max_norm(max_value=3.))(Outro_out) Outro_out = Dense(Y_train.shape[-1],activation='softmax', kernel_constraint=max_norm(max_value=3.))(i) model = Model(inputs=inp,outputs=Outro_out) return model