def build_generator(self): noise = Input((self.latent_dim, )) noise2 = Dense(1024)(noise) noise2 = LeakyReLU()(noise2) #seems to break it #noise2 = Dropout(0.2)(noise2) noise2 = Dense(128 * 8 * 8)(noise2) #seems to break it #noise2 = BatchNormalization()(noise2) noise2 = Reshape((8, 8, 128))(noise2) label = Input((self.num_labels, )) label2 = Dense(1024, activation='tanh')(label) label2 = Dense(8 * 8 * 128)(label2) label2 = BatchNormalization()(label2) label2 = Reshape((8, 8, 128))(label2) model = Concatenate()([noise2, label2]) model = UpSampling2D(size=(2, 2))(model) model = Conv2D(128, (5, 5), activation='relu', padding='same')(model) model = UpSampling2D(size=(2, 2))(model) model = Conv2D(64, (5, 5), activation='relu', padding='same')(model) model = UpSampling2D(size=(2, 2))(model) model = Conv2D(3, (5, 5), activation='tanh', padding='same')(model) model = Model([noise, label], model) model.summary() return model
def create_conditional_model(**kwargs): # # Parse settings # dropout = kwargs.get("dropout", -1.) leaky_relu = kwargs.get("leaky_relu", 0.2) name = kwargs.get("name", "model") num_outputs = kwargs.get("num_outputs") num_conditions = kwargs.get("num_conditions") num_observables = kwargs.get("num_observables") use_batch_norm = kwargs.get("batch_norm", False) verbose = kwargs.get("verbose", True) use_dropout = False if dropout > 0: use_dropout = True data_layers = kwargs.get("data_layers", (10, )) condition_layers = kwargs.get("condition_layers", (10, )) combined_layers = kwargs.get("combined_layers", ( 20, 20, )) # # Print stage # if verbose: print( f"Creating model with {num_observables} observables and {num_conditions} conditions" ) # # Create input layers # data_input = Input((num_observables, )) condition_input = Input((num_conditions, )) # # Create initially separate layers for the condition and data # model_data = data_input for layer_size in data_layers: model_data = Dense(layer_size)(model_data) model_data = LeakyReLU(leaky_relu)(model_data) if use_batch_norm: model_data = BatchNormalization()(model_data) if use_dropout: model_data = Dropout(dropout)(model_data) model_condition = condition_input for layer_size in condition_layers: model_condition = Dense(layer_size)(model_condition) model_condition = LeakyReLU(leaky_relu)(model_condition) if use_batch_norm: model_condition = BatchNormalization()(model_condition) if use_dropout: model_condition = Dropout(dropout)(model_condition) # # Concatenate the condition and data latent states # model = Concatenate()([discriminator_data, discriminator_condition]) # # Create final model layers # for layer_size in combined_layers: model = Dense(layer_size)(model) model = LeakyReLU(leaky_relu)(model) if use_batch_norm: model = BatchNormalization()(model) if use_dropout: model = Dropout(dropout)(model) # # Compile model # model = Dense(num_outputs, activation="linear")(model) model = Model(name=name, inputs=[data_input, condition_input], outputs=[model]) model.compile(loss="mse", optimizer=Adam()) if verbose: model.summary() # # return model # return model
def create_critic_generator_wgan(**kwargs): # # Parse settings # dropout = kwargs.get("dropout", -1.) GAN_noise_size = kwargs.get("GAN_noise_size", 3) leaky_relu = kwargs.get("leaky_relu", 0.2) num_observables = kwargs.get("num_observables") num_conditions = kwargs.get("num_conditions") verbose = kwargs.get("verbose", True) use_batch_norm = kwargs.get("batch_norm", False) use_dropout = False if dropout > 0: use_dropout = True critic_data_layers = kwargs.get("critic_data_layers", (10, )) critic_condition_layers = kwargs.get("critic_condition_layers", (10, )) critic_combined_layers = kwargs.get("critic_combined_layers", ( 20, 20, )) generator_noise_layers = kwargs.get("generator_noise_layers", (20, )) generator_condition_layers = kwargs.get("generator_condition_layers", (10, )) generator_combined_layers = kwargs.get("generator_combined_layers", ( 20, 20, )) # # Print stage # if verbose: print( f"Creating WGAN with {num_observables} observables and {num_conditions} conditions" ) # # Create input layers # data_input = Input((num_observables, )) condition_input = Input((num_conditions, )) noise_input = Input((GAN_noise_size, )) # # Create initially separate layers for the condition and data (critic) # critic_data = data_input for layer_size in critic_data_layers: critic_data = Dense(layer_size)(critic_data) critic_data = LeakyReLU(leaky_relu)(critic_data) if use_dropout: critic_data = Dropout(dropout)(critic_data) critic_condition = condition_input for layer_size in critic_condition_layers: critic_condition = Dense(layer_size)(critic_condition) critic_condition = LeakyReLU(leaky_relu)(critic_condition) if use_dropout: critic_condition = Dropout(dropout)(critic_condition) # # Concatenate the condition and data latent states (critic) # critic = Concatenate()([critic_data, critic_condition]) # # Create final critic layers # for layer_size in critic_combined_layers: critic = Dense(layer_size)(critic) critic = LeakyReLU(leaky_relu)(critic) if use_dropout: critic = Dropout(dropout)(critic) # # Compile critic model # critic = Dense(1, activation="linear")(critic) critic = Model(name="Critic", inputs=[data_input, condition_input], outputs=[critic]) critic.compile(loss=wasserstein_loss, optimizer=RMSprop(learning_rate=5e-5, rho=0)) if verbose: critic.summary() # # Create initially separate layers for the noise and data (generator) # generator_noise = noise_input for layer_size in generator_noise_layers: generator_noise = Dense(layer_size)(generator_noise) generator_noise = LeakyReLU(leaky_relu)(generator_noise) if use_batch_norm: generator_noise = BatchNormalization()(generator_noise) generator_condition = condition_input for layer_size in generator_condition_layers: generator_condition = Dense(layer_size)(generator_condition) generator_condition = LeakyReLU(leaky_relu)(generator_condition) if use_batch_norm: generator_condition = BatchNormalization()(generator_condition) # # Concatenate the condition and noise latent states (generator) # generator = Concatenate()([generator_noise, generator_condition]) # # Create final generator layers # for layer_size in generator_combined_layers: generator = Dense(layer_size)(generator) generator = LeakyReLU(leaky_relu)(generator) if use_batch_norm: generator = BatchNormalization()(generator) # # Compile generator model # generator = Dense(num_observables, activation="linear")(generator) generator = Model(name="Generator", inputs=[noise_input, condition_input], outputs=[generator]) if verbose: generator.summary() # # Create and compile GAN # GAN = critic([generator([noise_input, condition_input]), condition_input]) GAN = Model([noise_input, condition_input], GAN, name="GAN") critic.trainable = False GAN.compile(loss=wasserstein_loss, optimizer=RMSprop(learning_rate=5e-5, rho=0)) if verbose: GAN.summary() # # return critic, generator, GAN # return critic, generator, GAN
def create_conditional_discriminator(**kwargs): # # Parse settings # dropout = kwargs.get("dropout", -1.) leaky_relu = kwargs.get("leaky_relu", 0.2) num_categories = kwargs.get("num_categories") num_conditions = kwargs.get("num_conditions") num_observables = kwargs.get("num_observables") use_batch_norm = kwargs.get("batch_norm", False) verbose = kwargs.get("verbose", True) use_dropout = False if dropout > 0: use_dropout = True data_layers = kwargs.get("data_layers", (10, )) condition_layers = kwargs.get("condition_layers", (10, )) combined_layers = kwargs.get("combined_layers", ( 20, 20, )) # # Print stage # if verbose: print( f"Creating discriminator with {num_observables} observables and {num_conditions} conditions" ) # # Create input layers # data_input = Input((num_observables, )) condition_input = Input((num_conditions, )) # # Create initially separate layers for the condition and data # discriminator_data = data_input for layer_size in data_layers: discriminator_data = Dense(layer_size)(discriminator_data) discriminator_data = LeakyReLU(leaky_relu)(discriminator_data) if use_batch_norm: discriminator_data = BatchNormalization()(discriminator_data) if use_dropout: discriminator_data = Dropout(dropout)(discriminator_data) discriminator_condition = condition_input for layer_size in condition_layers: discriminator_condition = Dense(layer_size)(discriminator_condition) discriminator_condition = LeakyReLU(leaky_relu)( discriminator_condition) if use_batch_norm: discriminator_condition = BatchNormalization()( discriminator_condition) if use_dropout: discriminator_condition = Dropout(dropout)(discriminator_condition) # # Concatenate the condition and data latent states # discriminator = Concatenate()( [discriminator_data, discriminator_condition]) # # Create final discriminator layers # for layer_size in combined_layers: discriminator = Dense(layer_size)(discriminator) discriminator = LeakyReLU(leaky_relu)(discriminator) if use_batch_norm: discriminator = BatchNormalization()(discriminator) if use_dropout: discriminator = Dropout(dropout)(discriminator) # # Compile discriminator model # discriminator = Dense(num_categories, activation="sigmoid")(discriminator) discriminator = Model(name="Discriminator", inputs=[data_input, condition_input], outputs=[discriminator]) if num_categories == 1: discriminator.compile(loss="binary_crossentropy", optimizer=Adam()) else: discriminator.compile(loss="categorical_crossentropy", optimizer=Adam()) if verbose: discriminator.summary() # # return discriminator # return discriminator
def specify(self): """Specifies a multi-task BiLSTM-CRF for sequence tagging using Keras. Implements a hybrid bidirectional long short-term memory network-condition random field (BiLSTM-CRF) multi-task model for sequence tagging. """ # specify any shared layers outside the for loop # word-level embedding layer if self.embeddings is None: word_embeddings = Embedding( input_dim=len(self.datasets[0].type_to_idx['word']) + 1, output_dim=self.config.word_embed_dim, mask_zero=True, name="word_embedding_layer") else: word_embeddings = Embedding( input_dim=self.embeddings.num_embed, output_dim=self.embeddings.dimension, mask_zero=True, weights=[self.embeddings.matrix], trainable=self.config.fine_tune_word_embeddings, name="word_embedding_layer") # character-level embedding layer char_embeddings = Embedding( input_dim=len(self.datasets[0].type_to_idx['char']) + 1, output_dim=self.config.char_embed_dim, mask_zero=True, name="char_embedding_layer") # char-level BiLSTM char_BiLSTM = TimeDistributed(Bidirectional( LSTM(constants.UNITS_CHAR_LSTM // 2)), name='character_BiLSTM') # word-level BiLSTM word_BiLSTM_1 = Bidirectional(LSTM( units=constants.UNITS_WORD_LSTM // 2, return_sequences=True, dropout=self.config.dropout_rate['input'], recurrent_dropout=self.config.dropout_rate['recurrent']), name="word_BiLSTM_1") word_BiLSTM_2 = Bidirectional(LSTM( units=constants.UNITS_WORD_LSTM // 2, return_sequences=True, dropout=self.config.dropout_rate['input'], recurrent_dropout=self.config.dropout_rate['recurrent']), name="word_BiLSTM_2") # get all unique tag types across all datasets all_tags = [ds.type_to_idx['tag'] for ds in self.datasets] all_tags = set(x for l in all_tags for x in l) # feedforward before CRF, maps each time step to a vector dense_layer = TimeDistributed(Dense(len(all_tags), activation=self.config.activation), name='dense_layer') # specify model, taking into account the shared layers for dataset in self.datasets: # word-level embedding word_ids = Input(shape=(None, ), dtype='int32', name='word_id_inputs') word_embed = word_embeddings(word_ids) # character-level embedding char_ids = Input(shape=(None, None), dtype='int32', name='char_id_inputs') char_embed = char_embeddings(char_ids) # character-level BiLSTM + dropout. Spatial dropout applies the same dropout mask to all # timesteps which is necessary to implement variational dropout # (https://arxiv.org/pdf/1512.05287.pdf) char_embed = char_BiLSTM(char_embed) if self.config.variational_dropout: LOGGER.info('Used variational dropout') char_embed = SpatialDropout1D( self.config.dropout_rate['output'])(char_embed) # concatenate word- and char-level embeddings + dropout model = Concatenate()([word_embed, char_embed]) model = Dropout(self.config.dropout_rate['output'])(model) # word-level BiLSTM + dropout model = word_BiLSTM_1(model) if self.config.variational_dropout: model = SpatialDropout1D( self.config.dropout_rate['output'])(model) model = word_BiLSTM_2(model) if self.config.variational_dropout: model = SpatialDropout1D( self.config.dropout_rate['output'])(model) # feedforward before CRF model = dense_layer(model) # CRF output layer crf = CRF(len(dataset.type_to_idx['tag']), name='crf_classifier') output_layer = crf(model) # fully specified model # https://github.com/keras-team/keras/blob/bf1378f39d02b7d0b53ece5458f9275ac8208046/keras/utils/multi_gpu_utils.py with tf.device('/cpu:0'): model = Model(inputs=[word_ids, char_ids], outputs=[output_layer]) self.models.append(model) print(model.summary())
def create_model(words, chars, max_len, n_words, n_tags, max_len_char, n_pos, n_chars, embedding_mats, use_word=True, use_pos=False, embedding_matrix=None, embed_dim=70, trainable=True, input_dropout=False, stack_lstm=1, epochs=100, early_stopping=True, patience=20, min_delta=0.0001, use_char=False, crf=False, add_random_embedding=True, pretrained_embed_dim=300, stack_cross=False, stack_double=False, rec_dropout=0.1, validation_split=0.1, output_dropout=False, optimizer='rmsprop', pos_dropout=None, char_dropout=False, all_spatial_dropout=True, print_summary=True, verbose=2): X_tr, X_te, y_tr, y_te, pos_tr, pos_te = words X_char_tr, X_char_te, _, _ = chars all_input_embeds = [] all_inputs = [] train_data = [] if use_word and not add_random_embedding and embedding_matrix is None: raise ValueError('Cannot use word without embedding') if use_word: input = Input(shape=(max_len, )) if add_random_embedding: input_embed = Embedding(input_dim=n_words + 2, output_dim=embed_dim, input_length=max_len)(input) all_input_embeds.append(input_embed) if embedding_matrix is not None: input_embed = Embedding(input_dim=n_words + 2, output_dim=pretrained_embed_dim, input_length=max_len, weights=[embedding_mats[embedding_matrix]], trainable=trainable)(input) all_input_embeds.append(input_embed) all_inputs.append(input) train_data.append(X_tr) if use_pos: pos_input = Input(shape=(max_len, )) pos_embed = Embedding(input_dim=n_pos + 1, output_dim=10, input_length=max_len)(pos_input) if pos_dropout is not None: pos_embed = Dropout(pos_dropout)(pos_embed) all_input_embeds.append(pos_embed) all_inputs.append(pos_input) train_data.append(pos_tr) if use_char: # input and embeddings for characters char_in = Input(shape=( max_len, max_len_char, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars + 2, output_dim=20, input_length=max_len_char))(char_in) # character LSTM to get word encodings by characters char_enc = TimeDistributed( Bidirectional( LSTM(units=10, return_sequences=False, recurrent_dropout=0.5)))(emb_char) if char_dropout: char_enc = SpatialDropout1D(0.3)(char_enc) all_input_embeds.append(char_enc) all_inputs.append(char_in) train_data.append( np.array(X_char_tr).reshape( (len(X_char_tr), max_len, max_len_char))) if len(all_inputs) > 1: model = Concatenate()(all_input_embeds) if (use_char and all_spatial_dropout): model = SpatialDropout1D(0.3)(model) else: model = all_input_embeds[0] all_input_embeds = all_input_embeds[0] all_inputs = all_inputs[0] train_data = train_data[0] if input_dropout: model = Dropout(0.1)(model) if stack_double: front = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout)(model) front = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout)(front) back = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout, go_backwards=True)(model) model = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout, go_backwards=True)(back) if stack_cross: front = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout)(model) front = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout)(front) back = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout, go_backwards=True)(model) back = LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout, go_backwards=True)(back) model = concatenate([back, front]) for i in range(stack_lstm): model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=rec_dropout))(model) if output_dropout: model = Dropout(0.1)(model) if crf: model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(n_tags + 1) loss = crf_loss metric = crf_accuracy monitor = 'val_crf_accuracy' out = crf(model) else: out = TimeDistributed(Dense(n_tags + 1, activation="softmax"))( model) # softmax output layer loss = "categorical_crossentropy" metric = 'accuracy' monitor = 'val_acc' model = Model(all_inputs, out) model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) if early_stopping: es = [ EarlyStopping(monitor=monitor, mode='max', verbose=1, patience=patience, restore_best_weights=True, min_delta=min_delta) ] else: es = None if print_summary: print(model.summary()) history = model.fit(train_data, np.array(y_tr), batch_size=32, epochs=epochs, validation_split=validation_split, verbose=verbose, callbacks=es) hist = pd.DataFrame(history.history) return model, hist
import numpy as np x = np.array([1,2,3]) y = np.array([1,2,3]) first = Sequential() first.add(Dense(1, input_shape=(1,), activation='sigmoid')) second = Sequential() second.add(Dense(1, input_shape=(1,), activation='sigmoid')) # result = Sequential() merged = Concatenate([first, second]) # ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0) # result.add(merged) # result.compile(optimizer=ada_grad, loss=_loss_tensor, metrics=['accuracy']) # merged.summary() merged.add(Dense(2, activation="relu")) merged.add(Dense(1, activation="linear")) merged.summary() # result.compile(optimizer='adam', loss='mse', metrics=['accuracy']) # result.fit([x, x],[y, y], epochs=100, batch_size=1) # loss, acc = result.evaluate([x, x],[y, y], batch_size=1) # print("acc : ", acc)