def create_model(args, learning_rate, l1): hidden_layers = [int(n) for n in args.hidden_layers.split(',')] inputs = Input(shape=[N_FEATURES]) hidden = inputs if hidden_layers != [-1]: for size in hidden_layers: hidden = Dense(size, kernel_regularizer=L1L2(l1=l1), bias_regularizer=L1L2(l1=l1))(hidden) hidden = BatchNormalization()(hidden) hidden = ReLU()(hidden) outputs = Dense(1)(hidden) model = Model(inputs=inputs, outputs=outputs) # I know this is ugly, but I added the sgd arg only later so older networks # do not have args.optimizer (and were optimized with Adam) try: if args.optimizer == "sgd": optimizer = SGD(learning_rate=learning_rate, momentum=0.99, nesterov=True) elif args.optimizer == "adam": optimizer = Adam(learning_rate=learning_rate) except AttributeError: optimizer = Adam(learning_rate=learning_rate) model.compile( optimizer=optimizer, loss='mse', metrics=[RootMeanSquaredError(), MeanAbsoluteError(), RSquare()]) return model
def _set_metric(self): """Sets the metric for the model based on the experiment type or a list of metrics from user.""" import talos as ta from tensorflow.keras.metrics import MeanAbsoluteError if self.task in ['binary', 'multiclass', 'multilabel']: return ['acc'] elif self.task == 'continuous': return [MeanAbsoluteError(), 'acc']
def train(model, x_train, y_train, x_valid, y_valid, epochs=10, patience=3): adam = Adam() mae = MeanAbsoluteError() stop = EarlyStopping(monitor='val_loss', patience=patience) model.compile(optimizer=adam, loss=mean_squared_error, metrics=[mae]) history = model.fit(x_train, y_train, epochs=epochs, callbacks=[stop], validation_data=(x_valid, y_valid)) return history
def from_saved(cls, folder): with open(os.path.join(folder, "args.pickle"), "rb") as f: args = argparse.Namespace(**pickle.load(f)) # loads dict and converts it to namespace with open(os.path.join(folder,'model.json')) as f: json_string = json.load(f) model = tf.keras.models.model_from_json(json_string, custom_objects=None) model.load_weights(os.path.join(folder, 'weights.h5')) model.compile( loss ='mse', metrics = [RootMeanSquaredError(), MeanAbsoluteError(), RSquare()] ) return cls(model, args)
def create_ensemble(models): if len(models) == 1: return models[0] else: inputs = Input(shape=[N_FEATURES]) predictions = [model(inputs) for model in models] outputs = average(predictions) model = Model(inputs=inputs, outputs=outputs) model.compile( loss='mse', metrics=[RootMeanSquaredError(), MeanAbsoluteError(), RSquare()]) return model
def evaluate(): # So you don't have to retrain every time you want to evaluate model = load(build_fcnn, name=save_name) model.named_steps['nn'].model.compile(optimizer=Adam(learning_rate=LR), loss=MeanSquaredError(), metrics=[MeanAbsoluteError()]) # Get loss(mse) and mae score = model.score(X_test, Y_test) print(score) # Plot predictions vs real values df = create_pred_dataframe(X_test, station, model) df.plot() plt.show()
def __init__(self, hparams, name, log_dir): self.univariate = hparams.get('UNIVARIATE', True) self.batch_size = int(hparams.get('BATCH_SIZE', 32)) self.epochs = int(hparams.get('EPOCHS', 500)) self.patience = int(hparams.get('PATIENCE', 15)) self.val_frac = hparams.get('VAL_FRAC', 0.15) self.T_x = int(hparams.get('T_X', 32)) self.metrics = [ MeanSquaredError(name='mse'), RootMeanSquaredError(name='rmse'), MeanAbsoluteError(name='mae'), MeanAbsolutePercentageError(name='mape') ] self.standard_scaler = StandardScaler() self.forecast_start = datetime.datetime.today() model = None super(NNModel, self).__init__(model, self.univariate, name, log_dir=log_dir)
def __init__(self, rnn_layer_sizes=[128], layer_normalize=[True], dropouts=[0.1], show_summary=True, patience=3, epochs=1000, batch_size=128, lr=0.001, loss='MSE', max_seq_len=128, embedding_size=200, monitor_loss='val_loss', metrics=[ MeanSquaredError(name='MSE'), MeanAbsoluteError(name='MAE'), MeanSquaredLogarithmicError(name='MSLE'), ]): self.lr = lr self.batch_size = batch_size self.rnn_layer_sizes = rnn_layer_sizes self.layer_normalize = layer_normalize self.dropouts = dropouts self.max_seq_len = max_seq_len self.show_summary = show_summary self.patience = patience self.epochs = epochs self.loss = loss self.embedding_size = embedding_size self.monitor_loss = monitor_loss self.metrics = metrics self.earlystop = tensorflow.keras.callbacks.EarlyStopping( monitor=self.monitor_loss, patience=self.patience, verbose=1, restore_best_weights=True, mode='min') self.unk_token = '[unk]' self.pad_token = '[pad]'
def train(): # Compile keras model to ready for training keras_model = build_fcnn[station]() keras_model.compile(optimizer=Adam(learning_rate=LR), loss=MeanSquaredError(), metrics=[MeanAbsoluteError()]) keras_model.summary() # Define automated pipeline model = Pipeline([ ('vectorizer', PumpdataVectorizer(station)), # Subtracts mean and scales by std on each feature ('standarizer', StandardScaler()), ('nn', kerasEstimator(keras_model, EPHOCS, BATCH_SIZE, val_split=0.07)) ]) model.fit(X_train, Y_train) plot_train(model.named_steps['nn'].history) save(model, filename=save_name)
def build_bbox_separable_model( input_size=(56, 56, 3), n_conv_blocks=3, base_conv_n_filters=16, n_dense_layers=2, dense_size=256, dropout_rate=0.25, loss=MeanSquaredError(), optimizer=Adam(), metrics=[MeanAbsoluteError(), MeanBBoxIoU(x2y2=False)]): model_in = Input(shape=input_size) model = model_in for i in range(n_conv_blocks): model = SeparableConv2D(base_conv_n_filters * (2**i), (3, 3), padding='same', activation='relu', name="block-{}_conv_0".format(i))(model) model = SeparableConv2D(base_conv_n_filters * (2**i), (3, 3), padding='same', activation='relu', name="block-{}_conv_1".format(i))(model) model = MaxPooling2D((2, 2), strides=(2, 2), name="block-{}_pool".format(i))(model) model = Flatten()(model) for i in range(n_dense_layers): model = Dense(dense_size, activation='relu', name="dense-{}".format(i))(model) model = Dropout(dropout_rate)(model) model_out = Dense(4, activation='sigmoid', name="output")(model) model = Model(model_in, model_out) model.compile(loss=loss, optimizer=optimizer, metrics=metrics) return model
def compile_model(model): model.compile(optimizer=Adam(), loss=MeanSquaredError(), metrics=[MeanAbsoluteError()])
def fit(self, patience=20, lr_decay=0.5, loss='mse', label=''): results = [] now = time.time() base_path = '../result/{}/'.format(self.hyper['model']) log_path = base_path + "l{}_{}_{}_e{}_{}_{}/".format( self.hyper['num_conv_layers_intra'], self.hyper['num_conv_layers_inter'], self.hyper['num_fc_layers'], self.hyper['units_embed'], label, time.strftime('%b%d_%H_%M_%S', time.localtime(now))) self.hyper['patience'] = patience self.hyper['lr_decay'] = lr_decay for trial in range(int(self.hyper['fold'])): # Make folder now = time.time() trial_path = log_path + 'trial_{:02d}/'.format(trial) # Reset model self.model.model = model_from_json( self.model.model.to_json(), custom_objects=self.model.custom_objects) self.model.compile( optimizer='adam', loss=loss, lr=0.00015, clipnorm=0.5, metric=[MeanAbsoluteError(), RootMeanSquaredError()]) self.hyper = {**self.hyper, **self.model.hyper} # Shuffle, split and normalize data self.dataset.shuffle() self.dataset.split(batch=32, valid_ratio=0.1, test_ratio=0.1) self.hyper = {**self.hyper, **self.dataset.hyper} # Define callbacks callbacks = [ TensorBoard(log_dir=trial_path, write_graph=False, histogram_freq=0, write_images=False), EarlyStopping(patience=patience, restore_best_weights=True), ReduceLROnPlateau(factor=lr_decay, patience=patience // 2), TerminateOnNaN() ] # Train model self.model.model.fit(self.dataset.train, steps_per_epoch=self.dataset.train_step, validation_data=self.dataset.valid, validation_steps=self.dataset.valid_step, epochs=1500, callbacks=callbacks, verbose=2) # Save current state self.model.model.save_weights(trial_path + 'best_weights.h5') self.model.model.save(trial_path + 'best_model.h5') self.hyper['training_time'] = '{:.2f}'.format(time.time() - now) # Evaluate model train_loss = self.model.model.evaluate( self.dataset.train, steps=self.dataset.train_step, verbose=0) valid_loss = self.model.model.evaluate( self.dataset.valid, steps=self.dataset.valid_step, verbose=0) test_loss = self.model.model.evaluate(self.dataset.test, steps=self.dataset.test_step, verbose=0) results.append([ train_loss[1], valid_loss[1], test_loss[1], train_loss[2], valid_loss[2], test_loss[2] ]) # Save trial results with open(trial_path + 'hyper.csv', 'w') as file: writer = csv.DictWriter(file, fieldnames=list(self.hyper.keys())) writer.writeheader() writer.writerow(self.hyper) with open(trial_path + 'result.csv', 'w') as file: writer = csv.writer(file, delimiter=',') writer.writerow([ 'train_mae', 'valid_mae', 'test_mae', 'train_rmse', 'valid_rmse', 'test_rmse' ]) writer.writerow(np.array(results[-1]) * self.hyper['std']) self.dataset.save(trial_path + 'data_split.npz') clear_session() # Save cross-validated results header = [ 'train_mae', 'valid_mae', 'test_mae', 'train_rmse', 'valid_rmse', 'test_rmse' ] results = np.array(results) * self.hyper['std'] results = [np.mean(results, axis=0), np.std(results, axis=0)] with open(log_path + "results.csv", "w") as csvfile: writer = csv.writer(csvfile, delimiter=",") writer.writerow(header) for r in results: writer.writerow(r) print('{}-fold cross-validation result'.format(self.hyper['fold'])) print('RMSE {}+-{}, {}+-{}, {}+-{}'.format( results[0][3], results[1][3], results[0][4], results[1][4], results[0][5], results[1][5]))
from utils import * from training import * from data_generators import * from model_builders import * ######################### import os from tensorflow.keras.metrics import MeanAbsoluteError exp_name = "x2y2_cnn" data_dir = "../../data/" results_dir = "../results/" tensorboard_dir = "../tensorboard_logs/" bboxs_csv = os.path.join(data_dir, "bboxs_x2y2.csv") splits_csv = os.path.join(data_dir, "splits.csv") imgs_dir = os.path.join(data_dir, "Img_Resize/") train_df, val_df, _ = get_train_val_test_dfs(bboxs_csv, splits_csv) train_datagen = get_bboxs_generator(train_df, imgs_dir=imgs_dir) val_datagen = get_bboxs_generator(val_df, imgs_dir=imgs_dir) model = build_bbox_model(metrics=[MeanAbsoluteError(), MeanBBoxIoU(x2y2=True)]) run_experiment(model, exp_name, train_datagen, val_datagen, results_dir=results_dir, tensorboard_logdir=tensorboard_dir)
train_data, test_data, train_targets, test_targets = train_test_split( data, target, test_size=0.1) model = Sequential([ Dense(units=128, activation=relu, input_shape=(train_data.shape[1], )), Dense(units=64, activation=relu), BatchNormalization(), Dense(units=32, activation=relu), Dense(units=32, activation=relu), Dense(units=1) ]) model.compile(optimizer=SGD(), loss=MeanSquaredError(), metrics=[MeanAbsoluteError()]) class MetricLossCallback(Callback): def on_train_batch_end(self, batch, logs=None): if batch % 2 == 0: print(f'[Train] After batch {batch} - loss {logs["loss"]}') def on_test_batch_end(self, batch, logs=None): print(f'[Test] After batch {batch} - loss {logs["loss"]}') def on_epoch_end(self, epoch, logs=None): print( f'Epoch {epoch} avg loss is: {logs["loss"]} MAE is {logs["mean_absolute_error"]}' )
def build_model(): """ Description: Building dummy model Args: None Returns: None """ print("Building dummy model....") #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) ######## 1x1D-Convolutional Layers with BatchNormalization, Dropout and MaxPooling ######## conv_layer1 = Conv1D(16, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer1) # conv2D_act = activations.relu(batch_norm) conv2D_act = ReLU()(batch_norm) conv_dropout = Dropout(0.2)(conv2D_act) ############################################################################################ #Final Dense layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(conv_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=0.00015) #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision(), AUC() ]) #print model summary model.summary() return model
def build_model(): #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) #3x1D Convolutional Hidden Layers with BatchNormalization and MaxPooling conv_layer1 = Conv1D(64, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer1) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) # ave_pool_1 = AveragePooling1D(2, 1, padding='same')(conv_dropout) max_pool_1D_1 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer2 = Conv1D(128, 7, padding='same')(concat) batch_norm = BatchNormalization()(conv_layer2) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) # ave_pool_2 = AveragePooling1D(2, 1, padding='same')(conv_dropout) max_pool_1D_2 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer3 = Conv1D(256, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer3) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) max_pool_1D_3 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) # ave_pool_3 = AveragePooling1D(2, 1, padding='same')(conv_dropout) #concatenate convolutional layers conv_features = Concatenate(axis=-1)( [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3]) #output node is 1D convolutional layer with 8 filters for the 8 different categories main_output = Conv1D(8, 7, padding='same', activation='softmax', name='main_output')(conv_features) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=0.0003) #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) model.summary() #set earlyStopping and checkpoint callback earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min') checkpoint_path = "checkpoints/3xConv_cnn_" + str( datetime.date(datetime.now())) + ".h5" checkpointer = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=True, monitor='val_acc', mode='max') return model
imgs_dir=imgs_dir, out_image_size=(img_size, img_size)) del train_df, val_df gc.collect() model = build_bbox_model( input_size=(img_size, img_size, 3), n_conv_blocks=n_conv_blocks, base_conv_n_filters=base_conv_n_filters, n_dense_layers=2, dense_size=dense_size, dropout_rate=0.30, loss=MeanSquaredError(), optimizer=Adam(), metrics=[ MeanAbsoluteError(), MeanBBoxIoU(x2y2=True) ]) run_experiment(model, exp_name, train_datagen, val_datagen, results_dir=results_dir, tensorboard_logdir=tensorboard_dir) del train_datagen, val_datagen, model gc.collect() tf.keras.backend.clear_session() gc.collect()
def model_builder(self, filter_size: int = 5, seed_val: int = 123, **kwargs) -> tf.keras.Sequential: """ Build and compile a 1D-CNN depending on the given hyper params (parameters_hyper.yaml). Kwargs require a dict like below. { "conv1_length": int, "conv2_length": int, "extra_conv_layer": bool, "conv3_length": int, "dense1_length": int } """ he_norm = he_normal(seed=seed_val) bias_val = zeros() model = models.Sequential() model.add( layers.Conv1D(filters=kwargs["conv1_length"], kernel_size=filter_size, strides=1, padding="same", use_bias=True, input_shape=self.data.shape[1:], kernel_initializer=he_norm, bias_initializer=bias_val, activation="relu")) model.add(layers.MaxPool1D()) model.add( layers.Conv1D(filters=kwargs["conv2_length"], kernel_size=ceil(filter_size / 2), strides=1, padding="same", use_bias=True, kernel_initializer=he_norm, bias_initializer=bias_val, activation="relu")) model.add(layers.MaxPool1D()) if kwargs["extra_conv_layer"]: model.add( layers.Conv1D(filters=kwargs["conv3_length"], kernel_size=ceil(filter_size / 2), strides=1, padding="same", use_bias=True, kernel_initializer=he_norm, bias_initializer=bias_val, activation="relu")) model.add(layers.MaxPool1D()) model.add(layers.Flatten()) model.add( layers.Dense(units=kwargs["dense1_length"], use_bias=True, kernel_initializer=he_norm, bias_initializer=bias_val, activation="relu")) model.add( layers.Dense(units=1, use_bias=True, kernel_initializer=he_norm, bias_initializer=bias_val, activation="relu")) model.compile(optimizer=Adam( learning_rate=parameters.General_Params().initial_lr), loss=MeanAbsolutePercentageError(name="MAPE"), metrics=[ MeanAbsoluteError(name="MAE"), RootMeanSquaredError(name="RMSE") ]) return model
def test_check_metric_serialization_mae(): check_metric_serialization(MeanAbsoluteError(), (2, 2), (2, 2)) check_metric_serialization(MeanAbsoluteError(name="hello"), (2, 2), (2, 2)) check_metric_serialization(MeanAbsoluteError(), (2, 2, 2), (2, 2, 2)) check_metric_serialization(MeanAbsoluteError(), (2, 2, 2), (2, 2, 2), (2, 2, 1))
from keras.preprocessing.image import ImageDataGenerator from keras.models import load_model from tensorflow.keras.losses import BinaryCrossentropy, Huber from tensorflow.keras.metrics import BinaryAccuracy, MeanAbsoluteError from rpn.generation import rpn_generator, RPNconfig from rpn.rpn import make_cls_wrapper, make_reg_wrapper, ThresholdedRegularizer, ClsMetricWrapper, RegMetricWrapper import pandas as pd if __name__ == '__main__': seed = 42 rpn_config = RPNconfig.load_json('versions/RPN_v8/rpn_config.json') cls_loss = make_cls_wrapper(BinaryCrossentropy(from_logits=True)) reg_loss = make_reg_wrapper(Huber()) cls_acc = ClsMetricWrapper(BinaryAccuracy(), name='acc') reg_mae = RegMetricWrapper(MeanAbsoluteError(), name='mae') rpn = load_model('versions/RPN_v8/configs/best.h5', custom_objects={ 'ThresholdedRegularizer': ThresholdedRegularizer, 'reg_processer': reg_loss, 'cls_processer': cls_loss }) rpn.compile(optimizer=rpn.optimizer, loss=rpn.loss, metrics={ 'bbox_reg': reg_mae, 'bbox_cls_log': cls_acc }) test_data = pd.read_json('../dataset/test.json') test_generator = ImageDataGenerator(rescale=1. / 255).flow_from_dataframe(
model = Sequential([ Conv2D(filters=16, kernel_size=(3, 3), activation=relu, input_shape=(28, 28, 1)), MaxPooling2D(pool_size=(3, 3)), Flatten(), Dense(units=10, activation=softmax) ]) model.summary() # Compile the model optimizer = Adam(learning_rate=0.002) accuracy = SparseCategoricalAccuracy() mae = MeanAbsoluteError() model.compile(optimizer=optimizer, loss=SparseCategoricalCrossentropy(), metrics=[accuracy, mae]) print(model.optimizer) print(model.optimizer.lr) print(model.loss) print(model.metrics) # Load data fashion_mnist_data = tf.keras.datasets.fashion_mnist (train_imgs, train_lbls), (test_imgs, test_lbls) = fashion_mnist_data.load_data() print(train_imgs.shape)
def build_model_hpconfig(args): """ Description: Building models for hyperparameter Tuning Args: args: input arguments Returns: model (keras model) """ #parsing and assigning hyperparameter variables from argparse conv1_filters = int(args.conv1_filters) conv2_filters = int(args.conv2_filters) conv3_filters = int(args.conv3_filters) window_size = int(args.window_size) kernel_regularizer = args.kernel_regularizer max_pool_size = int(args.pool_size) conv_dropout = float(args.conv_dropout) conv1d_initializer = args.conv_weight_initializer recurrent_layer1 = int(args.recurrent_layer1) recurrent_layer2 = int(args.recurrent_layer2) recurrent_dropout = float(args.recurrent_dropout) after_recurrent_dropout = float(args.after_recurrent_dropout) recurrent_recurrent_dropout = float(args.recurrent_recurrent_dropout) recurrent_initalizer = args.recurrent_weight_initializer optimizer = args.optimizer learning_rate = float(args.learning_rate) bidirection = args.bidirection recurrent_layer = str(args.recurrent_layer) dense_dropout = float(args.dense_dropout) dense_1 = int(args.dense_1) dense_initializer = args.dense_weight_initializer train_data = str(args.train_input_data) #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) #3x1D Convolutional Hidden Layers with BatchNormalization, Dropout and MaxPooling conv_layer1 = Conv1D(conv1_filters, window_size, kernel_regularizer=kernel_regularizer, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer1) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_1 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) conv_layer2 = Conv1D(conv2_filters, window_size, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer2) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_2 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) conv_layer3 = Conv1D(conv3_filters, window_size, kernel_regularizer=kernel_regularizer, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer3) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_3 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) #concat pooling layers conv_features = Concatenate(axis=-1)( [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3]) print("Shape of convolutional output: ", conv_features.get_shape()) conv_features = Dense(600, activation='relu')(conv_features) ######## Recurrent Layers ######## if (recurrent_layer == 'lstm'): if (bidirection): print('Entering LSTM Layers') #Creating Bidirectional LSTM layers lstm_f1 = Bidirectional( LSTM(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(conv_features) lstm_f2 = Bidirectional( LSTM(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) print('Concatenated LSTM layers') else: #Creating unidirectional LSTM Layers lstm_f1 = LSTM( recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(conv_features) lstm_f2 = LSTM(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) elif (recurrent_layer == 'gru'): if (bidirection): #Creating Bidirectional GRU layers gru_f1 = Bidirectional( GRU(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(conv_features) gru_f2 = Bidirectional( GRU(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: #Creating unidirectional GRU Layers gru_f1 = GRU( recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(conv_features) gru_f2 = GRU(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: print('Only LSTM and GRU recurrent layers are used in this model') return #Dense Fully-Connected DNN layers fc_dense1 = Dense(dense_1, activation='relu', kernel_initializer=dense_initializer)(concat_features) fc_dense1_dropout = Dropout(dense_dropout)(fc_dense1) #Final Output layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(fc_dense1_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #Set optimizer to be used with the model, default is Adam if optimizer == 'adam': optimizer = Adam(lr=learning_rate, name='adam') elif optimizer == 'sgd': optimizer = SGD(lr=0.01, momentum=0.0, nesterov=False, name='SGD') elif optimizer == 'rmsprop': optimizer = RMSprop(learning_rate=learning_rate, centered=True, name='RMSprop') elif optimizer == 'adagrad': optimizer = Adagrad(learning_rate=learning_rate, name='Adagrad') elif optimizer == 'adamax': optimizer = Adamax(learning_rate=learning_rate, name='Adamax') else: optimizer = 'adam' optimizer = Adam(lr=learning_rate, name='adam') #compile model using optimizer and the cateogorical crossentropy loss function model.compile(optimizer=optimizer, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) #get summary of model including its layers and num parameters model.summary() return model
def build_model(): """ Description: Building DCBGRU model Args: None Returns: None """ #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) #3x1D Convolutional Hidden Layers with BatchNormalization, Dropout and MaxPooling conv_layer1 = Conv1D(16, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer1) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(0.2)(conv_act) max_pool_1D_1 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer2 = Conv1D(32, 7, padding='same')(concat) batch_norm = BatchNormalization()(conv_layer2) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(0.2)(conv_act) max_pool_1D_2 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer3 = Conv1D(64, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer3) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(0.2)(conv_act) max_pool_1D_3 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) ############################################################################################ #concatenate convolutional layers conv_features = Concatenate(axis=-1)( [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3]) #dense layer before GRU's gru_dense = Dense(600, activation='relu', name="after_cnn_dense")(conv_features) ######## Recurrent Unidirectional Long-Short-Term-Memory Layers ######## gru_f1 = Bidirectional( GRU(200, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=0.5, recurrent_dropout=0.5))(gru_dense) gru_f2 = Bidirectional( GRU(200, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=0.5, recurrent_dropout=0.5))(gru_f1) ############################################################################################ #concatenate GRU with convolutional layers concat_features = Concatenate(axis=-1)([gru_f1, gru_f2, gru_dense]) concat_features = Dropout(0.4)(concat_features) #Dense Fully-Connected DNN layers after_gru_dense = Dense(600, activation='relu')(concat_features) after_gru_dense_dropout = Dropout(0.3)(after_gru_dense) #Final Dense layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(after_gru_dense_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=0.00015) #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) #print model summary model.summary() return model
def build_model(): """ Description: Building PSP-CD model Args: None Returns: None """ #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat_features = Concatenate(axis=-1)([embed, auxiliary_input]) ############################################################################################ #Dense Fully-Connected DNN layers dense_1 = Dense(512, activation='relu')(concat_features) dense_1_dropout = Dropout(0.3)(dense_1) dense_2 = Dense(256, activation='relu')(dense_1_dropout) dense_2_dropout = Dropout(0.3)(dense_2) dense_3 = Dense(128, activation='relu')(dense_2_dropout) dense_3_dropout = Dropout(0.3)(dense_3) dense_4 = Dense(64, activation='relu')(dense_3_dropout) dense_4_dropout = Dropout(0.3)(dense_4) dense_5 = Dense(32, activation='relu')(dense_4_dropout) dense_5_dropout = Dropout(0.3)(dense_5) dense_6 = Dense(16, activation='relu')(dense_5_dropout) dense_6_dropout = Dropout(0.3)(dense_6) #Final Dense layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(dense_6_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=0.00015) #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) #print model summary model.summary() return model
def build_model(): #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #auxiliary_input = Masking(mask_value=0)(auxiliary_input) #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) ######## Recurrent Bi-Directional Long-Short-Term-Memory Layers ######## lstm_f1 = Bidirectional( LSTM(400, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=0.5, recurrent_dropout=0.5))(conv_features) lstm_f2 = Bidirectional( LSTM(300, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=0.5, recurrent_dropout=0.5))(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)([lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(0.4)(concat_features) #Dense Fully-Connected DNN layers dense_1 = Dense(300, activation='relu')(conv_features) dense_1_dropout = Dropout(dense_dropout)(dense_1) dense_2 = Dense(100, activation='relu')(dense_1_dropout) dense_2_dropout = Dropout(dense_dropout)(dense_2) dense_3 = Dense(50, activation='relu')(dense_2_dropout) dense_3_dropout = Dropout(dense_dropout)(dense_3) dense_4 = Dense(16, activation='relu')(dense_3_dropout) dense_4_dropout = Dropout(dense_dropout)(dense_4) #Final Dense layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(dense_4_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=0.0003) #Adam is fast, but tends to over-fit #SGD is low but gives great results, sometimes RMSProp works best, SWA can easily improve quality, AdaTune #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) model.summary() #set earlyStopping and checkpoint callback earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min') checkpoint_path = "/blstm_3x1Dconv_dnn_" + str( datetime.date(datetime.now())) + ".h5" checkpointer = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=True, monitor='val_acc', mode='max') return model
y_prediction = model.predict([ cases_test, interventions_test.reshape(interventions_test.shape[0], 1, n_steps_in * 5) ], verbose=2) y_prediction = y_prediction.reshape(y_prediction.shape[0], n_steps_out) prediction = std_scaler.inverse_transform(y_prediction) # prediction = y_prediction y_test = std_scaler.inverse_transform(y[test]) # y_test = y[test] plt.scatter(range(len(y_test)), y_test) plt.scatter(range(len(prediction)), prediction) plt.legend(['true', 'prediction']) plt.show() mae_metric = MeanAbsoluteError() mape_metric = MeanAbsolutePercentageError() mse = mean_squared_error(y_test, prediction) rmse = sqrt(mse) mae = mae_metric(y_test, prediction).numpy() mape = mape_metric.update_state(y_test, y_prediction).numpy() dc = coeff_determination_numpy(y_test, prediction) print('mse loss for fold {} is {:,}'.format(fold_no, round(mse))) print('rmse loss for fold {} is {:,}'.format(fold_no, round(rmse))) print('mae loss for fold {} is {:,}'.format(fold_no, round(mae))) print('mape loss for fold {} is {:,}'.format(fold_no, round(mape))) print('determination coefficient for fold {} is {:,}'.format(fold_no, dc)) loss_per_fold = np.append(loss_per_fold, [[mse, rmse, mae, mape, dc]], axis=0) if mse <= np.min(loss_per_fold[:, 0]):
if os.path.isdir(aux_exp_dir): shutil.rmtree(aux_exp_dir) if os.path.isdir(aux_tensorboard_dir): shutil.rmtree(aux_tensorboard_dir) train_df, val_df, _ = get_train_val_test_dfs(bboxs_csv, splits_csv) train_datagen = BBoxsGenerator(train_df, imgs_dir=imgs_dir, out_image_size=(img_size, img_size), resize=(img_size!=224)) val_datagen = BBoxsGenerator(val_df, imgs_dir=imgs_dir, out_image_size=(img_size, img_size), resize=(img_size!=224)) del train_df, val_df gc.collect() model = build_bbox_model(input_size=(img_size, img_size, 3), n_conv_blocks=n_conv_blocks, base_conv_n_filters=base_conv_n_filters, n_dense_layers=2, dense_size=dense_size, dropout_rate=0.30, loss=MeanSquaredError(), optimizer=Adamax(), metrics=[MeanAbsoluteError(), MeanBBoxIoU(x2y2=True)]) run_experiment(model, exp_name, train_datagen, val_datagen, results_dir=results_dir, tensorboard_logdir=tensorboard_dir, generator_queue_size=50, generator_workers=8, use_multiprocessing=False) del train_datagen, val_datagen, model gc.collect() tf.keras.backend.clear_session() gc.collect() ### Temporary ### sys.exit(0) ###########
def _training_loop( self, filepath: str, train_gen: train_ts_generator, # can name of function be type? val_gen: train_ts_generator, epochs: int = 100, steps_per_epoch: int = 50, early_stopping: int = True, stopping_patience: int = 5, stopping_delta: int = 1, ) -> typing.Tuple[tf.Tensor, int]: """ util function iterates over batches, updates gradients, records metrics, writes to tb, checkpoints, early stopping """ # set up metrics to track during training batch_loss_avg = Mean() epoch_loss_avg = Mean() eval_loss_avg = Mean() eval_mae = MeanAbsoluteError() eval_rmse = RootMeanSquaredError() # set up early stopping callback early_stopping_cb = EarlyStopping(patience=stopping_patience, active=early_stopping, delta=stopping_delta) # setup table for unscaling self._lookup_table = build_tf_lookup(self._ts_obj.target_means) # Iterate over epochs. best_metric = math.inf for epoch in range(epochs): logger.info(f"Start of epoch {epoch}") start_time = time.time() for batch, (x_batch_train, cat_labels, y_batch_train) in enumerate(train_gen): # compute loss with tf.GradientTape(persistent=True) as tape: mu, scale = self._model(x_batch_train, training=True) # softplus parameters scale = softplus(scale) if self._ts_obj.count_data: mu = softplus(mu) mu, scale = unscale(mu, scale, cat_labels, self._lookup_table) loss_value = self._loss_fn(y_batch_train, (mu, scale)) # sgd if self._tb: tf.summary.scalar("train_loss", loss_value, epoch * steps_per_epoch + batch) batch_loss_avg(loss_value) epoch_loss_avg(loss_value) grads = tape.gradient(loss_value, self._model.trainable_weights) self._optimizer.apply_gradients( zip(grads, self._model.trainable_weights)) # Log 5x per epoch. if batch % (steps_per_epoch // 5) == 0 and batch != 0: logger.info( f"Epoch {epoch}: Avg train loss over last {(steps_per_epoch // 5)} steps: {batch_loss_avg.result()}" ) batch_loss_avg.reset_states() # Run each epoch batches times epoch_loss_avg_result = epoch_loss_avg.result() if batch == steps_per_epoch: logger.info( f"Epoch {epoch} took {round(time.time() - start_time, 0)}s : Avg train loss: {epoch_loss_avg_result}" ) break # validation if val_gen is not None: logger.info(f"End of epoch {epoch}, validating...") start_time = time.time() for batch, (x_batch_val, cat_labels, y_batch_val) in enumerate(val_gen): # compute loss, doesn't need to be persistent bc not updating weights with tf.GradientTape() as tape: # treat as training -> reset lstm states inbetween each batch mu, scale = self._model(x_batch_val, training=True) # softplus parameters mu, scale = self._softplus(mu, scale) # unscale parameters mu, scale = unscale(mu, scale, cat_labels, self._lookup_table) # calculate loss loss_value = self._loss_fn(y_batch_val, (mu, scale)) # log validation metrics (avg loss, avg MAE, avg RMSE) eval_mae(y_batch_val, mu) eval_rmse(y_batch_val, mu) eval_loss_avg(loss_value) if batch == steps_per_epoch: break # logging eval_mae_result = eval_mae.result() logger.info( f"Validation took {round(time.time() - start_time, 0)}s") logger.info( f"Epoch {epoch}: Val loss on {steps_per_epoch} steps: {eval_loss_avg.result()}" ) logger.info( f"Epoch {epoch}: Val MAE: {eval_mae_result}, RMSE: {eval_rmse.result()}" ) if self._tb: tf.summary.scalar("val_loss", eval_loss_avg.result(), epoch) tf.summary.scalar("val_mae", eval_mae_result, epoch) tf.summary.scalar("val_rmse", eval_rmse.result(), epoch) new_metric = eval_mae_result # early stopping if early_stopping_cb(eval_mae_result): break # reset metric states eval_loss_avg.reset_states() eval_mae.reset_states() eval_rmse.reset_states() else: if early_stopping_cb(epoch_loss_avg_result): break new_metric = epoch_loss_avg_result # update best_metric and save new checkpoint if improvement if new_metric < best_metric: best_metric = new_metric if filepath is not None: self._checkpointer.save(file_prefix=filepath) else: self.save_weights("model_best_weights.h5") # reset epoch loss metric epoch_loss_avg.reset_states() # load in best weights before returning if not using checkpointer if filepath is None: self.load_weights("model_best_weights.h5") os.remove("model_best_weights.h5") return best_metric, epoch + 1
def build_model(): #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate 2 input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) #3x1D Convolutional Hidden Layers with BatchNormalization and MaxPooling conv_layer1 = Convolution1D(64, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer1) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) max_pool_2D_1 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer2 = Convolution1D(128, 7, padding='same')(concat) batch_norm = BatchNormalization()(conv_layer2) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) max_pool_2D_2 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) conv_layer3 = Convolution1D(256, 7, kernel_regularizer="l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer3) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(0.5)(conv2D_act) max_pool_2D_3 = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_dropout) #concatenate convolutional layers conv_features = Concatenate(axis=-1)( [max_pool_2D_1, max_pool_2D_2, max_pool_2D_3]) #Dense Fully-Connected DNN layers dense_1 = Dense(300, activation='relu')(conv_features) dense_1_dropout = Dropout(dense_dropout)(dense_1) dense_2 = Dense(100, activation='relu')(dense_1_dropout) dense_2_dropout = Dropout(dense_dropout)(dense_2) dense_3 = Dense(50, activation='relu')(dense_2_dropout) dense_3_dropout = Dropout(dense_dropout)(dense_3) dense_4 = Dense(16, activation='relu')(dense_3_dropout) dense_4_dropout = Dropout(dense_dropout)(dense_4) #Final Dense layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(protein_features_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #use Adam optimizer adam = Adam(lr=lr) #Adam is fast, but tends to over-fit #SGD is low but gives great results, sometimes RMSProp works best, SWA can easily improve quality, AdaTune #compile model using adam optimizer and the cateogorical crossentropy loss function model.compile(optimizer=adam, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) model.summary() #set earlyStopping and checkpoint callback earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min') checkpoint_path = "/3x1DConv_dnn_" + str(datetime.date( datetime.now())) + ".h5" checkpointer = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=True, monitor='val_acc', mode='max') return model
def build_model_hpconfig(args): #parsing and assigning hyperparameter variables from argparse conv1_filters=int(args.conv1_filters) conv2_filters=int(args.conv2_filters) conv3_filters=int(args.conv3_filters) window_size=int(args.window_size) kernel_regularizer = args.kernel_regularizer conv_dropout=float(args.conv2d_dropout) pool_size = int(args.pool_size) conv2d_activation=args.conv2d_activation conv2d_dropout=float(args.conv2d_dropout) recurrent_layer1 = int(args.recurrent_layer1) recurrent_layer2 = int(args.recurrent_layer2) recurrent_dropout = float(args.recurrent_dropout) after_recurrent_dropout = float(args.after_recurrent_dropout) recurrent_recurrent_dropout = float(args.recurrent_recurrent_dropout) optimizer=args.optimizer learning_rate = float(args.learning_rate) bidirection = args.bidirection recurrent_layer = args.recurrent_layer dense_dropout = float(args.dense_dropout) dense_1 = int(args.dense_1) dense_2 = int(args.dense_2) dense_3 = int(args.dense_3) dense_4 = int(args.dense_4) #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700,), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700,21), name='aux_input') #concatenate input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) conv_layer1 = Convolution1D(conv1_filters, window_size, kernel_regularizer = "l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer1) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv2D_act) # ave_pool_1 = AveragePooling1D(3, 1, padding='same')(conv_dropout) max_pool_1D_1 = MaxPooling1D(pool_size=pool_size, strides=1, padding='same')(conv_dropout) conv_layer2 = Convolution1D(conv2_filters, window_size, padding='same')(concat) batch_norm = BatchNormalization()(conv_layer2) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv2D_act) # ave_pool_2 = AveragePooling1D(3, 1, padding='same')(conv_dropout) max_pool_1D_2 = MaxPooling1D(pool_size=pool_size, strides=1, padding='same')(conv_dropout) conv_layer3 = Convolution1D(conv3_filters, window_size,kernel_regularizer = "l2", padding='same')(concat) batch_norm = BatchNormalization()(conv_layer3) conv2D_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv2D_act) max_pool_1D_3 = MaxPooling1D(pool_size=pool_size, strides=1, padding='same')(conv_dropout) #concat pooling layers conv_features = Concatenate(axis=-1)([max_pool_1D_1, max_pool_1D_2, max_pool_1D_3]) ######## Recurrent Layers ######## if (recurrent_layer == 'lstm'): if (bidirection): #Creating Bidirectional LSTM layers lstm_f1 = Bidirectional(LSTM(recurrent_layer1,return_sequences=True,activation = 'tanh', recurrent_activation='sigmoid',dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout))(conv_features) lstm_f2 = Bidirectional(LSTM(recurrent_layer2, return_sequences=True,activation = 'tanh',recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout))(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)([lstm_f1, lstm_f2, conv2_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: #Creating unidirectional LSTM Layers lstm_f1 = LSTM(recurrent_layer1,return_sequences=True,activation = 'tanh', recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout)(conv_features) lstm_f2 = LSTM(recurrent_layer2, return_sequences=True,activation = 'tanh',recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout)(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)([lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) elif (recurrent_layer == 'gru'): if (bidirection): #Creating Bidirectional GRU layers gru_f1 = Bidirectional(GRU(recurrent_layer1,return_sequences=True,activation = 'tanh', recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout))(conv_features) gru_f2 = Bidirectional(GRU(recurrent_layer2, return_sequences=True,activation = 'tanh',recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout))(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)([gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: #Creating unidirectional GRU Layers gru_f1 = GRU(recurrent_layer1,return_sequences=True,activation = 'tanh', recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout)(conv_features) gru_f2 = GRU(recurrent_layer1, return_sequences=True,activation = 'tanh',recurrent_activation='sigmoid',dropout=recurrent_dropout,recurrent_dropout=recurrent_recurrent_dropout)(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)([gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: print('Only LSTM and GRU recurrent layers are used in this model') return #Dense Fully-Connected DNN layers # concat_features = Flatten()(concat_features) fc_dense1 = Dense(dense_1, activation='relu')(concat_features) fc_dense1_dropout = Dropout(dense_dropout)(fc_dense1) fc_dense2 = Dense(dense_2, activation='relu')(fc_dense1_dropout) fc_dense2_dropout = Dropout(dense_dropout)(fc_dense2) fc_dense3 = Dense(dense_3, activation='relu')(fc_dense2_dropout) fc_dense3_dropout = Dropout(dense_dropout)(fc_dense3) #Final Output layer with 8 nodes for the 8 output classifications # main_output = Dense(8, activation='softmax', name='main_output')(concat_features) main_output = Dense(8, activation='softmax', name='main_output')(fc_dense3_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #Set optimizer to be used with the model, default is Adam if optimizer == 'adam': optimizer = Adam(lr=learning_rate, name='adam') elif optimizer == 'sgd': optimizer = SGD(lr=0.01, momentum=0.0, nestero=False, name='SGD') elif optimizer == 'rmsprop': optimizer = RMSprop(learning_rate=learning_rate, centered = True, name='RMSprop') elif optimizer == 'adagrad': optimizer = Adagrad(learning_rate = learning_rate, name='Adagrad') elif optimizer == 'adamax': optimizer = Adamax(learning_rate=learning_rate, name='Adamax') else: optimizer = 'adam' optimizer = Adam(lr=learning_rate, name='adam') #Nadam & Ftrl optimizers #use Adam optimizer #optimizer = Adam(lr=0.003) #Adam is fast, but tends to over-fit #SGD is low but gives great results, sometimes RMSProp works best, SWA can easily improve quality, AdaTune #compile model using optimizer and the cateogorical crossentropy loss function model.compile(optimizer = optimizer, loss={'main_output': 'categorical_crossentropy'}, metrics=['accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision()]) #get summary of model including its layers and num parameters model.summary() #set early stopping and checkpoints for model earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min') checkpoint_path = BUCKET_PATH + "/checkpoints/" + str(datetime.date(datetime.now())) +\ '_' + str((datetime.now().strftime('%H:%M'))) + ".h5" checkpointer = ModelCheckpoint(filepath=checkpoint_path,verbose=1,save_best_only=True, monitor='val_acc', mode='max') return model