def test_mpo_num_parameters(dummy_data): # Disable the redefined-outer-name violation in this function # pylint: disable=redefined-outer-name data, _ = dummy_data output_dim = data.shape[1] num_nodes = int(math.log(data.shape[1], 8)) bond_dim = 8 model = Sequential() model.add( DenseMPO(output_dim, num_nodes=num_nodes, bond_dim=bond_dim, use_bias=True, activation='relu', input_shape=(data.shape[1],))) in_leg_dim = math.ceil(data.shape[1]**(1. / num_nodes)) out_leg_dim = math.ceil(output_dim**(1. / num_nodes)) # num_params = num_edge_node_params + num_middle_node_params + bias_params expected_num_parameters = (2 * in_leg_dim * bond_dim * out_leg_dim) + ( (num_nodes - 2) * in_leg_dim * bond_dim * bond_dim * out_leg_dim) + output_dim np.testing.assert_equal(expected_num_parameters, model.count_params())
def test_entangler_asymmetric_num_parameters_output_shape( num_legs, num_levels, leg_dims): leg_dim, out_leg_dim = leg_dims data_shape = (leg_dim**num_legs, ) model = Sequential() model.add( DenseEntangler(out_leg_dim**num_legs, num_legs=num_legs, num_levels=num_levels, use_bias=True, activation='relu', input_shape=data_shape)) primary = leg_dim secondary = out_leg_dim if leg_dim > out_leg_dim: primary, secondary = secondary, primary expected_num_parameters = (num_levels - 1) * (num_legs - 1) * ( primary**4 ) + (num_legs - 2) * primary**3 * secondary + primary**2 * secondary**2 + ( out_leg_dim**num_legs) np.testing.assert_equal(expected_num_parameters, model.count_params()) data = np.random.randint(10, size=(10, data_shape[0])) out = model(data) np.testing.assert_equal(out.shape, (data.shape[0], out_leg_dim**num_legs))
def build(self): model = Sequential() model.add( Conv2D(filters=self.filters, kernel_size=self.kernel_size, input_shape=self.input_shape, activation=self.activation)) model.add(MaxPooling2D(pool_size=self.pool_size)) model.add(Dropout(self.dropout)) model.add( Conv2D(filters=self.filters * 2, kernel_size=self.kernel_size, activation=self.activation)) model.add(MaxPooling2D(pool_size=self.pool_size)) model.add(Dropout(self.dropout)) model.add( Conv2D(filters=self.filters * 4, kernel_size=self.kernel_size, activation=self.activation)) model.add(MaxPooling2D(pool_size=self.pool_size)) model.add(Dropout(self.dropout)) model.add(Flatten()) model.add(Dense(self.num_labels, activation='softmax')) model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[self.metrics]) num_params = model.count_params() print('# network parameters: ' + str(num_params)) model.summary() self.model = model
def compile(self, **params): # params = {'units':100, 'num_epochs':5, batch_size':32, 'dropout':0., 'optimizer':'RMSprop', 'lrate':0.,} self.num_layers = params['num_layers'] self.units = params['units'] self.batch_size = params['batch_size'] self.timesteps = params['timesteps'] self.input_dim = params['input_dim'] self.output_dim = params['output_dim'] self.optimizer = params['optimizer'] self.seed = params['seed'] if self.optimizer == 'RMSprop': optim = RMSprop(lr=params['lrate']) else: optim = Adam(lr=params['lrate']) # Create new model or load existing model if params['load']: print("Loading existing model " + params['load_name']) model = load_model(params['load_name'], custom_objects={'rmse': rmse}) else: model = Sequential() if self.num_layers > 1: for i in range(self.num_layers - 1): model.add( LSTM(self.units, input_shape=(self.timesteps, self.input_dim), dropout=self.dropout, stateful=self.stateful, return_sequences=True)) model.add( LSTM(self.units, input_shape=(self.timesteps, self.input_dim), dropout=self.dropout, stateful=self.stateful)) else: model.add( LSTM(self.units, input_shape=(self.timesteps, self.input_dim), dropout=self.dropout, stateful=self.stateful)) if self.dropout > 0.: model.add(Dropout(self.dropout)) model.add(Dense(self.output_dim)) # Compile model model.compile(optimizer=optim, loss=rmse) #Set loss function and optimizer #print(model.summary()) # Print parameter count num_params = model.count_params() print('# network parameters: ' + str(num_params)) self.model = model return model
def build(self): model = Sequential() model.add( LSTM(units=self.units, input_shape=self.input_shape, dropout=self.dropout)) model.add(Dense(self.num_labels, activation='softmax')) model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[self.metrics]) num_params = model.count_params() print('# network parameters: ' + str(num_params)) model.summary() self.model = model
def test_decomp_num_parameters(dummy_data): # Disable the redefined-outer-name violation in this function # pylint: disable=redefined-outer-name data, _ = dummy_data output_dim = 256 decomp_size = 128 model = Sequential() model.add( DenseDecomp(output_dim, decomp_size=decomp_size, use_bias=True, activation='relu', input_shape=(data.shape[1],))) # num_params = a_params + b_params + bias_params expected_num_parameters = (data.shape[1] * decomp_size) + ( decomp_size * output_dim) + output_dim np.testing.assert_equal(expected_num_parameters, model.count_params())
def test_expander_num_parameters(dummy_data): # Disable the redefined-outer-name violation in this function # pylint: disable=redefined-outer-name data, _ = dummy_data exp_base = 2 num_nodes = 3 model = Sequential() model.add( DenseExpander(exp_base=exp_base, num_nodes=num_nodes, use_bias=True, activation='relu', input_shape=(data.shape[-1],))) output_dim = data.shape[-1] * (exp_base**num_nodes) # num_params = (num_nodes * num_node_params) + num_bias_params expected_num_parameters = (num_nodes * data.shape[-1] * data.shape[-1] * exp_base) + output_dim np.testing.assert_equal(expected_num_parameters, model.count_params())
def test_entangler_num_parameters(dummy_data): # Disable the redefined-outer-name violation in this function # pylint: disable=redefined-outer-name data, _ = dummy_data num_legs = 3 num_levels = 3 leg_dim = round(data.shape[-1]**(1. / num_legs)) assert leg_dim**num_legs == data.shape[-1] model = Sequential() model.add( DenseEntangler(leg_dim**num_legs, num_legs=num_legs, num_levels=num_levels, use_bias=True, activation='relu', input_shape=(data.shape[1],))) # num_params = entangler_node_params + bias_params expected_num_parameters = num_levels * (leg_dim**4) + leg_dim**num_legs np.testing.assert_equal(expected_num_parameters, model.count_params())
train_faces, train_emotions, val_faces, val_emotions = load_fer2013() num_samples, num_classes = train_emotions.shape train_faces /= 255. val_faces /= 255. # Define the model here, CHANGEME model = Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dense(num_classes, activation="softmax")) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # log the number of total parameters config.total_params = model.count_params() model.fit(train_faces, train_emotions, batch_size=config.batch_size, epochs=config.num_epochs, verbose=1, callbacks=[ Perf(val_faces), WandbCallback(data_type="image", labels=[ "Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral" ]) ], validation_data=(val_faces, val_emotions))
model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=int(np.ceil(x_train.shape[0] / float(batch_size))), epochs=args.epochs, validation_data=(x_test, y_test), workers=4) training_time = time.process_time() - start # File to save model and summary - encodes some of the parameters learning_rate = '{:0.6f}'.format(backend.eval(model.optimizer.lr)) base_name = '{}_lr={}_udl={:04d}_e={:03d}'.format(test_name, learning_rate, args.units_dense_layer, args.epochs) # Save model model.save(base_name + '_model.h5') # Save training history with open(base_name + '_history.json', 'w') as f: json.dump(model.history.history, f) # Save a summary of the results with open(base_name + '_summary.txt', 'w') as f: f.write('Training time: {}\n'.format(training_time)) f.write('Total parameters: {}\n'.format(model.count_params())) f.write('Optimizer: {}\n'.format(type(model.optimizer).__name__)) f.write('Learning rate: {}\n'.format(learning_rate)) f.write('Validation accuracy: {}\n'.format( model.history.history['val_accuracy']))
def GenModel(data_objects, model_params): # Load the data to check dimensionality x_train, y_train = data_objects["x_train"], data_objects["y_train"] print("Input data shape:", x_train.shape) # Model-controlled parameters: F_modeltype = data_objects["F_modeltype"] ####################################################################### # Number of block layers BLOCK_LAYERS = model_params["BLOCK_LAYERS"] # Alternative block type setup: BLOCK1_TYPE = model_params["BLOCK1_TYPE"] BLOCK2_TYPE = model_params["BLOCK2_TYPE"] BLOCK3_TYPE = model_params["BLOCK3_TYPE"] BLOCK4_TYPE = model_params["BLOCK4_TYPE"] FC_BLOCK1 = model_params["FC_BLOCK1"] FC_BLOCK2 = model_params["FC_BLOCK2"] FC_BLOCK3 = model_params["FC_BLOCK3"] FC_BLOCK4 = model_params["FC_BLOCK4"] #CNN related params DROPOUT_RATE = model_params["DROPOUT_RATE"] #FULLY_CONNECTED = model_params["FULLY_CONNECTED"] NUM_FILTERS = 0 #model_params["NUM_FILTERS"] KERNEL_SIZE = 0 #model_params["KERNEL_SIZE"] KERNEL_STRIDE = 0 #model_params["KERNEL_STRIDE"] POOL_STRIDE = 0 #model_params["POOL_STRIDE"] POOL_SIZE = 0 #model_params["POOL_SIZE"] PADDING = 0 #used for analysis only input_dim = (x_train.shape[1], x_train.shape[2]) # Print the configuration to be trained: print("Hyper params:") print("================================" * 3) print("================================" * 3) print("\nArchitecture::") print("Blocks: ", model_params["BLOCK_LAYERS"]) print("") print("Block types: ", model_params["BLOCK1_TYPE"], model_params["BLOCK2_TYPE"], model_params["BLOCK3_TYPE"], model_params["BLOCK4_TYPE"]) print("Hidden units: ", model_params["FC_BLOCK1"], model_params["FC_BLOCK2"], model_params["FC_BLOCK3"], model_params["FC_BLOCK4"]) print("Dropout: ", (model_params["DROPOUT_RATE"])) print("\nTraining:") print("- batch_size: ", (model_params["batch_size"])) print("- optimizer: ", (model_params["optimizer"])) print("- learningrate: ", (model_params["learningrate"])) print("================================" * 3) print("================================" * 3) ####################################################################### #################### Config analysis ##################### ####################################################################### """ ##### Helper functions: def Get_outdim_conv(Inputdim, paddingdim, kerneldim, stridedim, filters): out_dim = (int((Inputdim + 2*paddingdim - kerneldim)/stridedim +1), filters) print("Conv:",out_dim) return out_dim def Get_outdim_maxpool(Inputdim, pooldim, stridedim, filters): out_dim = (int(((Inputdim-pooldim)/stridedim +1)), filters) print("Maxpool:",out_dim) return out_dim ##### Pre-model building Analysis/validation if F_modeltype == "CNN": # List of blocks that can be added to the model blocks_available = [] print("Pre-build analysis:") for block in range(1,BLOCK_LAYERS+1): print("-----------") if block == 1: newdims = Get_outdim_conv(Inputdim=input_dim[0], paddingdim=PADDING, kerneldim=KERNEL_SIZE, stridedim=KERNEL_STRIDE, filters=NUM_FILTERS) newdims = Get_outdim_maxpool(Inputdim=newdims[0], pooldim=POOL_SIZE, stridedim=POOL_STRIDE, filters=NUM_FILTERS) if block > 1: newdims = Get_outdim_conv(Inputdim=newdims[0], paddingdim=PADDING, kerneldim=KERNEL_SIZE, stridedim=KERNEL_STRIDE, filters=NUM_FILTERS) newdims = Get_outdim_maxpool(Inputdim=newdims[0], pooldim=POOL_SIZE, stridedim=POOL_STRIDE, filters=NUM_FILTERS) if newdims[0] < 1: print("Block ",block," fail") if block == 1: print("First block fail - Need different configuration/bounds") blocks_available.append(0) if newdims[0] > 0: print("Block",block,"pass: "******""" ####################################################################### def Make_block( BLOCK_TYPE, BLOCK, DROPOUT_RATE, #FULLY_CONNECTED, NUM_FILTERS, KERNEL_SIZE, KERNEL_STRIDE, POOL_STRIDE, POOL_SIZE, PADDING, input_dim, model_type=None, end_seq=None): """ if model_type == "CNN": # First layer: Include input dim if BLOCK_TYPE == 1 & BLOCK == 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) # No need to specify input dim if BLOCK_TYPE == 1 & BLOCK > 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) if BLOCK_TYPE == 2 & BLOCK == 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(BatchNormalization()) if BLOCK_TYPE == 2 & BLOCK > 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(BatchNormalization()) if BLOCK_TYPE == 3 & BLOCK == 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(Dropout(DROPOUT_RATE)) if BLOCK_TYPE == 3 & BLOCK > 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(Dropout(DROPOUT_RATE)) if BLOCK_TYPE == 4 & BLOCK == 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(BatchNormalization()) model.add(Dropout(DROPOUT_RATE)) if BLOCK_TYPE == 4 & BLOCK > 1: model.add(Conv1D(NUM_FILTERS, kernel_size=KERNEL_SIZE, padding='valid', activation='relu', strides=KERNEL_STRIDE, use_bias=True, input_shape=input_dim)) model.add(MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_STRIDE)) model.add(BatchNormalization()) model.add(Dropout(DROPOUT_RATE)) """ if model_type == "LSTM": if BLOCK == 1: FULLY_CONNECTED = FC_BLOCK1 if BLOCK == 2: FULLY_CONNECTED = FC_BLOCK2 if BLOCK == 3: FULLY_CONNECTED = FC_BLOCK3 if BLOCK == 4: FULLY_CONNECTED = FC_BLOCK4 # For contiunation of the recurrent sequences if end_seq == False: print("type", BLOCK_TYPE, "Seq = T") # First layer: Include input dim # Basic LSTM with recurrent dropout if BLOCK_TYPE == 1: model.add( LSTM(FULLY_CONNECTED, implementation=2, input_shape=input_dim, recurrent_dropout=DROPOUT_RATE, return_sequences=True)) # LSTM with batchNorm and recurrent dropout if BLOCK_TYPE == 2: model.add( LSTM(FULLY_CONNECTED, implementation=2, input_shape=input_dim, recurrent_dropout=DROPOUT_RATE, return_sequences=True)) model.add(BatchNormalization()) # LSTM with no dropout if BLOCK_TYPE == 3: model.add( CuDNNLSTM( FULLY_CONNECTED, #implementation=2, input_shape=input_dim, return_sequences=True)) # LSTM with batchNorm and no dropout if BLOCK_TYPE == 4: model.add( CuDNNLSTM( FULLY_CONNECTED, #implementation=2, input_shape=input_dim, return_sequences=True)) model.add(BatchNormalization()) # For ending the sequence if end_seq == True: print("type", BLOCK_TYPE, "Seq = F") # First layer: Include input dim # Basic LSTM with recurrent dropout if BLOCK_TYPE == 1: model.add( LSTM(FULLY_CONNECTED, implementation=2, recurrent_dropout=DROPOUT_RATE, input_shape=input_dim, return_sequences=False)) # LSTM with batchNorm and recurrent dropout if BLOCK_TYPE == 2: model.add( LSTM(FULLY_CONNECTED, implementation=2, recurrent_dropout=DROPOUT_RATE, input_shape=input_dim, return_sequences=False)) model.add(BatchNormalization()) # LSTM with no dropout if BLOCK_TYPE == 3: model.add( CuDNNLSTM( FULLY_CONNECTED, #implementation=2, input_shape=input_dim, return_sequences=False)) # LSTM with batchNorm and no dropout if BLOCK_TYPE == 4: model.add( CuDNNLSTM( FULLY_CONNECTED, #implementation=2, input_shape=input_dim, return_sequences=False)) model.add(BatchNormalization()) return ####################################################################### ########## Achitecture build ######################################### ####################################################################### model = Sequential() ####### input_shape=input_dim if F_modeltype == "LSTM": #Store number of blocks blocks_available = BLOCK_LAYERS print(F_modeltype) for block in range(0, BLOCK_LAYERS): block = block + 1 print("Adding block", block, "of", BLOCK_LAYERS) # Figure out whether to end the sequence or not Last_block = False if block == BLOCK_LAYERS: print("last block: ", block, "=", BLOCK_LAYERS) Last_block = True if block == 1: Make_block( BLOCK1_TYPE, block, DROPOUT_RATE, #FULLY_CONNECTED, NUM_FILTERS, KERNEL_SIZE, KERNEL_STRIDE, POOL_STRIDE, POOL_SIZE, PADDING, input_dim, model_type=F_modeltype, end_seq=Last_block) if block == 2: Make_block( BLOCK1_TYPE, block, DROPOUT_RATE, #FULLY_CONNECTED, NUM_FILTERS, KERNEL_SIZE, KERNEL_STRIDE, POOL_STRIDE, POOL_SIZE, PADDING, input_dim, model_type=F_modeltype, end_seq=Last_block) if block == 3: Make_block( BLOCK3_TYPE, block, DROPOUT_RATE, #FULLY_CONNECTED, NUM_FILTERS, KERNEL_SIZE, KERNEL_STRIDE, POOL_STRIDE, POOL_SIZE, PADDING, input_dim, model_type=F_modeltype, end_seq=Last_block) if block == 4: Make_block( BLOCK4_TYPE, block, DROPOUT_RATE, #FULLY_CONNECTED, NUM_FILTERS, KERNEL_SIZE, KERNEL_STRIDE, POOL_STRIDE, POOL_SIZE, PADDING, input_dim, model_type=F_modeltype, end_seq=Last_block) print("Done.") model.add( Dense(1) ) #, dtype='float32' #Only the softmax is adviced to be float32 #print(model.summary()) if BLOCK_LAYERS > 1: print("Total number of params:", model.count_params()) #################### TESTING ######################### """ # Print model overview print(model.summary()) #compiling the model, creating the callbacks model.compile(loss='mae', optimizer='Nadam', metrics=['mae']) trainhist = model.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=(batch_size)) scores = model.evaluate(x_test, y_test, verbose=1, batch_size=512) mae_test = scores[1]#/(24.0*3600) mae_test = mae_test/(24.0*3600) mae_test """ return model, blocks_available
model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) print() print("Model parameters = %d" % model.count_params()) print() print(model.summary()) print() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save("mnist_cnn_base_model.h5")
def compute(self, config, budget, working_directory, *args, **kwargs): """ Simple example for a compute function using a feed forward network. It is trained on the MNIST dataset. The input parameter "config" (dictionary) contains the sampled configurations passed by the bohb optimizer """ self.train_dataset, self.steps_per_epoch, self.valid_dataset = generate_datasets( ) model = Sequential() model.add( Conv2D(config['num_filters_1'], kernel_size=(3, 3), activation='relu', input_shape=self.input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) if config['num_conv_layers'] > 1: model.add( Conv2D(config['num_filters_2'], kernel_size=(3, 3), activation='relu', input_shape=self.input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(config['dropout_rate'])) model.add(Flatten()) model.add(Dense(config['num_fc_units'], activation='relu')) model.add(Dropout(config['dropout_rate'])) model.add(Dense(self.num_classes, activation='softmax')) if config['optimizer'] == 'Adam': optimizer = keras.optimizers.Adam(lr=config['lr']) else: optimizer = keras.optimizers.SGD(lr=config['lr'], momentum=config['sgd_momentum']) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) model.fit(self.train_dataset, steps_per_epoch=self.steps_per_epoch, epochs=int(budget), verbose=0, validation_data=self.valid_dataset) train_score = model.evaluate(self.train_dataset, steps=1, verbose=0) val_score = model.evaluate(self.valid_dataset, steps=1, verbose=0) #test_score = model.evaluate(self.x_test, self.y_test, verbose=0) #import IPython; IPython.embed() return ({ 'loss': 1 - val_score[1], # remember: HpBandSter always minimizes! 'info': { #'test accuracy': float(test_score[1]), 'train accuracy': float(train_score[1]), 'validation accuracy': float(val_score[1]), 'number of parameters': float(model.count_params()), } })
def fit_model(X_train, y_train, no_features, learning_rate, l2, epochs, val_frac, architecture, units, layers, activation, verbose): """Neural Network architecture/model to train the mixers Args: X_train and y_train: Features and target values of the training set no_features (int): Number of features learning_rate (float): Learning rate of the gradient descent l2 (float): Regularization constant epochs (int): Number of iterations val_frac (float): Fraction of the training that will serve the validation of the model architecture (string): Type of the architecture units (int): Number of units of the first hidden layer layers (int): Number of layers in the NN verbose (int) Returns: history: History of the algorithme """ # Optimizer opt = keras.optimizers.Adagrad(learning_rate=learning_rate) # Initializer ini = keras.initializers.GlorotUniform() # Regularizer reg = keras.regularizers.l2(l2) # Architecture of the Neural Network if architecture == 'deep': model = Sequential() model.add( Dense(units, input_dim=no_features, kernel_initializer=ini, kernel_regularizer=reg, activation=activation)) l = 1 while l < layers: model.add( Dense(units, kernel_initializer=ini, kernel_regularizer=reg, activation=activation)) l = l + 1 model.add( Dense(1, kernel_initializer=ini, kernel_regularizer=reg, activation='linear')) elif architecture == 'cascade': model = Sequential() model.add( Dense(units, input_dim=no_features, kernel_initializer=ini, kernel_regularizer=reg, activation=activation)) l = 1 while l < layers and units >= 2: units = units / 2 model.add( Dense(units, kernel_initializer=ini, kernel_regularizer=reg, activation=activation)) l = l + 1 model.add( Dense(1, kernel_initializer=ini, kernel_regularizer=reg, activation='linear')) # Compile model.compile(loss='mse', optimizer=opt, metrics=['mse', 'mae', 'mape']) model.summary() # TensorBoard #log_dir = 'logs/fit/' #tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) #early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=100) # Fit history = model.fit(X_train, y_train, epochs=epochs, validation_split=val_frac, verbose=verbose) return history, model, model.count_params()
def compute(self, config, budget, working_directory, *args, **kwargs): """ Simple example for a compute function using a feed forward network. It is trained on the MNIST dataset. The input parameter "config" (dictionary) contains the sampled configurations passed by the bohb optimizer """ model = Sequential() model.add( ConvLSTM2D_2(config['num_filters_1'], kernel_size=(3, self.x_batch.shape[3]), activation='relu', input_shape=self.input_shape, return_sequences=True)) model.add(keras.layers.BatchNormalization()) if config['num_conv_layers'] > 1: model.add( ConvLSTM2D_2(config['num_filters_2'], kernel_size=(3, 1), activation='relu', return_sequences=True)) model.add(keras.layers.BatchNormalization()) if config['num_conv_layers'] > 2: model.add( ConvLSTM2D_2(config['num_filters_3'], kernel_size=(3, 1), activation='relu', return_sequences=True)) model.add(keras.layers.BatchNormalization()) model.add(Dropout(config['dropout_rate'])) model.add(TimeDistributed(Flatten())) model.add( TimeDistributed(Dense(config['num_fc_units'], activation='relu'))) model.add(Dropout(config['dropout_rate'])) model.add( TimeDistributed(Dense(self.num_classes, activation='softmax'))) if config['optimizer'] == 'Adam': optimizer = keras.optimizers.Adam(lr=config['lr']) else: optimizer = keras.optimizers.SGD(lr=config['lr'], momentum=config['sgd_momentum']) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) model.fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=int(budget), verbose=0, validation_data=(self.x_validation, self.y_validation)) train_score = model.evaluate(self.x_train, self.y_train, verbose=0) val_score = model.evaluate(self.x_validation, self.y_validation, verbose=0) #import IPython; IPython.embed() return ({ 'loss': 1 - val_score[1], # remember: HpBandSter always minimizes! 'info': { 'train accuracy': train_score[1], 'validation accuracy': val_score[1], 'number of parameters': model.count_params(), } })
yNN = best_model.predict(x_test) errTest = np.linalg.norm(yNN.reshape(yNN.size)-y_test)**2/n_test best_params = best_model.trainable_variables # we compute the path norm (this should be encapsulated) path_norm = np.abs(best_params[0]) for i in range(2,len(best_params),2): # we only multiplyt the weight matrices # we don't consider the biases in this case path_norm = np.matmul(path_norm, np.abs(best_params[i])) path_norm = np.sum(path_norm) / d print('error with best model') print("mean squared error of train data in NN: %.3e" % (errTrain)) print("mean squared error of validation data in NN: %.3e" % (errVal)) print("mean squared error of test data in NN: %.3e" % (errTest)) print("path norm of the best model: %.3e" % (path_norm)) # Change the output filename here log_os = open('summary_pathnormreg_0210', "a") log_os.write('%d\t%.3e\t' % (args.d, args.reg)) log_os.write('%d\t%d\t' % (args.epoch, Nsamples)) log_os.write('%d\t' % (model.count_params())) log_os.write('%.3e\t%.3e\t%.3e\t%.3e\t' % (errTrain, errVal, errTest, path_norm)) log_os.write('\n') log_os.close()
def build_and_train_rnn(hype_space, log_for_tensorboard=True): train_x, train_y = return_dataset(train_data_path, hype_space['normalize_data']) test_x, test_y = return_dataset(test_data_path, hype_space['normalize_data']) model = Sequential() model.add(InputLayer(input_shape=(600, 1))) if hype_space['depth'] == 1: model.add( return_cell_type(hype_space['cell_type'])(int( hype_space['hidden_dim']))) if hype_space['depth'] == 2: model.add( return_cell_type(hype_space['cell_type'])(int( hype_space['hidden_dim']), return_sequences=True)) model.add( return_cell_type(hype_space['cell_type'])(int( hype_space['hidden_dim']))) if hype_space['fc_dim']: model.add(Dense(int(hype_space['fc_dim']), activation='relu')) model.add(Dense(2, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model_uuid = str(uuid.uuid4())[:5] # TensorBoard logging callback: log_path = None if log_for_tensorboard: log_path = os.path.join(TENSORBOARD_DIR, model_uuid) print("Tensorboard log files will be saved to: {}".format(log_path)) if not os.path.exists(log_path): os.makedirs(log_path) tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_path, write_graph=True) tb_callback.set_model(model) # Train net: history = model.fit(train_x, train_y, batch_size=int(hype_space['batch_size']), epochs=EPOCHS, shuffle=True, verbose=0, callbacks=[tb_callback]).history # Test net: print(f'History keys: {history.keys()}') score = model.evaluate(test_x, test_y, verbose=0) model_name = "model_{:.4f}_{}".format(score[1], model_uuid) print("Model name: {}".format(model_name)) result = { 'train_accuracy': history['accuracy'][-1], 'test_accuracy': score[1], 'loss': score[0], #minimize test loss as hypertuning objective # Misc: 'model_name': model_name, 'space': hype_space, 'history': history, 'status': STATUS_OK, 'model_param_num': model.count_params() } # print("RESULT:") # print_json(result) return model, model_name, result