def unpack(model, training_config, weights): restored_model = deserialize(model) if training_config is not None: restored_model.compile( **saving_utils.compile_args_from_training_config(training_config)) restored_model.set_weights(weights) return restored_model
def change_nodes(self, new_nodes): # rebuild layer with 5 less nodes # new_nodes = len(self.weights[1])-number_of_nodes serialized = layers.serialize(self.tf_layer) serialized["config"]["units"] = new_nodes self.tf_layer = layers.deserialize(serialized) # load back old weights self.save_weights([self.weights[0][:,:new_nodes], self.weights[1][:new_nodes]])
def parse_model(self, model): """Reads in a tensorflow model""" # first clear all variables self.layers = {} self.connected_layers = {} self.connected_inputs = [] self.connected_outputs = [] self.inputs = [] self.outputs = [] # iterate through all layers of the tensorflow model for layer in model.layers: # disconnected_layer = layers.deserialize({'class_name': layer.__class__.__name__, 'config': layer.get_config()}) disconnected_layer = layers.deserialize(layers.serialize(layer)) weights = layer.get_weights() # add the layer to the self.layers array self.layers[layer.name] = Layer(disconnected_layer, name = layer.name, type = "Layer") # save the weights to the layer (NOTE this doesn't mean they're loaded to # the tensorflow layer, they're only saved to the custom layer object) self.layers[layer.name].weights = weights # if there is only a single or no input make it into a list of length 1 # so it's iterable if isinstance(layer._inbound_nodes[0].inbound_layers,list): input_layer = layer._inbound_nodes[0].inbound_layers else: input_layer = [layer._inbound_nodes[0].inbound_layers] # if the input_layer is empty we know that the layer has to be an # input if len(input_layer)<1: self.layers[layer.name] = Layer(layer.input, name = layer.name, type = "Input") self.inputs.append(layer.name) # if layer.name not in self.layers: # Otherwise we iterate though the inputs and set them accordingly for input in input_layer: self.set_input(input.name,layer.name) # print(layer.name,"gets input from",input.name) self.connect_layers() for layer in self.layers: # in_conv = False if layer not in list(self.connections_out.keys()): _, in_conv = self.find_value(layer,self.connections_in) if not in_conv: self.layers[layer].type = "Output" self.outputs.append(self.layers[layer])
def MultiHead(layer, n_heads=None): if type(layer) in (list, tuple): layers = layer else: if n_heads is None: raise Exception("n_heads is missing.") layer_info = { 'class_name': layer.__class__.__name__, 'config': layer.get_config() } n_heads, original_name = int(n_heads), layer_info['config']['name'] layers = [] for i in range(n_heads): layer_info['config']['name'] = f'{original_name}_multihead_{i}' layers.append(deserialize(layer_info)) def __inner__(in_, *args, **kwargs): return [l(in_, *args, **kwargs) for l in layers] return __inner__
def train_model(learning_algorithm, dataset, hidden_layers, batch_dim, learning_rate, seed): """ function that trains a neural network with tf.keras with automatic differentiation. Keyword arguments: learning_algorithm -- either 'EBP' for error backpropagation (with softmax and cross-entropy loss) or 'BrainProp' dataset -- either 'MNIST', 'CIFAR10' or 'CIFAR100' hidden_layers -- list of layers for the network (accepts 'Dense(n)', 'Conv2D(n_filters, (ksize_x,ksize_y)' and any other layer with full input) batch_dim -- minibatch size learning_rate -- learning rate used for training seed -- integer, seed used for reproducible results """ save_plots = True print("Experiment begins, training on {} with {}".format( dataset, learning_algorithm)) np.random.seed(seed) tf.random.set_seed(seed) if dataset == 'MNIST': (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data() if len(np.shape(train_images)) < 4: train_images = tf.expand_dims(train_images, -1).numpy() test_images = tf.expand_dims(test_images, -1).numpy() elif dataset == 'CIFAR10': (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() elif dataset == 'CIFAR100': (train_images, train_labels), (test_images, test_labels) = datasets.cifar100.load_data( label_mode='fine') else: raise Exception( "Unknown dataset. Choose either \'MNIST\', \'CIFAR10\' or \'CIFAR100\'." ) if tf.reduce_max(train_images) > 1: train_images = train_images / 255.0 if tf.reduce_max(test_images) > 1: test_images = test_images / 255.0 image_shape = np.shape(train_images)[1:] n_classes = tf.cast(tf.reduce_max(train_labels) + 1, dtype=tf.int32) n_batches = len(train_images) // batch_dim train_labels = tf.keras.utils.to_categorical(train_labels, n_classes, dtype='float32') test_labels = tf.keras.utils.to_categorical(test_labels, n_classes, dtype='float32') #preparing architecture and optimizer depending on the selected learning algorithm if learning_algorithm == 'EBP': output_activation_function = 'softmax' loss = 'categorical_crossentropy' metric = 'accuracy' output_layer = layers.Dense elif learning_algorithm == 'BrainProp': output_activation_function = 'linear' metric = 'accuracy' brainprop = import_from_path('brainprop', file_path="brainprop.py") loss = brainprop.BrainPropLoss(batch_size=batch_dim, n_classes=n_classes, replicas=1) output_layer = brainprop.BrainPropLayer # if os.path.exists('brainprop.py') != True: # ! wget https://github.com/isapome/BrainProp/raw/master/brainprop.py # from brainprop import BrainPropLayer, BrainPropLoss # loss = BrainPropLoss(batch_size=batch_dim, n_classes=n_classes, replicas=1) # output_layer = BrainPropLayer else: raise Exception( "Unknown learning algorithm. Choose between \'EBP\' and \'BrainProp\' " ) optimizer = optimizers.SGD(learning_rate=learning_rate, momentum=0.) bias = False initializer = tf.random_normal_initializer(mean=0., stddev=0.01) regularizer = None pad = 'same' model = models.Sequential() model.add(Input(shape=image_shape)) #input_shape=image_shape flatten_layer = 0 #there needs to be a flatten layer between 4dim inputs and dense layers. for hidden_layer in hidden_layers: if hidden_layer.__class__.__name__ == 'Dense' and flatten_layer < 1: model.add(layers.Flatten()) flatten_layer += 1 if hidden_layer.__class__.__name__ == 'Conv2D' and flatten_layer > 0: raise Exception( "Please do not add convolutional layers after dense layers.") config = hidden_layer.get_config() layer = layers.deserialize({ 'class_name': hidden_layer.__class__.__name__, 'config': config }) layer.use_bias = bias layer.kernel_initializer = initializer layer.kernel_regularizer = regularizer if hidden_layer.__class__.__name__ == 'Conv2D': layer.padding = pad model.add(layer) last_layer = output_layer(n_classes, activation=output_activation_function, use_bias=bias, kernel_regularizer=regularizer, kernel_initializer=initializer) model.add(last_layer) model.summary() model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) epochs = 500 #just as upper bound. Early stopping will act much earlier than this. lr_schedule = None terminate_on_NaN = callbacks.TerminateOnNaN() earlystopping = callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=10, verbose=1, mode='max', baseline=None, restore_best_weights=False) callbacks_list = list( filter(None, [lr_schedule, terminate_on_NaN, earlystopping])) tic_training = datetime.datetime.now() history = model.fit(train_images, train_labels, batch_size=batch_dim, epochs=epochs, validation_data=(test_images, test_labels), shuffle=True, verbose=2, callbacks=callbacks_list) toc_training = datetime.datetime.now() elapsed = (toc_training - tic_training).seconds // 60 print("Training, elapsed: {} minute{}.".format(elapsed, 's' if elapsed > 1 else '')) if save_plots == True: #save a plot of the accuracy as a function of the epochs filename_plot = get_filename('accuracy.png', dataset, learning_algorithm) n_epochs = len(history.history['accuracy']) plt.figure() plt.title("{} - {}".format(learning_algorithm, dataset), fontsize=16) plt.plot(history.history['accuracy'], label='accuracy', linewidth=2) plt.plot(history.history['val_accuracy'], label='validation accuracy', linewidth=2) maximum_val_accuracy = np.max(history.history['val_accuracy']) argmax_val_accuracy = np.argmax(history.history['val_accuracy']) plt.plot([argmax_val_accuracy, argmax_val_accuracy], [-0.4, maximum_val_accuracy], '--', color='green', linewidth=1) plt.plot(argmax_val_accuracy, maximum_val_accuracy, 'ks', markersize=7, label='maximum = {:.5}'.format(maximum_val_accuracy)) plt.xticks(list(plt.xticks()[0]) + [argmax_val_accuracy]) plt.gca().get_xticklabels()[-1].set_color("white") plt.gca().get_xticklabels()[-1].set_fontweight('bold') plt.gca().get_xticklabels()[-1].set_bbox( dict(facecolor='green', edgecolor='white', alpha=0.8)) plt.xlabel('Epoch', fontsize=14) plt.ylabel('Accuracy', fontsize=14) plt.xlim([-0.4, (n_epochs - .5)]) plt.ylim([0.0, 1.05]) plt.legend(loc='lower right', fontsize=12) print("Saving the accuracy plot as \'{}\'".format(filename_plot)) plt.savefig(filename_plot, dpi=300, bbox_inches='tight')
from tensorflow.keras import layers as kl from tensorflow import keras from tensorflow.keras.layers import Layer, Dense import tensorflow as tf from tensorflow.keras import regularizers #%% l = kl.Dense(10, kernel_regularizer="l2") kl.serialize(l) #%% l1 = kl.deserialize({'class_name': 'Dense', 'config': {'units': 5}}) l1.get_config() #%% from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object deserialize_keras_object({ 'class_name': 'Dense', 'config': { 'units': 5 } }, module_objects=globals()) #%% class ConstantMultiple(Layer): def __init__(self, init_val: float = 1, regularizer=None, **kwargs): super().__init__(**kwargs) self.init_val = init_val self.regularizer = regularizers.get(regularizer) self.c = self.add_weight(name="c", shape=(), regularizer=regularizer)
def reset_layers(self): for layer in self.layers: if self.layers[layer].type != "Input": self.layers[layer].tf_layer = layers.deserialize(layers.serialize(self.layers[layer].tf_layer)) else: self.layers[layer].tf_layer._name = self.layers[layer].name
def add_model_output(modelIn, mode=None, num_add=None, activation=None): """ This function modifies the last dense layer in the passed keras model. The modification includes adding units and optionally changing the activation function. Parameters ---------- modelIn : keras model Keras model to be modified. mode : string Mode to modify the layer. It could be: 'abstain' for adding an arbitrary number of units for the abstention optimization strategy. 'qtl' for quantile regression which needs the outputs to be tripled. 'het' for heteroscedastic regression which needs the outputs to be doubled. num_add : integer Number of units to add. This only applies to the 'abstain' mode. activation : string String with keras specification of activation function (e.g. 'relu', 'sigomid', 'softmax', etc.) Return ---------- modelOut : keras model Keras model after last dense layer has been modified as specified. If there is no mode specified it returns the same model. If the mode is not one of 'abstain', 'qtl' or 'het' an exception is raised. """ if mode is None: return modelIn numlayers = len(modelIn.layers) # Find last dense layer i = -1 while 'dense' not in (modelIn.layers[i].name) and ((i + numlayers) > 0): i -= 1 # Minimal verification about the validity of the layer found assert ((i + numlayers) >= 0) assert ('dense' in modelIn.layers[i].name) # Compute new output size if mode == 'abstain': assert num_add is not None new_output_size = modelIn.layers[i].output_shape[-1] + num_add elif mode == 'qtl': # for quantile UQ new_output_size = 3 * modelIn.layers[i].output_shape[-1] elif mode == 'het': # for heteroscedastic UQ new_output_size = 2 * modelIn.layers[i].output_shape[-1] else: raise Exception('ERROR ! Type of mode specified for adding outputs to the model: ' + mode + ' not implemented... Exiting') # Recover current layer options config = modelIn.layers[i].get_config() # Update number of units config['units'] = new_output_size # Update activation function if requested if activation is not None: config['activation'] = activation # Bias initialization seems to help het and qtl if mode == 'het' or mode == 'qtl': config['bias_initializer'] = 'ones' # Create new Dense layer reconstructed_layer = Dense.from_config(config) # Connect new Dense last layer to previous one-before-last layer additional = reconstructed_layer(modelIn.layers[i - 1].output) # If the layer to replace is not the last layer, add the remainder layers if i < -1: for j in range(i + 1, 0): config_j = modelIn.layers[j].get_config() aux_j = layers.deserialize({'class_name': modelIn.layers[j].__class__.__name__, 'config': config_j}) reconstructed_layer = aux_j.from_config(config_j) additional = reconstructed_layer(additional) modelOut = Model(modelIn.input, additional) return modelOut
def add_layer_by_type(input_layer, layer_type, **kwargs): if type(layer_type) is type: return layer_type(**kwargs)(input_layer) return deserialize({"class_name": layer_type, "config": kwargs})(input_layer)