def load(kmdl, path): # load the parts which have identical names and shapes: std->std; lhc-formable->lhc-formable kmdl.load_weights(path, True, True) file0 = file = h5py.File(path, 'r') if 'layer_names' not in file.attrs and 'model_weights' in file: file = file['model_weights'] from tensorflow.python.keras.saving.hdf5_format import _legacy_weights, load_attributes_from_hdf5_group, \ preprocess_weights_for_loading if 'keras_version' in file.attrs: original_keras_version = file.attrs['keras_version'] # .decode('utf8') else: original_keras_version = '1' if 'backend' in file.attrs: original_backend = file.attrs['backend'] # .decode('utf8') else: original_backend = None layer_names = load_attributes_from_hdf5_group(file, 'layer_names') index = {} for layer in kmdl.layers: if layer.name: index.setdefault(layer.name, []).append(layer) # load the remaining parts weight_value_tuples = [] for k, name in enumerate(layer_names): g = file[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [ np.asarray(g[weight_name]) for weight_name in weight_names ] layer = index.get(name, []) if len(layer) == 0: continue assert len(layer) == 1 layer = layer[0] if type(layer) in (Conv2dLhcf, Conv2dLhcr): weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) wdict = dict(zip(weight_names, weight_values)) symbolic_weights = _legacy_weights(layer) symbol_names = [s.name for s in symbolic_weights] sdict = dict(zip(symbol_names, symbolic_weights)) for pname in Conv2dLhcf.VAR_NAMES[:3]: # symb = [__ for _, __ in sdict.items() if _[:-2].endswith(pname)] symb = [__ for _, __ in sdict.items() if pname in _] # wght = [__ for _, __ in wdict.items() if _[:-2].endswith(pname)] wght = [__ for _, __ in wdict.items() if pname in _] assert len(symb) == 1 and len(wght) <= 1 if len(wght) == 1: weight_value_tuples.append((symb[0], wght[0])) KB.batch_set_value(weight_value_tuples) file0.close()
def _load_state_dict(self, model, weight_dict): original_keras_version = keras_version original_backend = K.backend() weight_value_tuples = [] for k, layer in enumerate(model.layers): weight_names = [l.name for l in layer.weights] if len(weight_names) == 0: continue weight_values = [ np.asarray(weight_dict[weight_name]) for weight_name in weight_names ] symbolic_weights = layer.trainable_weights + layer.non_trainable_weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError("Layer #" + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' "correspond to layer " + layer.name + " in the save file. " "However the new layer " + layer.name + " expects " + str(len(symbolic_weights)) + " weights, but the saved weights have " + str(len(weight_values)) + " elements.") weight_value_tuples += zip(symbolic_weights, weight_values) K.batch_set_value(weight_value_tuples)
def test_preprocess_weights_for_loading_for_model(layer): model = Sequential([layer]) weights1 = model.get_weights() weights2 = preprocess_weights_for_loading(model, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def test_preprocess_weights_for_loading(layer): # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = preprocess_weights_for_loading(layer, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def test_preprocess_weights_for_loading_rnn_should_be_idempotent( self, layer_class, layer_args): with self.cached_session(): layer = layer_class(**layer_args) layer.build(input_shape=layer_args.get('input_shape')) weights1 = layer.get_weights() weights2 = hdf5_format.preprocess_weights_for_loading( layer, weights1) _ = [ self.assertAllClose(x, y, rtol=1e-05) for (x, y) in zip(weights1, weights2) ]
def test_preprocess_weights_for_loading_rnn_should_be_idempotent( self, layer_class, layer_args): with self.cached_session(): layer = layer_class(**layer_args) layer.build(input_shape=layer_args.get('input_shape')) weights1 = layer.get_weights() weights2 = hdf5_format.preprocess_weights_for_loading( layer, weights1) _ = [ self.assertAllClose(x, y, rtol=1e-05) for (x, y) in zip(weights1, weights2) ]
def test_preprocess_weights_for_loading_rnn_should_be_idempotent( layer_class, args): """ Loading weights from a RNN class to itself should not convert the weights. """ # layer can be instantiated only for supported backends layer = layer_class(**args) # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = preprocess_weights_for_loading(layer, weights1) assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def _load_weights_from_tiledb( self, model_array_results: Mapping[str, Any], model: tf.keras.Model, original_keras_version: Optional[str], original_backend: Optional[str], ) -> None: num_layers = 0 for layer in model.layers: weights = layer.trainable_weights + layer.non_trainable_weights if weights: num_layers += 1 read_layer_names = [] for k, name in enumerate(model_array_results["layer_name"]): layer_weight_names = pickle.loads( model_array_results["weight_names"].item(k)) if layer_weight_names: read_layer_names.append(name) if len(read_layer_names) != num_layers: raise ValueError( f"You are trying to load a weight file with {len(read_layer_names)} " f"layers into a model with {num_layers} layers") var_value_tuples: List[Tuple[tf.Variable, np.ndarray]] = [] for k, layer in enumerate(model.layers): weight_vars = layer.trainable_weights + layer.non_trainable_weights read_weight_values = pickle.loads( model_array_results["weight_values"].item(k)) read_weight_values = preprocess_weights_for_loading( layer, read_weight_values, original_keras_version, original_backend) if len(read_weight_values) != len(weight_vars): raise ValueError( f'Layer #{k} (named "{layer.name}" in the current model) was found ' f"to correspond to layer {layer} in the save file. However the new " f"layer {layer.name} expects {len(weight_vars)} weights, " f"but the saved weights have {len(read_weight_values)} elements" ) var_value_tuples.extend(zip(weight_vars, read_weight_values)) backend.batch_set_value(var_value_tuples)
def test_weight_preprocessing(self): input_dim = 3 output_dim = 3 size = 2 cases = [ [ (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), [np.random.random((2, 1)), np.random.random((2, 1))], (None, 3, 2), ], [ (keras.layers.TimeDistributed(keras.layers.Dense(1))), [np.random.random((2, 1)), np.random.random((1,))], (None, 3, 2), ], [ (keras.layers.Conv1D(output_dim, size, use_bias=False)), [np.random.random((output_dim, input_dim, size, 1))], (None, 4, input_dim), ], [ (keras.layers.Conv2D(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size))], (None, input_dim, 4, 4), ], [ (keras.layers.Conv2DTranspose(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size))], (None, input_dim, 4, 4), ], [ (keras.layers.Conv2DTranspose(output_dim, size, use_bias=False, data_format='channels_last')), [np.random.random((size, size, input_dim, output_dim))], (None, 4, 4, input_dim), ], [ (keras.layers.Conv3D(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size, size))], (None, input_dim, 4, 4, 4), ], [ (keras.layers.GRUV1(output_dim)), [np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,))], (None, 4, input_dim), ], [ (keras.layers.LSTMV1(output_dim)), [np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,))], (None, 4, input_dim), ], ] for layer, weights, input_shape in cases: layer.build(input_shape) _ = hdf5_format.preprocess_weights_for_loading( layer, weights, original_keras_version='1') model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) _ = hdf5_format.preprocess_weights_for_loading( model, model.weights, original_keras_version='1') x = keras.Input((2,)) y = keras.layers.Dense(2)(x) model = keras.models.Model(x, y) _ = hdf5_format.preprocess_weights_for_loading( model, model.weights, original_keras_version='1')
def test_weight_preprocessing(self): input_dim = 3 output_dim = 3 size = 2 cases = [ [ (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), [np.random.random((2, 1)), np.random.random((2, 1))], (None, 3, 2), ], [ (keras.layers.TimeDistributed(keras.layers.Dense(1))), [np.random.random((2, 1)), np.random.random((1,))], (None, 3, 2), ], [ (keras.layers.Conv1D(output_dim, size, use_bias=False)), [np.random.random((output_dim, input_dim, size, 1))], (None, 4, input_dim), ], [ (keras.layers.Conv2D(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size))], (None, input_dim, 4, 4), ], [ (keras.layers.Conv2DTranspose(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size))], (None, input_dim, 4, 4), ], [ (keras.layers.Conv2DTranspose(output_dim, size, use_bias=False, data_format='channels_last')), [np.random.random((size, size, input_dim, output_dim))], (None, 4, 4, input_dim), ], [ (keras.layers.Conv3D(output_dim, size, use_bias=False, data_format='channels_first')), [np.random.random((output_dim, input_dim, size, size, size))], (None, input_dim, 4, 4, 4), ], [ (keras.layers.GRU(output_dim)), [np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,))], (None, 4, input_dim), ], [ (keras.layers.LSTM(output_dim)), [np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,)), np.random.random((input_dim, output_dim)), np.random.random((output_dim, output_dim)), np.random.random((output_dim,))], (None, 4, input_dim), ], ] for layer, weights, input_shape in cases: layer.build(input_shape) _ = hdf5_format.preprocess_weights_for_loading( layer, weights, original_keras_version='1') model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) _ = hdf5_format.preprocess_weights_for_loading( model, model.weights, original_keras_version='1') x = keras.Input((2,)) y = keras.layers.Dense(2)(x) model = keras.models.Model(x, y) _ = hdf5_format.preprocess_weights_for_loading( model, model.weights, original_keras_version='1')
def load_weights_from_hdf5_group_by_name_mapping(f, layers, name_mapping, skip_mismatch=False): """Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. Args: f: A pointer to a HDF5 group. layers: a list of target layers. name_mapping : name mapping dict skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weights. Raises: ValueError: in case of mismatch between provided layers and weights file and skip_match=False. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'] if hasattr(original_keras_version, 'decode'): original_keras_version = original_keras_version.decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'] if hasattr(original_backend, 'decode'): original_backend = original_backend.decode('utf8') else: original_backend = None # New file format. layer_names = load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [ np.asarray(g[weight_name]) for weight_name in weight_names ] for layer in index.get(name, []): symbolic_weights = _legacy_weights(layer) weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): if skip_mismatch: logging.warning( 'Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to mismatch ' 'in number of weights ({} vs {}).'.format( len(symbolic_weights), len(weight_values))) continue raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): if backend.int_shape( symbolic_weights[i]) != weight_values[i].shape: if skip_mismatch: logging.warning('Skipping loading of weights for ' 'layer {}'.format(layer.name) + ' due to ' 'mismatch in shape ({} vs {}).'.format( symbolic_weights[i].shape, weight_values[i].shape)) continue raise ValueError( 'Layer #' + str(k) + ' (named "' + layer.name + '"), weight ' + str(symbolic_weights[i]) + ' has shape {}'.format( backend.int_shape(symbolic_weights[i])) + ', but the saved weight has shape ' + str(weight_values[i].shape) + '.') else: weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) backend.batch_set_value(weight_value_tuples)
def load_weights_from_hdf5_group_by_name(f, layers, partial_loading=False, verbose=0): """Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. # Arguments f: A pointer to a HDF5 group. layers: a list of target layers. # Raises ValueError: in case of mismatch between provided layers and weights file. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = _load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] loaded_layer = [] for k, name in enumerate(layer_names): g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] # extra channel for conv1 and bn_data if partial_loading: if name == 'conv1/conv' and index[name][0].weights[0].shape[ -1] != 64: add_channel = index[name][0].weights[0].shape[-1] - 64 _zeros = np.zeros([(*index[name][0].weights[0].shape[:-1]), add_channel]) weight_values[0] = np.concatenate( [np.array(weight_values[0]), _zeros], axis=-1) elif name == 'conv2_block1_1_conv' and index[name][0].weights[ 0].shape[-2] != 64: add_channel = index[name][0].weights[0].shape[-2] - 64 _zeros = np.zeros([(*index[name][0].weights[0].shape[:-2]), \ add_channel, index[name][0].weights[0].shape[-1]]) weight_values[0] = np.concatenate( [np.array(weight_values[0]), _zeros], axis=-2) elif (name == 'conv1/bn' or name == 'conv2_block1_0_bn') \ and index[name][0].weights[0].shape[-1] != 64: add_channel = index[name][0].weights[0].shape[-1] - 64 _zeros = np.zeros(add_channel) _ones = np.ones(add_channel) weight_values[0] = np.concatenate([weight_values[0], _zeros]) weight_values[1] = np.concatenate([weight_values[1], _ones]) weight_values[2] = np.concatenate([weight_values[2], _zeros]) weight_values[3] = np.concatenate([weight_values[3], _ones]) for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) if len(weight_values) != 0: loaded_layer.append(name) # for debugging purpose if verbose > 0: print(weight_value_tuples) K.batch_set_value(weight_value_tuples) return loaded_layer
def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed from keras.layers import Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_conv = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_conv) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that # the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15, ))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, # returns a list of 3 concatenated larger tensors. weights_bi_conv_old = [] for j in range(2): # bidirectional for i in range(4): weights_bi_conv_old.append(np.zeros((3, 3, 15, 10))) # kernel weights_bi_conv_old.append(np.zeros( (3, 3, 10, 10))) # recurrent kernel weights_bi_conv_old.append(np.zeros((10, ))) # bias bi_convlstm_layer = model.layers[2] weights_bi_conv_new = preprocess_weights_for_loading( bi_convlstm_layer, weights_bi_conv_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weights_bi_conv_new)) weight_value_tuples += zip(symbolic_weights, weights_bi_conv_new) K.batch_set_value(weight_value_tuples) assert np.all( K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all( K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[0]) == weights_bi_conv_new[0]) assert np.all(K.eval(model.layers[2].weights[1]) == weights_bi_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[2]) == weights_bi_conv_new[2]) assert np.all(K.eval(model.layers[2].weights[3]) == weights_bi_conv_new[3]) assert np.all(K.eval(model.layers[2].weights[4]) == weights_bi_conv_new[4]) assert np.all(K.eval(model.layers[2].weights[5]) == weights_bi_conv_new[5])