def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed, Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_convlstm2d = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_convlstm2d) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15,))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = topology.preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, returns a list of 3 concatenated larger tensors. weight_tensor_bi_convlstm_old = [] for j in range(2): # bidirectional for i in range(4): weight_tensor_bi_convlstm_old.append(np.zeros((3, 3, 15, 10))) # kernel weight_tensor_bi_convlstm_old.append(np.zeros((3, 3, 10, 10))) # recurrent kernel weight_tensor_bi_convlstm_old.append(np.zeros((10,))) # bias bi_convlstm_layer = model.layers[2] weight_tensor_bi_convlstm_new = topology.preprocess_weights_for_loading( bi_convlstm_layer, weight_tensor_bi_convlstm_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weight_tensor_bi_convlstm_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_bi_convlstm_new) K.batch_set_value(weight_value_tuples) assert np.all(K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all(K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[0]) == weight_tensor_bi_convlstm_new[0]) assert np.all(K.eval(model.layers[2].weights[1]) == weight_tensor_bi_convlstm_new[1]) assert np.all(K.eval(model.layers[2].weights[2]) == weight_tensor_bi_convlstm_new[2]) assert np.all(K.eval(model.layers[2].weights[3]) == weight_tensor_bi_convlstm_new[3]) assert np.all(K.eval(model.layers[2].weights[4]) == weight_tensor_bi_convlstm_new[4]) assert np.all(K.eval(model.layers[2].weights[5]) == weight_tensor_bi_convlstm_new[5])
def test_gru_legacy_interface(): old_layer = keras.layers.GRU(input_shape=[3, 5], output_dim=2, name='d') new_layer = keras.layers.GRU(2, input_shape=[3, 5], name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) preprocess_weights_for_loading(new_layer, [np.random.random(x) for x in [(5, 2), (2, 2), (2,)] * 3], original_keras_version='1') old_layer = keras.layers.GRU(2, init='normal', inner_init='glorot_uniform', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='GRU') new_layer = keras.layers.GRU(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='GRU') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
def test_preprocess_weights_for_loading(layer): # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = topology.preprocess_weights_for_loading( layer, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def test_preprocess_weights_for_loading_for_model(layer): model = Sequential([layer]) weights1 = model.get_weights() weights2 = topology.preprocess_weights_for_loading( model, convert_weights(layer, weights1), original_keras_version='1') assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def test_preprocess_weights_for_loading_rnn_should_be_idempotent(layer_class, layer_args): """ Loading weights from a RNN class to itself should not convert the weights. """ # layer can be instantiated only for supported backends layer = layer_class(**layer_args) # A model is needed to initialize weights. _ = Sequential([layer]) weights1 = layer.get_weights() weights2 = topology.preprocess_weights_for_loading(layer, weights1) assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
def convert(model_path): cudnn_model = load_model(model_path) model = create_model( cudnn_model.get_layer(index=0).input_shape, cudnn_model.get_layer(index=-1).output_shape, False) cudnn_weights = cudnn_model.get_weights() weights = preprocess_weights_for_loading(model, cudnn_weights, '1') model.set_weights(weights) opt = Adam(lr=params['rate_1']) model.compile(opt, loss='categorical_crossentropy', metrics=['accuracy']) model.save(model_path[:-3] + '_cpu.h5')
def load_weights_from_hdf5_group_by_name(f, layers): if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: index.setdefault(layer.name.split('-')[0], []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples)
def write_keras_hdf5_from_file(model, hdf5_in, hdf5_out): ''' Generate an HDF5 file with trained model parameters given a Keras definition Parameters ---------- model : Keras model Keras deep learning model hdf5_in : string Fully qualified file name of Keras HDF5 file hdf5_out : string Fully qualified file name of SAS-compatible HDF5 file ''' # open input/output files try: f_in = h5py.File(hdf5_in, 'r') except IOError: sys.exit('File ' + hdf5_in + ' does not exist') try: f_out = h5py.File(hdf5_out, 'w') except IOError: sys.exit('File ' + hdf5_out + ' could not be created') if 'keras_version' in f_in.attrs: original_keras_version = f_in.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f_in.attrs: original_backend = f_in.attrs['backend'].decode('utf8') else: original_backend = None try: image_data_format = K.image_data_format() # determine layers with weights filtered_layers = [] for layer in model.layers: weights = layer.weights if weights: filtered_layers.append(layer) layer_names = [n.decode('utf8') for n in f_in.attrs['layer_names']] filtered_layer_names = [] for name in layer_names: g = f_in[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # determine permutation vector associated with flattening layer (if it exists) flatten_layer_index = -1 index = 0 for layer in model.layers: if (layer.__class__.__name__.lower() == 'flatten'): flatten_layer_index = index break index = index + 1 if (flatten_layer_index != -1): layer = model.layers[flatten_layer_index] permute_layer_name = model.layers[flatten_layer_index + 1].name if (image_data_format == 'channels_first'): C, H, W = (layer.input_shape)[1:] else: H, W, C = (layer.input_shape)[1:] N = (layer.output_shape)[1] perm_index = [0] * N if (image_data_format == 'channels_last'): ii = 0 for cc in range(C): for hh in range(H): for ww in range(W): perm_index[ii] = hh * W * C + ww * C + cc ii = ii + 1 else: for nn in range(N): perm_index[nn] = nn else: perm_index = [] # let Keras read weights, reformat, and write to SAS-compatible file for k, name in enumerate(layer_names): g_in = f_in[name] g_out = f_out.create_group(name) new_weight_names = [] weight_names = [ n.decode('utf8') for n in g_in.attrs['weight_names'] ] weight_values = [g_in[weight_name] for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the saved file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') # read/write weights for ii in range(len(weight_names)): tensor_in = np.zeros(weight_values[ii].shape, dtype=weight_values[ii].dtype) weight_values[ii].read_direct(tensor_in) # permute axes as needed to conform to SAS deep # learning "channels first" format if ((image_data_format == 'channels_first') or (not perm_index)): # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2) if (len(tensor_in.shape) == 4): tensor_out = np.transpose(tensor_in, (0, 3, 1, 2)) else: tensor_out = tensor_in.copy() else: # "channels last" format # this is a vector - nothing to permute if (len(tensor_in.shape) == 1): tensor_out = tensor_in.copy() else: # permute Conv2D tensor to "channels_first" format if (layer.__class__.__name__ == 'Conv2D'): tensor_out = np.transpose(tensor_in, (3, 2, 0, 1)) # have to account for neuron ordering in first dense # layer following flattening operation elif (layer.__class__.__name__ == 'Dense'): if (layer.name == permute_layer_name): tensor_out = np.zeros(tensor_in.shape) for jj in range(tensor_out.shape[0]): tensor_out[jj, :] = tensor_in[ perm_index[jj], :] else: # not following flattening, just copy tensor_out = tensor_in.copy() # mimic Caffe layout tensor_out = np.transpose(tensor_out, (1, 0)) # save weight in format amenable to SAS dset_name = generate_dataset_name(layer, ii) new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=tensor_out) # update weight names g_out.attrs['weight_names'] = new_weight_names except ValueError as err_msg: print(err_msg) finally: # close files f_out.close() f_in.close()
def write_keras_hdf5_from_file(model, hdf5_in, hdf5_out): ''' Generate an HDF5 file with trained model parameters given a Keras definition Parameters ---------- model : Keras model Keras deep learning model hdf5_in : string Fully qualified file name of Keras HDF5 file hdf5_out : string Fully qualified file name of SAS-compatible HDF5 file ''' # open input/output files if os.path.isfile(hdf5_in): f_in = h5py.File(hdf5_in, 'r') try: f_out = h5py.File(hdf5_out, 'w') except IOError: raise DLPyError('The specified file cannot be written: ' + hdf5_out) else: raise DLPyError('The specified file does not exist: ' + hdf5_in) if 'keras_version' in f_in.attrs: original_keras_version = f_in.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f_in.attrs: original_backend = f_in.attrs['backend'].decode('utf8') else: original_backend = None try: image_data_format = K.image_data_format() # navigate to correct HDF5 group if 'layer_names' in f_in.attrs.keys(): root_group = f_in elif 'layer_names' in f_in['model_weights'].attrs.keys(): root_group = f_in['model_weights'] else: raise DLPyError('Cannot read HDF5 file correctly') # determine layers with weights filtered_layers = [] for layer in model.layers: weights = layer.weights if weights: filtered_layers.append(layer) layer_names = [n.decode('utf8') for n in root_group.attrs['layer_names']] filtered_layer_names = [] for name in layer_names: g = root_group[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # determine permutation vector associated with flattening layer (if it exists) flatten_layer_index = -1 index = 0 for layer in model.layers: if layer.__class__.__name__.lower() == 'flatten': flatten_layer_index = index break index = index + 1 if flatten_layer_index != -1: layer = model.layers[flatten_layer_index] permute_layer_name = model.layers[flatten_layer_index + 1].name if image_data_format == 'channels_first': C, H, W = (layer.input_shape)[1:] else: H, W, C = (layer.input_shape)[1:] N = (layer.output_shape)[1] perm_index = [0] * N if image_data_format == 'channels_last': ii = 0 for cc in range(C): for hh in range(H): for ww in range(W): perm_index[ii] = hh * W * C + ww * C + cc ii = ii + 1 else: for nn in range(N): perm_index[nn] = nn else: perm_index = [] permute_layer_name = None f_out.attrs['layer_names'] = [l.replace('/','_').encode('utf8') for l in layer_names] # let Keras read weights, reformat, and write to SAS-compatible file for k, name in enumerate(layer_names): g_in = root_group[name] g_out = f_out.create_group(name.replace('/','_')) new_weight_names = [] weight_names = [n.decode('utf8') for n in g_in.attrs['weight_names']] weight_values = [g_in[weight_name] for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading(layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the saved file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') if layer.__class__.__name__.lower() == 'batchnormalization': bn_gamma = np.ones(weight_values[0].shape, dtype=weight_values[0].dtype) bn_beta = np.zeros(weight_values[0].shape, dtype=weight_values[0].dtype) layer_config = layer.get_config() # if scale = False and center = True if not layer_config['scale'] and layer_config['center']: weight_values.insert(0, bn_gamma) weight_names.insert(0, layer.name.replace('/','_')+'/'+'gamma:0') # if scale = True and center = False elif layer_config['scale'] and not layer_config['center']: weight_values.insert(1, bn_beta) weight_names.insert(1, layer.name.replace('/','_')+'/'+'beta:0') # if scale = False and center = False elif not layer_config['scale'] and not layer_config['center']: weight_values = [bn_gamma, bn_beta] + weight_values weight_names = [layer.name.replace('/','_')+'/'+'gamma:0', layer.name.replace('/','_')+'/'+'beta:0'] + weight_names # add epsilon to variance values to avoid divide by zero if 'epsilon' in layer_config.keys(): for ii,wgt_name in enumerate(weight_names): if 'moving_variance' in wgt_name: weight_values[ii] = weight_values[ii] + (layer_config['epsilon']* np.ones(weight_values[ii].shape, dtype=weight_values[ii].dtype)) # read/write weights for ii in range(len(weight_names)): if type(weight_values[ii]) == np.ndarray: tensor_in = weight_values[ii] else: tensor_in = np.zeros(weight_values[ii].shape, dtype=weight_values[ii].dtype) weight_values[ii].read_direct(tensor_in) # permute axes as needed to conform to SAS deep # learning "channels first" format if image_data_format == 'channels_first': # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2) if len(tensor_in.shape) == 4: tensor_out = np.transpose(tensor_in, (0, 3, 1, 2)) else: tensor_out = tensor_in.copy() else: # "channels last" format # this is a vector - nothing to permute if len(tensor_in.shape) == 1: tensor_out = tensor_in.copy() else: # permute Conv2D tensor to "channels_first" format if layer.__class__.__name__ == 'Conv2D': tensor_out = np.transpose(tensor_in, (3, 2, 0, 1)) # have to account for neuron ordering in first dense # layer following flattening operation elif layer.__class__.__name__ == 'Dense': if (permute_layer_name is not None) and (layer.name == permute_layer_name): tensor_out = np.zeros(tensor_in.shape) for jj in range(tensor_out.shape[0]): tensor_out[jj, :] = tensor_in[perm_index[jj], :] else: # not following flattening, just copy tensor_out = tensor_in.copy() # mimic Caffe layout tensor_out = np.transpose(tensor_out, (1, 0)) # save weight in format amenable to SAS dset_name = generate_dataset_name(layer, ii) new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=tensor_out) # update weight names g_out.attrs['weight_names'] = new_weight_names except ValueError as err_msg: print(err_msg) finally: # close files f_out.close() f_in.close()
def test_lstm_legacy_interface(): old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) preprocess_weights_for_loading(new_layer, [np.random.random(x) for x in [(5, 2), (2, 2), (2,)] * 4], original_keras_version='1') old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_dim=5, input_length=3, output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_dim=5, output_dim=2, name='d', consume_less='mem') new_layer = keras.layers.LSTM(2, input_shape=[None, 5], name='d', implementation=1) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', consume_less='gpu') new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d', implementation=2) assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(2, init='normal', inner_init='glorot_uniform', forget_bias_init='one', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='LSTM') new_layer = keras.layers.LSTM(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', unit_forget_bias=True, recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='LSTM') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) old_layer = keras.layers.LSTM(2, init='normal', inner_init='glorot_uniform', forget_bias_init='zero', inner_activation='hard_sigmoid', W_regularizer='l1', U_regularizer='l1', b_regularizer='l1', dropout_W=0.1, dropout_U=0.1, name='LSTM') new_layer = keras.layers.LSTM(2, kernel_initializer='normal', recurrent_initializer='glorot_uniform', unit_forget_bias=True, recurrent_activation='hard_sigmoid', kernel_regularizer='l1', recurrent_regularizer='l1', bias_regularizer='l1', dropout=0.1, recurrent_dropout=0.1, name='LSTM') assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
def load_weights_from_hdf5_group_by_name_assume_weight_order( f, layers, layer_name_map={}, skip_mismatch=False, verbose=False): """ Adapted from the function with same name in keras.engine.topology. Added warnings when a layer in the hdf5 file fails to match any layers in argument `layers`. Also print all assigned weights' names. Implements name-based weight loading. (instead of topological weight loading). Layers that have no matching name are skipped. # Arguments f: A pointer to a HDF5 group. layers: A list of target layers. skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weights. # Raises ValueError: in case of mismatch between provided layers and weights file and skip_mismatch=False. """ if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.name: if layer.name in layer_name_map: index.setdefault(layer_name_map[layer.name], []).append(layer) else: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): if skip_mismatch: warnings.warn( 'Skipping loading of weights for layer {}'.format( layer.name) + ' due to mismatch in number of weights' + ' ({} vs {}).'.format(len(symbolic_weights), len(weight_values))) continue else: raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): if skip_mismatch: if K.int_shape( symbolic_weights[i]) != weight_values[i].shape: warnings.warn( 'Skipping loading of weights for layer {}'.format( layer.name) + ' due to mismatch in shape' + ' ({} vs {}).'.format(symbolic_weights[i].shape, weight_values[i].shape)) continue weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) if len(weight_value_tuples) == 0: warnings.warn('No layer is loaded.') #return weights_in_layers = [] for layer in layers: if layer.weights: weights_in_layers += layer.weights weights_to_be_assigned = [x for x, _ in weight_value_tuples] for wil in weights_in_layers: if wil not in weights_to_be_assigned: if verbose: warnings.warn('%s is not loaded.' % wil.name) if verbose: print('Weight value tuples:') from pprint import pprint pprint(weight_value_tuples) K.batch_set_value(weight_value_tuples)
def load_trainable_weights_only(filepath, layers): """Implements name-based weight loading for only trainable weights. (instead of topological weight loading). Layers that are trainable are skipped # Arguments f: A pointer to a HDF5 group. layers: a list of target layers. # Raises ValueError: in case of mismatch between provided layers and weights file. """ f = h5py.File(filepath, mode='r') if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] if 'keras_version' in f.attrs: original_keras_version = f.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f.attrs: original_backend = f.attrs['backend'].decode('utf8') else: original_backend = None # New file format. layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] # Reverse index of layer name to list of layers with name. index = {} for layer in layers: if layer.trainable == False: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '") expects ' + str(len(symbolic_weights)) + ' weight(s), but the saved weights' + ' have ' + str(len(weight_values)) + ' element(s).') # Set values. for i in range(len(weight_values)): weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples)
def write_keras_hdf5_from_file(model, rnn_support, hdf5_in, hdf5_out): ''' Generate an HDF5 file with trained model parameters given a Keras definition Parameters ---------- model : Keras model Keras deep learning model rnn_support : boolean Indicates whether importing RNN models is supported hdf5_in : string Fully qualified file name of Keras HDF5 file hdf5_out : string Fully qualified file name of SAS-compatible HDF5 file ''' # open input/output files if os.path.isfile(hdf5_in): f_in = h5py.File(hdf5_in, 'r') try: f_out = h5py.File(hdf5_out, 'w') except IOError: raise DLPyError('The specified file cannot be written: ' + hdf5_out) else: raise DLPyError('The specified file does not exist: ' + hdf5_in) if 'keras_version' in f_in.attrs: original_keras_version = f_in.attrs['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in f_in.attrs: original_backend = f_in.attrs['backend'].decode('utf8') else: original_backend = None model_type = None use_gpu = None try: # determine model type # NOTE: must check ALL RNN layers to determine # whether model must run on GPU gpu_layers = [] cpu_layers = [] for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for RNN layers if class_name in rnn_layer_classes: model_type = 'RNN' image_data_format = None if class_name in rnn_gpu_layer_classes: gpu_layers.append(True) elif class_name in rnn_cpu_layer_classes: cpu_layers.append(True) # verify that model is supported by SAS Deep Learning if model_type == 'RNN': if rnn_support: if (len(gpu_layers) > 0) and (len(cpu_layers) == 0): use_gpu = True elif (len(gpu_layers) == 0) and (len(cpu_layers) > 0): use_gpu = False elif (len(gpu_layers) > 0) and (len(cpu_layers) > 0): raise DLPyError('A mixture of CPU and GPU layers was detected. ' 'This is not supported by SAS Deep Learning.') else: raise DLPyError('RNN model detected: your Viya deployment does not support ' 'importing an RNN model.') if model_type is None: found_cnn_layer = False for layer in model.layers: class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: # check for CNN layers if class_name in conv_layer_classes: model_type = 'CNN' image_data_format = K.image_data_format() found_cnn_layer = True if found_cnn_layer: break if model_type is None: raise DLPyError('Only RNN and CNN models are currently supported.') # navigate to correct HDF5 group if 'layer_names' in f_in.attrs.keys(): root_group = f_in elif 'layer_names' in f_in['model_weights'].attrs.keys(): root_group = f_in['model_weights'] else: raise DLPyError('Cannot read HDF5 file correctly') # determine layers with weights filtered_layers = [] for layer in model.layers: weights = layer.weights if weights: filtered_layers.append(layer) layer_names = [n.decode('utf8') for n in root_group.attrs['layer_names']] filtered_layer_names = [] for name in layer_names: g = root_group[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # determine permutation vector associated with flattening layer (if it exists) if model_type == 'CNN': flatten_layer_index = -1 index = 0 for layer in model.layers: if layer.__class__.__name__.lower() == 'flatten': flatten_layer_index = index break index = index + 1 if flatten_layer_index != -1: layer = model.layers[flatten_layer_index] permute_layer_name = model.layers[flatten_layer_index + 1].name if image_data_format == 'channels_first': C, H, W = (layer.input_shape)[1:] else: H, W, C = (layer.input_shape)[1:] N = (layer.output_shape)[1] perm_index = [0] * N if image_data_format == 'channels_last': ii = 0 for cc in range(C): for hh in range(H): for ww in range(W): perm_index[ii] = hh * W * C + ww * C + cc ii = ii + 1 else: for nn in range(N): perm_index[nn] = nn else: perm_index = [] permute_layer_name = None else: perm_index = [] permute_layer_name = None # populate attributes with layer names attrib_layer_names = [] for name in layer_names: layer = model.get_layer(name=name) class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: attrib_layer_names.append(tlayer.name) f_out.attrs['layer_names'] = [replace_forward_slash(l).encode('utf8') for l in attrib_layer_names] # let Keras read weights, reformat, and write to SAS-compatible file for k, name in enumerate(layer_names): g_in = root_group[name] layer = filtered_layers[k] weight_names = [n.decode('utf8') for n in g_in.attrs['weight_names']] weight_values = [g_in[weight_name] for weight_name in weight_names] symbolic_weights = layer.weights # create CPU-compatible layer cpu_layer = create_cpu_compatible_layer(layer, model_type) # use Keras to load/preprocess weights weight_values = preprocess_weights_for_loading(cpu_layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the saved file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') if layer.__class__.__name__.lower() == 'batchnormalization': bn_gamma = np.ones(weight_values[0].shape, dtype=weight_values[0].dtype) bn_beta = np.zeros(weight_values[0].shape, dtype=weight_values[0].dtype) layer_config = layer.get_config() # if scale = False and center = True if not layer_config['scale'] and layer_config['center']: weight_values.insert(0, bn_gamma) weight_names.insert(0, replace_forward_slash(layer.name)+'/'+'gamma:0') # if scale = True and center = False elif layer_config['scale'] and not layer_config['center']: weight_values.insert(1, bn_beta) weight_names.insert(1, replace_forward_slash(layer.name)+'/'+'beta:0') # if scale = False and center = False elif not layer_config['scale'] and not layer_config['center']: weight_values = [bn_gamma, bn_beta] + weight_values weight_names = [replace_forward_slash(layer.name)+'/'+'gamma:0', replace_forward_slash(layer.name)+'/'+'beta:0'] + weight_names # add epsilon to variance values to avoid divide by zero if 'epsilon' in layer_config.keys(): for ii,wgt_name in enumerate(weight_names): if 'moving_variance' in wgt_name: weight_values[ii] = weight_values[ii] + (layer_config['epsilon']* np.ones(weight_values[ii].shape, dtype=weight_values[ii].dtype)) # read/write weights class_name, sublayers = remove_layer_wrapper(layer) for tlayer in sublayers: g_out = f_out.create_group(replace_forward_slash(tlayer.name)) new_weight_names = [] wgt_idx = 0 for ii,wgt_name in enumerate(weight_names): if tlayer.name in wgt_name: if type(weight_values[ii]) == np.ndarray: tensor_in = weight_values[ii] else: tensor_in = np.zeros(weight_values[ii].shape, dtype=weight_values[ii].dtype) weight_values[ii].read_direct(tensor_in) # permute axes as needed to conform to SAS deep # learning "channels first" format if (image_data_format is not None) and (image_data_format == 'channels_first'): # format: (C,fdim1, fdim2, fdim3) ==> (C,fdim3,fdim1,fdim2) if len(tensor_in.shape) == 4: tensor_out = np.transpose(tensor_in, (0, 3, 1, 2)) else: tensor_out = tensor_in.copy() else: # "channels last" format or not image processing problem # process RNN layers first if class_name in rnn_layer_classes: cpu_class_name, cpu_sublayers = remove_layer_wrapper(cpu_layer) if (len(tensor_in.shape) == 1) and (class_name != cpu_class_name): tensor_out = np.tile(0.5 * tensor_in, 2) else: tensor_out = tensor_in.copy() # not an RNN layer, but this is a vector - nothing to permute elif len(tensor_in.shape) == 1: tensor_out = tensor_in.copy() else: # permute Conv2D tensor to "channels_first" format if class_name == 'conv2d': tensor_out = np.transpose(tensor_in, (3, 2, 0, 1)) # have to account for neuron ordering in first dense # layer following flattening operation elif class_name == 'dense': if (permute_layer_name is not None) and (tlayer.name == permute_layer_name): tensor_out = np.zeros(tensor_in.shape) for jj in range(tensor_out.shape[0]): tensor_out[jj, :] = tensor_in[perm_index[jj], :] else: # not following flattening, just copy tensor_out = tensor_in.copy() # mimic Caffe layout tensor_out = np.transpose(tensor_out, (1, 0)) # save weight in format amenable to SAS dset_name = generate_dataset_name(tlayer, wgt_idx) wgt_idx = wgt_idx + 1 new_weight_names.append(dset_name) g_out.create_dataset(dset_name, data=tensor_out) # update weight names g_out.attrs['weight_names'] = new_weight_names except ValueError as err_msg: print(err_msg) finally: # close files f_out.close() f_in.close() return use_gpu
def _load_weights(weights, layers): """Implements topological (order-based) weight loading. # Arguments weights: A dict of weights as returned by reduce_model layers: a list of target layers. # Raises ValueError: in case of mismatch between provided layers and weights file. """ if 'keras_version' in weights: original_keras_version = weights['keras_version'].decode('utf8') else: original_keras_version = '1' if 'backend' in weights: original_backend = weights['backend'].decode('utf8') else: original_backend = None filtered_layers = [] for layer in layers: lweights = layer.weights if lweights: filtered_layers.append(layer) layer_names = [n.decode('utf8') for n in weights['layer_names']] filtered_layer_names = [] for name in layer_names: g = weights[name] weight_names = g['weight_names'] if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names if len(layer_names) != len(filtered_layers): raise ValueError('You are trying to load a weight file ' 'containing ' + str(len(layer_names)) + ' layers into a model with ' + str(len(filtered_layers)) + ' layers.') # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = weights[name] weight_names = g['weight_names'] weight_values = [g[weight_name] for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights weight_values = topology.preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) if len(weight_values) != len(symbolic_weights): raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + '" in the current model) was found to ' 'correspond to layer ' + name + ' in the save file. ' 'However the new layer ' + layer.name + ' expects ' + str(len(symbolic_weights)) + ' weights, but the saved weights have ' + str(len(weight_values)) + ' elements.') weight_value_tuples += zip(symbolic_weights, weight_values) K.batch_set_value(weight_value_tuples)