def test_apply_complex_gradient(input_layer, batch_size): with DEFAULT_TF_GRAPH.as_default(): machine = Linear(input_layer) model = Model(inputs=[input_layer], outputs=machine.predictions) if tensorflow.__version__ >= '1.14': optimizer = ComplexValuesOptimizer(model, machine.predictions_jacobian, name='optimizer', lr=1.0) else: optimizer = ComplexValuesOptimizer(model, machine.predictions_jacobian, lr=1.0) complex_vector_t = K.placeholder(shape=(model.count_params() // 2, 1), dtype=tensorflow.complex64) predictions_function = K.function(inputs=[input_layer], outputs=[machine.predictions]) sample = numpy.random.choice( 2, (batch_size, ) + K.int_shape(input_layer)[1:]) * 2 - 1 predictions_before = predictions_function([sample])[0] updates = optimizer.apply_complex_gradient(complex_vector_t) apply_gradients_function = K.function( inputs=[input_layer, complex_vector_t], outputs=[machine.predictions], updates=[updates]) real_vector = numpy.random.normal(size=(model.count_params() // 2, 1, 2)) complex_vector = real_vector[..., 0] + 1j * real_vector[..., 1] apply_gradients_function([sample, complex_vector]) predictions_after = predictions_function([sample])[0] diff = predictions_after - predictions_before manual_diff = sample.reshape((batch_size, -1)) @ complex_vector diff_norm = numpy.linalg.norm(diff - manual_diff) res_norm = numpy.linalg.norm(manual_diff) assert (diff_norm / res_norm) < 1e-5
def assert_n_params(inp, out, expected_size): model = Model(inputs=inp, outputs=out) model.compile(optimizer="adam", loss="mean_squared_error") print(model.count_params()) assert model.count_params() == expected_size # for test coverage: model.predict([x, a, e])
def test_compute_wave_function_gradient_covariance_inverse_multiplication( input_layer, batch_size, diag_shift, iterative): with DEFAULT_TF_GRAPH.as_default(): machine = Linear(input_layer) model = Model(inputs=[input_layer], outputs=machine.predictions) if tensorflow.__version__ >= '1.14': optimizer = ComplexValuesStochasticReconfiguration( model, machine.predictions_jacobian, diag_shift=diag_shift, conjugate_gradient_tol=1e-6, iterative_solver=iterative, iterative_solver_max_iterations=None, name='optimizer') else: optimizer = ComplexValuesStochasticReconfiguration( model, machine.predictions_jacobian, diag_shift=diag_shift, conjugate_gradient_tol=1e-6, iterative_solver=iterative, iterative_solver_max_iterations=None) complex_vector_t = K.placeholder(shape=(model.count_params() // 2, 1), dtype=tensorflow.complex64) jacobian_minus_mean = machine.manual_jacobian - tensorflow.reduce_mean( machine.manual_jacobian, axis=0, keepdims=True) manual_s = tensorflow.eye(model.count_params() // 2, dtype=tensorflow.complex64) * diag_shift manual_s += tensorflow.matmul( jacobian_minus_mean, jacobian_minus_mean, adjoint_a=True) / tensorflow.cast(batch_size, tensorflow.complex64) manual_res_t = pinv(manual_s, complex_vector_t) res_t = optimizer.compute_wave_function_gradient_covariance_inverse_multiplication( complex_vector_t, jacobian_minus_mean) res_function = K.function(inputs=[input_layer, complex_vector_t], outputs=[res_t]) manual_res_function = K.function( inputs=[input_layer, complex_vector_t], outputs=[manual_res_t]) sample = numpy.random.choice( 2, (batch_size, ) + K.int_shape(input_layer)[1:]) * 2 - 1 real_vector = numpy.random.normal(size=(model.count_params() // 2, 1, 2)) complex_vector = real_vector[..., 0] + 1j * real_vector[..., 1] res = res_function([sample, complex_vector])[0] manual_res = manual_res_function([sample, complex_vector])[0] diff_norm = numpy.linalg.norm(res - manual_res) res_norm = numpy.linalg.norm(manual_res) assert (diff_norm / res_norm) < 1e-5
def get_imp_model(model_dim, n_layers, n_heads, dff, time_dim, n_signals): opt = Adam(CustomSchedule(model_dim), beta_1=0.9, beta_2=0.98, epsilon=1e-9) # opt = Adam(learning_rate = 1e-2, clipnorm = 1.0) imp_input = tfl.Input(shape=(time_dim, n_signals)) x = Encoder(num_layers=n_layers, d_model=model_dim, time_dim=time_dim, m_dim=n_signals, num_heads=n_heads, dff=dff, max_time_step=time_dim, rate=0.1, imputation_mode=True)( imp_input) # (batch, time, measure, d_model) # x = tfl.Reshape((time_dim, model_dim * n_signals))(x) # (batch, time, measure * d_model) output_layer = tfl.Dense(1)(x) # (batch, time, measure, 1) imp_model = Model(inputs=imp_input, outputs=output_layer) imp_model.compile(optimizer=opt, loss=imputation_rmse_loss) print("The number of parameters in the model: {:,d}".format( imp_model.count_params())) return imp_model
def train_lstm(): # Create symbolic vars x = Input(shape=(None, in_dim), dtype='float32', name='input') # Create network # fw_cell = LSTM(hidden_units_size, return_sequences=False, # implementation=2)(x) fw_cell = CuDNNLSTM(hidden_units_size, return_sequences=False)(x) h3 = Dense(classes, activation='softmax', use_bias=False)(fw_cell) model = Model(inputs=x, outputs=h3) validate_lstm_in_out(model) start = timer.perf_counter() model.compile(optimizer='Adam', loss='categorical_crossentropy') end = timer.perf_counter() print('>>> Model compilation took {:.1f} seconds'.format(end - start)) # Print parameter count params = model.count_params() print('# network parameters: ' + str(params)) # Start training batch_time = [] batch_loss = [] train_start = timer.perf_counter() for i in range(nb_batches): batch_start = timer.perf_counter() loss = model.train_on_batch(x=bX, y=to_categorical(bY, num_classes=classes)) batch_end = timer.perf_counter() batch_time.append(batch_end - batch_start) batch_loss.append(loss) train_end = timer.perf_counter() print_results(batch_loss, batch_time, train_start, train_end)
def get_siamese_model(input_shape): ''' https://github.com/akshaysharma096/Siamese-Networks/blob/master/Few%20Shot%20Learning%20-%20V1.ipynb ''' input_A = Input(input_shape) input_B = Input(input_shape) #build conv_net to use in each siamese 'leg' conv_net = Sequential() # First layer 任意のkernel_initializerの設定方法を調べる conv_net.add(Conv2D(filters = 16, kernel_size = (4, 4), padding = 'same', activation = 'relu', kernel_initializer = RandomNormal(mean = 0, stddev = 0.01))) conv_net.add(MaxPooling2D(pool_size = (2, 2), padding = 'same')) # Second layer conv_net.add(Conv2D(filters = 64, kernel_size = (4, 4), padding = 'same', activation = 'relu', kernel_initializer = RandomNormal(mean = 0, stddev = 0.01))) conv_net.add(MaxPooling2D(pool_size = (2, 2), padding = 'same')) # Third layer conv_net.add(Conv2D(filters = 32, kernel_size = (4, 4), padding = 'same', activation = 'relu', kernel_initializer = RandomNormal(mean = 0, stddev = 0.01))) conv_net.add(MaxPooling2D(pool_size = (2, 2), padding = 'same')) conv_net.add(Flatten()) conv_net.add(Dense(units = 512, activation = "sigmoid", # conv_net.add(Dense(units = 1024 * 2 * 2, activation = "sigmoid", kernel_initializer = RandomNormal(mean = 0, stddev = 0.01), bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))) #call the convnet Sequential model on each of the input tensors so params will be shared encoded_A = conv_net(input_A) encoded_B = conv_net(input_B) #layer to merge two encoded inputs with the l1 distance between them L1_layer = Lambda(lambda tensors:K.backend.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_A, encoded_B]) prediction = Dense(units = 1, activation = 'sigmoid', bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))(L1_distance) siamese_net = Model(inputs = [input_A, input_B], outputs = prediction) optimizer = Adam(0.001) siamese_net.compile(loss = "binary_crossentropy", optimizer = optimizer) siamese_net.count_params() return siamese_net
def make_model(filters=160, blocks=8, kernels=(5, 1), rate=0.001, freeze_batch_norm=False): input = Input(shape=(NUM_INPUT_CHANNELS, 8, 8), name='input') # initial convolution x = get_conv(filters=filters, kernel_size=kernels[0])(input) # residual blocks for i in range(blocks): x = get_residual_block(x, freeze_batch_norm, i) # value tower vt = Flatten()(x) vt = get_dense(40, regu=0.02)(vt) vt = Dropout(rate=0.5)(vt) vt = get_norm(freeze_batch_norm, 'batchnorm-vt')(vt) vt = get_dense(20, regu=0.04)(vt) vt = Dropout(rate=0.5)(vt) value = Dense(1, activation=tf.nn.tanh, name='value', kernel_initializer=initializers.glorot_normal(), bias_initializer=initializers.zeros(), bias_regularizer=l2(0.2), kernel_regularizer=l2(0.4), activity_regularizer=l2(0.1))(vt) px = get_conv(filters=8 * 8, activation=None, kernel_size=kernels[1])(x) pf = Flatten()(px) policy = Softmax(name='policy')(pf) model = Model(inputs=input, outputs=[value, policy]) losses = { 'value': 'mean_squared_error', 'policy': 'categorical_crossentropy' } weights = {'value': 1.0, 'policy': 1.0} optimizer = Adam(rate) model.compile(optimizer=optimizer, loss=losses, loss_weights=weights, metrics=[]) print('Model parameters: %d' % model.count_params()) return model
def main_model(self): self.config() # --------------real input rec_node_feature = Input(shape=(self.residue_num, self.node_feature_length), dtype='float32') lig_node_feature = Input(shape=(self.residue_num, self.node_feature_length), dtype='float32') rec_intra_edge_feature = Input(shape=(self.residue_num, self.residue_num, self.edge_feature_length), dtype='float32') lig_intra_edge_feature = Input(shape=(self.residue_num, self.residue_num, self.edge_feature_length), dtype='float32') inter_edge_feature = Input(shape=(self.residue_num, self.residue_num, self.edge_feature_length), dtype='float32') unbound_rec_edge_feature = Input(shape=(self.residue_num, self.residue_num, self.edge_feature_length), dtype='float32') unbound_lig_edge_feature = Input(shape=(self.residue_num, self.residue_num, self.edge_feature_length), dtype='float32') unbound_rec_node_feature = Input(shape=(self.residue_num, self.node_feature_length), dtype='float32') unbound_lig_node_feature = Input(shape=(self.residue_num, self.node_feature_length), dtype='float32') intra_energy_module = self.folding_stablility_module('intra_model') inter_energy_module = self.folding_stablility_module('inter_model') score_rec_intra = intra_energy_module([rec_node_feature, rec_node_feature, rec_intra_edge_feature]) score_lig_intra = intra_energy_module([lig_node_feature, lig_node_feature, lig_intra_edge_feature]) score_rec_unbound = intra_energy_module([unbound_rec_node_feature, unbound_rec_node_feature, unbound_rec_edge_feature]) score_lig_unbound = intra_energy_module([unbound_lig_node_feature, unbound_lig_node_feature, unbound_lig_edge_feature]) score_inter = inter_energy_module([rec_node_feature, lig_node_feature, inter_edge_feature]) def score_get(v): return v[0]+v[1]+v[2]-v[3]-v[4] score = Lambda(score_get)([score_rec_intra, score_lig_intra, score_inter, score_rec_unbound, score_lig_unbound]) Final_GCN = Model(inputs=[rec_node_feature, lig_node_feature, rec_intra_edge_feature, lig_intra_edge_feature, inter_edge_feature, unbound_rec_node_feature, unbound_lig_node_feature, unbound_rec_edge_feature, unbound_lig_edge_feature], outputs=score) Final_GCN.summary() print(Final_GCN.count_params()) return Final_GCN
for hid_dim_1 in dim_hidden_layres: print('========', 'hid_dim_0:', hid_dim_0, '; hid_dim_1:', hid_dim_1, '========') model = CNNModel(hid_dim_0=hid_dim_0, hid_dim_1=hid_dim_1) model = model.build() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc']) callbacks = [ EarlyStopping(patience=3), ModelCheckpoint(filepath=os.path.join( 'models', 'CNN', 'model_{}_{}.h5'.format(hid_dim_0, hid_dim_1)), save_best_only=True), ] n_param = model.count_params() model.fit(x=x_train, y=y_train, batch_size=128, epochs=100, callbacks=callbacks, validation_split=0.2) acc = accuracy_score(y_test, model.predict(x_test).argmax(axis=-1)) df_accuracy = pd.concat([ df_accuracy, pd.DataFrame( [[hid_dim_0, hid_dim_1, n_param, acc]], columns=['hid_dim_0', 'hid_dim_1', 'n_param', 'accuracy']) ]) # -
output_channels=input_sz[-1]) generator = Model(g_input, g_output, name='generator') # Build GAN # --------------------------------------------------------- # The generator and discriminator are both in keras' trainable=True mode # which means that batchnorm and dropout will be applied as intended. # We will update only the generators weights during training of this model. g_output_gan = generator(g_input) d_output_gan = discriminator([g_output_gan, g_input]) gan = Model(g_input, [g_output_gan, d_output_gan], name='gan') # Display setup details # --------------------------------------------------------- print_setup(tf.__version__, tf.executing_eagerly(), args, discriminator.count_params(), generator.count_params(), gan.count_params()) # Training loop # -------------------------------------------------------- # Initialize metrics train_metrics = Metrics(metrics_csv_pth) start_time = datetime.datetime.now() # PatchGan discriminator labels, arrays of ones or zeroes real = np.ones((batch_size, ) + discriminator_output_sz) # real => 1 fake = np.zeros((batch_size, ) + discriminator_output_sz) # fake => 0 for epoch in range(epochs): # update training results webpage gen_checkpoint(gan, check_loader, epoch, checkpoints_pth)
bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))) #call the convnet Sequential model on each of the input tensors so params will be shared encoded_A = conv_net(input_A) encoded_B = conv_net(input_B) #layer to merge two encoded inputs with the l1 distance between them L1_layer = Lambda(lambda tensors:K.backend.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_A, encoded_B]) prediction = Dense(units = 1, activation = 'sigmoid', bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))(L1_distance) siamese_net = Model(inputs = [input_A, input_B], outputs = prediction) optimizer = Adam(0.001) siamese_net.compile(loss = "binary_crossentropy", optimizer = optimizer) siamese_net.count_params() print('Model Building Finished') def get_pair_train_data(label_list, img_list, replace = False): labels = np.random.choice(nb_class, size = 2, replace = replace) label_pair = [ label_list[labels[0]], label_list[labels[1]], ] img_pair = [ img_list[labels[0]], img_list[labels[1]], ] return label_pair, img_pair print('Training Loop Started') n_iter = 10
def generate_model(self, hp_schema, comp_schema, regression=True, model_tag=None): ''' Method for generating the tensorflow graph employed as linear model Args: - hp_schema: a dictionary, it stores the schema for the hyperparameters Example: {'dropout' : float specifying the ammount of input features masked to zero, 'regularizer' : keras regularizer object, specify the type regularization applied to the weights matrix this allows to have lasso , ridge or elasticnet models } - comp_schema: a dictionary, it stores the schema for compiling the model Example: {optimizer: string or keras optimizer, optimization algorithm employed, loss: string or keras loss, loss minimized by the optimizer, metrics: list of strings or keras metrics, additional metrics computed for monitoring convergence } - regression: a bolean, it specify wether the model is target to a regression or classification task - tag: a string, specify the model identifier applied to each layer Returns: - None ''' if model_tag is None: model_tag = 'reg' if regression else 'clas' setattr(self, 'hp_schema', hp_schema) setattr(self, 'model_tag', model_tag) input = Input(shape=(self.X_train[1], ), name='input_{}'.format(model_tag)) dropout_input = Dropout( rate=self.hp_schema['dropout'], name='dopout_input_{}'.format(model_tag))(input) matmul = Dense( units=self.y_train[1], name='matmul_{}'.format(model_tag), kernel_regularizer=self.hp_schema['regularizer'])(dropout_input) if regression: link = Activation( 'linear', name='identity_link_{}'.format(model_tag))(matmul) elif not regression and self.y_train[1] > 1: link = Activation('softmax', name='softmax_link_{}'.format(model_tag))(matmul) else: link = Activation('sigmoid', name='sigmoid_link_{}'.format(model_tag))(matmul) model = Model(inputs=input, outputs=link) model.compile(optimizer=comp_schema['optimizer'], loss=comp_schema['loss'], metrics=comp_schema['metrics']) setattr(self, '_model', model) setattr(self, 'n_parameters', model.count_params())
def generate_model(self, hp_schema, comp_schema, sequence_len, masked=0.0, prob=False, model_tag=None): ''' Method for generating the tensorflow graph employed as bifurcating model Args: - hp_schema: a dictionary of dictionaries, it stores the schemas for the hyperparameters for each compontet of the model - comp_schema: a dictionary, it stores the schema for compiling the model Example: {optimizer: string or keras optimizer, optimization algorithm employed , loss: string or keras loss, loss minimized by the optimizer , metrics: list of strings or keras metrics, additional metrics computed for monitoring convergence , loos_weights: list of floats, weights aplied to the two lossess of the model } - sequence_len: is an integer specifying the maximum length of the input sequence - masked: is a float, it specifies the value that will be masked - prob: a bolean, whether the model will employ dropout at estimation time allowing for uncertainty estimation Returns: - None ''' if model_tag is None: model_tag = 'bifurcating' setattr(self, 'masked', masked) setattr(self, 'prob', prob) setattr(self, 'em_schema', hp_schema['em_schema']) setattr(self, 'td_schema', hp_schema['td_schema']) setattr(self, 're_schema', hp_schema['re_schema']) setattr(self, 'fc_schema', hp_schema['fc_schema']) setattr(self, 'model_tag', model_tag) # inputs features_input = Input(shape=(self.X_feat_train[1], ), name='features_input') features_reshape = Reshape( target_shape=(sequence_len, self.X_feat_train[1] // sequence_len), name='reshape_layer')(features_input) context_input = Input(shape=(self.X_cont_train[1], ), name='context_input') # embedding creation embedding = self._generate_embedding_block( input_tensor=context_input, input_dim=self.em_schema['input_dim'], output_dim=self.em_schema['output_dim'], input_length=self.X_cont_train[1], tag='shared') # temporal fusion embedding and raw-features # repeat the result of the embedding for each timestep embedding = RepeatVector( sequence_len, name='embedding_temporalization_layer')(embedding) if masked is not None: features_padded = Lambda( self.__apply_special_masking, name='re_padding_layer', )([features_reshape, embedding]) features_padded = Masking(mask_value=masked, name='masking_layer')(features_padded) else: features_padded = K.concatenate([features_reshape, embedding]) time_distributed = self._generate_time_distriuted_block( input_tensor=features_padded, layers=self.td_schema['layers'], activation=self.td_schema['activation'], dropout=self.td_schema['dropout'], tag='shared', prob=prob) recurrent = self._generate_recurrent_block( input_tensor=time_distributed, layers=self.re_schema['layers'], tag='shared') # regression head dense_reg = self._generate_fully_connected_block( input_tensor=recurrent, layers=self.fc_schema['layers'], activation=self.fc_schema['activation'], dropout=self.fc_schema['dropout'], tag='reg', prob=prob, ) output_reg = Dense(units=self.y_reg_train[1], name='output_dense_reg')(dense_reg) output_reg = Activation('linear', name='output_linear_reg')(output_reg) # classification head dense_clas = self._generate_fully_connected_block( input_tensor=recurrent, layers=self.fc_schema['layers'], activation=self.fc_schema['activation'], dropout=self.fc_schema['dropout'], tag='clas', prob=prob) output_clas = Dense(units=self.y_clas_train[1], name='output_dense_clas')(dense_clas) output_clas = Activation('sigmoid', name='output_sigmoid_clas')(output_clas) # build and compile the model model = Model(inputs=[features_input, context_input], outputs=[output_reg, output_clas]) model.compile(optimizer=comp_schema['optimizer'], loss=comp_schema['losses'], metrics=comp_schema['metrics'], loss_weights=comp_schema['loss_weights']) setattr(self, '_model', model) setattr(self, 'n_parameters', model.count_params())
def generate_model(self, hp_schema, comp_schema, regression=True, prob=False, model_tag=None): ''' Method for generating the tensorflow graph employed as multilayer perceptron Args: - hp_schema: a dictionary, it stores the schema for the hyperparameters Example: {layers : list of integers specifying number of layers and hidden units ([100, 100, 100]) , 'dropout' : float specifying the ammount of hidden units masked to zero , 'activation' : string sepcifying tha activation function applied between each layer } - comp_schema: a dictionary, it stores the schema for compiling the model Example: {optimizer: string or keras optimizer, optimization algorithm employed , loss: string or keras loss, loss minimized by the optimizer , metrics: list of strings or keras metrics, additional metrics computed for monitoring convergence } - regression: a bolean, it specify wether the model is target to a regression or classification task - prob: a bolean, whether the model will employ dropout at estimation time allowing for uncertainty estimation - tag: a string, specify the model identifier applied to each layer Returns: - None ''' setattr(self, 'hp_schema', hp_schema) setattr(self, 'prob', prob) if model_tag is None: model_tag = 'reg' if regression else 'clas' setattr(self, 'model_tag', model_tag) input = Input(shape=(self.X_train[1], ), name='input_{}'.format(model_tag)) fc_block = self._generate_fully_connected_block( input, self.hp_schema['layers'], self.hp_schema['activation'], self.hp_schema['dropout'], model_tag, prob, ) act = Dense(units=self.y_train[1], name='act_{}'.format(model_tag))(fc_block) if regression: act = Activation( 'linear', name='identity_activation_{}'.format(model_tag))(act) elif not regression and self.y_train[1] > 1: act = Activation( 'softmax', name='softmax_activation_{}'.format(model_tag))(act) else: act = Activation( 'sigmoid', name='sigmoid_activation_{}'.format(model_tag))(act) model = Model(inputs=input, outputs=act) model.compile(optimizer=comp_schema['optimizer'], loss=comp_schema['loss'], metrics=comp_schema['metrics']) setattr(self, '_model', model) setattr(self, 'n_parameters', model.count_params())
strides=2, name='TransposedConv_Deconv_id_%d_adapt_layer_%d' % (self.couche_id, abs(dim_noeud)))(couche) dim_noeud += 1 Llayer_fin.append(couche) if len(Llayer_fin) > 1: couche_fin = Concatenate(axis=-1)(Llayer_fin) else: couche_fin = Llayer_fin[0] couche_fin = Conv2D(filters=3, kernel_size=2, padding='SAME', activation='linear', name='Convolution_fin_k2_f3')(couche_fin) model = Model(inputs=G_Layer.couches_graph[0].couche, outputs=couche_fin, name='Debruiteur') print("Nb param : ", model.count_params()) model.compile( optimizer=tf.keras.optimizers.SGD( learning_rate=hp.Choice('lr', [1., 0.1, 0.01, 0.001, 10**-4, 10**-5], default=0.01), momentum=hp.Choice('momentum', [1., 0.1, 0.01, 0.001, 10**-4, 10**-5, 0.], default=0), nesterov=False), loss='MSE', metrics=[]) #custom_accuracy_fct(model.count_params(),7e4),'accuracy'])
#pd.DataFrame(history.history).to_csv("history/%s.csv" % (saved_model))''' # In[6]: '''if df_his is None: df = pd.DataFrame(history.history) df.to_csv("history_%s.csv" % (saved_model), header=True) else: df = pd.concat([df_his, pd.DataFrame(history.history)]).reset_index() df.to_csv("history_%s.csv" % (saved_model), header=True)''' from tensorflow.keras.models import load_model import time model = load_model("models/%s" % (saved_model))#, custom_objects={'Attention': Attention(params["seq_length"])} print(model.count_params()) model.summary() start_time = time.time() predict = scaler_y.inverse_transform(model.predict(X_test)) print("--- %s seconds ---" % (time.time() - start_time)) #exit() y_true = scaler_y.inverse_transform(y_test) from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error def NRMSD(y_true, y_pred): rmsd = np.sqrt(mean_squared_error(y_true.flatten(), y_pred.flatten())) y_min = min(y_true) y_max = max(y_true)
class HLDRVGG(object): def __init__(self, input_dimension, output_dimension, number_of_classes=2, optimizer=None, dual_outputs=False, loss_threshold=defaultLossThreshold, patience=defaultPatience, dropout_rate=defaultDropoutRate, reg='HLDR', max_relu_bound=None, adv_penalty=0.01, unprotected=False, freezeWeights=False, verbose=False): self.buildModel(input_dimension, output_dimension, number_of_classes=number_of_classes, dual_outputs=dual_outputs, loss_threshold=loss_threshold, patience=patience, dropout_rate=dropout_rate, max_relu_bound=max_relu_bound, adv_penalty=adv_penalty, unprotected=unprotected, optimizer=optimizer, reg=reg, freezeWeights=freezeWeights, verbose=verbose) def buildModel(self, input_dimension, output_dimension, number_of_classes=2, optimizer=None, dual_outputs=False, loss_threshold=defaultLossThreshold, patience=defaultPatience, dropout_rate=defaultDropoutRate, max_relu_bound=None, adv_penalty=0.01, unprotected=False, reg='HLDR', freezeWeights=False, verbose=False): self.input_dimension, self.output_dimension = input_dimension, np.copy( output_dimension) self.advPenalty = np.copy(adv_penalty) self.loss_threshold, self.number_of_classes = np.copy( loss_threshold), np.copy(number_of_classes) self.dropoutRate, self.max_relu_bound = np.copy(dropout_rate), np.copy( max_relu_bound) self.patience = np.copy(patience) self.dualOutputs = dual_outputs self.image_size = 32 self.num_channels = 3 self.num_labels = np.copy(number_of_classes) self.penaltyCoeff = np.copy(adv_penalty) #decide an activation function self.chosenActivation = "relu" if max_relu_bound is not None else "tanh" if (verbose): print("input dimension: %s" % str(self.input_dimension)) # define input layer self.inputLayer = layers.Input(shape=self.input_dimension) previousLayer = self.inputLayer #define the adversarial main input layer (only used in training) self.advInputLayer = layers.Input(shape=self.input_dimension) previousAdvLayer = self.advInputLayer #define the hidden layers self.hiddenLayers = dict() self.hiddenAdvLayers = dict() self.poolingLayers = dict() #define hidden layer outputs self.hiddenModelOutputs, self.hiddenAdvModelOutputs = dict(), dict() #layer 0 self.hiddenLayers[0] = layers.Conv2D( 64, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[0] = self.hiddenLayers[0] previousLayer = self.hiddenLayers[0](previousLayer) previousAdvLayer = self.hiddenAdvLayers[0](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[0] = previousLayer self.hiddenAdvModelOutputs[0] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 1 self.hiddenLayers[1] = layers.Conv2D( 64, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[1] = self.hiddenLayers[1] previousLayer = self.hiddenLayers[1](previousLayer) previousAdvLayer = self.hiddenAdvLayers[1](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[1] = previousLayer self.hiddenAdvModelOutputs[1] = previousAdvLayer #pooling self.poolingLayers[0] = layers.MaxPool2D((2, 2), padding='same') previousLayer = self.poolingLayers[0](previousLayer) previousAdvLayer = self.poolingLayers[0](previousAdvLayer) if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 2 self.hiddenLayers[2] = layers.Conv2D( 128, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[2] = self.hiddenLayers[2] previousLayer = self.hiddenLayers[2](previousLayer) previousAdvLayer = self.hiddenAdvLayers[2](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[2] = previousLayer self.hiddenAdvModelOutputs[2] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 3 self.hiddenLayers[3] = layers.Conv2D( 128, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[3] = self.hiddenLayers[3] previousLayer = self.hiddenLayers[3](previousLayer) previousAdvLayer = self.hiddenAdvLayers[3](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[3] = previousLayer self.hiddenAdvModelOutputs[3] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) # pooling self.poolingLayers[1] = layers.MaxPool2D((2, 2), padding='same') previousLayer = self.poolingLayers[1](previousLayer) previousAdvLayer = self.poolingLayers[1](previousAdvLayer) #layer 4 self.hiddenLayers[4] = layers.Conv2D( 256, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[4] = self.hiddenLayers[4] previousLayer = self.hiddenLayers[4](previousLayer) previousAdvLayer = self.hiddenAdvLayers[4](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[4] = previousLayer self.hiddenAdvModelOutputs[4] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 5 self.hiddenLayers[5] = layers.Conv2D( 256, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[5] = self.hiddenLayers[5] previousLayer = self.hiddenLayers[5](previousLayer) previousAdvLayer = self.hiddenAdvLayers[5](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[5] = previousLayer self.hiddenAdvModelOutputs[5] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 6 self.hiddenLayers[6] = layers.Conv2D( 256, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[6] = self.hiddenLayers[6] previousLayer = self.hiddenLayers[6](previousLayer) previousAdvLayer = self.hiddenAdvLayers[6](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[6] = previousLayer self.hiddenAdvModelOutputs[6] = previousAdvLayer # pooling self.poolingLayers[2] = layers.MaxPool2D((2, 2), padding='same') previousLayer = self.poolingLayers[2](previousLayer) previousAdvLayer = self.poolingLayers[2](previousAdvLayer) #layer 7 self.hiddenLayers[7] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[7] = self.hiddenLayers[7] previousLayer = self.hiddenLayers[7](previousLayer) previousAdvLayer = self.hiddenAdvLayers[7](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[7] = previousLayer self.hiddenAdvModelOutputs[7] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 8 self.hiddenLayers[8] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[8] = self.hiddenLayers[8] previousLayer = self.hiddenLayers[8](previousLayer) previousAdvLayer = self.hiddenAdvLayers[8](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[8] = previousLayer self.hiddenAdvModelOutputs[8] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 9 self.hiddenLayers[9] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[9] = self.hiddenLayers[9] previousLayer = self.hiddenLayers[9](previousLayer) previousAdvLayer = self.hiddenAdvLayers[9](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[9] = previousLayer self.hiddenAdvModelOutputs[9] = previousAdvLayer #pooling self.poolingLayers[3] = layers.MaxPool2D((2, 2), padding='same') previousLayer = self.poolingLayers[3](previousLayer) previousAdvLayer = self.poolingLayers[3](previousAdvLayer) #layer 10 self.hiddenLayers[10] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[10] = self.hiddenLayers[10] previousLayer = self.hiddenLayers[10](previousLayer) previousAdvLayer = self.hiddenAdvLayers[10](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[10] = previousLayer self.hiddenAdvModelOutputs[10] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 11 self.hiddenLayers[11] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[11] = self.hiddenLayers[11] previousLayer = self.hiddenLayers[11](previousLayer) previousAdvLayer = self.hiddenAdvLayers[11](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[11] = previousLayer self.hiddenAdvModelOutputs[11] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #layer 12 self.hiddenLayers[12] = layers.Conv2D( 512, (3, 3), kernel_regularizer=regularizers.l2(5e-5), activation=self.chosenActivation, padding='same') self.hiddenAdvLayers[12] = self.hiddenLayers[12] previousLayer = self.hiddenLayers[12](previousLayer) previousAdvLayer = self.hiddenAdvLayers[12](previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[12] = previousLayer self.hiddenAdvModelOutputs[12] = previousAdvLayer # pooling self.poolingLayers[4] = layers.MaxPool2D((2, 2), padding='same') previousLayer = self.poolingLayers[4](previousLayer) previousAdvLayer = self.poolingLayers[4](previousAdvLayer) #dense layers previousLayer = layers.Flatten()(previousLayer) previousAdvLayer = layers.Flatten()(previousAdvLayer) if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) self.penultimateDenseLayer = layers.Dense( 512, activation=self.chosenActivation, kernel_regularizer=regularizers.l2(5e-5)) self.hiddenLayers[13] = self.penultimateDenseLayer self.hiddenAdvLayers[13] = self.penultimateDenseLayer previousLayer = self.penultimateDenseLayer(previousLayer) previousAdvLayer = self.penultimateDenseLayer(previousAdvLayer) previousLayer = layers.BatchNormalization()(previousLayer) previousAdvLayer = layers.BatchNormalization()(previousAdvLayer) self.hiddenModelOutputs[13] = previousLayer self.hiddenAdvModelOutputs[13] = previousAdvLayer if (self.dropoutRate > 0): previousLayer = Dropout(dropout_rate)(previousLayer) previousAdvLayer = Dropout(dropout_rate)(previousAdvLayer) #add the output layer #size constrained by dimensionality of inputs # self.logitsLayer = layers.Dense(output_dimension, activation=None, name='logitsLayer') # self.penultimateLayer = self.logitsActivity = self.logitsLayer(previousLayer) # self.penultimateAdvLayer = advLogitsActivity = self.logitsLayer(previousAdvLayer) self.outputLayer = layers.Dense( output_dimension, activation='softmax', name='outputlayer') #layers.Activation('softmax') self.outputActivity = self.outputLayer(previousLayer) self.advOutputActivity = self.outputLayer(previousAdvLayer) #set up the logits layer (not just breaking apart the outputlayer because we want to be able to read in old pretrained models, so we'll just invert for this #softmax^-1 (X) at coordinate i = log(X_i) + log(\sum_j exp(X_j)) self.logitsActivity = K.log( self.outputActivity) #+ K.log(K.sum(K.exp(self.outputActivity))) self.advLogitsActivity = K.log( self.advOutputActivity ) #+ K.log(K.sum(K.exp(self.advOutputActivity))) self.hiddenModelOutputs[14] = self.logitsActivity self.hiddenAdvLayers[14] = self.advLogitsActivity # # setup the models with which we can see states of hidden layers numberOfHiddenLayers = len(self.hiddenLayers) #collect adversarial projections and benign projections benProjs = layers.concatenate([ layers.Flatten()(self.hiddenModelOutputs[curLayer]) for curLayer in range(numberOfHiddenLayers) ]) advProjs = layers.concatenate([ layers.Flatten()(self.hiddenAdvModelOutputs[curLayer]) for curLayer in range(numberOfHiddenLayers) ]) self.benProjs, self.advProjs = benProjs, advProjs #define our custom loss function depending on how the intializer wants to regularize (i.e., the "reg" argument) #this is cross_entropy + \sum_layers(abs(benign_projection-adv_projection)) self.unprotected = unprotected self.reg = reg if (reg == 'HLDR'): if (not unprotected): def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum( K.abs(benProjs - advProjs)) / (tf.cast( K.sum(K.abs(benProjs)), tf.float32)) # return (1-penaltyCoeff)*K.categorical_crossentropy(y_true, y_pred) + penaltyCoeff*K.sum(K.abs(benProjs - advProjs))/(tf.cast(tf.shape(benProjs)[0], tf.float32)) return customLoss else: #if we are using an unprotected model, don't force the machine to calculate this too def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred) return customLoss elif (reg == 'HLRDivlayer'): if (not unprotected): # numerators = K.abs(layerWiseBenProjs - layerWiseAdvProjs) summands = [ K.sum( K.abs(self.hiddenLayers[curLayer].output - self.hiddenAdvLayers[curLayer].output)) / K.sum(K.abs(self.hiddenLayers[curLayer].output)) for curLayer in range(numberOfHiddenLayers) ] def customLossWrapper(summands, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum(summands) return customLoss else: #if we are using an unprotected model, don't force the machine to calculate this too def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred) return customLoss elif (reg == 'FIM'): # dS = tf.gradients(self.outputLayer, self.inputLayer) # dS_2 = tf.matmul(dS, tf.reshape(dS, (dS.shape[1], dS.shape[0]))) # eigs = tf.linalg.eigvals(dS_2) ps = tf.divide( tf.ones(shape=(tf.shape(self.outputActivity))), tf.where(self.outputActivity > 0, self.outputActivity, 1e16 * tf.ones_like(self.outputActivity))) def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum(ps) return customLoss elif (reg == 'logEigen'): ps = tf.divide( tf.ones(shape=(tf.shape(self.outputActivity))), tf.ones_like(self.outputActivity) - tf.math.log( tf.where(self.outputActivity > 0, self.outputActivity, 1e16 * tf.ones_like(self.outputActivity)))) def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum(ps) return customLoss elif (reg == 'logEigenlogits'): ps = tf.divide( tf.ones(shape=(tf.shape(self.logitsActivity))), tf.ones_like(self.logitsActivity) + tf.math.log( tf.where(self.logitsActivity > 0, self.logitsActivity, 1e16 * tf.ones_like(self.logitsActivity)))) def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum(ps) return customLoss elif (reg == 'logitFIM'): ps = tf.divide( tf.ones(shape=(tf.shape(self.logitsActivity))), tf.where(self.logitsActivity > 0, self.logitsActivity, 1e16 * tf.ones_like(self.logitsActivity))) def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return (1 - penaltyCoeff) * K.categorical_crossentropy( y_true, y_pred) + penaltyCoeff * K.sum(ps) return customLoss else: def customLossWrapper(benProjs, advProjs, penaltyCoeff=self.penaltyCoeff): def customLoss(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred) return customLoss self.sgd = tf.keras.optimizers.Nadam( ) #Adadelta(learning_rate=self.learning_rate) self.reduceLR = None #set up data augmentation self.generator = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) #convert self.hiddenAdvLayers to a list for the model compilation, ascending order of keys is order of layers #outputsList is a list of outputs of the model constructed so that the first entry is the true output (ie prediction) layer #and each subsequent (i, i+1)th entries are the pair of hiddenAdvLayer, hiddenBenignLayer activations #this is going to be useful for calculating the MAE between benignly and adversarially induced hidden states outputsList = [self.outputActivity] oneSideOutputsList = [self.outputActivity] for curHiddenLayer in range(len(self.hiddenAdvModelOutputs))[:-1]: oneSideOutputsList.append(self.hiddenModelOutputs[curHiddenLayer]) outputsList.append(self.hiddenAdvModelOutputs[curHiddenLayer]) outputsList.append(self.hiddenModelOutputs[curHiddenLayer]) mainOutputList = [self.outputActivity] if (self.dualOutputs): mainOutputList.append(self.logitsActivity) #mainOutputList.append(self.advOutputActivity) #mainOutputList.append(self.advLogitsActivity) # instantiate and compile the model self.customLossWrapper = customLossWrapper self.model = Model(inputs=[self.inputLayer, self.advInputLayer], outputs=mainOutputList, name='HLDR_vgg_16') #if we want to use this as a frozen model if (freezeWeights): for curWeights in range(len(self.model.layers)): self.model.layers[curWeights].trainable = False if (reg == 'HLRlayer'): self.model.compile(loss=customLossWrapper(summands, self.penaltyCoeff), metrics=['acc'], optimizer=self.sgd) else: self.model.compile(loss=customLossWrapper(benProjs, advProjs, self.penaltyCoeff), metrics=['acc'], optimizer=self.sgd) #setup the models with which we can see states of hidden layers self.hiddenModel = Model(inputs=[self.inputLayer, self.advInputLayer], outputs=outputsList, name='hidden_HLDR_vgg_16') self.hiddenOneSideModel = Model( inputs=[self.inputLayer, self.advInputLayer], outputs=oneSideOutputsList, name='hidden_oneside_HLDR_vgg_16') self.hiddenJointLatentModel = Model( inputs=[self.inputLayer, self.advInputLayer], outputs=[benProjs], name='hiddenJointLatentModel') self.logitModel = Model(inputs=[self.inputLayer, self.advInputLayer], outputs=[self.logitsActivity], name='hiddenLogitModel') # double check weight trainability bug allVars = self.model.variables trainableVars = self.model.trainable_variables allVarNames = [ self.model.variables[i].name for i in range(len(self.model.variables)) ] trainableVarNames = [ self.model.trainable_variables[i].name for i in range(len(self.model.trainable_variables)) ] nonTrainableVars = np.setdiff1d(allVarNames, trainableVarNames) if (verbose): self.model.summary() if (len(nonTrainableVars) > 0): print( 'the following variables are set to non-trainable; ensure that this is correct before publishing!!!!' ) print(nonTrainableVars) #set data statistics to default values self.mean = 0 self.stddev = 1 #this routine is used to collect statistics on training data, as well as to preprocess the training data by normalizing #i.e. centering and dividing by standard deviation def normalize(self, inputData, storeStats=False): if (storeStats): self.mean = np.mean(inputData) self.stddev = np.std(inputData) outputData = (inputData - self.mean) / (self.stddev + 0.0000001) return outputData # routine to get a pointer to the optimizer of this model def getOptimizer(self): return self.sgd # def getVGGWeights(self): return self.model.get_weights().copy() def getParameterCount(self): return self.model.count_params() # handle data augmentation with multiple inputs (example found on https://stackoverflow.com/questions/49404993/keras-how-to-use-fit-generator-with-multiple-inputs #so thanks to loannis and Julian def multiInputDataGenerator(self, X1, X2, Y, batch_size): genX1 = self.generator.flow(X1, Y, batch_size=batch_size) genX2 = self.generator.flow(X2, Y, batch_size=batch_size) while True: X1g = genX1.next() X2g = genX2.next() yield [X1g[0], X2g[0]], X1g[1] #adversarial order parameter tells us if we're doing adversarial training, so we know if we should normalize to the first or second argument def train(self, inputTrainingData, trainingTargets, inputValidationData, validationTargets, training_epochs=1, normed=False, monitor='val_loss', patience=defaultPatience, model_path=None, keras_batch_size=None, dataAugmentation=False, adversarialOrder=0): #if a path isn't provided by caller, just use the current time for restoring best weights from fit if (model_path is None): model_path = os.path.join( '/tmp/models/', 'hlr_vgg16_' + str(int(round(time.time() * 1000)))) #if the data are not normalized, normalize them trainingData, validationData = [[], []], [[], []] if (not normed): #don't store stats from the adversarially attacked data if (adversarialOrder == 0): trainingData[0] = self.normalize(inputTrainingData[0], storeStats=True) trainingData[1] = self.normalize(inputTrainingData[1], storeStats=False) else: trainingData[1] = self.normalize(inputTrainingData[1], storeStats=True) trainingData[0] = self.normalize(inputTrainingData[0], storeStats=False) #also don't store stats from validation data validationData[0] = self.normalize(inputValidationData[0], storeStats=False) validationData[1] = self.normalize(inputValidationData[1], storeStats=False) else: trainingData[0] = inputTrainingData[0] trainingData[1] = inputTrainingData[1] validationData[0] = inputValidationData[0] validationData[1] = inputValidationData[1] #collect our callbacks earlyStopper = EarlyStopping(monitor=monitor, mode='min', patience=patience, verbose=1, min_delta=defaultLossThreshold) checkpoint = ModelCheckpoint(model_path, verbose=1, monitor=monitor, save_weights_only=True, save_best_only=True, mode='auto') callbackList = [earlyStopper, checkpoint] # history = self.model.fit(trainingData, trainingTargets, epochs=training_epochs, batch_size=keras_batch_size, # validation_split=validation_split, callbacks=[earlyStopper, self.reduce_lr]) #handle data augmentation if (not dataAugmentation): # set up data augmentation self.generator = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization= False, # divide each input by its std zca_whitening=False, # apply ZCA whitening # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) self.generator.fit(trainingData[0]) history = self.model.fit( self.multiInputDataGenerator(trainingData[0], trainingData[1], trainingTargets, keras_batch_size), steps_per_epoch=trainingData[0].shape[0] // keras_batch_size, epochs=training_epochs, validation_data=(validationData, validationTargets), callbacks=callbackList, verbose=1) #self.reduce_lr else: # set up data augmentation self.generator = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization= False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) self.generator.fit(trainingData[0]) history = self.model.fit( self.multiInputDataGenerator(trainingData[0], trainingData[1], trainingTargets, keras_batch_size), steps_per_epoch=trainingData[0].shape[0] // keras_batch_size, epochs=training_epochs, validation_data=(validationData, validationTargets), callbacks=callbackList, verbose=1) #self.reduce_lr if (not np.isnan(history.history['loss']).any() and not np.isinf(history.history['loss']).any()): self.model.load_weights(model_path) loss, acc = history.history['loss'], history.history['val_acc'] return loss, acc, model_path def evaluate(self, inputData, targets, batchSize=None): evalData = [ self.normalize(inputData[0], storeStats=False), self.normalize(inputData[1], storeStats=False) ] fullEval = self.model.evaluate(evalData, targets, batch_size=batchSize) return fullEval # this method reads our models from the disk at the specified path + name of component of the class + h5 # and reads all other parameters from a pickle file def readModelFromDisk(self, pathToFile): #rebuild the model self.buildModel(self.input_dimension, self.output_dimension, self.number_of_classes, loss_threshold=self.loss_threshold, patience=self.patience, dropout_rate=self.dropoutRate, max_relu_bound=self.max_relu_bound, adv_penalty=self.advPenalty, unprotected=self.unprotected, reg=self.reg, verbose=verbose) # set the vgg weights self.model.load_weights(pathToFile) # #read in the picklebox pickleBox = pickle.load(open(pathToFile + '_pickle', 'rb')) # # self.bottleneckLayer = pickleBox['bottleneckLayer'] # # self.hiddenEncodingLayer = pickleBox['hiddenEncodingLayer'] # # self.inputLayer = pickleBox['inputLayer'] self.reg = pickleBox['reg'] self.chosenActivation = pickleBox['chosenActivation'] self.mean, self.std = pickleBox['scaleMean'], pickleBox['scaleSTD']
class G_Controleur: prec_echec = True def __init__(self, hparam): self.couche_id = 0 self.couches_graph = [] self.nb_liens = 0 self.hp = hparam self.graph = Digraph("graph", format='png') self.nb_couches = { "conv": { "min": 4, "max": 20, "choix": None }, "deconv": { "min": 0, "max": 5, "choix": None }, "pool": { "min": 0, "max": 5, "choix": None }, "add": { "min": 0, "max": 30, "choix": None }, "dense": { "min": 0, "max": 10, "choix": None } } liste_classes = [G_Conv, G_Deconv, G_Pool, G_Add, G_Dense] G_Input(self) # Crée le nb de couches choisies par Keras Tuner et retourne le nb créé for name, v, Class in zip(self.nb_couches.items(), liste_classes): v["choix"] = self.hp.Int("nb_" + name, min_value=v["min"], max_value=v["max"]) for _ in range(v["choix"]): Class(self) G_Output(self) self.couches_graph[0].eval() self.lier(0, 1, forcer=True) for n in self.couches_graph: n.link() self.afficher("post_liaison") #On fournit l'input aux noeuds racines for n in self.couches_graph: if len(n.parent) == 0: print("Liaison théorique entre %d et %d" % (self.couches_graph[0].couche_id, n.couche_id)) self.lier(self.couches_graph[0].couche_id, n.couche_id, forcer=True) n.couche_input = self.couches_graph[0].couche self.afficher("ajout_input") # On lie à l'output index_output = len(self.couches_graph) - 1 while self.couches_graph[ index_output].__class__.__name__ != "G_Output" and i >= 0: index_output -= 1 if index_output < 0: raise Exception("Output missing") for n in self.couches_graph: if len(n.enfant) == 0 and n.__class__.__name__ != "G_Output": self.lier(n.couche_id, self.couches_graph[index_output].couche_id, forcer=True) self.afficher("fin_liaisons") # On évalue les couches qui peuvent l'être self.eval_rec(self.couches_graph[:]) # On remet avec une dernière couche à la bonne taille couche_fin = Conv2D(filters=3, kernel_size=2, padding='SAME', activation='linear', name='conv_fin_k2_f3')( self.couches_graph[index_output].couche_output) #On affiche les graphs dossier_fichier = os.path.abspath(os.path.dirname(__file__)) for img in os.listdir(dossier_fichier): if img.endswith(".png"): os.system("display ./" + img) # On construit le modèle self.model = Model(inputs=self.couches_graph[0].couche_output, outputs=couche_fin, name='Debruiteur') self.model.compile(optimizer=tf.keras.optimizers.SGD( learning_rate=self.hp.Choice( 'lr', [1., 0.1, 0.01, 0.001, 10**-4, 10**-5], default=0.01), momentum=self.hp.Choice('momentum', [1., 0.1, 0.01, 0.001, 10**-4, 10**-5, 0.], default=0), nesterov=False), loss='MSE', metrics=[custom_accuracy]) os.system( "cp -r /content/Bayesian_optimization '/content/drive/My Drive'") max_param = -1 if os.path.isfile("/content/Graph_TF/max_param.txt") == True: with open("/content/Graph_TF/max_param.txt", "r") as f: for i, l in enumerate(f): if i == 0: max_param = int(l.strip()) elif i == 1: prec_echec = l.strip() retour_ancienne_exec = int(prec_echec) max_param = retour_ancienne_exec if ( max_param == -1 and max_param < retour_ancienne_exec ) and G_Controleur.prec_echec == True else max_param G_Controleur.prec_echec = False if (self.model.count_params() >= max_param and max_param != -1): print( "Trop de parametre avec %d pour ce modèle et précédement échec avec %d (-1 = infini)" % (self.model.count_params(), max_param)) global trop_param trop_param = True return with open("/content/Graph_TF/max_param.txt", "w") as f: f.write(str(max_param) + "\n") f.write(str(self.model.count_params()) + "\n") def clean(self): del self.couche_id del self.nb_liens for c in self.couches_graph: c.clean() del self.graph def eval_rec(self, nodes): if len(nodes) == 0: return else: index = 0 while index < len(nodes) and nodes[index].eval() == False: index += 1 if index == len(nodes): raise Exception("Modèle impossible") else: nodes.pop(index) return self.eval_rec(nodes) def new_id(self): self.couche_id += 1 return self.couche_id - 1 def add_couche(self, couche): self.couches_graph.append(couche) def afficher(self, name): print( "%d noeuds et %d liens contre %d au max soit %f prct et %d lien par noeud en moyenne" % (len(self.couches_graph), self.nb_liens, len(self.couches_graph) * (len(self.couches_graph) - 1), 10**-2 * int(10000 * self.nb_liens / (len(self.couches_graph) * (len(self.couches_graph) - 1))), self.nb_liens / len(self.couches_graph))) self.graph.render("./graph_%s" % name) def lier(self, couche_id_1, couche_id_2, forcer=False, adapt=False): if adapt == False and ( (self.couches_graph[couche_id_1].invisible_adapt == True and "Output" not in self.couches_graph[couche_id_1].__class__.__name__) or (self.couches_graph[couche_id_2].invisible_adapt == True and "Output" not in self.couches_graph[couche_id_2].__class__.__name__)): return lien = False #Vérifications verifications_ok = couche_id_1 != couche_id_2 and "Output" not in self.couches_graph[ couche_id_1].__class__.__name__ #Ce n'est pas la couche courante verifications_ok = verifications_ok and "Input" not in self.couches_graph[ couche_id_2].__class__.__name__ verifications_ok = verifications_ok and couche_id_2 not in self.couches_graph[ couche_id_1].parents #Ce n'est pas un parent de la couche mère if verifications_ok == False: return #Calcul de la difference de taille #nb de réduction de taille (pool = -1 ; deconv = +1) : dimension < 0 => réduit globalement la taille tailles_source, parents_1 = self.couches_graph[ couche_id_1].get_size_parent_list_parents() tailles_dest_enfants, enfants_2 = self.couches_graph[ couche_id_2].get_size_enfant_list_enfants() tailles_dest_parents, parents_2 = self.couches_graph[ couche_id_2].get_size_parent_list_parents() #Le chemin jusqu'à la racine côté couche courante et le chemin jusqu'aux feuilles n'a pas trop de couche de pooling au total #Si les dimensions de sortie du layer courant et celles d'entrée du layer cibles coincident taille_si_lie = 0 if len(tailles_source) != 0: taille_si_lie += min(tailles_source) if len(tailles_dest_enfants) != 0: taille_si_lie += min(tailles_dest_enfants) if taille_si_lie < -8: #Si on a trop de couches de pooling si on liait les deux couches lier = False return self.couches_graph[couche_id_2].tmp_parents = self.couches_graph[ couche_id_2].parents self.couches_graph[couche_id_2].tmp_parents.append( self.couches_graph[couche_id_1].couche_id) self.couches_graph[couche_id_2].tmp_parents = list( dict.fromkeys(self.couches_graph[couche_id_2].parents)) verif_boucle = self.couches_graph[couche_id_2].test_actualiser_enfant( couche_id_1) if verif_boucle == False: return #Vérifie si les tailles sont compatibles verification_taille = False if len(self.couches_graph[couche_id_1].parent) == 0: verification_taille = True elif len(self.couches_graph[couche_id_2].parent) == 0 and len( self.couches_graph[couche_id_2].enfant) == 0: verification_taille = True else: diff_taille = tailles_source[0] - ( tailles_dest_parents[0] + self.couches_graph[couche_id_2].couche_pool - self.couches_graph[couche_id_2].couche_deconv ) #Calcul la différence entre la taille à la sortie de la première couche et celle à l'entrée!! de la seconde if diff_taille == 0: verification_taille = True else: couche_adapt = self.couches_graph[couche_id_1] if diff_taille > 0: for i in range(diff_taille): adapt = graph_layer.G_Pool(self) self.couches_graph[ adapt.couche_id].invisible_adapt = True self.lier( couche_adapt.couche_id, self.couches_graph[adapt.couche_id].couche_id, forcer=True, adapt=True) couche_adapt = self.couches_graph[adapt.couche_id] else: for i in range(-diff_taille): adapt = graph_layer.G_Deconv(self) self.couches_graph[ adapt.couche_id].invisible_adapt = True self.lier( couche_adapt.couche_id, self.couches_graph[adapt.couche_id].couche_id, forcer=True, adapt=True) couche_adapt = self.couches_graph[adapt.couche_id] self.lier(self.couches_graph[couche_adapt.couche_id].couche_id, self.couches_graph[couche_id_2].couche_id, adapt=True, forcer=True) return test_boucle = self.couches_graph[couche_id_2].test_actualiser_enfant( couche_id_1) if test_boucle == False: return choix_lien = False if verification_taille == True and forcer == False: choix_lien = self.hp.Choice( "lien_%s%d_%s%d" % (self.couches_graph[couche_id_1].__class__.__name__, self.couches_graph[couche_id_1].couche_id_type, self.couches_graph[couche_id_2].__class__.__name__, self.couches_graph[couche_id_2].couche_id_type), [True, False], default=False) else: choix_lien = True if couche_id_2 == 3: break_pt2 = -2 if couche_id_1 == 4 and couche_id_2 == 1: self.afficher("breakpoint") break_pt = -1 if verification_taille == True and choix_lien == True: self.nb_liens += 1 self.graph.edge(str(self.couches_graph[couche_id_1].couche_id), str(self.couches_graph[couche_id_2].couche_id)) self.couches_graph[couche_id_2].parent.append( self.couches_graph[couche_id_1].couche_id) self.couches_graph[couche_id_2].parents.append( self.couches_graph[couche_id_1].couche_id) self.couches_graph[couche_id_2].parents = list( dict.fromkeys(self.couches_graph[couche_id_2].parents)) self.couches_graph[couche_id_2].actualiser_enfant() self.couches_graph[couche_id_1].enfant.append( self.couches_graph[couche_id_2].couche_id)