epochs=adv_numberOfEpochs) #Adversarial training losses = {"L_f": [], "L_r": [], "L_f - L_r": []} batch_size = 128 num_epochs=200 for i in range(num_epochs): print i l = DRf.evaluate(test_x, [test_y, df_Convert_v2_Test]) losses["L_f - L_r"].append(l[0][None][0]) losses["L_f"].append(l[1][None][0]) losses["L_r"].append(-l[2][None][0]) print(losses["L_r"][-1] / lam) plot_losses(i, losses, lam, num_epochs) #Fit "model" model.trainable = True advmodel.trainable = False model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy']) DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf) DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR) indices = np.random.permutation(len(train_x))[:batch_size] #model.train_on_batch(train_x.iloc[indices],train_y.iloc[indices]) DRf.train_on_batch(train_x.iloc[indices], [train_y.iloc[indices], df_Convert_v2.iloc[indices]]) #Fit "advmodel"
nJets_binned_bkg, callbacks=[earlystop2], epochs = adv_numberOfEpochs) # Adversarial training num_epochs = 200 for i in range(num_epochs): print 'Adversarial training epoch: ', i+1 l = DRf.evaluate([test_x, test_x.dM_Go_LSP], [test_y, nJets_binned_test]) losses["L_f - L_r"].append(l[0][None][0]) losses["L_f"].append(l[1][None][0]) losses["L_r"].append(-l[2][None][0]) print(losses["L_r"][-1] / lam) plot_losses(i, losses, lam, num_epochs, 'Losses_nJets_PAT_noLO_HT_lambda'+str(lam)) #Fit "model" model.trainable = True advmodel.trainable = False model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy']) DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf) DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR) indices = np.random.permutation(len(train_x))[:batch_size] DRf.train_on_batch([train_x.iloc[indices], train_x.dM_Go_LSP.iloc[indices]], [train_y.iloc[indices], nJets_binned.iloc[indices]]) #Fit "advmodel" if lam >= 0.0: model.trainable = False
print 'Epoch mean loss: ', round(np.mean(epoch_losses), 5) for (x_batch, y_batch) in val_data_batched: classifier_model.compile(loss=make_disco_loss( x_batch.numpy()[:, decorr_var_col]), optimizer=opt_model, metrics=['accuracy']) val_loss, val_acc = classifier_model.evaluate(x_batch, y_batch, batch_size=batchSize) val_epoch_losses.append(val_loss) tf.keras.backend.clear_session() print 'Epoch validation mean loss: ', round(np.mean(val_epoch_losses), 5) losses["L_t"].append(np.mean(epoch_losses)) losses["L_v"].append(np.mean(val_epoch_losses)) plot_losses(epoch, losses, lam, numberOfEpochs, 'Losses_DisCo_dilepton_lambda_' + str(lam)) tf.keras.backend.clear_session() epoch_end_time = time.time() print 'Epoch time elapsed: ', np.round( (epoch_end_time - epoch_start_time), 3) print "End of epoch: ", epoch + 1 print '-------------' print ' - Test set ROC AUC: ', round( roc_auc_score(test_y, classifier_model.predict(test_x)), 4) # Save the model save_path = '/work/kimmokal/susyDNN/models/' save_name = 'susyDNN_DisCo_dilepton_nJets_lambda' + str(lam) + '_' + str( mass_point) classifier_model.save(save_path + save_name + '.h5')
hist_dnn_output_njet_6to8) js_distances["JS1"].append(js1) js_distances["JS2"].append(js2) print 'DNN output Jensen-Shannon distance (nJet = [6,7,8] vs. nJet = [4,5]): ' + str( js1) print 'DNN output Jensen-Shannon distance (nJet >= 9 vs. nJet = [6,7,8]): ' + str( js2) ### Save losses and plot l = DRf.evaluate(test_x, [test_y, nJets_binned_test], batch_size=512) losses["L_f - L_r"].append(l[0][None][0]) losses["L_f"].append(l[1][None][0]) losses["L_r"].append(-l[2][None][0]) print("Loss: " + str(losses["L_r"][-1] / lam)) plot_losses(i, losses, lam, num_epochs, 'Losses_adversarial_' + model_name + '_lambda_' + str(lam)) plot_jensenshannon(i, js_distances, lam, num_epochs, 'JS_distance_' + model_name + '_lambda_' + str(lam)) plot_inefficiencies( i, inefficiencies_compressed, inefficiencies_uncompressed, lam, num_epochs, 'Inefficiencies_' + model_name + '_lambda' + str(lam)) roc_aoc = 1 - round(roc_auc_score(test_y, model.predict(test_x)), 4) print 'ROC area over curve: ' + str(roc_aoc) # Save the metrics for the best models to a .csv file if (js1 <= 0.08 and js2 <= 0.08 and sig_uncompressed_ineff <= 0.7 and sig_compressed_ineff <= 0.8 and bkg_uncompressed_ineff <= 0.1): metrics_path = save_path + model_name + '_best_metrics.txt' metrics_to_csv(metrics_path, i + 1, js1, js2,
print 'Epoch mean loss: ', round(np.mean(epoch_losses), 5) for (x_batch, y_batch) in val_data_batched: classifier_model.compile(loss=make_disco_loss( x_batch.numpy()[:, decorr_var_col]), optimizer=opt_model, metrics=['accuracy']) val_loss, val_acc = classifier_model.evaluate(x_batch, y_batch, batch_size=batchSize) val_epoch_losses.append(val_loss) tf.keras.backend.clear_session() print 'Epoch validation mean loss: ', round(np.mean(val_epoch_losses), 5) losses["L_t"].append(np.mean(epoch_losses)) losses["L_v"].append(np.mean(val_epoch_losses)) plot_losses(epoch, losses, lam, numberOfEpochs, 'Losses_DisCo_all_bkg_lambda_' + str(lam)) tf.keras.backend.clear_session() epoch_end_time = time.time() print 'Epoch time elapsed: ', np.round( (epoch_end_time - epoch_start_time), 3) print "End of epoch: ", epoch + 1 print '-------------' print ' - Test set ROC AUC: ', round( roc_auc_score(test_y, classifier_model.predict(test_x)), 4) # Save the model save_path = '/work/kimmokal/susyDNN/models/' save_name = 'susyDNN_DisCo_all_bkg_nJets_lambda' + str(lam) + '_' + str( mass_point) classifier_model.save(save_path + save_name + '.h5')
print "Signal Inefficiency Uncompressed: " + str(Signal_Uncompressed_Ineff) print "Bkg Inefficiency Uncompressed: " + str(Bkg_Uncompressed_Ineff) #///////////////////////# ### Save losses and plot l = DRf.evaluate(test_x, [test_y, nJets_binned_test]) losses["L_f - L_r"].append(l[0][None][0]) losses["L_f"].append(l[1][None][0]) losses["L_r"].append(-l[2][None][0]) print("Loss L_r: " + str(losses["L_r"][-1] / lam)) print("Loss L_f: " + str(losses["L_f"][-1])) print(" ") if (i == num_epochs - 1): plot_losses(i, losses, lam, num_epochs, 'Losses_adversarial_reduced_bkg_lambda' + str(lam)) plot_jensenshannon(i, js_distances, lam, num_epochs, 'JS_distance_reduced_bkg_lambda' + str(lam)) plot_Inefficiencies(i, inefficiencies_Compressed, inefficiencies_Uncompressed, lam, num_epochs, 'Inefficiencies_reduced_bkg_lambda' + str(lam)) save_name = 'susyDNN_adv_model_reduced_bkg_lambda_' + str(lam) + "_" + str( i) model.save(save_path2 + save_name + '.h5') print '- - - - - - -' print ' - second test set roc auc: ', round( roc_auc_score(test_y, model.predict(test_x)), 4) # Check decorrelation for test set
nJets_binned_bkg, callbacks=[earlystop2], epochs = adv_numberOfEpochs) # Adversarial training num_epochs = 200 for i in range(num_epochs): print 'Adversarial training epoch: ', i+1 l = DRf.evaluate(test_x, [test_y, nJets_binned_test]) losses["L_f - L_r"].append(l[0][None][0]) losses["L_f"].append(l[1][None][0]) losses["L_r"].append(-l[2][None][0]) print(losses["L_r"][-1] / lam) plot_losses(i, losses, lam, num_epochs, 'Losses_nJets_dilepton_lambda'+str(lam)) #Fit "model" model.trainable = True advmodel.trainable = False model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy']) DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf) DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR) indices = np.random.permutation(len(train_x))[:batch_size] DRf.train_on_batch(train_x.iloc[indices], [train_y.iloc[indices], nJets_binned.iloc[indices]]) #Fit "advmodel" if lam >= 0.0: model.trainable = False