def feature_permutation(feats, g, sample, labels, model, bkg_rej_full, train_labels, training, n_classes, n_reps, output_dir): ''' Takes a pretrained model and saves the permutation importance of a feature or a group of features to a dictionary in a pickle file. ''' # All the results will be saved in the permutation_importance subdirectory: output_dir += '/permutation_importance' # The importance of each variable will be saved in a different file: name = [feats[0],'group_{}'.format(g)][g>=0] fname = output_dir + '/' + name + '_importance' create_path(output_dir) # Converts the feature into a list to homogenize the format (groups are already given as a list) if type(feats) == str : feats = [feats] # Initialize bkg_rej if n_classes == 2 : bkg_rej = np.empty((1, n_reps)) elif n_classes == 6 : bkg_rej = np.empty((n_classes, n_reps)) bkg_rej_full = np.reshape(bkg_rej_full,(n_classes, 1)) # Permutation of the given features k times features = ' + '.join(feats) print('\nPERMUTATION OF : ' + features) shuffled_sample = copy_sample(sample, feats) for k in range(n_reps) : shuffling_sample(shuffled_sample,feats, k) probs = model.predict(shuffled_sample, batch_size=20000, verbose=2) # Background rejection with one feature shuffled bkg_rej[:, k] = valid_results(shuffled_sample, labels, probs, train_labels, training, output_dir, 'OFF', False, True) # Computation of the importance of the features importance = bkg_rej_full / bkg_rej imp_mean, imp_std = np.mean(importance, axis=1), np.std(importance, axis=1) imp_tup = name, imp_mean, imp_std, bkg_rej saving_results(imp_tup, fname)
args.n_valid, input_data, args.n_tracks, args.n_classes, valid_batch_size, args.valid_cuts, scaler, t_scaler, shuffle='OFF') valid_probs = model.predict(valid_gen, verbose=args.verbose) else: valid_probs = model.predict(valid_sample, batch_size=valid_batch_size, verbose=args.verbose) bkg_rej = valid_results(valid_sample, valid_labels, valid_probs, train_labels, training, args.output_dir, args.plotting, args.sep_bkg, args.runDiffPlots) if '.pkl' in args.results_out: if args.feature_removal == 'ON': args.results_out = args.output_dir[0:args.output_dir.rfind( '/')] + '/' + args.results_out.split('/')[-1] try: pickle.dump({removed_feature: bkg_rej}, open(args.results_out, 'ab')) except IOError: print('FILE ACCESS CONFLICT FOR', removed_feature, '--> SKIPPING FILE ACCESS\n') feature_ranking(args.output_dir, args.results_out, scalars, images,
parser.add_argument('--cross_valid', default='OFF') parser.add_argument('--plotting', default='ON') parser.add_argument('--input', default='') parser.add_argument('--output_dir', default='outputs') parser.add_argument('--scaler_file', default='scaler.pkl') parser.add_argument('--checkpoint', default='') parser.add_argument('--result_file', default='') args = parser.parse_args() # OBTAINING PERFORMANCE FROM EXISTING VALIDATION RESULTS if '.pkl' in args.result_file: result_file = args.output_dir + '/' + args.result_file if os.path.isfile(result_file): print('\nLOADING VALIDATION RESULTS FROM', result_file, '\n') sample, labels, probs = pickle.load(open(result_file, 'rb')) valid_results(sample, labels, probs, [], None, args.output_dir, args.plotting) sys.exit() # PROGRAM ARGUMENTS VERIFICATIONS for key in ['n_train', 'n_valid', 'batch_size']: vars(args)[key] = int(vars(args)[key]) #for key, val in vars(args).items(): vars(args)[key]= int(val) if type(val)==float else val #for key, val in vars(args).items(): exec(key + '= val') if args.weight_type not in ['flattening', 'match2s', 'match2b']: args.weight_type = None if '.h5' not in args.checkpoint and args.n_epochs < 1: print( '\nERROR: weight file required with n_epochs < 1 -> exiting program\n') sys.exit() if args.cross_valid == 'ON' and args.n_folds <= 1: print(