예제 #1
0
def feature_permutation(feats, g, sample, labels, model, bkg_rej_full, train_labels, training, n_classes, n_reps,
                       output_dir):
    '''
    Takes a pretrained model and saves the permutation importance of a feature or a group
    of features to a dictionary in a pickle file.
    '''
    # All the results will be saved in the permutation_importance subdirectory:
    output_dir += '/permutation_importance'
    # The importance of each variable will be saved in a different file:
    name = [feats[0],'group_{}'.format(g)][g>=0]
    fname = output_dir + '/' + name + '_importance'
    create_path(output_dir)
    # Converts the feature into a list to homogenize the format (groups are already given as a list)
    if type(feats) == str :
        feats = [feats]
    # Initialize bkg_rej
    if n_classes == 2 :
        bkg_rej = np.empty((1, n_reps))
    elif n_classes == 6 :
        bkg_rej = np.empty((n_classes, n_reps))
        bkg_rej_full = np.reshape(bkg_rej_full,(n_classes, 1))
    # Permutation of the given features k times
    features = ' + '.join(feats)
    print('\nPERMUTATION OF : ' + features)
    shuffled_sample = copy_sample(sample, feats)
    for k in range(n_reps) :
        shuffling_sample(shuffled_sample,feats, k)
        probs = model.predict(shuffled_sample, batch_size=20000, verbose=2)
        # Background rejection with one feature shuffled
        bkg_rej[:, k] = valid_results(shuffled_sample, labels, probs,
                            train_labels, training, output_dir, 'OFF', False, True)
    # Computation of the importance of the features
    importance = bkg_rej_full / bkg_rej
    imp_mean, imp_std = np.mean(importance, axis=1), np.std(importance, axis=1)
    imp_tup = name, imp_mean, imp_std, bkg_rej
    saving_results(imp_tup, fname)
예제 #2
0
                                    args.n_valid,
                                    input_data,
                                    args.n_tracks,
                                    args.n_classes,
                                    valid_batch_size,
                                    args.valid_cuts,
                                    scaler,
                                    t_scaler,
                                    shuffle='OFF')
        valid_probs = model.predict(valid_gen, verbose=args.verbose)
    else:
        valid_probs = model.predict(valid_sample,
                                    batch_size=valid_batch_size,
                                    verbose=args.verbose)
bkg_rej = valid_results(valid_sample, valid_labels, valid_probs, train_labels,
                        training, args.output_dir, args.plotting, args.sep_bkg,
                        args.runDiffPlots)
if '.pkl' in args.results_out:
    if args.feature_removal == 'ON':
        args.results_out = args.output_dir[0:args.output_dir.rfind(
            '/')] + '/' + args.results_out.split('/')[-1]
        try:
            pickle.dump({removed_feature: bkg_rej},
                        open(args.results_out, 'ab'))
        except IOError:
            print('FILE ACCESS CONFLICT FOR', removed_feature,
                  '--> SKIPPING FILE ACCESS\n')
        feature_ranking(args.output_dir,
                        args.results_out,
                        scalars,
                        images,
parser.add_argument('--cross_valid', default='OFF')
parser.add_argument('--plotting', default='ON')
parser.add_argument('--input', default='')
parser.add_argument('--output_dir', default='outputs')
parser.add_argument('--scaler_file', default='scaler.pkl')
parser.add_argument('--checkpoint', default='')
parser.add_argument('--result_file', default='')
args = parser.parse_args()

# OBTAINING PERFORMANCE FROM EXISTING VALIDATION RESULTS
if '.pkl' in args.result_file:
    result_file = args.output_dir + '/' + args.result_file
    if os.path.isfile(result_file):
        print('\nLOADING VALIDATION RESULTS FROM', result_file, '\n')
        sample, labels, probs = pickle.load(open(result_file, 'rb'))
        valid_results(sample, labels, probs, [], None, args.output_dir,
                      args.plotting)
    sys.exit()

# PROGRAM ARGUMENTS VERIFICATIONS
for key in ['n_train', 'n_valid', 'batch_size']:
    vars(args)[key] = int(vars(args)[key])
#for key, val in vars(args).items(): vars(args)[key]= int(val) if type(val)==float else val
#for key, val in vars(args).items(): exec(key + '= val')
if args.weight_type not in ['flattening', 'match2s', 'match2b']:
    args.weight_type = None
if '.h5' not in args.checkpoint and args.n_epochs < 1:
    print(
        '\nERROR: weight file required with n_epochs < 1 -> exiting program\n')
    sys.exit()
if args.cross_valid == 'ON' and args.n_folds <= 1:
    print(