def reweight_evt_dics(evt_dic_mc, evt_dic_rd):
    """
    reweigh mc event dictionary for training
    """
    evt_dic_mc_copy = evt_dic_mc.copy()
    evt_dic_rd_copy = evt_dic_rd.copy()

    shape_data(evt_dic_mc_copy)
    shape_data(evt_dic_rd_copy)
    flatten_feature(evt_dic_mc_copy, 'track')
    flatten_feature(evt_dic_rd_copy, 'track')

    mc_array = np.c_[evt_dic_mc_copy['track'], evt_dic_mc_copy['event']]
    mc_train, mc_test = train_test_split(mc_array, test_size=0.4)

    rd_array = np.c_[evt_dic_rd_copy['track'], evt_dic_rd_copy['event']]

    reweighter = reweight.GBReweighter(n_estimators=200,
                                       learning_rate=0.1,
                                       max_depth=3,
                                       min_samples_leaf=30)
    reweighter.fit(mc_array, rd_array)
    gb_weights_test = reweighter.predict_weights(mc_array)

    return gb_weights_test
Пример #2
0
def reweightermodel(ioriginal, itarget, ioriginal_weights, itarget_weights,
                    args):
    numpy.random.seed(args[5])  #Fix any random seed using numpy arrays
    reweighter_base = reweight.GBReweighter(n_estimators=args[0],
                                            learning_rate=args[1],
                                            max_depth=args[2],
                                            min_samples_leaf=args[3],
                                            gb_args={'subsample': args[4]})
    reweighter = reweight.FoldingReweighter(reweighter_base,
                                            random_state=args[5],
                                            n_folds=3,
                                            verbose=False)
    reweighter.fit(ioriginal, itarget, ioriginal_weights, itarget_weights)
    return reweighter
Пример #3
0
##                   'original_weights_test.png')
##
### Gradient boosted Reweighter
##reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1,
##                                   max_depth=3, min_samples_leaf=1000,
##                                   gb_args={'subsample': 0.4})
##reweighter.fit(original_train, target_train)
##gb_weights_test = reweighter.predict_weights(original_test)
##
### Validate reweighting rule on the test part comparing 1d projections
##draw_distributions(original_test, target_test, gb_weights_test,
##                   'gb_weights_test.png')
##
# Folding Reweighter
# define base reweighter
reweighter_base = reweight.GBReweighter(n_estimators=50,
                                        learning_rate=0.1,
                                        max_depth=2,
                                        min_samples_leaf=1000,
                                        gb_args={'subsample': 0.4})
reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)
# not need to divide data into train/test parts
reweighter.fit(original, target, target_weight=target_sWeights)
folding_weights = reweighter.predict_weights(original)
# cast the array into float
cast_target_sWeights = target_sWeights.astype(float)
draw_distributions_weighted(original, target, folding_weights,
                            cast_target_sWeights, 'FoldingReweight.png')
#draw_distributions(original, target, folding_weights,
#                   'FoldingReweight.png')
Пример #4
0
# print 'After binned re-weighting'
# draw_distributions(original_test.iloc[:,:-1], target_test.iloc[:,:-1], bins_weights_test, target_weights_test)
# **********************************************

# *********Gradient Boosted Re-weighting********

# This is currently the best config for Run1
# reweighter = reweight.GBReweighter(n_estimators=200, learning_rate=0.1,
#                                    max_depth=3, min_samples_leaf=50,
#                                    gb_args={'subsample': 0.2,
#                                             'random_state': 42})

reweighter = reweight.GBReweighter(n_estimators=100,
                                   learning_rate=0.2,
                                   max_depth=4,
                                   min_samples_leaf=50,
                                   gb_args={
                                       'subsample': 0.5,
                                       'random_state': 42
                                   })

# reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1,
#                                    max_depth=3, min_samples_leaf=100,
#                                    gb_args={'subsample': 0.5,
#                                             'random_state': 42})

# reweighter.fit(original_train.iloc[:, :-1], target_train.iloc[:, :-1],
#                original_weights_train, target_weights_train)
reweighter.fit(original.iloc[:, :-1], target.iloc[:, :-1], original_weights,
               target_weights)

# gb_weights_test = reweighter.predict_weights(original_test.iloc[:, :-1])
Пример #5
0
#bins_weights = bins_reweighter.predict_weights(original)
## validate reweighting rule on the test part comparing 1d projections
#draw_distributions(original, target, bins_weights, target_weights)

##====================gb reweighter reweight!!!
"""

the following set are used for the low statistic case
feel free to increase the n_estimators (number of trees) and min_samples_leaf (minimal number of evnts in the leaf) if you have enough statistics
usually set to be n_estimators = 200 , min_samples_leaf=1000 ; 

"""

reweighter = reweight.GBReweighter(n_estimators=70,
                                   learning_rate=0.1,
                                   max_depth=3,
                                   min_samples_leaf=100,
                                   gb_args={'subsample': 0.7})
reweighter.fit(original, target, original_weights, target_weights)

gb_weights_test = reweighter.predict_weights(original)
gb_weights_used = reweighter.predict_weights(used)
print(type(gb_weights_used))
#reweighting done

#show the weight results
#validate reweighting vars on the test part comparing 1d projections

draw_distributions(original, target, gb_weights_test, target_weights)

#saving weight to root
Пример #6
0
def reweightermodel(original,target,original_weights,target_weights,args): 
	reweighter_base = reweight.GBReweighter(n_estimators=args[0], learning_rate=args[1], max_depth=args[2], min_samples_leaf=args[3],gb_args={'subsample': args[4]})
	reweighter = reweight.FoldingReweighter(reweighter_base,random_state=2019, n_folds=2, verbose=True)
	reweighter.fit(original,target,original_weights,target_weights)
	return reweighter
Пример #7
0
def run():
    args = getArgs().parse_args()

    ##
    ## TRAINING + VALIDATION OF BDT
    ##

    original = readData(args.original)
    target = readData(args.target)

    original_weights = np.ones(len(original))
    target_weights = np.ones(len(target))

    # divide original samples into training and test parts
    original_train, original_test = train_test_split(original)
    # divide target samples into training and test parts
    target_train, target_test = train_test_split(target)

    original_weights_train = np.ones(len(original_train))
    original_weights_test = np.ones(len(original_test))

    columns = [
        'hs_pt', 'wp_pt', 'wm_pt', 'met', 'hs_abseta', 'wp_abseta',
        'wm_abseta', 'dRWW'
    ]

    print('train', len(original_train))
    print('test', len(original_test))

    # create output folder
    try:
        makedirs(args.outputdir)
    except OSError:
        pass
    # draw full distributions
    drawDistributions(original, target, original_weights, columns,
                      join(args.outputdir, 'total.png'))
    # draw train distributions
    drawDistributions(original_train, target_train, original_weights_train,
                      columns, join(args.outputdir, 'train.png'))
    # draw test distributions
    drawDistributions(original_test, target_test, original_weights_test,
                      columns, join(args.outputdir, 'test_before.png'))

    # gradient boosted reweighting
    reweighter = reweight.GBReweighter(n_estimators=200,
                                       learning_rate=0.1,
                                       max_depth=4,
                                       min_samples_leaf=1000,
                                       gb_args={'subsample': 0.4})
    reweighter.fit(original_train, target_train)
    gb_weights_test = reweighter.predict_weights(original_test)
    # validate reweighting rule on the test part comparing 1d projections
    drawDistributions(original_test, target_test, gb_weights_test, columns,
                      join(args.outputdir, 'test_bdt.png'))

    ##
    ## REWEIGHTING THE SIMULATED SIGNAL SAMPLE
    ##
    if args.result:
        result = readData(args.result, clean=False)
        result_weights = np.ones(len(result))
        print('result', len(result))
        # reweight result
        gb_weights_result = reweighter.predict_weights(result)

        # plot result comparing 1d projections
        drawDistributions(result, target_test, result_weights, columns,
                          join(args.outputdir, 'result_before.png'))
        drawDistributions(result, target_test, gb_weights_result, columns,
                          join(args.outputdir, 'result_bdt.png'))
#                    nrows=1, ncols=1, hist_settings=hist_settings)
#
# draw_distributions('n100gbr4_validate_KuPT.png',
#                    [columns[3]], original_test, target_test, gb_weights_test,
#                    filename_as_title=True,
#                    # yscale=('log',),
#                    nrows=1, ncols=1, hist_settings=hist_settings)

######################
# Folding Reweighter #
######################

# Define base reweighter
reweighter_base = reweight.GBReweighter(
    n_estimators=120,
    learning_rate=0.1,
    max_depth=3,
    min_samples_leaf=5000,
)

reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)

# Not need to divide data into train/test parts
reweighter.fit(original, target, target_weight=target_weights)

# Prediect weights for the input file
folding_weights = reweighter.predict_weights(toReweight)

draw_distributions(
    'GBR4_validate.png',
    columns,
    toReweight,
  weightBase[i] = probabilityToWeight(baseProba[i][:,1])
  weightLbfgs[i] = probabilityToWeight(lbfgsProba[i][:,1])
  weightAda[i] = probabilityToWeightAda(adaProba[i][:,1])
  weightSgd[i] = probabilityToWeight(sgdProba[i][:,1])

"""# Gradient Boosting Reweighter"""

#Gradient boosted reweighter
gb_weights_test = np.empty(sets,dtype=object)

for i in range(sets):

  #Can mess with these paramaters to optimse performance
  gb_reweighter = reweight.GBReweighter(n_estimators=100, 
                                        learning_rate=0.1, max_depth=32, min_samples_leaf=500, 
                                        gb_args={'subsample': 0.4, 'max_features' : 6, 'min_samples_split' : 201})
  gb_reweighter.fit(original_train[i], target_train[i])

  gb_weights_test[i] = gb_reweighter.predict_weights(original_test[i])

#Check weighted distributions on the test splits
draw_distributions(original_test[1], target_test[1], gb_weights_test[1])

#Folding Reweighter
folding_weights = np.empty(sets,dtype=object)

for i in range(sets):
  #Gradient boosted decision tree as base
  reweighter_gb = reweight.GBReweighter(  learning_rate=0.1, n_estimators=64, max_depth=32, min_samples_leaf=200, 
                                        gb_args={'subsample': 0.4,})