示例#1
0
def reweightermodel(ioriginal, itarget, ioriginal_weights, itarget_weights,
                    args):
    numpy.random.seed(args[5])  #Fix any random seed using numpy arrays
    reweighter_base = reweight.GBReweighter(n_estimators=args[0],
                                            learning_rate=args[1],
                                            max_depth=args[2],
                                            min_samples_leaf=args[3],
                                            gb_args={'subsample': args[4]})
    reweighter = reweight.FoldingReweighter(reweighter_base,
                                            random_state=args[5],
                                            n_folds=3,
                                            verbose=False)
    reweighter.fit(ioriginal, itarget, ioriginal_weights, itarget_weights)
    return reweighter
示例#2
0
##                   'original_weights_test.png')
##
### Gradient boosted Reweighter
##reweighter = reweight.GBReweighter(n_estimators=50, learning_rate=0.1,
##                                   max_depth=3, min_samples_leaf=1000,
##                                   gb_args={'subsample': 0.4})
##reweighter.fit(original_train, target_train)
##gb_weights_test = reweighter.predict_weights(original_test)
##
### Validate reweighting rule on the test part comparing 1d projections
##draw_distributions(original_test, target_test, gb_weights_test,
##                   'gb_weights_test.png')
##
# Folding Reweighter
# define base reweighter
reweighter_base = reweight.GBReweighter(n_estimators=50,
                                        learning_rate=0.1,
                                        max_depth=2,
                                        min_samples_leaf=1000,
                                        gb_args={'subsample': 0.4})
reweighter = reweight.FoldingReweighter(reweighter_base, n_folds=2)
# not need to divide data into train/test parts
reweighter.fit(original, target, target_weight=target_sWeights)
folding_weights = reweighter.predict_weights(original)
# cast the array into float
cast_target_sWeights = target_sWeights.astype(float)
draw_distributions_weighted(original, target, folding_weights,
                            cast_target_sWeights, 'FoldingReweight.png')
#draw_distributions(original, target, folding_weights,
#                   'FoldingReweight.png')
示例#3
0
weights_target = numpy.ones(dtype='float64', shape=len(target))

## now train the BDT reweighter

print '... starting the reweighting'

rnd_seed = 123456
numpy.random.seed(rnd_seed)  # the seed is set through numpy arrays
reweighter_base = reweight.GBReweighter(n_estimators=100,
                                        learning_rate=0.1,
                                        max_depth=4,
                                        min_samples_leaf=400,
                                        gb_args={'subsample': 0.5})
reweighter = reweight.FoldingReweighter(reweighter_base,
                                        random_state=rnd_seed,
                                        n_folds=2,
                                        verbose=True)
reweighter.fit(origin, target, weights_origin, weights_target)

print '... reweighting fit done'

ws = reweighter.predict_weights(origin, weights_origin,
                                lambda x: numpy.mean(x, axis=0))
weights = numpy.multiply(ws, transfer_factor)
factor = float(float(len(target.index)) / weights.sum())

print " == Summary of the reweighting =="
print " ================================"
print "  - The transfer factor                                                 = ", transfer_factor
print "  - The sum of target weights                                           = ", weights_target.sum(
), "+/-", math.sqrt(numpy.square(weights_target).sum())
示例#4
0
def reweightermodel(original,target,original_weights,target_weights,args): 
	reweighter_base = reweight.GBReweighter(n_estimators=args[0], learning_rate=args[1], max_depth=args[2], min_samples_leaf=args[3],gb_args={'subsample': args[4]})
	reweighter = reweight.FoldingReweighter(reweighter_base,random_state=2019, n_folds=2, verbose=True)
	reweighter.fit(original,target,original_weights,target_weights)
	return reweighter
                                        gb_args={'subsample': 0.4, 'max_features' : 6, 'min_samples_split' : 201})
  gb_reweighter.fit(original_train[i], target_train[i])

  gb_weights_test[i] = gb_reweighter.predict_weights(original_test[i])

#Check weighted distributions on the test splits
draw_distributions(original_test[1], target_test[1], gb_weights_test[1])

#Folding Reweighter
folding_weights = np.empty(sets,dtype=object)

for i in range(sets):
  #Gradient boosted decision tree as base
  reweighter_gb = reweight.GBReweighter(  learning_rate=0.1, n_estimators=64, max_depth=32, min_samples_leaf=200, 
                                        gb_args={'subsample': 0.4,})
  folding_gb = reweight.FoldingReweighter(reweighter_gb, n_folds=5)
  
  #Give full datasets to the reweighter
  folding_gb.fit(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist(), allCollisions[i,1].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist())


  #folding_weights[i] = folding_reweighter.predict_weights(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist(),vote_function = lambda x: np.mean(x, axis=0))

  #calculate weights for each point
  folding_weights[i] = folding_gb.predict_weights(allCollisions[i,0].drop(['p3','p4','p3_phi','p4_phi'], axis=1).to_numpy().tolist())



draw_distributions(allCollisions[0,0], allCollisions[0,1], folding_weights[0])

"""# Model Evaluation"""