def BuildTransferFactorModel(data_3b, data_4b, category, tag):
    print "[INFO] Processing predicted model for %s" % (tag)
    #######################################
    ##Prepare data to create the model
    #######################################
    variablescr = ['HH_m']
    originalcr, targetcr, originalcr_weights, targetcr_weights, transferfactor = data.preparedataformodel(
        data_3b, data_4b, variablescr)
    data.fittransferfactormodel(originalcr, targetcr, originalcr_weights,
                                targetcr_weights, transferfactor)
    #plotter.Draw1DHistosComparison(originalcr, targetcr, variablescr, originalcr_weights,True,"%s_model"%tag)
    ########################################
    ## Save training information
    ########################################
    SaveTransferFactorModel(variablescr, transferfactor, tag)

    return originalcr_weights, transferfactor
Exemplo n.º 2
0
def BuildReweightingModel(data_3b, data_4b, category, tag):
    print "[INFO] Processing predicted model for %s" % (tag)
    ############################################################################
    ##Let's slice data one more time to have the inputs for the bdt reweighting#
    ############################################################################
    if category == 'GGF':
        variablescr = [
            'H1_pt', 'H2_pt', 'H1_eta', 'H2_eta', 'nJet', 'H1_bb_deltaR',
            'H2_bb_deltaR', 'H1_bb_deltaPhi', 'H2_bb_deltaPhi'
        ]
        modelargs = [100, 0.1, 5, 300, 0.5]
    elif category == 'VBF':
        variablescr = [
            'H1_pt', 'H2_pt', 'H1_eta', 'H2_eta', 'nJet', 'JJ_j1_qgl',
            'JJ_j2_qgl', 'JJ_m', 'j1j2_deltaEta'
        ]
        modelargs = [80, 0.1, 5, 200, 0.5]
    else:
        variablescr = ['H1_pt', 'H2_pt', 'H1_eta', 'H2_eta', 'nJet']
    originalcr, targetcr, originalcr_weights, targetcr_weights = data.preparedataformodel(
        data_3b, data_4b, variablescr)
    #plotter.Draw1DHistosComparison(originalcr, targetcr, variablescr, originalcr_weights,True,"original_%s"%tag)
    #######################################
    ##Folding Gradient Boosted Reweighter
    #######################################
    foldingcr_weights, reweightermodel, transferfactor = data.fitreweightermodel(
        originalcr, targetcr, originalcr_weights, targetcr_weights, modelargs)
    #plotter.Draw1DHistosComparison(originalcr, targetcr, variablescr, foldingcr_weights,True,"model_%s"%tag)
    ########################################
    ##GB ROC AUC
    ########################################
    bdtreweighter.roc_auc_measurement(originalcr, targetcr, originalcr_weights,
                                      foldingcr_weights)
    ########################################
    ##Update 3b dataframe for modeling
    ########################################
    return foldingcr_weights, reweightermodel, transferfactor
def BuildReweightingModel(data_3b, data_4b, category, tag, bkgclassifierparams,
                          bkgparams):
    print "[INFO] Processing predicted model for %s" % (tag)
    modelargs = bkgparams
    ############################################################################
    ##Let's slice data one more time to have the inputs for the bdt reweighting
    ############################################################################
    if category == 'GGF' or category == 'GGF1' or category == 'GGF2':
        variablescr = [
            'H1_b1_ptRegressed', 'H1_b2_ptRegressed', 'H2_b1_ptRegressed',
            'H2_b2_ptRegressed', 'H1_m', 'H2_m', 'HH_m', 'H1_pt', 'H2_pt',
            'h1h2_deltaEta', 'H1_bb_deltaR', 'H2_bb_deltaR',
            'abs_costh_H1_ggfcm', 'HH_btag_b3_bres', 'abs_costh_H1_b1_h1cm',
            'sum_3b_bscore', 'sum_4b_pt', 'HH_pt'
        ]
    elif category == 'VBF' or category == 'VBF1' or category == 'VBF2':
        variablescr = [
            'H1_b1_ptRegressed', 'H1_b2_ptRegressed', 'H2_b1_ptRegressed',
            'H2_b2_ptRegressed', 'H1_m', 'H2_m', 'HH_m', 'H1_pt', 'H2_pt',
            'h1h2_deltaEta', 'h1h2_deltaPhi', 'JJ_m', 'j1j2_deltaEta',
            'GGFKiller'
        ]
    else:
        variablescr = ['HH_m']
    #######################################
    ##Prepare data to create the model
    #######################################
    originalcr, targetcr, originalcr_weights, targetcr_weights, transferfactor = data.preparedataformodel(
        data_3b, data_4b, variablescr)
    #plotter.Draw1DHistosComparison(originalcr, targetcr, variablescr, originalcr_weights,True,"%s_original"%tag)

    #######################################
    ##Grid search or main fit
    #######################################
    print "--------------------------Best-Hyperparameters-------------------------------------"
    print "-----------------------------------------------------------------------------------"
    print "Parameters: ", modelargs
    print "-----------------------------------------------------------------------------------"
    #######################################
    ##Folding Gradient Boosted Reweighter (3-fold BDT reweighter)
    #######################################
    foldingcr_weights, reweightermodel = data.fitreweightermodel(
        originalcr, targetcr, originalcr_weights, targetcr_weights,
        transferfactor, modelargs)
    #plotter.Draw1DHistosComparison(originalcr, targetcr, variablescr, foldingcr_weights,True,"%s_model"%tag)

    ########################################
    ## KS Test (as the developers of the method do), currently used for optimization/check of the parameters
    ########################################
    ksresult_original = bdtreweighter.ks_test(originalcr, targetcr,
                                              variablescr, originalcr_weights)
    ksresult_model = bdtreweighter.ks_test(originalcr, targetcr, variablescr,
                                           foldingcr_weights)
    passedksresult = bdtreweighter.ks_comparison(variablescr,
                                                 ksresult_original,
                                                 ksresult_model)
    ###########################################################################################
    ## GB ROC AUC Test Study (Very slow test, needs to train a classifier in cross-validation)
    ############################################################################################
    roc_auc_nowts = bdtreweighter.discrimination_test(originalcr, targetcr,
                                                      originalcr_weights,
                                                      bkgclassifierparams,
                                                      "%s" % tag, "original",
                                                      False)
    roc_auc_wts = bdtreweighter.discrimination_test(originalcr, targetcr,
                                                    foldingcr_weights,
                                                    bkgclassifierparams,
                                                    "%s" % tag, "model", False)
    passedresult = bdtreweighter.discrimination_comparison(
        roc_auc_nowts, roc_auc_wts)

    return foldingcr_weights, reweightermodel, transferfactor