def run(samples, channel, era, use, train, short, datacard=False, add_nominal=False): # import model if use == "xgb": from XGBModel import XGBObject as modelObject parameters = "conf/parameters_xgb.json" if use == "keras": from KerasModel import KerasObject as modelObject parameters = "conf/parameters_keras.json" # create instance of Reader read = Reader(channel=channel, config_file=samples, folds=2, era=era) target_names = read.config["target_names"] variables = read.config["variables"] models_folder = era + "/models" if not os.path.exists(models_folder): os.makedirs(models_folder) modelname = "{0}/{1}.{2}".format(models_folder, channel, use) scaler = None if train: print "Training new model" print "Loading Training set" # load samples for training. This returns the input files merged (ggH+qqH+...), randomized and split into two folds trainSet = read.getSamplesForTraining() # use StandardScaler from sklearn and "train" it on the training set. Then pickle it store it for later and finaly apply it to the training set. print "Fit Scaler to training set...", scaler = trainScaler(trainSet, variables) print " done. Dumping for later." with open("{0}/StandardScaler.{1}.pkl".format(models_folder, channel), 'wb') as FSO: cPickle.dump(scaler, FSO, 2) scaler = [scaler, scaler] # Hotfix since KIT uses 2 scalers trainSet = applyScaler(scaler, trainSet, variables) # load model and start training model = modelObject(parameter_file=parameters, variables=variables, target_names=target_names) model.train(trainSet) # done in KerasModel.py model.save(modelname) # prediction starts #load scaler and trained model elif not datacard: # TODO: Maybe not needed to check. Just load what is there if os.path.exists("{0}/StandardScaler.{1}.pkl".format( models_folder, channel)): print "Loading Scaler" scaler = [] if glob("{0}/{1}_*_keras_preprocessing.pickle".format( models_folder, channel)): with open( "{0}/{1}_fold0_keras_preprocessing.pickle".format( models_folder, channel), "rb") as FSO: scaler.append(cPickle.load(FSO)) with open( "{0}/{1}_fold1_keras_preprocessing.pickle".format( models_folder, channel), "rb") as FSO: scaler.append(cPickle.load(FSO)) else: with open( "{0}/StandardScaler.{1}.pkl".format( models_folder, channel), "rb") as FSO: tmp = cPickle.load(FSO) scaler = [tmp, tmp] print "Loading model and predicting." model = modelObject(filename=modelname) read.variables = model.variables variables = model.variables if not datacard: outpath = read.config["outpath"] + "/predictions_" + era predictions = {} print "Predicting samples" # only used by DESY if add_nominal: print "Predicting Nominal" for sample, sampleConfig in read.get(what="nominal", for_prediction=True): sandbox(channel, model, scaler, sample, variables, "nom_" + sampleConfig["histname"], outpath, sampleConfig, read.modifyDF) # Iterate over nominal samples for sample, sampleConfig in read.get(what="full", add_jec=not short, for_prediction=True): if "data" in sampleConfig["histname"]: sandbox(channel, model, scaler, sample, variables, "NOMINAL_ntuple_Data", outpath, sampleConfig, read.modifyDF) elif "full" in sampleConfig["histname"]: sandbox( channel, model, scaler, sample, variables, "NOMINAL_ntuple_" + sampleConfig["histname"].split("_")[0], outpath, sampleConfig, read.modifyDF) else: splName = sampleConfig["histname"].split("_") sandbox( channel, model, scaler, sample, variables, "_".join(splName[1:]) + "_ntuple_" + sampleConfig["histname"].split("_")[0], outpath, sampleConfig, read.modifyDF) # shape files if not short: print "Predicting shapes" for sample, sampleConfig in read.get(what="tes", for_prediction=True): sandbox(channel, model, scaler, sample, variables, sampleConfig["histname"], outpath, sampleConfig, read.modifyDF) if "hephy.at" in os.environ["HOME"]: from Tools.Datacard.produce import Datacard, makePlot from Tools.CutObject.CutObject import Cut from Tools.FakeFactor.FakeFactor import FakeFactor Datacard.use_config = era + "/datacard_conf" D = Datacard(channel=channel, variable="predicted_prob", era=era, real_est="mc", add_systematics=not short, debug=True, use_cutfile="conf/cuts_{0}.json".format(era)) FakeFactor.fractions = "{0}/datacard_conf/fractions/htt_ff_fractions_{0}.root".format( era) D.create(era + "/" + use) makePlot(channel, "ML", era + "/" + use, era, era + "/plots")
def run(samples,channel, use, train,short, preprocess_chain = []): if use == "xgb": from XGBModel import XGBObject as modelObject parameters = "conf/parameters_xgb.json" if use == "keras": from KerasModel import KerasObject as modelObject parameters = "conf/parameters_keras.json" read = Reader(channel = channel, config_file = samples, folds=2) target_names = read.config["target_names"] variables = read.config["variables"] if not os.path.exists("models"): os.mkdir("models") modelname = "models/{0}.{1}".format(channel,use) scaler = None if train: print "Training new model" print "Loading Training set" trainSet = read.getSamplesForTraining() print "Fit Scaler to training set...", scaler = trainScaler(trainSet, variables ) print " done. Dumping for later." with open("models/StandardScaler.{0}.pkl".format(channel), 'wb') as FSO: cPickle.dump(scaler, FSO , 2) trainSet = applyScaler(scaler, trainSet, variables) model = modelObject( parameter_file = parameters, variables=variables, target_names = target_names ) model.train( trainSet ) model.save(modelname) else: if os.path.exists("models/StandardScaler.{0}.pkl".format(channel) ): print "Loading Scaler" with open( "models/StandardScaler.{0}.pkl".format(channel), "rb" ) as FSO: scaler = cPickle.load( FSO ) print "Loading model and predicting." model = modelObject( filename = modelname ) where = "" coll = Collector( channel = channel, var_name = "pred_prob", target_names = target_names, path = use, recreate = True, rebin = False ) print "Predicting simulation" for sample, sampleName in read.get(what = "nominal"): pred = model.predict( applyScaler(scaler, sample, variables), where ) coll.addPrediction(pred, sample, sampleName) print "Adding looser samples to predictions" for sample, sampleName in read.get(what = "more"): pred = model.predict( applyScaler(scaler, sample, variables), where ) coll.addPrediction(pred, sample, sampleName) print "Predicting data" for sample, sampleName in read.get(what = "data"): pred = model.predict( applyScaler(scaler, sample, variables), where ) coll.addPrediction(pred, sample, sampleName) if not short: print "Predicting TES shapes" for sample, sampleName in read.get(what = "tes"): pred = model.predict( applyScaler(scaler, sample, variables), where ) coll.addPrediction(pred, sample, sampleName) print "Predicting JES shapes" for sample, sampleName in read.get(what = "jec"): pred = model.predict( applyScaler(scaler, sample, variables), where ) coll.addPrediction(pred, sample, sampleName) coll.createDC(writeAll = True) plot = Plotter( channel= channel, naming = read.processes, path = use ) plot.makePlots()