trainedmodels = fit(names, classifiers, X_train, y_train) savemodels(names, trainedmodels, "output", suffix) if (doimportance == 1): importanceplotall(mylistvariables, names, trainedmodels, suffix) if (docrossvalidation == 1): df_scores = cross_validation_mse(names, classifiers, X_train, y_train, 10, ncores) plot_cross_validation_mse(names, df_scores, suffix) if (doRoCLearning == 1): confusion(mylistvariables, names, classifiers, suffix, X_train, y_train, 5) precision_recall(mylistvariables, names, classifiers, suffix, X_train, y_train, 5) plot_learning_curves(names, classifiers, suffix, X_train, y_train, 100, 12000, 300) if (dotesting == 1): filenametest_set_ML = "output/testsample%sMLdecision.pkl" % (suffix) ntuplename = "fTreeFlagged%s" % (optionClassification) test_setML = test(names, trainedmodels, X_test, test_set) test_set.to_pickle(filenametest_set_ML) if (doBoundary == 1): mydecisionboundaries = decisionboundaries(names, trainedmodels, suffix, X_train, y_train) X_train_2PC, pca = GetPCADataFrameAndPC(X_train, 2) trainedmodels = fit(names, classifiers, X_train_2PC, y_train) mydecisionboundaries = decisionboundaries(names, trainedmodels, suffix + "PCAdecomposition", X_train_2PC, y_train)
writeTree(filenameMC_ML_root, ntuplename, dataframeMCML) if (docrossvalidation == 1): df_scores = [] if (MLtype == "Regression"): df_scores = cross_validation_mse_continuous(names, classifiers, X_train, y_train, 5, ncores) if (MLtype == "BinaryClassification"): df_scores = cross_validation_mse(names, classifiers, X_train, y_train, 5, ncores) plot_cross_validation_mse(names, df_scores, suffix, plotdir) if (doLearningCurve == 1): # confusion(mylistvariables,names,classifiers,suffix,X_train,y_train,5) plot_learning_curves(names, classifiers, suffix, plotdir, X_train, y_train, 10) if (doROCcurve == 1): precision_recall(mylistvariables, names, classifiers, suffix, X_train, y_train, 5, plotdir) if (doOptimisation == 1): if not ((MLsubtype == "HFmeson") & (optionanalysis == "Ds")): print("==================ERROR==================") print( "Optimisation is not implemented for this classification problem. The code is going to fail" ) sys.exit() studysignificance(optionanalysis, varmin[0], varmax[0], test_set, names, myvariablesy, suffix, plotdir)
if (dotesting == 1): filenametest_set_ML = output + "/testsample%sMLdecision.pkl" % (suffix) filenametest_set_ML_root = output + "/testsample%sMLdecision.root" % ( suffix) ntuplename = getTreeName(optionClassification) + "Tested" test_setML = test(names, trainedmodels, test_set, mylistvariables, myvariablesy) test_setML.to_pickle(filenametest_set_ML) writeTree(filenametest_set_ML_root, ntuplename, test_setML) if (doRoCLearning == 1): # confusion(mylistvariables,names,classifiers,suffix,X_train,y_train,5) precision_recall(mylistvariables, names, classifiers, suffix, X_train, y_train, 5, plotdir) plot_learning_curves(names, classifiers, suffix, plotdir, X_train, y_train, 500, nevents, 4000) if (doOptimisation == 1): if not ((classtype == "HFmeson") & (optionClassification == "Ds")): print("==================ERROR==================") print( "Optimisation is not implemented for this classification problem. The code is going to fail" ) sys.exit() studysignificance(optionClassification, varmin[0], varmax[0], test_set, names, myvariablesy, suffix, plotdir) if (doBinarySearch == 1): namesCV, classifiersCV, param_gridCV, changeparameter = getgridsearchparameters( optionClassification) grid_search_models, grid_search_bests = do_gridsearch(
writeTree(filenameMC_ML_root, ntuplename, dataframeMCML) if (docrossvalidation == 1): df_scores = [] if (MLtype == "Regression"): df_scores = cross_validation_mse_continuous(names, classifiers, X_train, y_train, 5, ncores) if (MLtype == "BinaryClassification"): df_scores = cross_validation_mse(names, classifiers, X_train, y_train, 5, ncores) plot_cross_validation_mse(names, df_scores, suffix, plotdir) if (doLearningCurve == 1): # confusion(mylistvariables,names,classifiers,suffix,X_train,y_train,5) plot_learning_curves(names, classifiers, suffix, plotdir, X_train, y_train, 10, yAxis, threshold) plot_learning_curves(names, classifiers, suffix, plotdir, X_train, y_train, 10, 'sig', threshold) plot_learning_curves(names, classifiers, suffix, plotdir, X_train, y_train, 10, 'bkg', threshold) if (doROCcurve == 1): precision_recall(mylistvariables, names, classifiers, suffix, X_train, y_train, 5, plotdir) if (doOptimisation == 1): print("\nDoing significance optimization") if (dotraining and dotesting and doapplytodata): if ((MLsubtype == "HFmeson") and (optionanalysis == "Ds")): studysignificance(optionanalysis, varmin[0], varmax[0], test_set, dataframeDataML, names, myvariablesy, suffix,