train_set_ptsel_sig, train_set_ptsel_bkg = splitdataframe_sigbkg( train_set_ptsel, var_signal) vardistplot(train_set_ptsel_sig, train_set_ptsel_bkg, mylistvariablesall, "plots") scatterplot(train_set_ptsel_sig, train_set_ptsel_bkg, mylistvariablesx, mylistvariablesy, "plots") correlationmatrix(train_set_ptsel_sig, "plots", "signal") correlationmatrix(train_set_ptsel_bkg, "plots", "background") if (doStandard == 1): X_train = GetDataFrameStandardised(X_train) if (doPCA == 1): n_pca = 9 X_train, pca = GetPCADataFrameAndPC(X_train, n_pca) plotvariancePCA(pca, "plots") if (dotraining == 1): trainedmodels = fit(names, classifiers, X_train, y_train) savemodels(names, trainedmodels, "output", suffix) if (doimportance == 1): importanceplotall(mylistvariables, names, trainedmodels, suffix) if (docrossvalidation == 1): df_scores = cross_validation_mse(names, classifiers, X_train, y_train, 10, ncores) plot_cross_validation_mse(names, df_scores, suffix) if (doRoCLearning == 1): confusion(mylistvariables, names, classifiers, suffix, X_train, y_train, 5)
train_set_ptsel_sig, train_set_ptsel_bkg = splitdataframe_sigbkg( train_set, myvariablesy) vardistplot(train_set_ptsel_sig, train_set_ptsel_bkg, mylistvariablesall, plotdir) scatterplot(train_set_ptsel_sig, train_set_ptsel_bkg, mylistvariablesx, mylistvariablesy, plotdir) correlationmatrix(train_set_ptsel_sig, plotdir, "signal") correlationmatrix(train_set_ptsel_bkg, plotdir, "background") if (doStandard == 1): X_train = GetDataFrameStandardised(X_train) if (doPCA == 1): n_pca = 5 X_train, pca = GetPCADataFrameAndPC(X_train, n_pca) plotvariancePCA(pca, plotdir) if (activateScikitModels == 1): classifiersScikit, namesScikit = getclassifiers() classifiers = classifiers + classifiersScikit names = names + namesScikit if (activateKerasModels == 1): classifiersDNN, namesDNN = getclassifiersDNN(len(X_train.columns)) classifiers = classifiers + classifiersDNN names = names + namesDNN if (dotraining == 1): trainedmodels = fit(names, classifiers, X_train, y_train) savemodels(names, trainedmodels, output, suffix)