ax3.plot([0, 1], [0, 1], color='gray', linestyle='--') ax3.set_xlabel('Mean Predicted Value') ax3.set_ylabel('Fraction of positives') fig.tight_layout(pad=0) if outputfile: fig.savefig(outputfile) else: plt.show() if __name__ == '__main__': df = read_data('signal.csv', 'background.csv') df = drop_useless(df) print(80*'=') print('{:^80}'.format('GaussianNB')) print(80*'=') gnb = GaussianNB() df_nb_label = df.dropna(axis=1)['label'] df_nb = df.dropna(axis=1).drop('label', axis=1) print('{} remaining features after dropping columns NaNs.'.format( len(df_nb.columns) )) nb_aucs = classifier_crossval_performance( df_nb.values, df_nb_label.values, classifier=gnb )
cval = StratifiedKFold(y, n_folds=n_folds, shuffle=True) with Parallel(n_jobs=n_jobs) as pool: performances = pool( delayed(evaluate)(X, y, train, test) for train, test in cval ) performances = pd.Panel(dict(enumerate(performances))) return performances if __name__ == '__main__': data = drop_useless(read_data('./signal.csv', './background.csv')) nb = GaussianNB() classifiers = { 'RandomForest': RandomForestClassifier( n_estimators=100, criterion='entropy', n_jobs=2, ), # 'ExtraTrees': ExtraTreesClassifier( # n_estimators=100, criterion='entropy', n_jobs=-1 # ), 'AdaBoost': GradientBoostingClassifier( n_estimators=100, loss='exponential', ), 'NaiveBayes': GaussianNB() }