def cluster_nn(X_train, y_train, X_test, y_test, savedir, ds, cluster_type): logging.info( 'Running neural net with {} clusters as features'.format(cluster_type)) pipe = A3.baseline_ann(X_train, y_train, ds) ypred = pipe.predict(X_test) util.confusionMatrix('{}-{}-ann'.format(ds, cluster_type), y_test, ypred, savedir) return f1_score(y_test, ypred)
def scoreModel(classifiers, X, y, testX, testy, scoring, outputDir, params, scoreType='baseline', dsname=''): fitClassifiers = {} scores = [] names = [] for classifier in classifiers: clf, _ = A1.getClfParams(classifier) if params is not None: # Remove classifier prefix from params p = {k.replace('classifier__', ''): v for k, v in params[classifier].items()} clf.set_params(**p) print('{}: Generating {} learning curve' .format(classifier, scoreType)) print('{}: hyperparameters: '.format(classifier), clf.get_params()) util.plot_learning_curve(classifier, clf, X, y, scoring, savedir=outputDir, scoreType=scoreType) # SVM and ANN need a training epoch graph if classifier == 'kernelSVM' or classifier == 'ann': util.plotValidationCurve(clf, X, y, scoring=scoring, paramName='max_iter', paramRange=range(100, 2000, 100), savedir=outputDir, clfName='{}-{}'.format(classifier, scoreType), cv=3) # To score the model, fit with given parameters and predict print('{}: Retraining with best parameters on entire training set' .format(classifier)) pipeline = Pipeline(steps=[('scaler', StandardScaler()), ('classifier', clf)]) start_time = timeit.default_timer() pipeline.fit(X, y) total_time = timeit.default_timer() - start_time print('Training ANN took {} seconds'.format(total_time)) ypred = pipeline.predict(testX) fitClassifiers[classifier] = pipeline scores.append(f1_score(testy, ypred)) names.append(classifier) # Generate confusion matrix print('{}: Scoring predictions against test set' .format(classifier)) util.confusionMatrix(classifier, testy, ypred, savedir=outputDir, scoreType=scoreType) plt.close('all') util.plotBarScores(scores, names, '', outputDir, phaseName=scoreType) plt.close('all') return fitClassifiers
def dr_ann(X_train, y_train, X_test, y_test, dr_steps, savedir, ds, cluster=None): if cluster is not None: c = ' with clustering from {}'.format(cluster) else: c = '' logging.info('ANN: Running baseline neural net' + c) baseline = A3.baseline_ann(X_train, y_train, ds) ypred = baseline.predict(X_test) util.confusionMatrix('{}{}-baseline'.format(ds, cluster), y_test, ypred, savedir) scores = [f1_score(y_test, ypred)] score_names = ['baseline'] for dr_step in dr_steps: drname = dr_step.__class__.__name__.lower() score_names.append(drname) logging.info('ANN: Running neural net with {} dimension reduction'. format(drname) + c) # Get trained ann with dr ann = A3.dr_ann(X_train, y_train, dr_step, ds) ypred = ann.predict(X_test) util.confusionMatrix('{}-{}{}'.format(ds, drname, cluster), y_test, ypred, savedir) scores.append(f1_score(y_test, ypred)) logging.info('ANN {} F1 Scores: {}'.format(c, scores)) util.plotBarScores(scores, score_names, ds, savedir, phaseName='{}-{}'.format(ds, cluster)) plt.close('all')
samples_predictions.append(prediction) # para calculo de taxa de erro if (actual_class == predicted_class): true_positives += 1 else: false_positives += 1 error_rate = util.errorRate(true_positives, false_positives) error_rates.append(error_rate) predictions.append(samples_predictions) print("confusion matrix") confusionMatrix = util.confusionMatrix(predictions) print(np.array_str(confusionMatrix, precision=6, suppress_small=True)) print("") print("precision by class") precision_by_class = confusionMatrix.diagonal() / float( patternSpace * repetitions) print(precision_by_class) print("") print("precision average %s" % np.mean(precision_by_class)) print("error rate average %s" % util.errorRateAverage(error_rates)) print("") # erro rate for each repetition for i, rate in enumerate(error_rates):
# stratified cross validation rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=30, random_state=42) #skf = StratifiedKFold(n_splits=10, random_state=42) print("======================== FOU =============================") #fou_h = parzen.bandwidth_estimator(fou) fou_h = 1.9952 #print("fou best bandwidth: {0}".format(fou_h)) fou_predictions, fou_error_rates = parzen.runClassifier(rskf, fou, target, fou_h) print("fou confusion matrix") fouConfusionMatrix = util.confusionMatrix(fou_predictions) print(np.array_str(fouConfusionMatrix, precision=6, suppress_small=True)) print("") print("fou precision by class") fou_precision_by_class = fouConfusionMatrix.diagonal() / float(patternSpace * repetitions) print(fou_precision_by_class) print("") print("fou precision average %s" % np.mean(fou_precision_by_class)) print("fou error rate average %s" % util.errorRateAverage(fou_error_rates)) print("") # erro rate for each repetition print("error rates") for i, rate in enumerate(fou_error_rates):