Пример #1
0
def cluster_nn(X_train, y_train, X_test, y_test, savedir, ds, cluster_type):
    logging.info(
        'Running neural net with {} clusters as features'.format(cluster_type))
    pipe = A3.baseline_ann(X_train, y_train, ds)
    ypred = pipe.predict(X_test)
    util.confusionMatrix('{}-{}-ann'.format(ds, cluster_type), y_test, ypred,
                         savedir)
    return f1_score(y_test, ypred)
Пример #2
0
def scoreModel(classifiers, X, y, testX, testy, scoring,
               outputDir, params, scoreType='baseline',
               dsname=''):
    fitClassifiers = {}
    scores = []
    names = []
    for classifier in classifiers:
        clf, _ = A1.getClfParams(classifier)
        if params is not None:
            # Remove classifier prefix from params
            p = {k.replace('classifier__', ''): v for k, v in params[classifier].items()}
            clf.set_params(**p)

        print('{}: Generating {} learning curve'
              .format(classifier, scoreType))
        print('{}: hyperparameters: '.format(classifier), clf.get_params())
        util.plot_learning_curve(classifier, clf, X,
                                 y, scoring,
                                 savedir=outputDir,
                                 scoreType=scoreType)

        # SVM and ANN need a training epoch graph
        if classifier == 'kernelSVM' or classifier == 'ann':
            util.plotValidationCurve(clf, X, y,
                                     scoring=scoring,
                                     paramName='max_iter',
                                     paramRange=range(100, 2000, 100),
                                     savedir=outputDir,
                                     clfName='{}-{}'.format(classifier, scoreType),
                                     cv=3)

        # To score the model, fit with given parameters and predict
        print('{}: Retraining with best parameters on entire training set'
              .format(classifier))
        pipeline = Pipeline(steps=[('scaler', StandardScaler()),
                                   ('classifier', clf)])
        start_time = timeit.default_timer()
        pipeline.fit(X, y)
        total_time = timeit.default_timer() - start_time
        print('Training ANN took {} seconds'.format(total_time))
        ypred = pipeline.predict(testX)
        fitClassifiers[classifier] = pipeline
        scores.append(f1_score(testy, ypred))
        names.append(classifier)

        # Generate confusion matrix
        print('{}: Scoring predictions against test set'
              .format(classifier))
        util.confusionMatrix(classifier, testy, ypred,
                             savedir=outputDir,
                             scoreType=scoreType)

        plt.close('all')

    util.plotBarScores(scores, names, '', outputDir, phaseName=scoreType)
    plt.close('all')
    return fitClassifiers
Пример #3
0
def dr_ann(X_train,
           y_train,
           X_test,
           y_test,
           dr_steps,
           savedir,
           ds,
           cluster=None):
    if cluster is not None:
        c = ' with clustering from {}'.format(cluster)
    else:
        c = ''
    logging.info('ANN: Running baseline neural net' + c)
    baseline = A3.baseline_ann(X_train, y_train, ds)
    ypred = baseline.predict(X_test)
    util.confusionMatrix('{}{}-baseline'.format(ds, cluster), y_test, ypred,
                         savedir)

    scores = [f1_score(y_test, ypred)]
    score_names = ['baseline']
    for dr_step in dr_steps:
        drname = dr_step.__class__.__name__.lower()
        score_names.append(drname)
        logging.info('ANN: Running neural net with {} dimension reduction'.
                     format(drname) + c)
        # Get trained ann with dr
        ann = A3.dr_ann(X_train, y_train, dr_step, ds)
        ypred = ann.predict(X_test)
        util.confusionMatrix('{}-{}{}'.format(ds, drname, cluster), y_test,
                             ypred, savedir)
        scores.append(f1_score(y_test, ypred))

    logging.info('ANN {} F1 Scores: {}'.format(c, scores))
    util.plotBarScores(scores,
                       score_names,
                       ds,
                       savedir,
                       phaseName='{}-{}'.format(ds, cluster))
    plt.close('all')
Пример #4
0
        samples_predictions.append(prediction)

        # para calculo de taxa de erro
        if (actual_class == predicted_class):
            true_positives += 1
        else:
            false_positives += 1

    error_rate = util.errorRate(true_positives, false_positives)

    error_rates.append(error_rate)

    predictions.append(samples_predictions)

print("confusion matrix")
confusionMatrix = util.confusionMatrix(predictions)
print(np.array_str(confusionMatrix, precision=6, suppress_small=True))
print("")

print("precision by class")
precision_by_class = confusionMatrix.diagonal() / float(
    patternSpace * repetitions)
print(precision_by_class)
print("")

print("precision average %s" % np.mean(precision_by_class))
print("error rate average %s" % util.errorRateAverage(error_rates))
print("")

# erro rate for each repetition
for i, rate in enumerate(error_rates):
Пример #5
0
# stratified cross validation
rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=30, random_state=42)
#skf = StratifiedKFold(n_splits=10, random_state=42)


print("======================== FOU =============================")

#fou_h = parzen.bandwidth_estimator(fou)
fou_h = 1.9952
#print("fou best bandwidth: {0}".format(fou_h))

fou_predictions, fou_error_rates = parzen.runClassifier(rskf, fou, target, fou_h)

print("fou confusion matrix")
fouConfusionMatrix = util.confusionMatrix(fou_predictions)
print(np.array_str(fouConfusionMatrix, precision=6, suppress_small=True))
print("")

print("fou precision by class")
fou_precision_by_class = fouConfusionMatrix.diagonal() / float(patternSpace * repetitions)
print(fou_precision_by_class)
print("")

print("fou precision average %s" % np.mean(fou_precision_by_class))
print("fou error rate average %s" % util.errorRateAverage(fou_error_rates))
print("")

# erro rate for each repetition
print("error rates")
for i, rate in enumerate(fou_error_rates):