Exemplo n.º 1
0
def evaluate(df, modelv="gnb", race="W", census=False, report=True, roc=True, pr=True):
    """ Run model evaluations for a specified model and race class """

    # get model
    models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string))
    model = models[race]

    # get data
    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    # transform data
    for col in [ "first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    # run specified evaluation visualizer
    if report:
        visualizer = ClassificationReport(model, classes=model.classes_, support=True)
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show() 

    if roc:
        visualizer = ROCAUC(model, classes=["W", "not-W"])
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show()
    if pr:
        viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"])
        viz.score(df[MODEL_COLS], tmpa)
        viz.show()
Exemplo n.º 2
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
Exemplo n.º 3
0
def draw_binary(outpath=None):
    _, ax = plt.subplots(figsize=(9,6))

    X_train, X_test, y_train, y_test = load_binary(split=True)

    oz = PrecisionRecallCurve(RidgeClassifier(), ax=ax)
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof(outpath=outpath)
Exemplo n.º 4
0
def draw_binary(outpath=None):
    _, ax = plt.subplots(figsize=(9, 6))

    X_train, X_test, y_train, y_test = load_binary(split=True)

    oz = PrecisionRecallCurve(RidgeClassifier(), ax=ax)
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof(outpath=outpath)
Exemplo n.º 5
0
def precision_recall_f1(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import PrecisionRecallCurve
    viz = PrecisionRecallCurve(model,
                               per_class=True,
                               iso_f1_curves=True,
                               fill_area=False,
                               micro=False)
    viz.fit(X_train, Y_train)
    viz.score(X_test, Y_test)
    viz.poof()
Exemplo n.º 6
0
def precision_recall(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import PrecisionRecallCurve

    # Load the dataset and split into train/test splits

    # Create the visualizer, fit, score, and poof it
    viz = PrecisionRecallCurve(model)
    viz.fit(X_train, Y_train)
    viz.score(X_test, Y_test)
    viz.poof()
Exemplo n.º 7
0
def draw_multiclass(outpath=None, simple=True):
    _, ax = plt.subplots(figsize=(9,6))

    X_train, X_test, y_train, y_test = load_multiclass()

    if simple:
        oz = PrecisionRecallCurve(RandomForestClassifier(), ax=ax)
    else:
        oz = PrecisionRecallCurve(MultinomialNB(), ax=ax, per_class=True, iso_f1_curves=True, fill_area=False, micro=False)

    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof(outpath=outpath)
Exemplo n.º 8
0
def eval_models(df,
                race="W",
                models=["gnb", "rf", "xgb"],
                census=False,
                report=False,
                roc=False,
                pr=False,
                cpe=False):
    """ Run evaluation on a set of models and a single race class """

    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    for col in ["first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    for modelv in models:

        models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" %
                             (modelv, model_string))
        model = models[race]

        model.target_type_ = "binary"

        if report:
            visualizer = ClassificationReport(model,
                                              classes=model.classes_,
                                              support=True)
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()

        if roc:
            visualizer = ROCAUC(model, classes=["W", "not-W"])
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()
        if pr:
            viz = PrecisionRecallCurve(model,
                                       is_fitted=True,
                                       classes=["W", "not-W"])
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()

        if cpe:
            viz = ClassPredictionError(model)
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()
def plot_precision_recall_curve_1(X_train, y_train, X_test, y_test, model):
    """
    Function to plot precision recall curve

    :param X_train: training set
    :param y_train: training set target
    :param X_test: test set
    :param y_test: test set target
    :param model: model to analyze performance for
    :return: precision recall curve plot
    """
    viz = PrecisionRecallCurve(model)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.show()
Exemplo n.º 10
0
def draw_multiclass(outpath=None, simple=True):
    _, ax = plt.subplots(figsize=(9, 6))

    X_train, X_test, y_train, y_test = load_multiclass()

    if simple:
        oz = PrecisionRecallCurve(RandomForestClassifier(), ax=ax)
    else:
        oz = PrecisionRecallCurve(MultinomialNB(),
                                  ax=ax,
                                  per_class=True,
                                  iso_f1_curves=True,
                                  fill_area=False,
                                  micro=False)

    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    oz.poof(outpath=outpath)
Exemplo n.º 11
0
def PRCurve(model, X_train, X_test, y_train, y_test):
    '''
    A function to visualize Precision Recall Curve.
    Returns average precision score of the model.
    Data to fit must be training i.e. X_train, y_train.
    Data score will be estimated on X_test, y_test.
    '''
    viz = PrecisionRecallCurve(model)
    viz.fit(X_train, y_train)
    avg_prec = viz.score(X_test, y_test)
    plt.legend(labels = ['Binary PR Curve',"AP=%.3f"%avg_prec], loc = 'lower right', prop={'size': 14})
    plt.xlabel(xlabel = 'Recall', size = 14)
    plt.ylabel(ylabel = 'Precision', size = 14)
    plt.title(label = 'Precision Recall Curve', size = 16)    
Exemplo n.º 12
0
y_pred_thresh = rf.predict_proba(X_test)[:, 1] > 0.85  # 0.85 as threshold

print(classification_report(y_test, y_pred_thresh))

### Precision-Recall curve

from scikitplot.metrics import plot_precision_recall

rf_probas = rf.predict_proba(X_test)[:, 1]
plot_precision_recall(y_test, rf_probas)

from yellowbrick.classifier import PrecisionRecallCurve

viz = PrecisionRecallCurve(rf)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.poof()

# Discimination Threshold - probability or score at which the positive class is chosen over the negative class

from yellowbrick.classifier import DiscriminationThreshold

viz = DiscriminationThreshold(rf)
viz.fit(X_train, y_train)
viz.poof()

# Average Precision

from sklearn.metrics import average_precision_score

average_precision_score(
Exemplo n.º 13
0
df_abalone.columns = ['sex', 'length', 'diameter', 'height', 'whole weight', 'sucked weight', 'viscera weight', 'shell weight', 'rings']
# print(df_abalone.head())

sns.countplot(data=df_abalone, x='sex', hue='rings', palette='gist_heat')

# plt.show()

# print(df_abalone.describe())
# df_abalone.info()

le = preprocessing.LabelEncoder()
df_abalone['sex'] = le.fit_transform(df_abalone['sex'])
# print(df_abalone.head())

cols = [col for col in df_abalone.columns if col is not "rings"]
# print(cols)
data = df_abalone[cols]
target = df_abalone['rings']
data_train, data_test, target_train, target_test = train_test_split(data, target, test_size = 0.20, random_state = 10)

data_train.info()

logReg = LogisticRegression()
pred = logReg.fit(data_train, target_train).predict(data_test)
# print(pred)
print("Logistic Regression accuracy: ", accuracy_score(target_test, pred, normalize=True))

visualizer = PrecisionRecallCurve(logReg)
visualizer.fit(data_train, target_train)
visualizer.score(data_test, target_test) 
visualizer.show()
Exemplo n.º 14
0
visualizer = ClassPredictionError(model, classes=classes)
visualizer.fit(X_fclass_train, y_train)
visualizer.score(X_fclass_test, y_test)
visualizer.poof(outpath="bag_class_errorf_classIF.png")

visualizer = DiscriminationThreshold(model)
visualizer.fit(X_fclass_train,
               y_train)  # Fit the training data to the visualizer
visualizer.score(X_fclass_test, y_test)
visualizer.poof(outpath="bag_descrimination_thresholdf_classIF.png")

# Create the visualizer, fit, score, and poof it
viz = PrecisionRecallCurve(model)
viz.fit(X_fclass_train, y_train)
viz.score(X_fclass_test, y_test)
viz.poof(outpath="bag_precision_recall_curvef_classIF.png")

#KNeighborsClassifier with f_classif features
model = KNeighborsClassifier()
model.fit(X_fclass_train, y_train)

visualizer = ClassificationReport(model, classes=classes)
visualizer.fit(X_fclass_train, y_train)  # Fit the visualizer and the model
visualizer.score(X_fclass_test, y_test)  # Evaluate the model on the test data
visualizer.poof(outpath="kneear_classification_report_fclassIF.png")

visualizer = ClassPredictionError(model, classes=classes)
visualizer.fit(X_fclass_train, y_train)
visualizer.score(X_fclass_test, y_test)
visualizer.poof(outpath="kneear_class_error_fclassIF.png")
Exemplo n.º 15
0
    # Run model with 4-fold cross validation. Report mean accuracy.
    scores = cross_val_score(mlp, X_train, y_train, cv=4)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Plot ROC, AUC.
    classes = ["Normal", "Pre-Ictal", "Seizure"]
    visualizer = ROCAUC(mlp, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    ROC_title = "ROCAUC_{}.png".format(animal_id)
    g = visualizer.poof(outpath=ROC_title)  # Save plot w unique title

    # Plot the precision-recall curve.
    viz = PrecisionRecallCurve(mlp)
    viz.fit(X_train, y_train)  # Fit the training data to the visualizer
    viz.score(X_test, y_test)  # Evaluate the model on the test data
    PR_title = "PR_{}.png".format(animal_id)
    viz.poof(outpath=PR_title)  # Save plot w unique title

    # Plot loss curve aka cost function.
    loss_values = mlp.loss_curve_
    plt.plot(loss_values)
    plt.show()
    Loss_title = "Loss_{}.png".format(animal_id)
    plt.savefig(Loss_title)
sys.stdout.close()

# In[ ]:
Exemplo n.º 16
0
def plot_pr(model, X_train, y_train, X_valid, y_valid):
    visualizer = PrecisionRecallCurve(model)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_valid, y_valid)
    visualizer.poof()