Exemplo n.º 1
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
Exemplo n.º 2
0
 def class_prediction_error(self) -> None:
     """Plot the support (number of training samples) for each class in the fitted classification
      model as a stacked bar chart. Each bar is segmented to show the proportion of predictions
      (including false negatives and false positives, like a Confusion Matrix) for each class.
      You can use a ClassPredictionError to visualize which classes your classifier is having
      a particularly difficult time with, and more importantly, what incorrect answers it is
      giving on a per-class basis.
      """
     visualizer = ClassPredictionError(self.trained_model)
     visualizer.fit(self.X_train, self.y_train)
     visualizer.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/class_prediction_error_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def draw_plots():
    classifier = MultinomialNB(alpha=0.01)

    for technique in ["base", "SMOTE", "ADASYN", "text-aug"]:
        X_train, X_test, y_train, y_test = get_baseline_split(representation="bow")
        if technique == "base":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test
        elif technique == "SMOTE":
            X_plot_train, y_plot_train = smote.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "ADASYN":
            X_plot_train, y_plot_train = adasyn.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "text-aug":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run(
                books_df=get_fully_processed_books_df(),
                representation="bow")
        else:
            raise Exception()

        # ROC micro average
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # ROC - Per Class
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # Class Prediction Error
        viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres())
        viz_pred_err.fit(X_plot_train, y_plot_train)
        viz_pred_err.score(X_plot_test, y_plot_test)
        viz_pred_err.show()

        # The ConfusionMatrix
        cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])
        cm.fit(X_plot_train, y_plot_train)
        cm.score(X_plot_test, y_plot_test)
        cm.show()
def class_prediction_errors(xx,yy,estimatorss,**kwargs):
    vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'],
        cmap="YlGn", size=(600, 360), **kwargs)
    vz2.fit(xx, yy)
    vz2.score(xx, yy)
    vz2.show()
Exemplo n.º 5
0
# In[34]:


from yellowbrick.classifier import ClassPredictionError


# In[35]:


classes = ['Exited', 'Not Exited']
clf = RandomForestClassifier(n_estimators = 200, random_state=200)
visualizer = ClassPredictionError(clf)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()


# In[36]:


svclassifier = SVC(kernel='rbf')
visualizer = ClassPredictionError(svclassifier)
visualizer.fit(X_train, y_train)
visualizer.score(X_test,y_test)
visualizer.show()


# In[10]: