def get_roc(self, on="test"):

        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self.X, self.y)

        visualizer.poof()
Пример #2
0
def base_model(X_train, y_train,X_test, y_test):
    model = LogisticRegression(multi_class='auto',solver='lbfgs')
    visualizer = ROCAUC(model, classes=['dancehall','reggae','soca','pop'])

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.show() 
Пример #3
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def ROC_Curve(Model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15)
    model = Model
    visualizer = ROCAUC(Model)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
Пример #5
0
def evaluate(df, modelv="gnb", race="W", census=False, report=True, roc=True, pr=True):
    """ Run model evaluations for a specified model and race class """

    # get model
    models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string))
    model = models[race]

    # get data
    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    # transform data
    for col in [ "first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    # run specified evaluation visualizer
    if report:
        visualizer = ClassificationReport(model, classes=model.classes_, support=True)
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show() 

    if roc:
        visualizer = ROCAUC(model, classes=["W", "not-W"])
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show()
    if pr:
        viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"])
        viz.score(df[MODEL_COLS], tmpa)
        viz.show()
Пример #6
0
def roc(model, data_type="music", features_nr=705):
    classes = ["{}".format(data_type), "no_{}".format(data_type)]
    from yellowbrick.classifier import ROCAUC

    data = load_data(how_many=4, last=True, data_type=data_type)
    data = data.astype({'class': str})

    features = data.columns[:features_nr]
    X = data[features]
    y = data["class"]

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.score(X, y)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
def plotting_ROC_curve_yellowbrick(model, labels, X_train, y_train, X_test,
                                   y_test):
    st.subheader('ROC Curve')
    visualizer = ROCAUC(model, classes=labels)
    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()
    st.pyplot()
    return
Пример #8
0
def log_roc_auc_chart(classifier,
                      X_train,
                      X_test,
                      y_train,
                      y_test,
                      experiment=None):
    """Log ROC-AUC chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_roc_auc_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='ROC-AUC')
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))
Пример #9
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
Пример #10
0
def classification_sanity_check(model,
                                X_train,
                                X_test,
                                y_train,
                                y_test,
                                classes=None):
    visualizer = ROCAUC(model, micro=False, macro=False, classes=classes)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.poof()
Пример #11
0
def ROC_AUC(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ROCAUC

    # Instantiate the visualizer with the classification model
    visualizer = ROCAUC(model, classes=classes)

    visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
    g = visualizer.poof()
Пример #12
0
    def get_roc(self, on="test"):
        """
        Produces aAUC/ROC curve graph made through the yellowbrick package

        Input
        -----
        on : string (default=test)
            Determines which set of data to score and create a ROC graph on.
            Default is 'test', meaning it will make a ROC graph of the test results. 
            'train' and 'all' are alternative values. 
        """
        visualizer = ROCAUC(self.pipe)
        if on == "test":
            visualizer.score(self._X_test, self._y_test)
        elif on == "train":
            visualizer.score(self._X_train, self._y_train)
        elif on == "all":
            visualizer.score(self._X, self._y)

        visualizer.poof()
Пример #13
0
 def roc_curve(self, classes) -> None:
     visualizer = ROCAUC(self.trained_model, classes=classes)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/roc_curve_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/roc_curve_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
Пример #14
0
def create_roc_auc_chart(classifier, X_train, X_test, y_train, y_test):
    """Create ROC-AUC chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/roc_auc'] = npt_utils.create_roc_auc_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ROCAUC(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log ROC-AUC chart. Error {}'.format(e))

    return chart
Пример #15
0
def eval_models(df,
                race="W",
                models=["gnb", "rf", "xgb"],
                census=False,
                report=False,
                roc=False,
                pr=False,
                cpe=False):
    """ Run evaluation on a set of models and a single race class """

    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    for col in ["first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    for modelv in models:

        models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" %
                             (modelv, model_string))
        model = models[race]

        model.target_type_ = "binary"

        if report:
            visualizer = ClassificationReport(model,
                                              classes=model.classes_,
                                              support=True)
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()

        if roc:
            visualizer = ROCAUC(model, classes=["W", "not-W"])
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()
        if pr:
            viz = PrecisionRecallCurve(model,
                                       is_fitted=True,
                                       classes=["W", "not-W"])
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()

        if cpe:
            viz = ClassPredictionError(model)
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()
Пример #16
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
Пример #17
0
def evaluate_model(clf_, X_tr, X_te, y_tr, y_te, cls_rpt_tr=False, show=True, cls_labels=None, binary=False):
    
    
    """Takes any classifier, train/test data for X/y, labels for graph (optional).
    Will output (if show) a Sklearn Classification Report and Confusion Matrix
    along with a Yellowbrick ROC/AUC curve and Feature Importance graph (if a tree).
    Otherwise will return training/test predictions."""
    
    import sklearn.metrics as metrics
    import matplotlib.pyplot as plt
    from yellowbrick.classifier import ROCAUC
    
    ## Fit and predict 
    y_hat_trn, y_hat_tes = fit_n_pred(clf_, X_tr, X_te, y_tr)
    
    if show:
        ## Classification Report / Scores
        if cls_rpt_tr:
            print('Classification Report Train')
            print(metrics.classification_report(y_tr,y_hat_trn))
        else:
            print('Classification Report Test')
            print(metrics.classification_report(y_te,y_hat_tes))

        ## Confusion Matrix
        fig, ax = plt.subplots(figsize=(10,5), ncols=2)
        
        metrics.plot_confusion_matrix(clf_,X_te,y_te,cmap="YlOrRd",
                                      normalize='true',ax=ax[0])
        ax[0].set(title='Confusion Matrix Test Data')
        ax[0].grid(False)        

        roc = ROCAUC(clf_, classes=cls_labels, ax=ax[1])
        roc.fit(X_tr, y_tr)
        roc.score(X_te, y_te)
        roc.finalize()
            
        plt.tight_layout()
        plt.show()
        
        if binary:
            try:
                imps = plot_importance(clf_, X_tr)
            except:
                imps = None
        
    else:
        return y_hat_trn, y_hat_tes
Пример #18
0
def evaluate_model(model,x_test,y_test,coef_):
    
    prediction = model.predict(x_test)
    acc_test = accuracy_score(y_test,prediction)
    acc_train = accuracy_score(y_train,model.predict(X_train))
    print("\n")
    print("Accuracy Score(Test): " + str(acc_test))
    print("Accuracy Score(Train): " + str(acc_train))
    print("Difference between train and test accuracy = {0}".format(abs(acc_test-acc_train)))
    print("Roc Auc Score: "+ str(roc_auc_score(y_test,prediction)))
    print("\n")
    print("Classification Report:")
    print(classification_report(y_test,prediction))
    # confusion matrix
    plt.figure()
    cm = confusion_matrix(y_test,prediction)
    sns.heatmap(cm,annot=True,cmap="YlGnBu",fmt="d")
    plt.title("Confusion Matrix(1:Churned, 0:Not Churned)")
    plt.show()
    
    # roc-curve
    plt.figure()
    visualizer = ROCAUC(model, classes=["Not Churn", "Churn"])
    
    visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)        # Evaluate the model on the test data
    visualizer.show()                       # Finalize and show the figure    
    plt.show()
    
    
    if coef_:
        feature_imp ={}
        
        for idx,col_name in enumerate(X_train.columns):
            feature_imp[col_name] = model.coef_[0][idx]
            
        
        feature_imp = pd.DataFrame(feature_imp.items(),columns = ["Feature","Feature Importance"])
        feature_imp.set_index("Feature",inplace=True)
        
        
        ax = feature_imp.plot(kind="bar",fontsize=10,color="red")
        
        ax.set_title("Future Importance",fontdict={"fontsize":12,"fontweight":"bold"})
        ax.set_ylabel("Coef_")
        
        plt.show()
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te):
    visualizer = ConfusionMatrix(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ClassificationReport(model, classes=classes, support=True)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ROCAUC(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()
Пример #20
0
    def plot_classifier_metrics(self):

        fig, axes = plt.subplots(2, 2, figsize=(12, 8))

        visualgrid = [
            ConfusionMatrix(self.clf, ax=axes[0][0]),
            ClassificationReport(self.clf, ax=axes[0][1]),
            ROCAUC(self.clf, ax=axes[1][0]),
        ]
        fig.delaxes(axes[1, 1])
        for viz in visualgrid:
            viz.fit(self.X_train, self.y_train)
            viz.score(self.X_test, self.y_test)
            viz.finalize()
        plt.savefig('../docs/metrics_classifier.png')
        plt.show()
Пример #21
0
    def __init__(self,
                 X_train,
                 X_test,
                 y_train,
                 y_test,
                 labels,
                 model,
                 viz_selection,
                 upsampled=False):
        """
        Class for yellowbrick classifier visualizer

        Args:
            X_train: numpy ndarray of model features training data values
            X_test: numpy ndarray of model features test data values
            y_train: numpy ndarray of model target variable training data values
            y_test: numpy ndarray of model target variable test data values
            labels: list of class labels for binary classification
            model: sklearn estimator for classification
            viz_selection: string value used to reference yellowbrick classification visualizer
            upsampled: binary value to determine to which subdirectory output image should be saved

        """

        self.labels = labels
        self.model = model
        self.viz_selection = viz_selection
        self.upsampled = upsampled
        self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test

        if self.viz_selection == 'ClassificationReport':
            self.visualizer = ClassificationReport(self.model,
                                                   classes=self.labels,
                                                   support=True)
        elif self.viz_selection == 'ROCAUC':
            self.visualizer = ROCAUC(self.model,
                                     classes=self.labels,
                                     support=True)
        elif self.viz_selection == 'PrecisionRecallCurve':
            self.visualizer = PrecisionRecallCurve(self.model)
        elif self.viz_selection == 'ConfusionMatrix':
            self.visualizer = ConfusionMatrix(model, classes=self.labels)
        else:
            return print(
                "Error: viz_selection does not match accepted values. View Visualizer Class for accepted values."
            )
Пример #22
0
def rocauc(X, y, model, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Instantiate the classification model and visualizer
    visualizer = ROCAUC(model, ax=ax, **kwargs)

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)

    # Save to disk
    visualizer.poof(outpath=outpath)
Пример #23
0
    def generate_roc_auc(self, X_train, y_train, X_test, y_test, **kwargs):
        """
        Given the training and testing sets, computes the ROC AUC metrics
        for the given model and returns a ROC AUC plotly figure.
        
        :param X_train: the training feature set.
        :param y_train: the training target set.
        :param X_test: the testing feature set.
        :param y_test: the testing target set.
        """
        
        visualizer = ROCAUC(self.model, classes=self.classes)

        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        
        roc_data = visualizer.tpr
        
        layout = go.Layout(yaxis = dict(
                                scaleratio = 1,
                                range=[-.1, 1]
                            ),
                            xaxis=dict(
                                range=[-.1, 1],
                                #scaleratio = 1
                            ),
                        )

        fig = go.Figure(layout=layout)

        for tr in roc_data.keys():
            trace = go.Scatter(
                x = [i / len(roc_data[tr]) for i in range(len(roc_data[tr]))],
                y = roc_data[tr],
                name = f'{tr}' if type(tr) != int else f'{self.classes[tr]}',
                line = dict(shape = 'hv')
            )
            fig.add_trace(trace)

        lin_line = go.Scatter(
            x = [0,1],
            y = [0,1],
            name = 'linear_line',
            line = dict(dash='dash')
        )

        fig.add_trace(lin_line)
        return fig
Пример #24
0
def rocauc(dataset):
    if dataset == "binary":
        X, y = load_occupancy()
        model = GaussianNB()
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        model = RidgeClassifier()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ROCAUC(model, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "rocauc_{}".format(dataset))
Пример #25
0
def showROC():
    # Load the classification data set
    data = load_data('occupancy')

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
Пример #26
0
def rocauc(ax):
    from yellowbrick.classifier import ROCAUC
    from sklearn.linear_model import LogisticRegression

    # Specify the features of interest and the classes of the target
    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    target = "occupancy"
    classes = ['unoccupied', 'occupied']

    # Load the data
    splits = load_data('occupancy', cols=features, target=target, tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = LogisticRegression()
    visualizer = ROCAUC(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer
Пример #27
0
y_pred = model.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
cmET = confusion_matrix(y_test, y_pred)
sn.heatmap(cmET,
           cmap='Blues_r',
           annot=True,
           xticklabels='1234',
           yticklabels='1234')
plt.xlabel('Predicted Label')
plt.ylabel('Actual Label')
plt.show()

#ROCAUC Plot
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from yellowbrick.classifier import ROCAUC

# Encode the non-numeric columns
X = OrdinalEncoder().fit_transform(X)
y = LabelEncoder().fit_transform(y)

# Instaniate the classification model and visualizer
visualizer = ROCAUC(model, Damage=[1, 2, 3, 4])

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure
model.fit(X_train, y_train)

#######################################################
# STEP 5: Accuracy check
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))

from yellowbrick.classifier import ROCAUC
print("Classes in the image are: ", np.unique(Y))

#ROC curve for RF
roc_auc=ROCAUC(model, classes=[0, 1, 2, 3])  #Create object
roc_auc.fit(X_train, y_train)
roc_auc.score(X_test, y_test)
roc_auc.show()

##########################################################
#STEP 6: SAVE MODEL FOR FUTURE USE
###########################################################
##You can store the model for future use. In fact, this is how you do machine elarning
##Train on training images, validate on test images and deploy the model on unknown images. 
#
#
##Save the trained model as pickle string to disk for future use
model_name = "sandstone_model"
pickle.dump(model, open(model_name, 'wb'))
#
Пример #29
0

datasets = DatasetMixin()
credit = datasets.load_data('credit')
credit_keys = credit.dtype.names
datatype = credit.dtype[0]
ncols = len(credit_keys)
categorical_names = ['edu','married']
y_name = 'default'
credit_data = None
for j in range(0,ncols):
    if credit_keys[j] in categorical_names:
        credit_data = add_categorical(credit_data,credit[credit_keys[j]],datatype)
    elif credit_keys[j] == y_name:
        y = credit[y_name].astype(int)
    else:
        credit_data = add_column(credit_data,credit[credit_keys[j]])

datashape = credit_data.shape
nrows = datashape[0]
cmeans = np.mean(credit_data,0)
repmeans = numpy.matlib.repmat(cmeans,nrows,1)
mydata = credit_data - repmeans
sstds = np.std(mydata,0)
repstds = numpy.matlib.repmat(sstds,nrows,1)
mydata = np.divide(mydata,repstds)

visualizer = ROCAUC(LinearSVC())
visualizer.fit(mydata,y)
visualizer.score(mydata,y)
visualizer.poof()
Пример #30
0

from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(
                estimator = clasificador,
                X         = X_train,
                y         = y_train,
                scoring   = 'neg_root_mean_squared_error',
                cv        = 5
             )

print(f"Métricas validación cruzada: {cv_scores}")
print(f"Média métricas de validación cruzada: {cv_scores.mean()}")
cv_scores = pd.DataFrame(cv_scores)

prediccion = clasificador.predict(X_test)

from yellowbrick.classifier import ROCAUC
visualizer = ROCAUC(clasificador, classes=[0, 1, 2, 3, 4, 5, 6, 7 , 8, 9])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()       


y_pred_proba = clasificador.predict_proba(X_test)

from sklearn.metrics import roc_auc_score
score = roc_auc_score(y_test,y_pred_proba,multi_class="ovr")
'''
Пример #31
0
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from yellowbrick.classifier import ROCAUC

if __name__ == '__main__':
    # Load the regression data set
    data = pd.read_csv("../../../examples/data/occupancy/occupancy.csv")

    features = ["temperature", "relative humidity", "light", "C02", "humidity"]
    classes = ['unoccupied', 'occupied']

    # Extract the numpy arrays from the data frame
    X = data[features].as_matrix()
    y = data.occupancy.as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the classification model and visualizer
    logistic = LogisticRegression()
    visualizer = ROCAUC(logistic)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(outpath="images/rocauc.png")  # Draw/show/poof the data