예제 #1
0
def yellow_brick_validation_curve(model, x, y, cpu_count, cv_count, param,
                                  scoring_metric):
    """

    """
    from yellowbrick.model_selection import LearningCurve
    from sklearn.model_selection import StratifiedKFold

    # Create the learning curve visualizer
    cv = StratifiedKFold(n_splits=cv_count)
    # Validation Curve

    mpl.rcParams['axes.prop_cycle'] = cycler('color', ['purple', 'darkblue'])

    fig = plt.gcf()
    fig.set_size_inches(10, 10)
    ax = plt.subplot(411)

    viz = ValidationCurve(model,
                          n_jobs=cpu_count,
                          ax=ax,
                          param_name=param,
                          param_range=np.arange(1, 11),
                          cv=cv,
                          scoring=scoring_metric)

    # Fit and poof the visualizer
    viz.fit(x, y)
    viz.show()
예제 #2
0
def validation_curve_classifier(path="images/validation_curve_classifier.png"):
    data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv"))

    target = "outcome"
    features = [col for col in data.columns if col != target]

    X = pd.get_dummies(data[features])
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(12)
    param_range = np.logspace(-6, -1, 12)

    oz = ValidationCurve(
        SVC(),
        ax=ax,
        param_name="gamma",
        param_range=param_range,
        logx=True,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.poof(outpath=path)
예제 #3
0
def validation_curve_classifier_alt(
        path="images/validation_curve_classifier_alt.png"):
    data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv"))

    target = "outcome"
    features = [col for col in data.columns if col != target]

    X = pd.get_dummies(data[features])
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(4)
    param_range = np.arange(3, 20, 2)

    oz = ValidationCurve(
        KNeighborsClassifier(),
        ax=ax,
        param_name="n_neighbors",
        param_range=param_range,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.poof(outpath=path)
예제 #4
0
def plot_validation_curve(final_X, final_Y):

    viz = ValidationCurve(DecisionTreeClassifier(),
                          param_name="max_depth",
                          param_range=np.arange(1, 30),
                          cv=10,
                          scoring="accuracy")
    viz.fit(final_X, final_Y)
    viz.poof()
예제 #5
0
def validation():
    X, y = load_energy()
    oz = ValidationCurve(
        DecisionTreeRegressor(),
        param_name="max_depth",
        param_range=np.arange(1, 11),
        cv=10,
        scoring="r2",
        ax=newfig(),
    )
    oz.fit(X, y)
    savefig(oz, "validation_curve")
예제 #6
0
 def draw_validation_curve(self,
                           param_name,
                           param_range,
                           cv,
                           logx=False,
                           scoring="accuracy",
                           n_jobs=5):
     visualizer = ValidationCurve(self.model,
                                  param_name=param_name,
                                  param_range=param_range,
                                  logx=logx,
                                  cv=cv,
                                  scoring=scoring,
                                  n_jobs=n_jobs)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.poof()
예제 #7
0
def validation_curve_sklearn_example(
        path="images/validation_curve_sklearn_example.png"):
    digits = load_digits()
    X, y = digits.data, digits.target

    _, ax = plt.subplots()

    param_range = np.logspace(-6, -1, 5)
    oz = ValidationCurve(SVC(),
                         ax=ax,
                         param_name="gamma",
                         param_range=param_range,
                         logx=True,
                         cv=10,
                         scoring="accuracy",
                         n_jobs=4)
    oz.fit(X, y)
    oz.poof(outpath=path)
예제 #8
0
def generate_validation_curve(model, clf_name, param_name, param_range,
                              scoring, cv, dataset_name, X_train, y_train):
    if 'svm' in clf_name or 'nn' == clf_name:
        train_scores, test_scores = validation_curve(model,
                                                     X_train,
                                                     y_train,
                                                     param_name=param_name,
                                                     param_range=param_range,
                                                     scoring="accuracy",
                                                     n_jobs=8)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)

        plt.title("Validation Curve with {}".format(clf_name))
        plt.xlabel(param_name)
        plt.ylabel("Score")
        plt.semilogx(param_range,
                     train_scores_mean,
                     label="Training score",
                     marker='o',
                     color="#0272a2")
        plt.semilogx(param_range,
                     test_scores_mean,
                     label="Cross-validation score",
                     marker='o',
                     color="#9fc377")
        plt.legend(loc="best")
        plt.savefig("results/{}_model_complexity_{}_{}.png".format(
            clf_name, dataset_name, param_name))
        plt.clf()

    else:
        viz = ValidationCurve(model,
                              param_name=param_name,
                              param_range=param_range,
                              scoring=scoring,
                              cv=cv)
        viz.fit(X_train, y_train)
        viz.show("results/{}_model_complexity_{}_{}.png".format(
            clf_name, dataset_name, param_name))
        plt.clf()
예제 #9
0
def validation_curve(model, X, y):
    from yellowbrick.model_selection import ValidationCurve
    from sklearn.model_selection import StratifiedKFold
    # Create the validation curve visualizer
    cv = StratifiedKFold(12)
    # param_range = np.linspace(30.00, 300.00, num=50.00, dtype=np.float64)
    param_range = np.logspace(30, 300, num=100, dtype=np.int32)

    viz = ValidationCurve(
        model,
        param_name="n_estimators",
        param_range=param_range,
        logx=True,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )

    viz.fit(X, y)
    viz.poof()
예제 #10
0
def validation_curve(model, x, y, param, rang, cv):
    """
    
    :param model: Modelo a ser avaliado.
    :param x: Variáveis independentes de treino.
    :param y: Variavel dependente de treino.
    :param param: Parametro do modelo a ser avaliado.
    :param rang: Espaço de hipotese do parametro que esta sendo avaliado.
    :param cv: quantidade de splits para a cross validação.
    :return: Viz das curvas de validação.
    """
    viz = ValidationCurve(model,
                          param_name=param,
                          param_range=rang,
                          cv=cv,
                          scoring="roc_auc",
                          n_jobs=-1)

    viz.fit(x, y)
    viz.show()
예제 #11
0
def validation_curve_classifier_knn(path="images/validation_curve_classifier_knn.png"):
    X, y = load_game()
    X = OneHotEncoder().fit_transform(X)

    _, ax = plt.subplots()
    cv = StratifiedKFold(4)
    param_range = np.arange(3, 20, 2)

    print("warning: generating the KNN validation curve can take a very long time!")

    oz = ValidationCurve(
        KNeighborsClassifier(),
        ax=ax,
        param_name="n_neighbors",
        param_range=param_range,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.show(outpath=path)
예제 #12
0
def validation_curve_classifier_svc(path="images/validation_curve_classifier_svc.png"):
    X, y = load_game()
    X = OneHotEncoder().fit_transform(X)

    _, ax = plt.subplots()
    cv = StratifiedKFold(12)
    param_range = np.logspace(-6, -1, 12)

    print("warning: generating the SVC validation curve can take a very long time!")

    oz = ValidationCurve(
        SVC(),
        ax=ax,
        param_name="gamma",
        param_range=param_range,
        logx=True,
        cv=cv,
        scoring="f1_weighted",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.show(outpath=path)
예제 #13
0
def validation_curve_regressor(path="images/validation_curve_regressor.png"):

    data = pd.read_csv(os.path.join(FIXTURES, "energy", "energy.csv"))

    targets = ["heating load", "cooling load"]
    features = [col for col in data.columns if col not in targets]

    X = data[features]
    y = data[targets[1]]

    _, ax = plt.subplots()
    param_range = np.arange(1, 11)

    oz = ValidationCurve(
        DecisionTreeRegressor(),
        ax=ax,
        param_name="max_depth",
        param_range=param_range,
        cv=10,
        scoring="r2",
        n_jobs=8,
    )
    oz.fit(X, y)
    oz.poof(outpath=path)
예제 #14
0
    visualizer.show()

    # 学习率
    visualizer = LearningCurve(model, scoring='f1_weighted')
    visualizer.fit(X_train, y_train)
    visualizer.show()

    # 交叉验证
    visualizer = CVScores(model, cv=5, scoring='f1_weighted')
    visualizer.fit(X_train, y_train)
    visualizer.show()

    # 特征重要性
    visualizer = FeatureImportances(model)
    visualizer.fit(X_train, y_train)
    visualizer.show()

    # 特征递归消减
    visualizer = RFECV(model, cv=5, scoring='f1_weighted')
    visualizer.fit(X_train, y_train)
    visualizer.show()

    # 特征选择
    visualizer = ValidationCurve(model,
                                 param_name="max_depth",
                                 param_range=np.arange(1, 11),
                                 cv=5,
                                 scoring="f1_weighted")
    visualizer.fit(X_train, y_train)
    visualizer.show()
예제 #15
0
    print("Balanced_accuracy:{:.4f}".format(svf))
    #print("accuracy:{:.4f}".format(svf_acc))
   
data_score = pd.DataFrame(columns=['Commodity', 'score'])
data_score['Commodity'] = y_location_trains.columns
data_score['score'] = scores
print(data_score)
df_score.to_csv('/Users/monalisa/Downloads/mmai823-project-master/out/SVF_scores.csv')

# Vizualization Curve is better for SVM

from sklearn.model_selection import StratifiedKFold
from matplotlib import pyplot as plt
%matplotlib inline

plt.tight_layout()
cv = StratifiedKFold(12)
param_range = np.logspace(-6, -1, 12)

viz = ValidationCurve(
    SVC(), param_name="gamma", param_range=param_range,
    logx=True, cv=cv, scoring="roc_auc", n_jobs=8,
)

viz.fit(X_train, training_scores_encoded)
viz.show()




              y='RMSE',
              data=test_scores,
              scale=.3,
              join=False,
              errwidth=2)
plt.title('Cross Validation Results')
plt.tight_layout()
plt.gcf().set_size_inches(10, 5)
plt.show()

# Train & Validation Curves mit yellowbricks
fig, ax = plt.subplots(figsize=(16, 9))
val_curve = ValidationCurve(
    KNeighborsRegressor(),
    param_name='n_neighbors',
    param_range=n_neighbors,
    cv=5,
    scoring=rmse_score,
    #                       n_jobs=-1,
    ax=ax)
val_curve.fit(X, y)
val_curve.poof()
fig.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize=(16, 9))
l_curve = LearningCurve(
    KNeighborsRegressor(n_neighbors=best_k),
    train_sizes=np.arange(.1, 1.01, .1),
    scoring=rmse_score,
    cv=5,
    #                         n_jobs=-1,
selected_features

# ## Hyperparameter Tuning with logistic regression

# ### Validation Curve with C (metric: accuracy)

# In[59]:

# Plotting
from yellowbrick.model_selection import ValidationCurve
param_range = np.arange(0.001, 10)

viz = ValidationCurve(LogisticRegression(solver='liblinear'),
                      param_name='C',
                      param_range=param_range,
                      cv=5,
                      scoring="accuracy")

viz.fit(X_train, y_train)
viz.show()

# ### Validation Curve with C (metric: AUC score)

# In[60]:

from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, classification_report, auc

C = [0.1, 0.5, 1, 1.5, 1.75, 2]
train_results = []
test_results = []
예제 #18
0
# %%
class DTClassifier():
    def hyperParameterTuning(self,x_train,y_train):
        param_grid = {'max_depth': range(1, 21), 'min_samples_leaf': range(1, 0)}
        tuned = GridSearchCV(estimator = DecisionTreeClassifier(random_state = rs), param_grid = param_grid, cv=10)
        tuned.fit(x_train, y_train)
        return tuned.best_params_


# %%
dt = DTClassifier()


# %%
viz = ValidationCurve(
    DecisionTreeClassifier(), param_name="max_depth",
    param_range=np.arange(1, 21), cv=10, scoring="f1_weighted"
)

# Fit and show the visualizer
viz.fit(x_data,y_data)
viz.show()


# %%
viz = ValidationCurve(
    DecisionTreeClassifier(), param_name="min_samples_leaf",
    param_range=np.arange(1, 21), cv=10, scoring="f1_weighted"
)

# Fit and show the visualizer
viz.fit(x_data,y_data)
예제 #19
0
                            ncols=2,
                            figsize=(9, 9),
                            sharex=True)

for ind, model in enumerate(models):
    model.fit(x_train, y_train)
    preds = model.predict(x_test)
    for index, ax in enumerate(axes):
        residuals_plot(model, x_test, preds, hist=False, ax=ax[index])
        prediction_error(model, x_test, preds, ax=ax)

# Do some scoring on XGB estimators
# Validation curve
viz = ValidationCurve(XGBRegressor(objective="reg:squarederror"),
                      param_name="max_depth",
                      param_range=np.arange(1, 11),
                      cv=5,
                      scoring="r2")
viz.fit(x_train, y_train)
viz.show()

# Learning curve
model = XGBRegressor(objective="reg:squarederror")
viz_2 = LearningCurve(model, scoring="r2")
viz_2.fit(x_train, y_train)
viz_2.show()

model = RFECV(LassoCV(), cv=5, scoring='r2')
model.fit(x_train, y_train)
model.show()
"""
예제 #20
0
case_name = "mg_sizing_dataset_with_loc"
df = pd.read_csv("results/" + case_name + ".csv", sep=";|,", engine="python", index_col='index')
#df = df.loc[df['off-grid'] == 1]
X = df[features]
scaler.fit(X)
X = scaler.transform(X)
# X = pd.DataFrame(scaler.transform(X), index=X.index, columns=X.columns)
targets = ["PV","BAT","RBAT","INV","GEN","NPV"]
y = df[targets]
cv = StratifiedKFold(12)
param_range = np.arange(1, 30, 1)
cv = KFold(n_splits=12, random_state=40, shuffle=True)

viz = ValidationCurve(
    KNeighborsRegressor(), param_name="n_neighbors", param_range=param_range, scoring="r2", cv=cv, n_jobs=8
)

viz.fit(X, y)
viz.show()

visualizer = LearningCurve(KNeighborsRegressor(), scoring='r2', random_state=2, cv=cv, shuffle=True)

visualizer.fit(X, y)
visualizer.show()

vis = CVScores(KNeighborsRegressor(), cv=cv, scoring='r2')

vis.fit(X, y)        # Fit the data to the visualizer
vis.show()