Python plot_learning_curve示例，functions.plot_learning_curve Python示例

示例#1

0

显示文件

plt.figure(figsize=(2, max_neighbors))
sb.heatmap(grid_visualization, cmap='Blues')
plt.xticks(np.arange(2) + 0.5, grid_search.param_grid['weights'])
plt.yticks(
    np.arange(max_neighbors) + 0.5,
    grid_search.param_grid['n_neighbors'][::-1])
plt.xlabel('weights')
plt.ylabel('n_neighbors')

# In[6]:
#Draw learning curve
X, y = arr_in, arr_out
title1 = "Learning Curve (K Nearest Neighbors)"
cv = StratifiedKFold(arr_out, n_folds=3)
estimator = KNeighborsClassifier()
plot_learning_curve(estimator, title1, X, y, ylim=None, cv=cv, n_jobs=4)

# In[7]:
#Draw validation curve
title2 = "Validation Curve with K Nearest Neighbors "
xlabel = "n_neighbors"
ylabel = "Score"
plot_validation_curve(estimator,
                      title2,
                      xlabel,
                      ylabel,
                      X,
                      y,
                      param_name='n_neighbors',
                      ylim=None,
                      cv=cv,

示例#2

0

显示文件

 
 """
     LOGISTIC REGRESSION
 """
 
 clf = LogisticRegression()
 
 """
     Here we plot learning curves to see the performance of the algorithm in function of number of samples we train on. 
     This provides an insight if we could increase the cv-accuracy when increasing the sample size.
 """
 scoring_type = "f1"
 cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
 #plot_learning_curve(clf, "Learning Curve", train_array, train_labels, ylim=None, 
 #cv=cv, n_jobs=4, scoring = scoring_type , train_sizes=np.linspace(0.1, 1.0, 15))
 plot_learning_curve(clf, "Learning Curve", X_train, y_train, ylim=None, cv=cv, n_jobs=4, scoring = scoring_type , train_sizes=np.linspace(0.01, 0.1, 15))#0.001, 0.02, 15)
 plt.show()
 
 """
     Here we fit the model to the train sets. We immediately predict the relevant probabilities per line of being a Default.
 """
 model = clf.fit(X_train, y_train)
 y_cv_predict = model.predict(X_cv)
 y_cv_PD = model.predict_proba(X_cv)
 
 """
     As we resample the dataset to a 50/50 representation of default and non-defaults, we need to apply a correction as the
     algorithm is expecting to be assessed on a 50/50 training set. We apply the correction as described in
     Dal Pozzolo, A., “Calibrating Probability with Undersampling for Unbalanced Classification”
 """
 y_cv_PD_corr = copy(y_cv_PD)

示例#3

0

显示文件

clf = GridSearchCV(boosted, parameters, cv=5, scoring='f1_weighted')
clf.fit(training_data, training_target)
print(clf.best_params_)
# best params
# num of estimators 200
# min split of 34
params = clf.best_params_
clf = GradientBoostingClassifier(**(params))
clf.fit(training_data, training_target)
print(classification_report(test_target, clf.predict(test_data)))
# score = 0.97 CHECK
title = "Learning Curve for Boosted Decision Tree - Breast Cancer"
estimator = GradientBoostingClassifier(**(params))
plot_learning_curve(estimator,
                    title,
                    training_data,
                    training_target,
                    ylim=(0, 1.1),
                    cv=5)
plt.show()

### DIGITS DATASET ###

digits = load_digits()
training_data, test_data, training_target, test_target = train_test_split(
    digits.data, digits.target, test_size=.3, random_state=1)

parameters = {
    'n_estimators': [25, 50, 100, 200],
    'min_samples_split': list(range(2, 50, 2))
}
print('Training Boosted Decision Tree on Digits dataset with parameters: ' +

示例#4

0

显示文件

model.add(Conv2D(64, (3, 3), input_shape=x.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64))
model.add(Activation("relu"))

model.add(Dense(1))
model.add(Activation("sigmoid"))

model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

history = model.fit(x, y, batch_size=8, epochs=5, validation_split=0.1)

model.save("weather_predictor.model")

plot_learning_curve(history)

示例#5

0

显示文件

文件： boosting.py 项目： melisandezonta/ML_CS7641

    grid_search.param_grid['learning_rate'][::-1])
plt.xlabel('n_estimators')
plt.ylabel('learning_rate')

# In[6]:
# Draw the learning curve
X, y = arr_in, arr_out
title1 = "Learning Curves (Adaboost)"

cv = ShuffleSplit(n_splits=100, test_size=0.33, random_state=0)
ADB = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimator=grid_search.best_params['n_estimators'],
    learning_rate=grid_search.best_params_['learning_rate'],
    random_state=0)
plot_learning_curve(ADB, title1, X, y, ylim=(0.4, 1.01), cv=cv, n_jobs=4)

# In[7]:
#Draw the validation curve
title2 = "Validation Curve with Adaboost "
xlabel = "n_estimators"
ylabel = "Score"
ADB = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    learning_rate=grid_search.best_params_['learning_rate'],
    random_state=0)
plot_validation_curve(ADB,
                      title2,
                      xlabel,
                      ylabel,
                      X,

示例#6

0

显示文件

文件： decisiontree.py 项目： melisandezonta/ML_CS7641

plt.figure(figsize=((size[1] - 1) / 2, max_d / 2))
sb.heatmap(grid_visualization, cmap='Blues')
plt.xticks(
    np.arange(size[1] - 1) + 0.5, grid_search.param_grid['min_samples_split'])
plt.yticks(np.arange(max_d) + 0.5, grid_search.param_grid['max_depth'][::-1])
plt.xlabel('min_samples_split')
plt.ylabel('max_depth')

# In[6]:
# Draw the learning curve
X, y = arr_in, arr_out
title1 = "Learning Curves (Decision Tree Classifier)"
cv = ShuffleSplit(n_splits=100, test_size=0.33, random_state=0)
estimator = DecisionTreeClassifier()
plot_learning_curve(estimator, title1, X, y, ylim=(0.4, 1.01), cv=cv, n_jobs=4)

# In[7]
# Draw the validation curve

title2 = "Validation Curve with Decision Tree Classifier "
xlabel = "max_depth"
ylabel = "Score"
plot_validation_curve(estimator,
                      title2,
                      xlabel,
                      ylabel,
                      X,
                      y,
                      param_name='max_depth',
                      ylim=None,