示例#1
0
plt.figure(figsize=(2, max_neighbors))
sb.heatmap(grid_visualization, cmap='Blues')
plt.xticks(np.arange(2) + 0.5, grid_search.param_grid['weights'])
plt.yticks(
    np.arange(max_neighbors) + 0.5,
    grid_search.param_grid['n_neighbors'][::-1])
plt.xlabel('weights')
plt.ylabel('n_neighbors')

# In[6]:
#Draw learning curve
X, y = arr_in, arr_out
title1 = "Learning Curve (K Nearest Neighbors)"
cv = StratifiedKFold(arr_out, n_folds=3)
estimator = KNeighborsClassifier()
plot_learning_curve(estimator, title1, X, y, ylim=None, cv=cv, n_jobs=4)

# In[7]:
#Draw validation curve
title2 = "Validation Curve with K Nearest Neighbors "
xlabel = "n_neighbors"
ylabel = "Score"
plot_validation_curve(estimator,
                      title2,
                      xlabel,
                      ylabel,
                      X,
                      y,
                      param_name='n_neighbors',
                      ylim=None,
                      cv=cv,
示例#2
0
 
 """
     LOGISTIC REGRESSION
 """
 
 clf = LogisticRegression()
 
 """
     Here we plot learning curves to see the performance of the algorithm in function of number of samples we train on. 
     This provides an insight if we could increase the cv-accuracy when increasing the sample size.
 """
 scoring_type = "f1"
 cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
 #plot_learning_curve(clf, "Learning Curve", train_array, train_labels, ylim=None, 
 #cv=cv, n_jobs=4, scoring = scoring_type , train_sizes=np.linspace(0.1, 1.0, 15))
 plot_learning_curve(clf, "Learning Curve", X_train, y_train, ylim=None, cv=cv, n_jobs=4, scoring = scoring_type , train_sizes=np.linspace(0.01, 0.1, 15))#0.001, 0.02, 15)
 plt.show()
 
 """
     Here we fit the model to the train sets. We immediately predict the relevant probabilities per line of being a Default.
 """
 model = clf.fit(X_train, y_train)
 y_cv_predict = model.predict(X_cv)
 y_cv_PD = model.predict_proba(X_cv)
 
 """
     As we resample the dataset to a 50/50 representation of default and non-defaults, we need to apply a correction as the
     algorithm is expecting to be assessed on a 50/50 training set. We apply the correction as described in
     Dal Pozzolo, A., “Calibrating Probability with Undersampling for Unbalanced Classification”
 """
 y_cv_PD_corr = copy(y_cv_PD)
示例#3
0
clf = GridSearchCV(boosted, parameters, cv=5, scoring='f1_weighted')
clf.fit(training_data, training_target)
print(clf.best_params_)
# best params
# num of estimators 200
# min split of 34
params = clf.best_params_
clf = GradientBoostingClassifier(**(params))
clf.fit(training_data, training_target)
print(classification_report(test_target, clf.predict(test_data)))
# score = 0.97 CHECK
title = "Learning Curve for Boosted Decision Tree - Breast Cancer"
estimator = GradientBoostingClassifier(**(params))
plot_learning_curve(estimator,
                    title,
                    training_data,
                    training_target,
                    ylim=(0, 1.1),
                    cv=5)
plt.show()

### DIGITS DATASET ###

digits = load_digits()
training_data, test_data, training_target, test_target = train_test_split(
    digits.data, digits.target, test_size=.3, random_state=1)

parameters = {
    'n_estimators': [25, 50, 100, 200],
    'min_samples_split': list(range(2, 50, 2))
}
print('Training Boosted Decision Tree on Digits dataset with parameters: ' +
示例#4
0
model.add(Conv2D(64, (3, 3), input_shape=x.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64))
model.add(Activation("relu"))

model.add(Dense(1))
model.add(Activation("sigmoid"))

model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

history = model.fit(x, y, batch_size=8, epochs=5, validation_split=0.1)

model.save("weather_predictor.model")

plot_learning_curve(history)
示例#5
0
    grid_search.param_grid['learning_rate'][::-1])
plt.xlabel('n_estimators')
plt.ylabel('learning_rate')

# In[6]:
# Draw the learning curve
X, y = arr_in, arr_out
title1 = "Learning Curves (Adaboost)"

cv = ShuffleSplit(n_splits=100, test_size=0.33, random_state=0)
ADB = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimator=grid_search.best_params['n_estimators'],
    learning_rate=grid_search.best_params_['learning_rate'],
    random_state=0)
plot_learning_curve(ADB, title1, X, y, ylim=(0.4, 1.01), cv=cv, n_jobs=4)

# In[7]:
#Draw the validation curve
title2 = "Validation Curve with Adaboost "
xlabel = "n_estimators"
ylabel = "Score"
ADB = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    learning_rate=grid_search.best_params_['learning_rate'],
    random_state=0)
plot_validation_curve(ADB,
                      title2,
                      xlabel,
                      ylabel,
                      X,
示例#6
0
plt.figure(figsize=((size[1] - 1) / 2, max_d / 2))
sb.heatmap(grid_visualization, cmap='Blues')
plt.xticks(
    np.arange(size[1] - 1) + 0.5, grid_search.param_grid['min_samples_split'])
plt.yticks(np.arange(max_d) + 0.5, grid_search.param_grid['max_depth'][::-1])
plt.xlabel('min_samples_split')
plt.ylabel('max_depth')

# In[6]:
# Draw the learning curve
X, y = arr_in, arr_out
title1 = "Learning Curves (Decision Tree Classifier)"
cv = ShuffleSplit(n_splits=100, test_size=0.33, random_state=0)
estimator = DecisionTreeClassifier()
plot_learning_curve(estimator, title1, X, y, ylim=(0.4, 1.01), cv=cv, n_jobs=4)

# In[7]
# Draw the validation curve

title2 = "Validation Curve with Decision Tree Classifier "
xlabel = "max_depth"
ylabel = "Score"
plot_validation_curve(estimator,
                      title2,
                      xlabel,
                      ylabel,
                      X,
                      y,
                      param_name='max_depth',
                      ylim=None,