def validation_curve_classifier(path="images/validation_curve_classifier.png"): data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv")) target = "outcome" features = [col for col in data.columns if col != target] X = pd.get_dummies(data[features]) y = data[target] _, ax = plt.subplots() cv = StratifiedKFold(12) param_range = np.logspace(-6, -1, 12) oz = ValidationCurve( SVC(), ax=ax, param_name="gamma", param_range=param_range, logx=True, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.poof(outpath=path)
def yellow_brick_validation_curve(model, x, y, cpu_count, cv_count, param, scoring_metric): """ """ from yellowbrick.model_selection import LearningCurve from sklearn.model_selection import StratifiedKFold # Create the learning curve visualizer cv = StratifiedKFold(n_splits=cv_count) # Validation Curve mpl.rcParams['axes.prop_cycle'] = cycler('color', ['purple', 'darkblue']) fig = plt.gcf() fig.set_size_inches(10, 10) ax = plt.subplot(411) viz = ValidationCurve(model, n_jobs=cpu_count, ax=ax, param_name=param, param_range=np.arange(1, 11), cv=cv, scoring=scoring_metric) # Fit and poof the visualizer viz.fit(x, y) viz.show()
def validation_curve_classifier_alt( path="images/validation_curve_classifier_alt.png"): data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv")) target = "outcome" features = [col for col in data.columns if col != target] X = pd.get_dummies(data[features]) y = data[target] _, ax = plt.subplots() cv = StratifiedKFold(4) param_range = np.arange(3, 20, 2) oz = ValidationCurve( KNeighborsClassifier(), ax=ax, param_name="n_neighbors", param_range=param_range, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.poof(outpath=path)
def plot_validation_curve(final_X, final_Y): viz = ValidationCurve(DecisionTreeClassifier(), param_name="max_depth", param_range=np.arange(1, 30), cv=10, scoring="accuracy") viz.fit(final_X, final_Y) viz.poof()
def validation(): X, y = load_energy() oz = ValidationCurve( DecisionTreeRegressor(), param_name="max_depth", param_range=np.arange(1, 11), cv=10, scoring="r2", ax=newfig(), ) oz.fit(X, y) savefig(oz, "validation_curve")
def validation_curve_sklearn_example(path="images/validation_curve_sklearn_example.png"): digits = load_digits() X, y = digits.data, digits.target _, ax = plt.subplots() param_range = np.logspace(-6, -1, 5) oz = ValidationCurve( SVC(), ax=ax, param_name="gamma", param_range=param_range, logx=True, cv=10, scoring="accuracy", n_jobs=4 ) oz.fit(X, y) oz.poof(outpath=path)
def draw_validation_curve(self, param_name, param_range, cv, logx=False, scoring="accuracy", n_jobs=5): visualizer = ValidationCurve(self.model, param_name=param_name, param_range=param_range, logx=logx, cv=cv, scoring=scoring, n_jobs=n_jobs) visualizer.fit(self.training_data, self.training_labels) visualizer.poof()
def validation_curve_sklearn_example( path="images/validation_curve_sklearn_example.png"): digits = load_digits() X, y = digits.data, digits.target _, ax = plt.subplots() param_range = np.logspace(-6, -1, 5) oz = ValidationCurve(SVC(), ax=ax, param_name="gamma", param_range=param_range, logx=True, cv=10, scoring="accuracy", n_jobs=4) oz.fit(X, y) oz.poof(outpath=path)
def generate_validation_curve(model, clf_name, param_name, param_range, scoring, cv, dataset_name, X_train, y_train): if 'svm' in clf_name or 'nn' == clf_name: train_scores, test_scores = validation_curve(model, X_train, y_train, param_name=param_name, param_range=param_range, scoring="accuracy", n_jobs=8) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.title("Validation Curve with {}".format(clf_name)) plt.xlabel(param_name) plt.ylabel("Score") plt.semilogx(param_range, train_scores_mean, label="Training score", marker='o', color="#0272a2") plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", marker='o', color="#9fc377") plt.legend(loc="best") plt.savefig("results/{}_model_complexity_{}_{}.png".format( clf_name, dataset_name, param_name)) plt.clf() else: viz = ValidationCurve(model, param_name=param_name, param_range=param_range, scoring=scoring, cv=cv) viz.fit(X_train, y_train) viz.show("results/{}_model_complexity_{}_{}.png".format( clf_name, dataset_name, param_name)) plt.clf()
def validation_curve_classifier_alt(path="images/validation_curve_classifier_alt.png"): data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv")) target = "outcome" features = [col for col in data.columns if col != target] X = pd.get_dummies(data[features]) y = data[target] _, ax = plt.subplots() cv = StratifiedKFold(4) param_range = np.arange(3, 20, 2) oz = ValidationCurve( KNeighborsClassifier(), ax=ax, param_name="n_neighbors", param_range=param_range, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.poof(outpath=path)
def validation_curve_regressor(path="images/validation_curve_regressor.png"): data = pd.read_csv(os.path.join(FIXTURES, "energy", "energy.csv")) targets = ["heating load", "cooling load"] features = [col for col in data.columns if col not in targets] X = data[features] y = data[targets[1]] _, ax = plt.subplots() param_range = np.arange(1, 11) oz = ValidationCurve( DecisionTreeRegressor(), ax=ax, param_name="max_depth", param_range=param_range, cv=10, scoring="r2", n_jobs=8, ) oz.fit(X, y) oz.poof(outpath=path)
def validation_curve(model, X, y): from yellowbrick.model_selection import ValidationCurve from sklearn.model_selection import StratifiedKFold # Create the validation curve visualizer cv = StratifiedKFold(12) # param_range = np.linspace(30.00, 300.00, num=50.00, dtype=np.float64) param_range = np.logspace(30, 300, num=100, dtype=np.int32) viz = ValidationCurve( model, param_name="n_estimators", param_range=param_range, logx=True, cv=cv, scoring="f1_weighted", n_jobs=8, ) viz.fit(X, y) viz.poof()
def validation_curve(model, x, y, param, rang, cv): """ :param model: Modelo a ser avaliado. :param x: Variáveis independentes de treino. :param y: Variavel dependente de treino. :param param: Parametro do modelo a ser avaliado. :param rang: Espaço de hipotese do parametro que esta sendo avaliado. :param cv: quantidade de splits para a cross validação. :return: Viz das curvas de validação. """ viz = ValidationCurve(model, param_name=param, param_range=rang, cv=cv, scoring="roc_auc", n_jobs=-1) viz.fit(x, y) viz.show()
def validation_curve_classifier_knn(path="images/validation_curve_classifier_knn.png"): X, y = load_game() X = OneHotEncoder().fit_transform(X) _, ax = plt.subplots() cv = StratifiedKFold(4) param_range = np.arange(3, 20, 2) print("warning: generating the KNN validation curve can take a very long time!") oz = ValidationCurve( KNeighborsClassifier(), ax=ax, param_name="n_neighbors", param_range=param_range, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.show(outpath=path)
def validation_curve_classifier_svc(path="images/validation_curve_classifier_svc.png"): X, y = load_game() X = OneHotEncoder().fit_transform(X) _, ax = plt.subplots() cv = StratifiedKFold(12) param_range = np.logspace(-6, -1, 12) print("warning: generating the SVC validation curve can take a very long time!") oz = ValidationCurve( SVC(), ax=ax, param_name="gamma", param_range=param_range, logx=True, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.show(outpath=path)
# ### Validation Curve with C (metric: accuracy) # In[59]: # Plotting from yellowbrick.model_selection import ValidationCurve param_range = np.arange(0.001, 10) viz = ValidationCurve(LogisticRegression(solver='liblinear'), param_name='C', param_range=param_range, cv=5, scoring="accuracy") viz.fit(X_train, y_train) viz.show() # ### Validation Curve with C (metric: AUC score) # In[60]: from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, classification_report, auc C = [0.1, 0.5, 1, 1.5, 1.75, 2] train_results = [] test_results = [] for c in C: model = LogisticRegression(C=c, solver='liblinear', random_state=0) model.fit(X_train, y_train) train_pred = model.predict_proba(X_train)[:, 1]
tuned.fit(x_train, y_train) return tuned.best_params_ # %% dt = DTClassifier() # %% viz = ValidationCurve( DecisionTreeClassifier(), param_name="max_depth", param_range=np.arange(1, 21), cv=10, scoring="f1_weighted" ) # Fit and show the visualizer viz.fit(x_data,y_data) viz.show() # %% viz = ValidationCurve( DecisionTreeClassifier(), param_name="min_samples_leaf", param_range=np.arange(1, 21), cv=10, scoring="f1_weighted" ) # Fit and show the visualizer viz.fit(x_data,y_data) viz.show() # %%
print("Balanced_accuracy:{:.4f}".format(svf)) #print("accuracy:{:.4f}".format(svf_acc)) data_score = pd.DataFrame(columns=['Commodity', 'score']) data_score['Commodity'] = y_location_trains.columns data_score['score'] = scores print(data_score) df_score.to_csv('/Users/monalisa/Downloads/mmai823-project-master/out/SVF_scores.csv') # Vizualization Curve is better for SVM from sklearn.model_selection import StratifiedKFold from matplotlib import pyplot as plt %matplotlib inline plt.tight_layout() cv = StratifiedKFold(12) param_range = np.logspace(-6, -1, 12) viz = ValidationCurve( SVC(), param_name="gamma", param_range=param_range, logx=True, cv=cv, scoring="roc_auc", n_jobs=8, ) viz.fit(X_train, training_scores_encoded) viz.show()
plt.title('Cross Validation Results') plt.tight_layout() plt.gcf().set_size_inches(10, 5) plt.show() # Train & Validation Curves mit yellowbricks fig, ax = plt.subplots(figsize=(16, 9)) val_curve = ValidationCurve( KNeighborsRegressor(), param_name='n_neighbors', param_range=n_neighbors, cv=5, scoring=rmse_score, # n_jobs=-1, ax=ax) val_curve.fit(X, y) val_curve.poof() fig.tight_layout() plt.show() fig, ax = plt.subplots(figsize=(16, 9)) l_curve = LearningCurve( KNeighborsRegressor(n_neighbors=best_k), train_sizes=np.arange(.1, 1.01, .1), scoring=rmse_score, cv=5, # n_jobs=-1, ax=ax) l_curve.fit(X, y) l_curve.poof() fig.tight_layout()
for ind, model in enumerate(models): model.fit(x_train, y_train) preds = model.predict(x_test) for index, ax in enumerate(axes): residuals_plot(model, x_test, preds, hist=False, ax=ax[index]) prediction_error(model, x_test, preds, ax=ax) # Do some scoring on XGB estimators # Validation curve viz = ValidationCurve(XGBRegressor(objective="reg:squarederror"), param_name="max_depth", param_range=np.arange(1, 11), cv=5, scoring="r2") viz.fit(x_train, y_train) viz.show() # Learning curve model = XGBRegressor(objective="reg:squarederror") viz_2 = LearningCurve(model, scoring="r2") viz_2.fit(x_train, y_train) viz_2.show() model = RFECV(LassoCV(), cv=5, scoring='r2') model.fit(x_train, y_train) model.show() """ Section: 5 Time-Series Algorithms """
case_name = "mg_sizing_dataset_with_loc" df = pd.read_csv("results/" + case_name + ".csv", sep=";|,", engine="python", index_col='index') #df = df.loc[df['off-grid'] == 1] X = df[features] scaler.fit(X) X = scaler.transform(X) # X = pd.DataFrame(scaler.transform(X), index=X.index, columns=X.columns) targets = ["PV","BAT","RBAT","INV","GEN","NPV"] y = df[targets] cv = StratifiedKFold(12) param_range = np.arange(1, 30, 1) cv = KFold(n_splits=12, random_state=40, shuffle=True) viz = ValidationCurve( KNeighborsRegressor(), param_name="n_neighbors", param_range=param_range, scoring="r2", cv=cv, n_jobs=8 ) viz.fit(X, y) viz.show() visualizer = LearningCurve(KNeighborsRegressor(), scoring='r2', random_state=2, cv=cv, shuffle=True) visualizer.fit(X, y) visualizer.show() vis = CVScores(KNeighborsRegressor(), cv=cv, scoring='r2') vis.fit(X, y) # Fit the data to the visualizer vis.show()