score_train = grid.score(X_train, y_train) score_test = grid.score(X_test, y_test) best_p = grid.best_params_ bp = pd.DataFrame(best_p, index=[i]) bp['accuracy_train'] = score_train bp['accuracy_test'] = score_test bp['random_state_k_fold'] = i*42 df = df.append(bp, ignore_index=True) #create folder and save save_output.function_save_output(df, name_clf)
#Designate distributions to sample hyperparameters from C_range = np.power(2, np.arange(-10, 8, dtype=float)) n_features_to_test = [0.85, 0.9, 0.95] clf = TransformedTargetRegressor(regressor=SVR(kernel='linear'), transformer=MinMaxScaler()) #SVM steps = [('scaler', StandardScaler()), ('red_dim', PCA()), ('clf', clf)] pipeline = Pipeline(steps) parameteres = [{ 'scaler': [StandardScaler()], 'red_dim': [PCA(random_state=42)], 'red_dim__n_components': list(n_features_to_test), 'clf__regressor__C': list(C_range) }, { 'scaler': [StandardScaler()], 'red_dim': [None], 'clf__regressor__C': list(C_range) }] results = GSCV.function_GSCV(data_train, labels_train, data_test, labels_test, pipeline, parameteres) #create folder and save save_output.function_save_output(results, name_clf)
#implmentation of steps scaler = scaler_ #pca = PCA(n_components=n_comp_pca, whiten=whiten_, random_state=random_state_PCA) svm = SVC(kernel='rbf', probability=True, random_state=random_state_clf) steps = [('scaler', scaler), ('clf', svm)] pipeline = Pipeline(steps) df_score_value, df_mean_std = score_cv_3_classes.function_score_cv( public_data, public_labels, pipeline, RS_outer_KF) df_tot = pd.concat([df_best_params, df_score_value, df_mean_std], axis=1, ignore_index=False) return df_tot for j in range(1, 2): df_MMS = create_score_csv_default_HP(MinMaxScaler(), 2 * j) save_output.function_save_output(df_MMS, 'MMS', name, 2 * j) df_STDS = create_score_csv_default_HP(StandardScaler(), 2 * j) save_output.function_save_output(df_STDS, 'STDS', name, 2 * j) df_RBT = create_score_csv_default_HP(RobustScaler(), 2 * j) save_output.function_save_output(df_RBT, 'RBT', name, 2 * j) df_NONE = create_score_csv_default_HP(None, 2 * j) save_output.function_save_output(df_NONE, 'NONE', name, 2 * j)
steps = [('scaler', MinMaxScaler()), ('red_dim', PCA()), ('clf', SVC(kernel='linear', probability=True, random_state=503))] pipeline = Pipeline(steps) #MMS parameteres_1 = [{'scaler':[MinMaxScaler()], 'red_dim':[PCA(random_state=42)], 'red_dim__n_components':list(n_features_to_test), 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}] for j in range(1,6): results, best_estimators_dict = nested_cv.function_nested_cv(public_data, public_labels, pipeline, parameteres_1, j*2) #create folder and save save_output.function_save_output(results, dim_reduction, name_1, j*2) #RBTS parameteres_2 = [{'scaler':[RobustScaler()], 'red_dim':[PCA(random_state=42)], 'red_dim__n_components':list(n_features_to_test), 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}] for j in range(1,6): results, best_estimators_dict = nested_cv.function_nested_cv(public_data, public_labels, pipeline, parameteres_2, j*2) #create folder and save save_output.function_save_output(results, dim_reduction, name_2, j*2)
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler scalers_to_test = [StandardScaler(), RobustScaler(), MinMaxScaler()] #Designate distributions to sample hyperparameters from C_range = np.power(2, np.arange(-10, 9, dtype=float)) n_features_to_test = [0.85, 0.9, 0.95] #SVM steps = [('scaler', MinMaxScaler()), ('red_dim', PCA()), ('clf', SVC(kernel='linear', probability=True))] pipeline = Pipeline(steps) parameteres = [{'scaler':[MinMaxScaler()], 'red_dim':[PCA(random_state=42)], 'red_dim__n_components':list(n_features_to_test), 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}, {'scaler':[MinMaxScaler()], 'red_dim':[SelectPercentile(f_classif, percentile=10)], 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}, {'scaler':[MinMaxScaler()], 'red_dim':[SelectPercentile(mutual_info_classif, percentile=10)], 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}, {'scaler':[MinMaxScaler()], 'red_dim':[None], 'clf__C':list(C_range), 'clf__class_weight':[None, 'balanced']}] for j in range(1,2): results, best_estimators_dict = nested_cv.function_nested_cv(public_data, public_labels, pipeline, parameteres, j*2) #create folder and save save_output.function_save_output(results, name, j*2)
#MMS parameteres_1 = [{ 'scaler': [MinMaxScaler()], 'red_dim': [PCA(random_state=42)], 'red_dim__n_components': list(n_features_to_test), 'clf__C': list(C_range), 'clf__class_weight': [None, 'balanced'] }] results_1 = nested_cv.function_nested_cv(public_data, public_labels, pipeline, parameteres_1) #create folder and save save_output.function_save_output(results_1, dim_reduction, name_1) #RBTS parameteres_2 = [{ 'scaler': [RobustScaler()], 'red_dim': [PCA(random_state=42)], 'red_dim__n_components': list(n_features_to_test), 'clf__C': list(C_range), 'clf__class_weight': [None, 'balanced'] }] results_2 = nested_cv.function_nested_cv(public_data, public_labels, pipeline, parameteres_2) #create folder and save