X = df_bands.values y = df_meta['Megaclasse'].values # sample data X, _, y, _ = train_test_split(X, y, train_size=.3, shuffle=True, stratify=y, random_state=random_state) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) ## Final model - Parameter tuning pipelines, param_grids = check_pipelines([anomally_detection, classifiers_1], random_state=0, n_runs=1) fit_params = check_fit_params(pre_fit_params) model_search = ModelSearchCV(pipelines, param_grids, scoring=scorers, refit='accuracy', n_jobs=-1, cv=cv, verbose=1) model_search.fit(X, y, **fit_params) pickle.dump(model_search, open(RESULTS_PATH + 'final_pipeline_parameter_tuning.pkl', 'wb'))
# 'selection_strategy': ['combined', 'minority', 'majority'], # 'truncation_factor': [-.5,0,.5], # 'deformation_factor': [0,.5,1], # 'k_neighbors_filter': [3,5] # }) #] classifiers = [ ('randomforestclassifier', RandomForestClassifier(n_estimators=100, random_state=random_state), {}) ] objects_list = [noise_objs, data_filters, #oversamplers, classifiers] pipelines, param_grid = check_pipelines(objects_list, random_state, 1) fit_params = {} for clf_name in list(dict(pipelines).keys()): clf_name_split = clf_name.split('|') if clf_name_split[1]=='DenoisedGeometricSMOTE': pass elif clf_name_split[1]=='singlefilter': fit_params[f'{clf_name_split[1]}__filters'] = [single_filter] elif clf_name_split[1]!='no_filter': fit_params[f'{clf_name_split[1]}__filters'] = filts cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) model_search = ModelSearchCV(pipelines, param_grid, n_jobs=-1, cv=cv, verbose=1) model_search.fit(X,y,**fit_params)
df_meta = df.drop(df.columns[df.columns.str.startswith('X201')|df.columns.str.startswith('ND')], axis=1) df_bands = df.drop(columns=df_meta.columns) # normalize znorm = StandardScaler() df_bands = pd.DataFrame(znorm.fit_transform(df_bands.values), columns=df_bands.columns, index=df_bands.index) X = df_bands.values y = df_meta['Label'].values ids = df_meta['Object'].values cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) ## Experiment 1 (feature selection) pipelines_feature, param_grid_feature = check_pipelines( [feature_selection, classifiers_1], random_state=0, n_runs=1 ) model_search_feature = ModelSearchCV(pipelines_feature, param_grid_feature, n_jobs=-1, cv=cv, verbose=1) model_search_feature.fit(X,y) df_results_feature = report_model_search_results(model_search_feature)\ .sort_values('mean_test_score', ascending=False) #df_results_feature.to_csv('results_feature_selection.csv') pickle.dump(model_search_feature, open('gini_feature_selection.pkl','wb')) rfc = RandomForestClassifier(
'multi_class': ['ovr', 'multinomial'], 'penalty': ['l2', 'none'] }), ] # setup scorers def geometric_mean_macro(X, y): return geometric_mean_score(X, y, average='macro') SCORERS['geometric_mean_macro'] = make_scorer(geometric_mean_macro) scorers = ['accuracy', 'f1_macro', 'geometric_mean_macro'] pipelines, param_grid = check_pipelines([classifiers], random_state=random_state, n_runs=1) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state) # run experiment model_search = ModelSearchCV(pipelines, param_grid, scoring=scorers, refit='accuracy', n_jobs=-1, cv=cv, verbose=1) model_search.fit(X, y) pickle.dump(model_search,
X = df_bands.values y = df_meta['Megaclasse'].values # sample data X, _, y, _ = train_test_split(X, y, train_size=.1, shuffle=True, stratify=y, random_state=random_state) cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) ## Experiment 1 (feature selection) pipelines_feature, param_grid_feature = check_pipelines( [feature_selection, classifiers], random_state=0, n_runs=1) model_search_feature = ModelSearchCV(pipelines_feature, param_grid_feature, scoring=scorers, refit='accuracy', n_jobs=-1, cv=cv, verbose=1) model_search_feature.fit(X, y) df_results_feature = report_model_search_results(model_search_feature)\ .sort_values('mean_test_accuracy', ascending=False) df_results_feature.to_csv('results_feature_selection.csv') pickle.dump(model_search_feature, open('model_search_feature_selection.pkl', 'wb'))
axis=1) df_bands = df.drop(columns=df_meta.columns) # normalize znorm = StandardScaler() df_bands = pd.DataFrame(znorm.fit_transform(df_bands.values), columns=df_bands.columns, index=df_bands.index) X = df_bands.values y = df_meta['Label'].values ids = df_meta['Object'].values cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) ## Experiment 1 (feature selection) pipelines_feature, param_grid_feature = check_pipelines( [feature_selection, classifiers_1], random_state=0, n_runs=1) model_search_feature = ModelSearchCV(pipelines_feature, param_grid_feature, n_jobs=-1, cv=cv, verbose=1) model_search_feature.fit(X, y) df_results_feature = report_model_search_results(model_search_feature)\ .sort_values('mean_test_score', ascending=False) df_results_feature.to_csv('results_feature_selection.csv') pickle.dump(model_search_feature, open('model_search_feature_selection.pkl', 'wb')) ## Experiment 2 (anomally detection)