(X.shape[0], 12, int(X.shape[-1] / (12)))) return X_reshaped # prepare data X_cubes = make_data_cubes(X, order) X_arrays = make_data_arrays(X, order) X_train, X_test, X_train_arrays, X_test_arrays, X_train_cubes, X_test_cubes, y_train, y_test = train_test_split( X, X_arrays, X_cubes, y, test_size=.20, random_state=random_state) # set up experiments ## Random Forest rfc = RandomForestClassifier(n_estimators=100, random_state=random_state) rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) rfc_results = reports(y_test, y_pred) pickle.dump(rfc_results, open(RESULTS_PATH + 'rfc_results.pkl', 'wb')) pickle.dump(rfc, open(MODELS_PATH + 'random_forest.pkl', 'wb')) ## HybridSN cnn = PixelBasedHybridSpectralNet(X_train_cubes[0].shape, y.max() + 1, MODELS_PATH + 'pixelbased_hybridsn.hdf5') cnn.fit(X_train_cubes, y_train, epochs=200) y_pred = cnn.predict(X_test_cubes) cnn_results = reports(y_test, y_pred) pickle.dump(cnn_results, open(RESULTS_PATH + 'HybridSN_results.pkl', 'wb')) ## ResNet50 resnet = PixelBasedResNet50(X_train_arrays[0].shape, y.max() + 1,
df = df.dropna() X, y, obj = df.drop(columns=['X','Y','Object', 'Label']).values, df.Label.values, df.Object.values ## Encode Labels label_map = {k:v for k, v in zip(range(len(np.unique(y))), np.unique(y))} y = np.fromiter(map(lambda x: {v:k for k,v in label_map.items()}[x], y), dtype=int) ## preprocess data scaler = StandardScaler() X = scaler.fit_transform(X) ## generate transfer map X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25) rfc = RandomForestClassifier() rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) cm = reports(y_test, y_pred, {i:i for i in range(len(label_map))})[1] cmun = cm.unstack().reset_index() cmun = cmun[ ~cmun[['level_0', 'level_1']].isin(['UA', 'PA','Total']).values.any(axis=1) ] cmun = cmun[ cmun['level_0']!=cmun['level_1'] ] cmun[0] = cmun[0].apply(lambda x: x.replace(',', '')).astype(int) cmun = cmun.sort_values(0, ascending=False).drop_duplicates(['level_0'], keep='first') transfer_map = {k:v for k,v in zip(cmun['level_0'], cmun['level_1'])} random_state = 0 ## classifiers for filters filts = (
df_test[norm_pca_cols].values) autoencoders[label] = autoencoder from sklearn.preprocessing import StandardScaler y_pred1 = pd.DataFrame(StandardScaler().fit_transform(df_test[[ '0.0_mse', '1.0_mse', '2.0_mse', '4.0_mse', '5.0_mse', '6.0_mse', '7.0_mse', '8.0_mse' ]]), columns=np.unique(y_train), index=df_test.index).idxmin(axis=1) y_pred2 = pd.DataFrame(df_test[[ '0.0_mse', '1.0_mse', '2.0_mse', '4.0_mse', '5.0_mse', '6.0_mse', '7.0_mse', '8.0_mse' ]].values, columns=np.unique(y_train), index=df_test.index).idxmin(axis=1) df_test['y_pred1'] = y_pred1 df_test['y_pred2'] = y_pred2 mlp = MLPEncoderClassifier(autoencoders.values(), int(np.unique(y_train).max()) + 1) mlp.fit(X_train, y_train) df_test['mlp_pred'] = mlp.predict(df_test[norm_pca_cols].values) df_test.to_csv(PROCESSED_PATH + 'autoencoder_mlp_classifier_results.csv') reports(df_test['Megaclasse'], df_test['mlp_pred'])
i = 0 skf = StratifiedKFold(n_splits=n_splits_cnn, shuffle=True, random_state=random_state) for _, split_indices in skf.split(X_coords, np.zeros(X_coords.shape[0])): i += 1 print(f'Prediction progress: {(i/n_splits_cnn)*100}%') X_split = X_coords[split_indices] X_patches = get_patches(X_split, X_lookup, window_size) indices.append(X_split) y_pre.append(ConvNet.predict(X_patches)) #df_final = coords.copy() #y_pred = pd.Series(data=np.concatenate(y_pre), index=np.concatenate(indices), name='y_pred').sort_index() #df_final = df_final.join(y_pred) y_pred = pd.DataFrame(data=np.concatenate( [np.expand_dims(np.concatenate(y_pre), 1), np.concatenate(indices)], axis=1), columns=['y_pred', 'y', 'x']) df = df.join(y_pred.set_index(['y', 'x']), on=['y', 'x']) df.to_csv(PROCESSED_PATH + 'classification_results.csv') reports(df[~df['train_set']].dropna()['Megaclasse'], df[~df['train_set']].dropna()['y_pred'], {i: i for i in df['Megaclasse'].unique()})[-1] plt.imshow(df[~df['train_set']].pivot('y', 'x', 'y_pred'))
df_meta = df[['x','y','Megaclasse']] # drop least important features features = pd.read_csv(FEATURE_RANK_PATH).iloc[:70,0] cols_mapper = df.columns.to_series()\ .apply(lambda x: x.split('_')[1]+'_'+x.split('_')[-1] if len(x.split('_'))==4 else x)\ .to_dict() df_bands = df.rename(columns=cols_mapper)[features.to_list()] # get data in simple format X = df_bands.values y = df_meta.Megaclasse.values.astype(int) # make predictions y_pred = clf.predict(X) cross_spatial_results = reports(y, y_pred) pickle.dump(cross_spatial_results, open(RESULTS_PATH+'near_final_cross_spatial_results.pkl','wb')) # rfc y_pred = rfc.predict(X) cross_spatial_results = reports(y, y_pred) pickle.dump(cross_spatial_results, open(RESULTS_PATH+'near_final_RF_cross_spatial_results.pkl','wb')) # ---------------------------------------------------------------------------- # # Cross Temporal Validation # ---------------------------------------------------------------------------- # # read data df = pd.read_csv(DATA_PATH+'2020_01_RS_1_n_features_320.csv') df = df.dropna()
file)).sort_values(['X', 'Y']) try: df = df.iloc[train_id].loc[df[pixel_selection_col].astype(float) == 1.0] except KeyError: df = df.iloc[train_id].loc[df['cluster_status'].astype(float) == 1.0] X = df[band_cols].values y = df['Label'].values print(f'Training Random Forest...') rf = RandomForestClassifier(n_estimators=100, random_state=random_state) rf.fit(X, y) y_pred = label_encoder.transform(rf.predict(X_test)) models[file] = reports(label_encoder.transform(y_test), y_pred, target_names) file = 'no_selection' print(f'Starting experiment {file}...') df = pd.read_csv(MERGED_CSV).sort_values(['X', 'Y']).dropna().iloc[train_id] X = df[band_cols].values y = df['Label'].values print(f'Training Random Forest...') rf = RandomForestClassifier(n_estimators=100, random_state=random_state) rf.fit(X, y) y_pred = label_encoder.transform(rf.predict(X_test)) models[file] = reports(label_encoder.transform(y_test), y_pred, target_names) scores = {} for name, results in models.items(): scores[name] = results[-1]
scaler = StandardScaler() scaler.fit(X_train) scaler.transform(X_train.values, copy=False) scaler.transform(X_test.values, copy=False) scores = [] for method in features_selected.columns: rfc = RandomForestClassifier(100, random_state=0) features = features_selected[method] _X_tr = X_train[features[features].index] _y_tr = y_train.copy() rfc.fit(_X_tr, _y_tr) _X_te = X_test[features[features].index] _y_te = y_test.copy() _y_pred = rfc.predict(_X_te) scores.append(reports(_y_te, _y_pred)[-1].rename({'Score': method})) pd.DataFrame(features_selected[features_selected].count(), columns=['# features used'])\ .join(pd.concat(scores, 1).T)\ .sort_values('# features used', ascending=False)\ .rename(index={'Selected':'Intersect'})\ .to_csv('feature_selection_results.csv') ################################################################################ ## define noise introduction procedure ## define filters ## define classifiers
'x', 'y', 'Megaclasse').values # to plot entire study area's ground truth train_gt = df_pre_train.pivot( 'x', 'y', 'Megaclasse').values # to plot training area's ground truth test_gt = df_test.pivot( 'x', 'y', 'Megaclasse').values # to plot test area's ground truth train_rgb # to plot training area ps_rgb = pivot_rgb(df_selected, xy_cols, rgb_cols) # to plot selected pixels ps_gt = df_selected.pivot( 'x', 'y', 'Megaclasse').values # to plot selected pixels' ground truth test_rgb # to plot test area test_gt # to plot ground truth pred_labels = df_final.pivot('x', 'y', 'y_pred').values # to plot predictions reports(df_final['y_true'], df_final['y_pred'], labels)[-1] rgbrgb = pivot_rgb(df_final, xy_cols, rgb_cols) plot_image( [ # np.moveaxis(total_rgb,0,1), np.moveaxis(train_rgb,0,1), np.moveaxis(test_rgb,0,1), # np.moveaxis(total_gt,0,1), np.moveaxis(train_gt,0,1), np.moveaxis(test_gt,0,1), # np.moveaxis(train_rgb,0,1), np.moveaxis(ps_rgb,0,1), np.moveaxis(ps_gt,0,1), np.moveaxis(test_rgb, 0, 1), np.moveaxis(test_gt, 0, 1), pred_labels ], # num_rows=4#, figsize=(80,20) )