continue r2 = np.zeros(len(im_sampling_plot_agc_gdf_dict)) feat_corr = OrderedDict() for feat_i, feat_key in enumerate(feats_of_interest): ref_feat = ref_im_sampling_plot_agc_gdf['feats'][feat_key] feat = im_calib_plot_gdf['feats'][feat_key] (slope, intercept, r2, p, stde) = stats.linregress(ref_feat, feat) feat_corr[feat_key] = r2 # print(f'slope: {slope}, intercept: {intercept}') if False: pyplot.figure(feat_i) pyplot.subplot(2, 2, image_i) xlabel = f'WV3 Oct 2017 - {feat_key}' ylabel = f'{image_key} - {feat_key}' vis.scatter_ds( pd.DataFrame(data=np.array([ref_feat, feat]).transpose(), columns=[xlabel, ylabel])) image_feat_corr[f'+{image_key}'] = feat_corr image_feat_corr_df = pd.DataFrame.from_dict(image_feat_corr) logger.info('Correlation of features between WV3 Oct 2017 and...') logger.info('\n' + image_feat_corr_df.to_string()) logger.info('Average correlation of features over images') logger.info('\n' + image_feat_corr_df.mean(axis=1).to_string()) logger.info('Average correlation of features over images') logger.info('\n' + image_feat_corr_df.mean(axis=1).to_string()) ## run the temporal calibration accuracy test with univariate model and log(mean(R/pan) feature calib_feat_keys = ['log(mean(R/pan))'] model_data_dict = {} for image_key, im_sampling_plot_agc_gdf in im_sampling_plot_agc_gdf_dict.items(
## write per-plant and per-plot ABC/AGC etc files agc_plot_est.write_abc_plant_file(out_file_name=plant_abc_file_name) agc_plot_est.write_agc_plot_file(out_file_name=plot_agc_file_name) ## write out surrogate map with open(surrogate_file_name, 'w', newline='') as outfile: writer = DictWriter(outfile, list(agc_plot_est.abc_aggregator.master_surrogate_dict.values())[100].keys()) writer.writeheader() writer.writerows(list(agc_plot_est.abc_aggregator.master_surrogate_dict.values())) ## plot relationships between plant volume and C stocks f1 = pyplot.figure('Relation between plant vol. and C stocks') f1.set_size_inches(10, 4, forward=True) ax = pyplot.subplot(1, 2, 1, aspect='equal') vis.scatter_ds(agc_plot_est.plot_summary_agc_df, x_col='VolHa', y_col='AbcHa', xfn=lambda x: x / 1000., yfn=lambda y: y / 1000., x_label='Biomass volume ($10^3$ m$^{3}$ ha$^{-1}$)', y_label='ABC (t C ha$^{-1}$)') ax.set_title('(a)') ax = pyplot.subplot(1, 2, 2, aspect='equal') vis.scatter_ds(agc_plot_est.plot_summary_agc_df, x_col='VolHa', y_col='AgcHa', xfn=lambda x: x / 1000., yfn=lambda y: y / 1000., x_label='Biomass volume ($10^3$ m$^{3}$ ha$^{-1}$)', y_label='AGC (t C ha$^{-1}$)') ax.set_title('(b)') f1.tight_layout() pyplot.pause(0.2) f1.savefig(root_path.joinpath('data/outputs/plots/vol_vs_agc_scatter.png'), dpi=300) f2 = pyplot.figure('Relation between Litter C and ABC') f2.set_size_inches(5, 4, forward=True) vis.scatter_ds(agc_plot_est.plot_summary_agc_df, x_col='LitterCHa', y_col='AbcHa', xfn=lambda x: x / 1000., yfn=lambda y: y / 1000., x_label='Litter C (t C ha$^{-1}$)', y_label='ABC (t C ha$^{-1}$)') f2.tight_layout()
orient='index') for key in ['AbcHa2', 'AgcHa2']: # append to im_plot_data_gdf im_plot_agc_gdf[('data', key)] = carbon_polynorm_df[key] # fix stratum labels im_plot_agc_gdf.loc[im_plot_agc_gdf['data']['Stratum'] == 'Degraded', ('data', 'Stratum')] = 'Severe' im_plot_agc_gdf.loc[im_plot_agc_gdf['data']['Stratum'] == 'Intact', ('data', 'Stratum')] = 'Pristine' # make an example scatter plot of feature vs AGC/ABC pyplot.figure() vis.scatter_ds(im_plot_agc_gdf, x_col=('feats', '(mean(pan/R))'), y_col=('data', 'AgcHa'), class_col=('data', 'Stratum'), xfn=lambda x: np.log10(x), do_regress=True) ## select and analyse best features for predicting AGC with linear regression # TODO - experiment with cv vals in fs and eval below - has a big effect on what is selected and how it is scored. y = im_plot_agc_gdf['data']['AgcHa'] selected_feats_df, selected_scores = fs.forward_selection( im_plot_agc_gdf['feats'], y, max_num_feats=25, cv=5, score_fn=None) # calculate scores of selected features with LOOCV selected_loocv_scores = [] num_feats = range(0, len(selected_scores)) for i in num_feats: scores, predicted = fs.score_model(selected_feats_df.to_numpy()[:, :i + 1], y,
im_plot_agc_gdf.loc[im_plot_agc_gdf['data']['Stratum'] == 'Degraded', ('data', 'Stratum')] = 'Severe' im_plot_agc_gdf.loc[im_plot_agc_gdf['data']['Stratum'] == 'Intact', ('data', 'Stratum')] = 'Pristine' # make an example scatter plot of feature vs AGC/ABC pyplot.rcParams["font.family"] = "arial" pyplot.rcParams["font.size"] = "12" pyplot.rcParams["font.style"] = "normal" pyplot.rcParams['legend.fontsize'] = 'medium' pyplot.rcParams['figure.titlesize'] = 'medium' pyplot.figure() vis.scatter_ds(im_plot_agc_gdf, x_col=('feats', 'log(mean(pan/R))'), y_col=('data', 'AgcHa'), class_col=('data', 'Stratum'), xfn=lambda x: x, do_regress=True) ## select and analyse best features for predicting AGC with linear regression # TODO - experiment with cv vals in fs and eval below - has a big effect on what is selected and how it is scored. # eg, cv=10, selects ~45 features and gets R2~.93, cv=5 selects ~10 features with R2~.88 y = im_plot_agc_gdf['data']['AgcHa'] # selected_feats_df, selected_scores = fs.fcr(im_plot_agc_gdf['feats'], y, max_num_feats=None, dist_fn=None, score_fn=None) cv = 5 selected_feats_df, selected_scores = fs.forward_selection( im_plot_agc_gdf['feats'], y, max_num_feats=25, cv=cv, score_fn=None) # calculate scores of selected features with LOOCV selected_loocv_scores = [] num_feats = range(1, len(selected_scores) + 1)