def create_attribute_data(self): """ Create the attribute data which has both stand level attributes as well as species data Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Get the attribute data attribute_table = self.plot_db.get_attribute_data(self.id_str) attribute_file = p.stand_attribute_file utilities.rec2csv(attribute_table, attribute_file) field_names = attribute_table.dtype.names self.create_attribute_metadata(field_names)
def create_hex_attribute_file(self): """ Create the file containing hexagon IDs and continuous stand attributes for forested Annual plots to be used in Riemann accuracy diagnostics Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Format list parameters as comma-delimited strings plot_years = ','.join([str(x) for x in p.plot_years]) image_years = ','.join([str(x) for x in p.image_years]) # Get the crosswalk of plot IDs to Hex IDs and write it out hex_attributes = self.plot_db.get_hex_attributes( p.riemann_assessment_year, plot_years, image_years) hex_attribute_file = p.hex_attribute_file riemann_dir = os.path.dirname(hex_attribute_file) if not os.path.exists(riemann_dir): os.makedirs(riemann_dir) utilities.rec2csv(hex_attributes, hex_attribute_file)
def create_validation_attribute_file(self): """ Create the file containing structure and species attributes for the plots to be used in the validation accuracy diagnostics Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Get the attributes for the validation plots validation_attributes = self.plot_db.get_validation_attributes() # Write these data out to the validation_attribute_file validation_attribute_file = p.validation_attribute_file validation_dir = p.validation_output_folder if not os.path.exists(validation_dir): os.makedirs(validation_dir) utilities.rec2csv(validation_attributes, validation_attribute_file)
def run_diagnostic(self): # Read in the dependent nn_index_file in_data = utilities.csv2rec(self.nn_index_file) # Subset the observed data to just those values above the # index threshold in_data = in_data[in_data.AVERAGE_POSITION >= self.index_threshold] # Write out the resulting recarray utilities.rec2csv(in_data, self.nn_index_outlier_file)
def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex, out_file): # Summarize the observed output stats = mlab.rec_groupby(data, (id_field,), stat_fields) # Filter so that the minimum number of plots per hex is maintained stats = stats[stats.PLOT_COUNT >= min_plots_per_hex] # Write out the file utilities.rec2csv(stats, out_file)
def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex, out_file): # Summarize the observed output stats = mlab.rec_groupby(data, (id_field, ), stat_fields) # Filter so that the minimum number of plots per hex is maintained stats = stats[stats.PLOT_COUNT >= min_plots_per_hex] # Write out the file utilities.rec2csv(stats, out_file)
def create_area_estimates(self): """ Create the observed area estimates file which stores plot based estimates of stand variables Parameters ---------- None Returns ------- None """ # Set an alias for the parameter parser p = self.parameter_parser # Get the 'eslf_only' flag from the exclusion codes # Removed since the DB proc does not have an option for # using only plot with ESLF codes anymore # if 'eslf_only' in p.exclusion_codes: # eslf_only = 0 # else: # eslf_only = 1 # Get the area expansion data area_estimate_table, nf_hectares, ns_hectares = \ self.plot_db.get_area_estimates(p.regional_assessment_year) # Create nonforest and nonsampled records to be concatenated with the # existing area_estimate_table recarray. The nonforest record # has an ID of -10001 and the nonsampled record has an ID of -10002 id_field = p.summary_level + 'ID' new_recs = np.recarray(2, dtype=area_estimate_table.dtype) for f in new_recs.dtype.names: for rec in new_recs: setattr(rec, f, 0.0) setattr(new_recs[0], id_field, -10002) setattr(new_recs[0], 'HECTARES', ns_hectares) setattr(new_recs[1], id_field, -10001) setattr(new_recs[1], 'HECTARES', nf_hectares) area_estimate_table = np.hstack((new_recs, area_estimate_table)) # Write out to a CSV file area_estimate_file = p.area_estimate_file aa_dir = os.path.dirname(area_estimate_file) if not os.path.exists(aa_dir): os.makedirs(aa_dir) utilities.rec2csv(area_estimate_table, area_estimate_file)
def create_species_plot_count_file(self): p = self.parameter_parser # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() spp_plot_table = self.plot_db.get_species_plot_counts(self.id_str) spp_plot_file = p.model_directory + '/' + p.model_type + \ '_spp_plot_counts.csv' if p.model_type in p.imagery_model_types: utilities.rec2csv(spp_plot_table, spp_plot_file) else: # Create 2 ID strings for non-imagery models, one with inventory # and Ecoplots and one with inventory plots only try: ecoplot_index = p.plot_types.index('ecoplot') except ValueError: # If Ecoplots are not already in the list, create another ID # string with them included plot_types_w_eco = p.plot_types plot_types_w_eco.append('ecoplot') plot_types_w_eco_str = ','.join(plot_types_w_eco) id_str2 = self._get_id_string(plot_types_w_eco_str) id_eco = 2 else: # If Ecoplot are already in the list, create another ID # string without them included plot_types_wo_eco = p.plot_types plot_types_wo_eco.remove('ecoplot') plot_types_wo_eco_str = ','.join(plot_types_wo_eco) id_str2 = self._get_id_string(plot_types_wo_eco_str) id_eco = 1 spp_plot_table2 = self.plot_db.get_species_plot_counts(id_str2) # Join the plot counts w/ Ecoplots to the plot counts w/o Ecoplots if id_eco == 1: joined_spp_plot_table = mlab.rec_join('SPP_LAYER', spp_plot_table, spp_plot_table2, 'leftouter') else: joined_spp_plot_table = mlab.rec_join('SPP_LAYER', spp_plot_table2, spp_plot_table, 'leftouter') utilities.rec2csv(joined_spp_plot_table, spp_plot_file)
def create_species_plot_count_file(self): p = self.parameter_parser # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() spp_plot_table = self.plot_db.get_species_plot_counts(self.id_str) spp_plot_file = p.model_directory + '/' + p.model_type + \ '_spp_plot_counts.csv' if p.model_type in p.imagery_model_types: utilities.rec2csv(spp_plot_table, spp_plot_file) else: # Create 2 ID strings for non-imagery models, one with inventory # and Ecoplots and one with inventory plots only try: ecoplot_index = p.plot_types.index('ecoplot') except ValueError: # If Ecoplots are not already in the list, create another ID # string with them included plot_types_w_eco = p.plot_types plot_types_w_eco.append('ecoplot') plot_types_w_eco_str = ','.join(plot_types_w_eco) id_str2 = self._get_id_string(plot_types_w_eco_str) id_eco = 2 else: # If Ecoplot are already in the list, create another ID # string without them included plot_types_wo_eco = p.plot_types plot_types_wo_eco.remove('ecoplot') plot_types_wo_eco_str = ','.join(plot_types_wo_eco) id_str2 = self._get_id_string(plot_types_wo_eco_str) id_eco = 1 spp_plot_table2 = self.plot_db.get_species_plot_counts(id_str2) # Join the plot counts w/ Ecoplots to the plot counts w/o Ecoplots if id_eco == 1: joined_spp_plot_table = mlab.rec_join( 'SPP_LAYER', spp_plot_table, spp_plot_table2, 'leftouter') else: joined_spp_plot_table = mlab.rec_join( 'SPP_LAYER', spp_plot_table2, spp_plot_table, 'leftouter') utilities.rec2csv(joined_spp_plot_table, spp_plot_file)
def create_ordination_matrices(self): """ Create the species and environmental matrices needed for ordination modeling. Write these files out to the location as specified in the parameter file Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Format list parameters as comma-delimited strings plot_years = ','.join([str(x) for x in p.plot_years]) image_years = ','.join([str(x) for x in p.image_years]) ordination_variables = ','.join(p.get_ordination_variable_names()) # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Get the species matrix and write it out spp_table = self.plot_db.get_species_matrix(self.id_str, 'ORDINATION', p.lump_table) spp_file = p.species_matrix_file utilities.rec2csv(spp_table, spp_file) # Get the environmental matrix and write it out env_table = self.plot_db.get_environmental_matrix( self.id_str, plot_years, image_years, ordination_variables) env_file = p.environmental_matrix_file utilities.rec2csv(env_table, env_file)
def create_ordination_matrices(self): """ Create the species and environmental matrices needed for ordination modeling. Write these files out to the location as specified in the parameter file Parameters ---------- None Returns ------- None """ p = self.parameter_parser # Format list parameters as comma-delimited strings plot_years = ','.join([str(x) for x in p.plot_years]) image_years = ','.join([str(x) for x in p.image_years]) ordination_variables = ','.join(p.get_ordination_variable_names()) # Store list of plot IDs into a string if this variable hasn't # yet been created if not hasattr(self, 'id_str'): self.id_str = self._get_id_string() # Get the species matrix and write it out spp_table = self.plot_db.get_species_matrix(self.id_str, 'ORDINATION', p.lump_table) spp_file = p.species_matrix_file utilities.rec2csv(spp_table, spp_file) # Get the environmental matrix and write it out env_table = self.plot_db.get_environmental_matrix(self.id_str, plot_years, image_years, ordination_variables) env_file = p.environmental_matrix_file utilities.rec2csv(env_table, env_file)
def run_diagnostic(self): # Shortcut to the parameter parser p = self.parameter_parser # ID field id_field = p.summary_level + 'ID' # Root directory for Riemann files root_dir = p.riemann_output_folder # Read in hex input file obs_data = utilities.csv2rec(self.hex_attribute_file) # Get the hexagon levels and ensure that the fields exist in the # hex_attribute file hex_resolutions = p.riemann_hex_resolutions hex_fields = [x[0] for x in hex_resolutions] for field in hex_fields: if field not in obs_data.dtype.names: err_msg = 'Field ' + field + ' does not exist in the ' err_msg += 'hex_attribute file' raise ValueError(err_msg) # Create the directory structure based on the hex levels hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions] all_levels = ['plot_pixel'] + hex_levels for level in all_levels: sub_dir = os.path.join(root_dir, level) if not os.path.exists(sub_dir): os.makedirs(sub_dir) # Get the values of k k_values = p.riemann_k_values # Create a dictionary of plot ID to image year (or model_year for # non-imagery models) for these plots if p.model_type in p.imagery_model_types: id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data) else: id_x_year = dict((x[id_field], p.model_year) for x in obs_data) # Create a PredictionRun instance pr = prediction_run.PredictionRun(p) # Get the neighbors and distances for these IDs pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field) # Create the lookup of id_field to LOC_ID for the hex plots nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data) # Create a dictionary between id_field and no_self_assign_field # for the model plots env_file = p.environmental_matrix_file env_data = utilities.csv2rec(env_file) model_nsa_id_dict = dict( (getattr(x, id_field), x.LOC_ID) for x in env_data) # Stitch the two dictionaries together for id in sorted(model_nsa_id_dict.keys()): if id not in nsa_id_dict: nsa_id_dict[id] = model_nsa_id_dict[id] # Get the stand attribute metadata and retrieve only the # continuous accuracy attributes stand_metadata_file = p.stand_metadata_file mp = xsmp.XMLStandMetadataParser(stand_metadata_file) attrs = [ x.field_name for x in mp.attributes if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1 ] # Subset the attributes for fields that are in the # hex_attribute file attrs = [x for x in attrs if x in obs_data.dtype.names] plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs) # Write out the plot_pixel observed file file_name = 'plot_pixel_observed.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) utilities.rec2csv(plot_pixel_obs, output_file) # Iterate over values of k for k in k_values: # Construct the output file name file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k))) file_name += '.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) out_fh = open(output_file, 'w') # For the plot/pixel scale, retrieve the independent predicted # data for this value of k. Even though attributes are being # returned from this function, we want to use the attribute list # that we've already found above. prediction_generator = pr.calculate_predictions_at_k( k=k, id_field=id_field, independent=True, nsa_id_dict=nsa_id_dict) # Write out the field names out_fh.write(id_field + ',' + ','.join(attrs) + '\n') # Write out the predictions for this k for plot_prediction in prediction_generator: # Write this record to the predicted attribute file pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs) # Close this file out_fh.close() # Create the fields for which to extract statistics at the hexagon # levels mean_fields = [(id_field, len, 'PLOT_COUNT')] mean_fields.extend([(x, np.mean, x) for x in attrs]) mean_fields = tuple(mean_fields) sd_fields = [(id_field, len, 'PLOT_COUNT')] sd_fields.extend([(x, np.std, x) for x in attrs]) sd_fields = tuple(sd_fields) stat_sets = { 'mean': mean_fields, 'std': sd_fields, } # For each hexagon level, associate the plots with their hexagon ID # and find observed and predicted statistics for each hexagon for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] min_plots_per_hex = hex_resolution[3] prefix = 'hex_' + str(hex_distance) # Create a crosswalk between the id_field and the hex_id_field id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field]) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name obs_out_file = \ '_'.join((prefix, 'observed', stat_name)) + '.csv' obs_out_file = os.path.join(root_dir, prefix, obs_out_file) # Write out the observed file self.write_hex_stats(obs_data, hex_id_field, stat_fields, min_plots_per_hex, obs_out_file) # Iterate over values of k for the predicted values for k in k_values: # Open the plot_pixel predicted file for this value of k # and join the hex_id_field to the recarray prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, 'plot_pixel', prd_file) prd_data = utilities.csv2rec(prd_file) prd_data = mlab.rec_join(id_field, prd_data, id_x_hex) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name prd_out_file = '_'.join((prefix, 'predicted', 'k' + str(k), stat_name)) + '.csv' prd_out_file = os.path.join(root_dir, prefix, prd_out_file) # Write out the predicted file self.write_hex_stats(prd_data, hex_id_field, stat_fields, min_plots_per_hex, prd_out_file) # Calculate the ECDF and AC statistics # For ECDF and AC, it is a paired comparison between the observed # and predicted data. We do this at each value of k and for each # hex resolution level. # Open the stats file stats_file = p.hex_statistics_file stats_fh = open(stats_file, 'w') header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE'] stats_fh.write(','.join(header_fields) + '\n') # Create a list of RiemannComparison instances which store the # information needed to do comparisons between observed and predicted # files for any level or value of k compare_list = [] for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] prefix = 'hex_' + str(hex_distance) obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = '_'.join( (prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison(prefix, obs_file, prd_file, hex_id_field, k) compare_list.append(r) # Add the plot_pixel comparisons to this list prefix = 'plot_pixel' obs_file = 'plot_pixel_observed.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison(prefix, obs_file, prd_file, id_field, k) compare_list.append(r) # Do all the comparisons for c in compare_list: # Open the observed file obs_data = utilities.csv2rec(c.obs_file) # Open the predicted file prd_data = utilities.csv2rec(c.prd_file) # Ensure that the IDs between the observed and predicted # data line up ids1 = getattr(obs_data, c.id_field) ids2 = getattr(prd_data, c.id_field) if np.all(ids1 != ids2): err_msg = 'IDs do not match between observed and ' err_msg += 'predicted data' raise ValueError(err_msg) for attr in attrs: arr1 = getattr(obs_data, attr) arr2 = getattr(prd_data, attr) rv = RiemannVariable(arr1, arr2) gmfr_stats = rv.gmfr_statistics() for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), gmfr_stats[stat]) stats_fh.write(stat_line) ks_stats = rv.ks_statistics() for stat in ('ks_max', 'ks_mean'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), ks_stats[stat]) stats_fh.write(stat_line)
def run_diagnostic(self): # Shortcut to the parameter parser p = self.parameter_parser # ID field id_field = p.summary_level + 'ID' # Root directory for Riemann files root_dir = p.riemann_output_folder # Read in hex input file obs_data = utilities.csv2rec(self.hex_attribute_file) # Get the hexagon levels and ensure that the fields exist in the # hex_attribute file hex_resolutions = p.riemann_hex_resolutions hex_fields = [x[0] for x in hex_resolutions] for field in hex_fields: if field not in obs_data.dtype.names: err_msg = 'Field ' + field + ' does not exist in the ' err_msg += 'hex_attribute file' raise ValueError(err_msg) # Create the directory structure based on the hex levels hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions] all_levels = ['plot_pixel'] + hex_levels for level in all_levels: sub_dir = os.path.join(root_dir, level) if not os.path.exists(sub_dir): os.makedirs(sub_dir) # Get the values of k k_values = p.riemann_k_values # Create a dictionary of plot ID to image year (or model_year for # non-imagery models) for these plots if p.model_type in p.imagery_model_types: id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data) else: id_x_year = dict((x[id_field], p.model_year) for x in obs_data) # Create a PredictionRun instance pr = prediction_run.PredictionRun(p) # Get the neighbors and distances for these IDs pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field) # Create the lookup of id_field to LOC_ID for the hex plots nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data) # Create a dictionary between id_field and no_self_assign_field # for the model plots env_file = p.environmental_matrix_file env_data = utilities.csv2rec(env_file) model_nsa_id_dict = dict((getattr(x, id_field), x.LOC_ID) for x in env_data) # Stitch the two dictionaries together for id in sorted(model_nsa_id_dict.keys()): if id not in nsa_id_dict: nsa_id_dict[id] = model_nsa_id_dict[id] # Get the stand attribute metadata and retrieve only the # continuous accuracy attributes stand_metadata_file = p.stand_metadata_file mp = xsmp.XMLStandMetadataParser(stand_metadata_file) attrs = [x.field_name for x in mp.attributes if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1] # Subset the attributes for fields that are in the # hex_attribute file attrs = [x for x in attrs if x in obs_data.dtype.names] plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs) # Write out the plot_pixel observed file file_name = 'plot_pixel_observed.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) utilities.rec2csv(plot_pixel_obs, output_file) # Iterate over values of k for k in k_values: # Construct the output file name file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k))) file_name += '.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) out_fh = open(output_file, 'w') # For the plot/pixel scale, retrieve the independent predicted # data for this value of k. Even though attributes are being # returned from this function, we want to use the attribute list # that we've already found above. prediction_generator = pr.calculate_predictions_at_k( k=k, id_field=id_field, independent=True, nsa_id_dict=nsa_id_dict) # Write out the field names out_fh.write(id_field + ',' + ','.join(attrs) + '\n') # Write out the predictions for this k for plot_prediction in prediction_generator: # Write this record to the predicted attribute file pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs) # Close this file out_fh.close() # Create the fields for which to extract statistics at the hexagon # levels mean_fields = [(id_field, len, 'PLOT_COUNT')] mean_fields.extend([(x, np.mean, x) for x in attrs]) mean_fields = tuple(mean_fields) sd_fields = [(id_field, len, 'PLOT_COUNT')] sd_fields.extend([(x, np.std, x) for x in attrs]) sd_fields = tuple(sd_fields) stat_sets = { 'mean': mean_fields, 'std': sd_fields, } # For each hexagon level, associate the plots with their hexagon ID # and find observed and predicted statistics for each hexagon for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] min_plots_per_hex = hex_resolution[3] prefix = 'hex_' + str(hex_distance) # Create a crosswalk between the id_field and the hex_id_field id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field]) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name obs_out_file = \ '_'.join((prefix, 'observed', stat_name)) + '.csv' obs_out_file = os.path.join(root_dir, prefix, obs_out_file) # Write out the observed file self.write_hex_stats(obs_data, hex_id_field, stat_fields, min_plots_per_hex, obs_out_file) # Iterate over values of k for the predicted values for k in k_values: # Open the plot_pixel predicted file for this value of k # and join the hex_id_field to the recarray prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, 'plot_pixel', prd_file) prd_data = utilities.csv2rec(prd_file) prd_data = mlab.rec_join(id_field, prd_data, id_x_hex) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name prd_out_file = '_'.join(( prefix, 'predicted', 'k' + str(k), stat_name)) + '.csv' prd_out_file = os.path.join(root_dir, prefix, prd_out_file) # Write out the predicted file self.write_hex_stats(prd_data, hex_id_field, stat_fields, min_plots_per_hex, prd_out_file) # Calculate the ECDF and AC statistics # For ECDF and AC, it is a paired comparison between the observed # and predicted data. We do this at each value of k and for each # hex resolution level. # Open the stats file stats_file = p.hex_statistics_file stats_fh = open(stats_file, 'w') header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE'] stats_fh.write(','.join(header_fields) + '\n') # Create a list of RiemannComparison instances which store the # information needed to do comparisons between observed and predicted # files for any level or value of k compare_list = [] for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] prefix = 'hex_' + str(hex_distance) obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = '_'.join(( prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison( prefix, obs_file, prd_file, hex_id_field, k) compare_list.append(r) # Add the plot_pixel comparisons to this list prefix = 'plot_pixel' obs_file = 'plot_pixel_observed.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison(prefix, obs_file, prd_file, id_field, k) compare_list.append(r) # Do all the comparisons for c in compare_list: # Open the observed file obs_data = utilities.csv2rec(c.obs_file) # Open the predicted file prd_data = utilities.csv2rec(c.prd_file) # Ensure that the IDs between the observed and predicted # data line up ids1 = getattr(obs_data, c.id_field) ids2 = getattr(prd_data, c.id_field) if np.all(ids1 != ids2): err_msg = 'IDs do not match between observed and ' err_msg += 'predicted data' raise ValueError(err_msg) for attr in attrs: arr1 = getattr(obs_data, attr) arr2 = getattr(prd_data, attr) rv = RiemannVariable(arr1, arr2) gmfr_stats = rv.gmfr_statistics() for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), gmfr_stats[stat]) stats_fh.write(stat_line) ks_stats = rv.ks_statistics() for stat in ('ks_max', 'ks_mean'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), ks_stats[stat]) stats_fh.write(stat_line)