def _get_attribute_data(self): """ Retrieve the attribute data for which predictions will be made. This should be called one time, after which it is stored in an instance- level variable along with the attribute names (self.attrs) Parameters ---------- None Returns ------- stand_attr_data : numpy recarray Recarray with all stand attributes attrs : list of strs List of all continuous variables in stand_attr_data """ # Get the stand attribute table and read into a recarray p = self.parameter_parser stand_attr_file = p.stand_attribute_file stand_attr_data = utilities.csv2rec(stand_attr_file) # Get the stand attribute metadata and retrieve only the # continuous accuracy attributes stand_metadata_file = p.stand_metadata_file mp = xsmp.XMLStandMetadataParser(stand_metadata_file) attrs = [ x.field_name for x in mp.attributes if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1 ] return (stand_attr_data, attrs)
def _create_histograms(self): # Open the area estimate file into a recarray ae_data = utilities.csv2rec(self.regional_accuracy_file) # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Subset the attributes to those that are accuracy attributes, # are identified to go into the report, and are not species variables attrs = [] for attr in mp.attributes: if attr.accuracy_attr == 1 and attr.project_attr == 1 and \ attr.species_attr == 0: attrs.append(attr.field_name) # Iterate over the attributes and create a histogram file of each histogram_files = [] for attr in attrs: # Metadata for this attribute metadata = mp.get_attribute(attr) # Get the observed and predicted data for this attribute obs_vals = self._get_histogram_data(ae_data, attr, 'OBSERVED') prd_vals = self._get_histogram_data(ae_data, attr, 'PREDICTED') # Set the areas for the observed and predicted data obs_area = obs_vals.AREA prd_area = prd_vals.AREA # Set the bin names (same for both observed and predicted series) bin_names = obs_vals.BIN_NAME if np.all(bin_names != prd_vals.BIN_NAME): err_msg = 'Bin names are not the same for ' + attr raise ValueError(err_msg) # Create the output file name output_file = attr.lower() + '_histogram.png' # Create the histogram mplf.draw_histogram([obs_area, prd_area], bin_names, metadata, output_type=mplf.FILE, output_file=output_file) # Add this to the list of histogram files histogram_files.append(output_file) # Return the list of histograms just created return histogram_files
def _create_scatterplots(self): # Open files into recarrays obs_data = utilities.csv2rec(self.observed_file) prd_data = utilities.csv2rec(self.predicted_file) # Subset the obs_data to just those IDs in the predicted data ids1 = getattr(obs_data, self.id_field) ids2 = getattr(prd_data, self.id_field) common_ids = np.in1d(ids1, ids2) obs_data = obs_data[common_ids] # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Subset the attributes to those that are continuous, are accuracy # attributes, are identified to go into the report, and are not # species variables attrs = [] for attr in mp.attributes: if attr.field_type == 'CONTINUOUS' and attr.project_attr == 1 and \ attr.accuracy_attr == 1 and attr.species_attr == 0: attrs.append(attr.field_name) # Iterate over the attributes and create a scatterplot file of each scatter_files = [] for attr in attrs: # Metadata for this attribute metadata = mp.get_attribute(attr) # Observed and predicted data matrices for this attribute obs_vals = getattr(obs_data, attr) prd_vals = getattr(prd_data, attr) # Create the output file name output_file = attr.lower() + '_scatter.png' # Create the scatterplot mplf.draw_scatterplot(obs_vals, prd_vals, metadata, output_type=mplf.FILE, output_file=output_file) # Add this to the list of scatterplot files scatter_files.append(output_file) # Return the list of scatterplots just created return scatter_files
def join_attributes(raster, raster_join_field, attribute_file, attribute_join_field, attribute_metadata_file): """ Join attributes to a raster Parameters: ----------- raster : str raster to join attributes to raster_join_field : str name of join field in raster attribute_file: str name of file with attributes to join to raster attribute_join_field: str name of join field in attribute file attribute_metadata_file: str name of file with attribute metadata to decide what variables to drop from join file Returns: -------- None """ model_dir = get_path(raster) gp = geoprocessor.Geoprocessor(model_dir) # create list of attributes to drop from join file - we only want a # subset of all variables joined to the NN grids (PROJECT_ATTR = 1), so # we need to specify all variables that have PROJECT_ATTR = 0 in the # metadata mp = xsmp.XMLStandMetadataParser(attribute_metadata_file) drop_fields = [x.field_name for x in mp.attributes if x.project_attr == 0] try: gp.join_attributes(raster, raster_join_field, attribute_file, attribute_join_field, drop_fields=drop_fields) except: print sys.exc_info()
def _create_story(self): # Set up an empty list to hold the story story = [] # Import the report styles styles = report_styles.get_report_styles() # Create a page break story = self._make_page_break(story, self.PORTRAIT) # Section title title_str = '<strong>Data Dictionary</strong>' para = p.Paragraph(title_str, styles['section_style']) t = p.Table([[para]], colWidths=[7.5 * u.inch]) t.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (-1, -1), 6), ('BOTTOMPADDING', (0, 0), (-1, -1), 6), ('BACKGROUND', (0, 0), (-1, -1), '#957348'), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('GRID', (0, 0), (-1, -1), 0.25, colors.black), ])) story.append(t) story.append(p.Spacer(0, 0.1 * u.inch)) # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Subset the attributes to those that are accuracy attributes, are # identified to go into the report, and are not species variables attrs = [] for attr in mp.attributes: if attr.accuracy_attr == 1 and attr.project_attr == 1 and \ attr.species_attr == 0: attrs.append(attr.field_name) # Set up the master dictionary table dictionary_table = [] # Iterate through the attributes and print out the field information # and codes if present for attr in attrs: metadata = mp.get_attribute(attr) field_name = metadata.field_name units = metadata.units description = metadata.description field_para = p.Paragraph(field_name, styles['body_style_10']) if units != 'none': description += ' (' + units + ')' field_desc_para = p.Paragraph(description, styles['body_style_10']) # If this field has codes, create a sub table underneath the # field description if metadata.codes: # Set up a container to hold the code rows code_table = [] # Iterate over all code rows and append to the code_table for code in metadata.codes: code_para = \ p.Paragraph(code.code_value, styles['code_style']) description = self.txt_to_html(code.description) code_desc_para = \ p.Paragraph(description, styles['code_style']) code_table.append([code_para, code_desc_para]) # Convert this to a reportlab table t = p.Table(code_table, colWidths=[0.75 * u.inch, 4.5 * u.inch]) t.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (-1, -1), 3), ('BOTTOMPADDING', (0, 0), (-1, -1), 3), ('BACKGROUND', (0, 0), (-1, -1), '#f7f7ea'), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('GRID', (0, 0), (-1, -1), 0.25, colors.white), ])) # Create a stack of the field description and field codes elements = \ [[field_desc_para], [t]] # If no codes exist, just add the field description else: elements = [[field_desc_para]] # Create a reportlab table of the field description and # (if present) field codes description_table = \ p.Table(elements, colWidths=[5.25 * u.inch]) description_table.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (-1, 0), 0), ('BOTTOMPADDING', (0, -1), (-1, -1), 0), ('LEFTPADDING', (0, 0), (-1, -1), 0), ('RIGHTPADDING', (0, 0), (-1, -1), 0), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ])) dictionary_table.append([field_para, description_table]) # Format the dictionary table into a reportlab table t = p.Table(dictionary_table, colWidths=[1.6 * u.inch, 5.4 * u.inch]) t.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (0, -1), 5), ('BOTTOMPADDING', (0, 0), (0, -1), 5), ('GRID', (0, 0), (-1, -1), 1, colors.white), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'), ])) story.append(t) # Description of the species information that is attached to ArcInfo # grids. We don't enumerate the codes here, but just give this # summary information spp_str = """ Individual species abundances are attached to ArcInfo grids that LEMMA distributes. For this model, fields designate species codes based on the <link color="#0000ff" href="http://plants.usda.gov/">USDA PLANTS database</link> from the year 2000, and values represent species """ if self.model_type in ['sppsz', 'sppba']: spp_str += " basal area (m^2/ha)." elif self.model_type in ['trecov', 'wdycov']: spp_str += " percent cover." para = p.Paragraph(spp_str, styles['body_style']) story.append(p.Spacer(0, 0.1 * u.inch)) story.append(para) # Return this story return story
def run_diagnostic(self): # Shortcut to the parameter parser p = self.parameter_parser # ID field id_field = p.summary_level + 'ID' # Root directory for Riemann files root_dir = p.riemann_output_folder # Read in hex input file obs_data = utilities.csv2rec(self.hex_attribute_file) # Get the hexagon levels and ensure that the fields exist in the # hex_attribute file hex_resolutions = p.riemann_hex_resolutions hex_fields = [x[0] for x in hex_resolutions] for field in hex_fields: if field not in obs_data.dtype.names: err_msg = 'Field ' + field + ' does not exist in the ' err_msg += 'hex_attribute file' raise ValueError(err_msg) # Create the directory structure based on the hex levels hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions] all_levels = ['plot_pixel'] + hex_levels for level in all_levels: sub_dir = os.path.join(root_dir, level) if not os.path.exists(sub_dir): os.makedirs(sub_dir) # Get the values of k k_values = p.riemann_k_values # Create a dictionary of plot ID to image year (or model_year for # non-imagery models) for these plots if p.model_type in p.imagery_model_types: id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data) else: id_x_year = dict((x[id_field], p.model_year) for x in obs_data) # Create a PredictionRun instance pr = prediction_run.PredictionRun(p) # Get the neighbors and distances for these IDs pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field) # Create the lookup of id_field to LOC_ID for the hex plots nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data) # Create a dictionary between id_field and no_self_assign_field # for the model plots env_file = p.environmental_matrix_file env_data = utilities.csv2rec(env_file) model_nsa_id_dict = dict( (getattr(x, id_field), x.LOC_ID) for x in env_data) # Stitch the two dictionaries together for id in sorted(model_nsa_id_dict.keys()): if id not in nsa_id_dict: nsa_id_dict[id] = model_nsa_id_dict[id] # Get the stand attribute metadata and retrieve only the # continuous accuracy attributes stand_metadata_file = p.stand_metadata_file mp = xsmp.XMLStandMetadataParser(stand_metadata_file) attrs = [ x.field_name for x in mp.attributes if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1 ] # Subset the attributes for fields that are in the # hex_attribute file attrs = [x for x in attrs if x in obs_data.dtype.names] plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs) # Write out the plot_pixel observed file file_name = 'plot_pixel_observed.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) utilities.rec2csv(plot_pixel_obs, output_file) # Iterate over values of k for k in k_values: # Construct the output file name file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k))) file_name += '.csv' output_file = os.path.join(root_dir, 'plot_pixel', file_name) out_fh = open(output_file, 'w') # For the plot/pixel scale, retrieve the independent predicted # data for this value of k. Even though attributes are being # returned from this function, we want to use the attribute list # that we've already found above. prediction_generator = pr.calculate_predictions_at_k( k=k, id_field=id_field, independent=True, nsa_id_dict=nsa_id_dict) # Write out the field names out_fh.write(id_field + ',' + ','.join(attrs) + '\n') # Write out the predictions for this k for plot_prediction in prediction_generator: # Write this record to the predicted attribute file pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs) # Close this file out_fh.close() # Create the fields for which to extract statistics at the hexagon # levels mean_fields = [(id_field, len, 'PLOT_COUNT')] mean_fields.extend([(x, np.mean, x) for x in attrs]) mean_fields = tuple(mean_fields) sd_fields = [(id_field, len, 'PLOT_COUNT')] sd_fields.extend([(x, np.std, x) for x in attrs]) sd_fields = tuple(sd_fields) stat_sets = { 'mean': mean_fields, 'std': sd_fields, } # For each hexagon level, associate the plots with their hexagon ID # and find observed and predicted statistics for each hexagon for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] min_plots_per_hex = hex_resolution[3] prefix = 'hex_' + str(hex_distance) # Create a crosswalk between the id_field and the hex_id_field id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field]) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name obs_out_file = \ '_'.join((prefix, 'observed', stat_name)) + '.csv' obs_out_file = os.path.join(root_dir, prefix, obs_out_file) # Write out the observed file self.write_hex_stats(obs_data, hex_id_field, stat_fields, min_plots_per_hex, obs_out_file) # Iterate over values of k for the predicted values for k in k_values: # Open the plot_pixel predicted file for this value of k # and join the hex_id_field to the recarray prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, 'plot_pixel', prd_file) prd_data = utilities.csv2rec(prd_file) prd_data = mlab.rec_join(id_field, prd_data, id_x_hex) # Iterate over all sets of statistics and write a unique file # for each set for (stat_name, stat_fields) in stat_sets.iteritems(): # Get the output file name prd_out_file = '_'.join((prefix, 'predicted', 'k' + str(k), stat_name)) + '.csv' prd_out_file = os.path.join(root_dir, prefix, prd_out_file) # Write out the predicted file self.write_hex_stats(prd_data, hex_id_field, stat_fields, min_plots_per_hex, prd_out_file) # Calculate the ECDF and AC statistics # For ECDF and AC, it is a paired comparison between the observed # and predicted data. We do this at each value of k and for each # hex resolution level. # Open the stats file stats_file = p.hex_statistics_file stats_fh = open(stats_file, 'w') header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE'] stats_fh.write(','.join(header_fields) + '\n') # Create a list of RiemannComparison instances which store the # information needed to do comparisons between observed and predicted # files for any level or value of k compare_list = [] for hex_resolution in hex_resolutions: (hex_id_field, hex_distance) = hex_resolution[0:2] prefix = 'hex_' + str(hex_distance) obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = '_'.join( (prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison(prefix, obs_file, prd_file, hex_id_field, k) compare_list.append(r) # Add the plot_pixel comparisons to this list prefix = 'plot_pixel' obs_file = 'plot_pixel_observed.csv' obs_file = os.path.join(root_dir, prefix, obs_file) for k in k_values: prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv' prd_file = os.path.join(root_dir, prefix, prd_file) r = RiemannComparison(prefix, obs_file, prd_file, id_field, k) compare_list.append(r) # Do all the comparisons for c in compare_list: # Open the observed file obs_data = utilities.csv2rec(c.obs_file) # Open the predicted file prd_data = utilities.csv2rec(c.prd_file) # Ensure that the IDs between the observed and predicted # data line up ids1 = getattr(obs_data, c.id_field) ids2 = getattr(prd_data, c.id_field) if np.all(ids1 != ids2): err_msg = 'IDs do not match between observed and ' err_msg += 'predicted data' raise ValueError(err_msg) for attr in attrs: arr1 = getattr(obs_data, attr) arr2 = getattr(prd_data, attr) rv = RiemannVariable(arr1, arr2) gmfr_stats = rv.gmfr_statistics() for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), gmfr_stats[stat]) stats_fh.write(stat_line) ks_stats = rv.ks_statistics() for stat in ('ks_max', 'ks_mean'): stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k, attr, stat.upper(), ks_stats[stat]) stats_fh.write(stat_line)
def setUp(self): xml_file_name = 'data/stand_attr.xml' self.xmp = xsmp.XMLStandMetadataParser(xml_file_name)
def run_diagnostic(self): # Read in the observed data from the area estimate file (obs_area, obs_nf_hectares, obs_ns_hectares) = \ self.get_observed_estimates() # Get the observed and predicted data arrays (prd_area, prd_nf_hectares) = self.get_predicted_estimates() prd_ns_hectares = 0.0 # Get the weights of the two datasets obs_weights = obs_area.HECTARES prd_weights = prd_area.HECTARES # Open the output file and print out the header line stats_fh = open(self.statistics_file, 'w') header_fields = ['VARIABLE', 'DATASET', 'BIN_NAME', 'AREA'] stats_fh.write(','.join(header_fields) + '\n') # Get a metadata parser mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Iterate over all fields and print out the area histogram statistics for v in obs_area.dtype.names: # Skip over the HECTARES field if v == 'HECTARES': continue # Get the metadata for this field try: fm = mp.get_attribute(v) except: err_msg = v + ' is missing metadata.' print err_msg continue # Skip over ID fields if fm.field_type == 'ID': continue # Get the actual data try: obs_vals = getattr(obs_area, v) prd_vals = getattr(prd_area, v) except AttributeError: continue obs_vw = histogram.VariableVW(obs_vals, obs_weights) prd_vw = histogram.VariableVW(prd_vals, prd_weights) # Figure out how to bin the data based on field type if fm.field_type == 'CONTINUOUS': bins = histogram.bin_continuous([obs_vw, prd_vw], bins=7) else: if fm.codes: class_names = {} for code in fm.codes: class_names[code.code_value] = code.label else: class_names = None bins = histogram.bin_categorical([obs_vw, prd_vw], class_names=class_names) bins[0].name = 'OBSERVED' bins[1].name = 'PREDICTED' # Handle special cases of nonsampled and nonforest area self.insert_class(bins[0], 'Unsampled', obs_ns_hectares) self.insert_class(bins[0], 'Nonforest', obs_nf_hectares) self.insert_class(bins[1], 'Unsampled', prd_ns_hectares) self.insert_class(bins[1], 'Nonforest', prd_nf_hectares) for bin in bins: for i in range(0, len(bin.bin_counts)): out_data = [ '%s' % v, '%s' % bin.name, '"%s"' % bin.bin_names[i], '%.3f' % bin.bin_counts[i], ] stats_fh.write(','.join(out_data) + '\n')
def _create_story(self): # Set up an empty list to hold the story story = [] # Import the report styles styles = report_styles.get_report_styles() # Create a page break story = self._make_page_break(story, self.LANDSCAPE) # This class is somewhat of a hack, in that it likely only works on # rotated paragraphs which fit into the desired cell area class RotatedParagraph(p.Paragraph): def wrap(self, availHeight, availWidth): h, w = \ p.Paragraph.wrap(self, self.canv.stringWidth(self.text), self.canv._leading) return w, h def draw(self): self.canv.rotate(90) self.canv.translate(0.0, -10.0) p.Paragraph.draw(self) # Section title title_str = '<strong>Local-Scale Accuracy Assessment: ' title_str += 'Error Matrix for Vegetation Classes at Plot ' title_str += 'Locations</strong>' para = p.Paragraph(title_str, styles['section_style']) t = p.Table([[para]], colWidths=[10.0 * u.inch]) t.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (-1, -1), 3), ('BOTTOMPADDING', (0, 0), (-1, -1), 3), ('BACKGROUND', (0, 0), (-1, -1), '#957348'), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('GRID', (0, 0), (-1, -1), 0.25, colors.black), ])) story.append(t) story.append(p.Spacer(0, 0.1 * u.inch)) # Read in the vegclass error matrix names = ['P_' + str(x) for x in range(1, 12)] names.insert(0, 'OBSERVED') names.extend(['TOTAL', 'CORRECT', 'FUZZY_CORRECT']) vc_data = mlab.csv2rec(self.vc_errmatrix_file, skiprows=1, names=names) vc_data = mlab.rec_drop_fields(vc_data, ['OBSERVED']) # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Get the class names from the metadata vegclass_metadata = mp.get_attribute('VEGCLASS') vc_codes = vegclass_metadata.codes # Create a list of lists to hold the vegclass table vegclass_table = [] # Add an empty row which will be a span row for the predicted label header_row = [] for i in xrange(2): header_row.append('') prd_str = '<strong>Predicted Class</strong>' para = p.Paragraph(prd_str, styles['body_style_10_center']) header_row.append(para) for i in xrange(len(vc_data) - 1): header_row.append('') vegclass_table.append(header_row) # Add the predicted labels summary_labels = ('Total', '% Correct', '% FCorrect') header_row = [] for i in xrange(2): header_row.append('') for code in vc_codes: label = re.sub('-', '-<br/>', code.label) para = p.Paragraph(label, styles['body_style_10_right']) header_row.append(para) for label in summary_labels: label = re.sub(' ', '<br/>', label) para = p.Paragraph(label, styles['body_style_10_right']) header_row.append(para) vegclass_table.append(header_row) # Set a variable to distinguish between plot counts and percents # in order to format them differently format_break = 11 # Set the cells which should be blank blank_cells = \ [(11, 12), (11, 13), (12, 11), (12, 13), (13, 11), (13, 12)] # Add the data for (i, row) in enumerate(vc_data): vegclass_row = [] for (j, elem) in enumerate(row): # Blank cells if (i, j) in blank_cells: elem_str = '' # Cells that represent plot counts elif i <= format_break and j <= format_break: elem_str = '%d' % int(elem) # Cells that represent percentages else: elem_str = '%.1f' % float(elem) para = p.Paragraph(elem_str, styles['body_style_10_right']) vegclass_row.append(para) # Add the observed labels at the beginning of each data row if i == 0: obs_str = '<strong>Observed Class</strong>' para = \ RotatedParagraph(obs_str, styles['body_style_10_center']) else: para = '' vegclass_row.insert(0, para) if i < len(vc_codes): label = vc_codes[i].label else: index = i - len(vc_codes) label = summary_labels[index] para = p.Paragraph(label, styles['body_style_10_right']) vegclass_row.insert(1, para) # Add this row to the table vegclass_table.append(vegclass_row) # Set up the widths for the table cells widths = [] widths.append(0.3) widths.append(0.85) for i in xrange(len(vc_codes)): widths.append(0.56) for i in xrange(3): widths.append(0.66) widths = [x * u.inch for x in widths] # Convert the vegclass table into a reportlab table t = p.Table(vegclass_table, colWidths=widths) t.setStyle( p.TableStyle([ ('SPAN', (0, 0), (1, 1)), ('SPAN', (0, 2), (0, -1)), ('SPAN', (2, 0), (-1, 0)), ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'), ('GRID', (0, 0), (-1, -1), 1, colors.white), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('VALIGN', (0, 2), (0, -1), 'MIDDLE'), ('VALIGN', (2, 1), (-1, 1), 'MIDDLE'), ('TOPPADDING', (0, 0), (-1, -1), 2), ('BOTTOMPADDING', (0, 0), (-1, -1), 3), ])) # Set up the shading for the truly correct cells correct = {} for i in xrange(len(vc_codes)): val = i + 1 correct[val] = val for key in correct: val = correct[key] t.setStyle( p.TableStyle([ ('BACKGROUND', (key + 1, val + 1), (key + 1, val + 1), '#aaaaaa'), ])) # Set up the shading for the fuzzy correct cells fuzzy = {} fuzzy[1] = [2] fuzzy[2] = [1, 3, 5, 8] fuzzy[3] = [2, 4, 5] fuzzy[4] = [3, 6, 7] fuzzy[5] = [2, 3, 6, 8] fuzzy[6] = [4, 5, 7, 9] fuzzy[7] = [4, 6, 10, 11] fuzzy[8] = [2, 5, 9] fuzzy[9] = [6, 8, 10] fuzzy[10] = [7, 9, 11] fuzzy[11] = [7, 10] for key in fuzzy: for elem in fuzzy[key]: t.setStyle( p.TableStyle([ ('BACKGROUND', (key + 1, elem + 1), (key + 1, elem + 1), '#dddddd'), ])) # Add this table to the story story.append(t) story.append(p.Spacer(0, 0.1 * u.inch)) # Explanation and definitions of vegetation class categories cell_str = """ Cell values are model plot counts. Dark gray cells represent plots where the observed class matches the predicted class and are included in the percent correct. Light gray cells represent cases where the observed and predicted differ slightly (within +/- one class) based on canopy cover, hardwood proportion or average stand diameter, and are included in the percent fuzzy correct. """ para = p.Paragraph(cell_str, styles['body_style_9']) story.append(para) story.append(p.Spacer(0, 0.1 * u.inch)) head_str = ''' <strong>Vegetation Class (VEGCLASS) Definitions</strong> -- CANCOV (canopy cover of all live trees), BAH_PROP (proportion of hardwood basal area), and QMD_DOM (quadratic mean diameter of all dominant and codominant trees). ''' para = p.Paragraph(head_str, styles['body_style_9']) story.append(para) story.append(p.Spacer(0, 0.1 * u.inch)) # Print out the vegclass code definitions for code in vc_codes: label = code.label desc = self.txt_to_html(code.description) doc_str = '<strong>' + label + ':</strong> ' + desc para = p.Paragraph(doc_str, styles['body_style_9']) story.append(para) return story
def run_diagnostic(self): # Open the stats file and print out the header line stats_fh = open(self.statistics_file, 'w') out_list = [ 'VARIABLE', 'PEARSON_R', 'SPEARMAN_R', 'RMSE', 'NORMALIZED_RMSE', 'BIAS_PERCENTAGE', 'R_SQUARE', ] stats_fh.write(','.join(out_list) + '\n') # Read the observed and predicted files into numpy recarrays obs = utilities.csv2rec(self.observed_file) prd = utilities.csv2rec(self.predicted_file) # Subset the observed data just to the IDs that are in the # predicted file obs_keep = np.in1d(getattr(obs, self.id_field), getattr(prd, self.id_field)) obs = obs[obs_keep] # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # For each variable, calculate the statistics for v in obs.dtype.names: # Get the metadata for this field try: fm = mp.get_attribute(v) except: err_msg = v + ' is missing metadata.' print err_msg continue # Only continue if this is a continuous accuracy variable if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0: continue obs_vals = getattr(obs, v) prd_vals = getattr(prd, v) if np.all(obs_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 rmse = 0.0 std_rmse = 0.0 bias = 0.0 r2 = 0.0 else: if np.all(prd_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 else: pearson_r = statistics.pearson_r(obs_vals, prd_vals) spearman_r = statistics.spearman_r(obs_vals, prd_vals) rmse = statistics.rmse(obs_vals, prd_vals) std_rmse = rmse / obs_vals.mean() bias = statistics.bias_percentage(obs_vals, prd_vals) r2 = statistics.r2(obs_vals, prd_vals) # Print this out to the stats file out_list = [ v, '%.6f' % pearson_r, '%.6f' % spearman_r, '%.6f' % rmse, '%.6f' % std_rmse, '%.6f' % bias, '%.6f' % r2, ] stats_fh.write(','.join(out_list) + '\n') stats_fh.close()
def run_diagnostic(self): # Read the observed and predicted files into numpy recarrays obs = utilities.csv2rec(self.observed_file) prd = utilities.csv2rec(self.predicted_file) # Subset the observed data just to the IDs that are in the # predicted file obs_keep = np.in1d(getattr(obs, self.id_field), getattr(prd, self.id_field)) obs = obs[obs_keep] # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Open the stats file and print out the header lines stats_fh = open(self.statistics_file, 'w') out_list = [ 'SPECIES', 'OP_PP', 'OP_PA', 'OA_PP', 'OA_PA', 'PREVALENCE', 'SENSITIVITY', 'FALSE_NEGATIVE_RATE', 'SPECIFICITY', 'FALSE_POSITIVE_RATE', 'PERCENT_CORRECT', 'ODDS_RATIO', 'KAPPA', ] stats_fh.write(','.join(out_list) + '\n') # For each variable, calculate the statistics for v in obs.dtype.names: # Get the metadata for this field try: fm = mp.get_attribute(v) except: err_msg = v + ' is missing metadata.' print err_msg continue # Only continue if this is a continuous species variable if fm.field_type != 'CONTINUOUS' or fm.species_attr == 0: continue obs_vals = getattr(obs, v) prd_vals = getattr(prd, v) # Create a binary error matrix from the obs and prd data stats = statistics.BinaryErrorMatrix(obs_vals, prd_vals) counts = stats.counts() # Build the list of items for printing out_list = [ v, '%d' % counts[0, 0], '%d' % counts[0, 1], '%d' % counts[1, 0], '%d' % counts[1, 1], '%.4f' % stats.prevalence(), '%.4f' % stats.sensitivity(), '%.4f' % stats.false_negative_rate(), '%.4f' % stats.specificity(), '%.4f' % stats.false_positive_rate(), '%.4f' % stats.percent_correct(), '%.4f' % stats.odds_ratio(), '%.4f' % stats.kappa(), ] stats_fh.write(','.join(out_list) + '\n') stats_fh.close()
def _create_story(self): # Set up an empty list to hold the story story = [] # Import the report styles styles = report_styles.get_report_styles() # Create a page break story = self._make_page_break(story, self.PORTRAIT) # Section title title_str = '<strong>Local-Scale Accuracy Assessment:<br/>' title_str += 'Species Accuracy at Plot Locations' title_str += '</strong>' para = p.Paragraph(title_str, styles['section_style']) t = p.Table([[para]], colWidths=[7.5 * u.inch]) t.setStyle( p.TableStyle([ ('TOPPADDING', (0, 0), (-1, -1), 6), ('BOTTOMPADDING', (0, 0), (-1, -1), 6), ('BACKGROUND', (0, 0), (-1, -1), '#957348'), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('GRID', (0, 0), (-1, -1), 0.25, colors.black), ])) story.append(t) story.append(p.Spacer(0, 0.2 * u.inch)) # Kappa explanation kappa_str = ''' Cohen's kappa coefficient (Cohen, 1960) is a statistical measure of reliability, accounting for agreement occurring by chance. The equation for kappa is: ''' para = p.Paragraph(kappa_str, styles['body_style']) story.append(para) story.append(p.Spacer(0, 0.05 * u.inch)) kappa_str = ''' kappa = (Pr(a) - Pr(e)) / (1.0 - Pr(e)) ''' para = p.Paragraph(kappa_str, styles['indented']) story.append(para) story.append(p.Spacer(0, 0.05 * u.inch)) kappa_str = ''' where Pr(a) is the relative observed agreement among raters, and Pr(e) is the probability that agreement is due to chance.<br/><br/> <strong>Abbreviations Used:</strong><br/> OP/PP = Observed Present / Predicted Present<br/> OA/PP = Observed Absent / Predicted Present (errors of commission)<br/> OP/PA = Observed Present / Predicted Absent (errors of ommission)<br/> OA/PA = Observed Absent / Predicted Absent ''' para = p.Paragraph(kappa_str, styles['body_style']) story.append(para) story.append(p.Spacer(0, 0.2 * u.inch)) # Create a list of lists to hold the species accuracy information species_table = [] # Header row header_row = [] spp_str = '<strong>Species PLANTS Code<br/>' spp_str += 'Scientific Name / Common Name</strong>' para = p.Paragraph(spp_str, styles['body_style_10']) header_row.append(para) spp_str = '<strong>Species prevalence</strong>' para = p.Paragraph(spp_str, styles['body_style_10']) header_row.append(para) p1 = p.Paragraph('<strong>OP/PP</strong>', styles['body_style_10_right']) p2 = p.Paragraph('<strong>OP/PA</strong>', styles['body_style_10_right']) p3 = p.Paragraph('<strong>OA/PP</strong>', styles['body_style_10_right']) p4 = p.Paragraph('<strong>OA/PA</strong>', styles['body_style_10_right']) header_cells = [[p1, p2], [p3, p4]] t = p.Table(header_cells, colWidths=[0.75 * u.inch, 0.75 * u.inch]) t.setStyle( p.TableStyle([ ('GRID', (0, 0), (-1, -1), 1, colors.white), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('TOPPADDING', (0, 0), (-1, -1), 2), ('BOTTOMPADDING', (0, 0), (-1, -1), 2), ])) header_row.append(t) kappa_str = '<strong>Kappa coefficient</strong>' para = p.Paragraph(kappa_str, styles['body_style_10']) header_row.append(para) species_table.append(header_row) # Open the species accuracy file into a recarray spp_data = utilities.csv2rec(self.species_accuracy_file) # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # Read in the report metadata if it exists if self.report_metadata_file: rmp = xrmp.XMLReportMetadataParser(self.report_metadata_file) else: rmp = None # Subset the attributes to just species attrs = [] for attr in mp.attributes: if attr.species_attr == 1 and 'NOTALY' not in attr.field_name: attrs.append(attr.field_name) # Iterate over the species and print out the statistics for spp in attrs: # Empty row to hold the formatted output species_row = [] # Get the scientific and common names from the report metadata # if it exists; otherwise, just use the species symbol if rmp is not None: # Strip off any suffix if it exists try: spp_plain = spp.split('_')[0] spp_info = rmp.get_species(spp_plain) spp_str = spp_info.spp_symbol + '<br/>' spp_str += spp_info.scientific_name + ' / ' spp_str += spp_info.common_name except IndexError: spp_str = spp else: spp_str = spp para = p.Paragraph(spp_str, styles['body_style_10']) species_row.append(para) # Get the statistical information data = spp_data[spp_data.SPECIES == spp][0] counts = [data.OP_PP, data.OP_PA, data.OA_PP, data.OA_PA] prevalence = data.PREVALENCE kappa = data.KAPPA # Species prevalence prevalence_str = '%.4f' % prevalence para = p.Paragraph(prevalence_str, styles['body_style_10_right']) species_row.append(para) # Capture the plot counts in an inner table count_cells = [] count_row = [] for i in range(0, 4): para = p.Paragraph('%d' % counts[i], styles['body_style_10_right']) count_row.append(para) if i % 2 == 1: count_cells.append(count_row) count_row = [] t = p.Table(count_cells, colWidths=[0.75 * u.inch, 0.75 * u.inch]) t.setStyle( p.TableStyle([ ('GRID', (0, 0), (-1, -1), 1, colors.white), ('ALIGNMENT', (0, 0), (-1, -1), 'LEFT'), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('TOPPADDING', (0, 0), (-1, -1), 2), ('BOTTOMPADDING', (0, 0), (-1, -1), 2), ])) species_row.append(t) # Print out the kappa statistic kappa_str = '%.4f' % kappa para = p.Paragraph(kappa_str, styles['body_style_10_right']) species_row.append(para) # Push this row to the master species table species_table.append(species_row) # Style this into a reportlab table and add to the story col_widths = [(x * u.inch) for x in [4.0, 0.75, 1.5, 0.75]] t = p.Table(species_table, colWidths=col_widths) t.setStyle( p.TableStyle([ ('BACKGROUND', (0, 0), (-1, -1), '#f1efe4'), ('GRID', (0, 0), (-1, -1), 2, colors.white), ('TOPPADDING', (0, 0), (0, -1), 2), ('BOTTOMPADDING', (0, 0), (0, -1), 2), ('LEFTPADDING', (0, 0), (0, -1), 6), ('RIGHTPADDING', (0, 0), (0, -1), 6), ('ALIGNMENT', (0, 0), (0, -1), 'LEFT'), ('VALIGN', (0, 0), (0, -1), 'TOP'), ('TOPPADDING', (1, 0), (1, -1), 2), ('BOTTOMPADDING', (1, 0), (1, -1), 2), ('LEFTPADDING', (1, 0), (1, -1), 6), ('RIGHTPADDING', (1, 0), (1, -1), 6), ('ALIGNMENT', (1, 0), (1, -1), 'RIGHT'), ('VALIGN', (1, 0), (1, 0), 'TOP'), ('VALIGN', (1, 1), (1, -1), 'MIDDLE'), ('TOPPADDING', (2, 0), (2, -1), 0), ('BOTTOMPADDING', (2, 0), (2, -1), 0), ('LEFTPADDING', (2, 0), (2, -1), 0), ('RIGHTPADDING', (2, 0), (2, -1), 0), ('ALIGNMENT', (2, 0), (2, -1), 'LEFT'), ('VALIGN', (2, 0), (2, -1), 'TOP'), ('TOPPADDING', (3, 0), (3, -1), 2), ('BOTTOMPADDING', (3, 0), (3, -1), 2), ('LEFTPADDING', (3, 0), (3, -1), 6), ('RIGHTPADDING', (3, 0), (3, -1), 6), ('ALIGNMENT', (3, 0), (3, -1), 'RIGHT'), ('VALIGN', (3, 0), (3, 0), 'TOP'), ('VALIGN', (3, 1), (3, -1), 'MIDDLE'), ])) story.append(t) story.append(p.Spacer(0, 0.1 * u.inch)) rare_species_str = """ Note that some very rare species do not appear in this accuracy report, because these species were not included when building the initial ordination model. The full set of species is available upon request. """ para = p.Paragraph(rare_species_str, styles['body_style']) story.append(para) # Return this story return story