Пример #1
0
    def create_attribute_data(self):
        """
        Create the attribute data which has both stand level attributes as
        well as species data

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        p = self.parameter_parser

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        # Get the attribute data
        attribute_table = self.plot_db.get_attribute_data(self.id_str)
        attribute_file = p.stand_attribute_file
        utilities.rec2csv(attribute_table, attribute_file)
        field_names = attribute_table.dtype.names
        self.create_attribute_metadata(field_names)
Пример #2
0
    def create_attribute_data(self):
        """
        Create the attribute data which has both stand level attributes as
        well as species data

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        p = self.parameter_parser

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        # Get the attribute data
        attribute_table = self.plot_db.get_attribute_data(self.id_str)
        attribute_file = p.stand_attribute_file
        utilities.rec2csv(attribute_table, attribute_file)
        field_names = attribute_table.dtype.names
        self.create_attribute_metadata(field_names)
Пример #3
0
    def create_hex_attribute_file(self):
        """
        Create the file containing hexagon IDs and continuous stand
        attributes for forested Annual plots to be used in Riemann
        accuracy diagnostics

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        p = self.parameter_parser

        # Format list parameters as comma-delimited strings
        plot_years = ','.join([str(x) for x in p.plot_years])
        image_years = ','.join([str(x) for x in p.image_years])

        # Get the crosswalk of plot IDs to Hex IDs and write it out
        hex_attributes = self.plot_db.get_hex_attributes(
            p.riemann_assessment_year, plot_years, image_years)

        hex_attribute_file = p.hex_attribute_file
        riemann_dir = os.path.dirname(hex_attribute_file)
        if not os.path.exists(riemann_dir):
            os.makedirs(riemann_dir)
        utilities.rec2csv(hex_attributes, hex_attribute_file)
Пример #4
0
    def create_validation_attribute_file(self):
        """
        Create the file containing structure and species
        attributes for the plots to be used in the validation
        accuracy diagnostics

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        p = self.parameter_parser

        # Get the attributes for the validation plots
        validation_attributes = self.plot_db.get_validation_attributes()

        # Write these data out to the validation_attribute_file
        validation_attribute_file = p.validation_attribute_file
        validation_dir = p.validation_output_folder
        if not os.path.exists(validation_dir):
            os.makedirs(validation_dir)
        utilities.rec2csv(validation_attributes, validation_attribute_file)
Пример #5
0
    def create_hex_attribute_file(self):
        """
        Create the file containing hexagon IDs and continuous stand
        attributes for forested Annual plots to be used in Riemann
        accuracy diagnostics

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        p = self.parameter_parser

        # Format list parameters as comma-delimited strings
        plot_years = ','.join([str(x) for x in p.plot_years])
        image_years = ','.join([str(x) for x in p.image_years])

        # Get the crosswalk of plot IDs to Hex IDs and write it out
        hex_attributes = self.plot_db.get_hex_attributes(
            p.riemann_assessment_year, plot_years, image_years)

        hex_attribute_file = p.hex_attribute_file
        riemann_dir = os.path.dirname(hex_attribute_file)
        if not os.path.exists(riemann_dir):
            os.makedirs(riemann_dir)
        utilities.rec2csv(hex_attributes, hex_attribute_file)
Пример #6
0
    def create_validation_attribute_file(self):
        """
        Create the file containing structure and species
        attributes for the plots to be used in the validation
        accuracy diagnostics

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        p = self.parameter_parser

        # Get the attributes for the validation plots
        validation_attributes = self.plot_db.get_validation_attributes()

        # Write these data out to the validation_attribute_file
        validation_attribute_file = p.validation_attribute_file
        validation_dir = p.validation_output_folder
        if not os.path.exists(validation_dir):
            os.makedirs(validation_dir)
        utilities.rec2csv(validation_attributes, validation_attribute_file)
    def run_diagnostic(self):

        # Read in the dependent nn_index_file
        in_data = utilities.csv2rec(self.nn_index_file)

        # Subset the observed data to just those values above the
        # index threshold
        in_data = in_data[in_data.AVERAGE_POSITION >= self.index_threshold]

        # Write out the resulting recarray
        utilities.rec2csv(in_data, self.nn_index_outlier_file)
    def run_diagnostic(self):

        # Read in the dependent nn_index_file
        in_data = utilities.csv2rec(self.nn_index_file)

        # Subset the observed data to just those values above the
        # index threshold
        in_data = in_data[in_data.AVERAGE_POSITION >= self.index_threshold]

        # Write out the resulting recarray
        utilities.rec2csv(in_data, self.nn_index_outlier_file)
    def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex,
            out_file):

        # Summarize the observed output
        stats = mlab.rec_groupby(data, (id_field,), stat_fields)

        # Filter so that the minimum number of plots per hex is maintained
        stats = stats[stats.PLOT_COUNT >= min_plots_per_hex]

        # Write out the file
        utilities.rec2csv(stats, out_file)
Пример #10
0
    def write_hex_stats(self, data, id_field, stat_fields, min_plots_per_hex,
                        out_file):

        # Summarize the observed output
        stats = mlab.rec_groupby(data, (id_field, ), stat_fields)

        # Filter so that the minimum number of plots per hex is maintained
        stats = stats[stats.PLOT_COUNT >= min_plots_per_hex]

        # Write out the file
        utilities.rec2csv(stats, out_file)
Пример #11
0
    def create_area_estimates(self):
        """
        Create the observed area estimates file which stores plot based
        estimates of stand variables

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        # Set an alias for the parameter parser
        p = self.parameter_parser

        # Get the 'eslf_only' flag from the exclusion codes
        # Removed since the DB proc does not have an option for
        # using only plot with ESLF codes anymore
#        if 'eslf_only' in p.exclusion_codes:
#            eslf_only = 0
#        else:
#            eslf_only = 1

        # Get the area expansion data
        area_estimate_table, nf_hectares, ns_hectares = \
            self.plot_db.get_area_estimates(p.regional_assessment_year)

        # Create nonforest and nonsampled records to be concatenated with the
        # existing area_estimate_table recarray.  The nonforest record
        # has an ID of -10001 and the nonsampled record has an ID of -10002
        id_field = p.summary_level + 'ID'
        new_recs = np.recarray(2, dtype=area_estimate_table.dtype)
        for f in new_recs.dtype.names:
            for rec in new_recs:
                setattr(rec, f, 0.0)
        setattr(new_recs[0], id_field, -10002)
        setattr(new_recs[0], 'HECTARES', ns_hectares)
        setattr(new_recs[1], id_field, -10001)
        setattr(new_recs[1], 'HECTARES', nf_hectares)
        area_estimate_table = np.hstack((new_recs, area_estimate_table))

        # Write out to a CSV file
        area_estimate_file = p.area_estimate_file
        aa_dir = os.path.dirname(area_estimate_file)
        if not os.path.exists(aa_dir):
            os.makedirs(aa_dir)
        utilities.rec2csv(area_estimate_table, area_estimate_file)
Пример #12
0
    def create_area_estimates(self):
        """
        Create the observed area estimates file which stores plot based
        estimates of stand variables

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        # Set an alias for the parameter parser
        p = self.parameter_parser

        # Get the 'eslf_only' flag from the exclusion codes
        # Removed since the DB proc does not have an option for
        # using only plot with ESLF codes anymore
        #        if 'eslf_only' in p.exclusion_codes:
        #            eslf_only = 0
        #        else:
        #            eslf_only = 1

        # Get the area expansion data
        area_estimate_table, nf_hectares, ns_hectares = \
            self.plot_db.get_area_estimates(p.regional_assessment_year)

        # Create nonforest and nonsampled records to be concatenated with the
        # existing area_estimate_table recarray.  The nonforest record
        # has an ID of -10001 and the nonsampled record has an ID of -10002
        id_field = p.summary_level + 'ID'
        new_recs = np.recarray(2, dtype=area_estimate_table.dtype)
        for f in new_recs.dtype.names:
            for rec in new_recs:
                setattr(rec, f, 0.0)
        setattr(new_recs[0], id_field, -10002)
        setattr(new_recs[0], 'HECTARES', ns_hectares)
        setattr(new_recs[1], id_field, -10001)
        setattr(new_recs[1], 'HECTARES', nf_hectares)
        area_estimate_table = np.hstack((new_recs, area_estimate_table))

        # Write out to a CSV file
        area_estimate_file = p.area_estimate_file
        aa_dir = os.path.dirname(area_estimate_file)
        if not os.path.exists(aa_dir):
            os.makedirs(aa_dir)
        utilities.rec2csv(area_estimate_table, area_estimate_file)
Пример #13
0
    def create_species_plot_count_file(self):
        p = self.parameter_parser

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        spp_plot_table = self.plot_db.get_species_plot_counts(self.id_str)
        spp_plot_file = p.model_directory + '/' + p.model_type + \
                            '_spp_plot_counts.csv'
        if p.model_type in p.imagery_model_types:
            utilities.rec2csv(spp_plot_table, spp_plot_file)
        else:
            # Create 2 ID strings for non-imagery models, one with inventory
            # and Ecoplots and one with inventory plots only
            try:
                ecoplot_index = p.plot_types.index('ecoplot')
            except ValueError:
                # If Ecoplots are not already in the list, create another ID
                # string with them included
                plot_types_w_eco = p.plot_types
                plot_types_w_eco.append('ecoplot')
                plot_types_w_eco_str = ','.join(plot_types_w_eco)
                id_str2 = self._get_id_string(plot_types_w_eco_str)
                id_eco = 2
            else:
                # If Ecoplot are already in the list, create another ID
                # string without them included
                plot_types_wo_eco = p.plot_types
                plot_types_wo_eco.remove('ecoplot')
                plot_types_wo_eco_str = ','.join(plot_types_wo_eco)
                id_str2 = self._get_id_string(plot_types_wo_eco_str)
                id_eco = 1

            spp_plot_table2 = self.plot_db.get_species_plot_counts(id_str2)

            # Join the plot counts w/ Ecoplots to the plot counts w/o Ecoplots
            if id_eco == 1:
                joined_spp_plot_table = mlab.rec_join('SPP_LAYER',
                                                      spp_plot_table,
                                                      spp_plot_table2,
                                                      'leftouter')
            else:
                joined_spp_plot_table = mlab.rec_join('SPP_LAYER',
                                                      spp_plot_table2,
                                                      spp_plot_table,
                                                      'leftouter')
            utilities.rec2csv(joined_spp_plot_table, spp_plot_file)
Пример #14
0
    def create_species_plot_count_file(self):
        p = self.parameter_parser

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        spp_plot_table = self.plot_db.get_species_plot_counts(self.id_str)
        spp_plot_file = p.model_directory + '/' + p.model_type + \
                            '_spp_plot_counts.csv'
        if p.model_type in p.imagery_model_types:
            utilities.rec2csv(spp_plot_table, spp_plot_file)
        else:
            # Create 2 ID strings for non-imagery models, one with inventory
            # and Ecoplots and one with inventory plots only
            try:
                ecoplot_index = p.plot_types.index('ecoplot')
            except ValueError:
                # If Ecoplots are not already in the list, create another ID
                # string with them included
                plot_types_w_eco = p.plot_types
                plot_types_w_eco.append('ecoplot')
                plot_types_w_eco_str = ','.join(plot_types_w_eco)
                id_str2 = self._get_id_string(plot_types_w_eco_str)
                id_eco = 2
            else:
                # If Ecoplot are already in the list, create another ID
                # string without them included
                plot_types_wo_eco = p.plot_types
                plot_types_wo_eco.remove('ecoplot')
                plot_types_wo_eco_str = ','.join(plot_types_wo_eco)
                id_str2 = self._get_id_string(plot_types_wo_eco_str)
                id_eco = 1

            spp_plot_table2 = self.plot_db.get_species_plot_counts(id_str2)

            # Join the plot counts w/ Ecoplots to the plot counts w/o Ecoplots
            if id_eco == 1:
                joined_spp_plot_table = mlab.rec_join(
                    'SPP_LAYER', spp_plot_table, spp_plot_table2, 'leftouter')
            else:
                joined_spp_plot_table = mlab.rec_join(
                    'SPP_LAYER', spp_plot_table2, spp_plot_table, 'leftouter')
            utilities.rec2csv(joined_spp_plot_table, spp_plot_file)
Пример #15
0
    def create_ordination_matrices(self):
        """
        Create the species and environmental matrices needed for ordination
        modeling.  Write these files out to the location as specified in the
        parameter file

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        p = self.parameter_parser

        # Format list parameters as comma-delimited strings
        plot_years = ','.join([str(x) for x in p.plot_years])
        image_years = ','.join([str(x) for x in p.image_years])
        ordination_variables = ','.join(p.get_ordination_variable_names())

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        # Get the species matrix and write it out
        spp_table = self.plot_db.get_species_matrix(self.id_str, 'ORDINATION',
                                                    p.lump_table)
        spp_file = p.species_matrix_file
        utilities.rec2csv(spp_table, spp_file)

        # Get the environmental matrix and write it out
        env_table = self.plot_db.get_environmental_matrix(
            self.id_str, plot_years, image_years, ordination_variables)
        env_file = p.environmental_matrix_file
        utilities.rec2csv(env_table, env_file)
Пример #16
0
    def create_ordination_matrices(self):
        """
        Create the species and environmental matrices needed for ordination
        modeling.  Write these files out to the location as specified in the
        parameter file

        Parameters
        ----------
        None

        Returns
        -------
        None
        """

        p = self.parameter_parser

        # Format list parameters as comma-delimited strings
        plot_years = ','.join([str(x) for x in p.plot_years])
        image_years = ','.join([str(x) for x in p.image_years])
        ordination_variables = ','.join(p.get_ordination_variable_names())

        # Store list of plot IDs into a string if this variable hasn't
        # yet been created
        if not hasattr(self, 'id_str'):
            self.id_str = self._get_id_string()

        # Get the species matrix and write it out
        spp_table = self.plot_db.get_species_matrix(self.id_str,
            'ORDINATION', p.lump_table)
        spp_file = p.species_matrix_file
        utilities.rec2csv(spp_table, spp_file)

        # Get the environmental matrix and write it out
        env_table = self.plot_db.get_environmental_matrix(self.id_str,
            plot_years, image_years, ordination_variables)
        env_file = p.environmental_matrix_file
        utilities.rec2csv(env_table, env_file)
Пример #17
0
    def run_diagnostic(self):

        # Shortcut to the parameter parser
        p = self.parameter_parser

        # ID field
        id_field = p.summary_level + 'ID'

        # Root directory for Riemann files
        root_dir = p.riemann_output_folder

        # Read in hex input file
        obs_data = utilities.csv2rec(self.hex_attribute_file)

        # Get the hexagon levels and ensure that the fields exist in the
        # hex_attribute file
        hex_resolutions = p.riemann_hex_resolutions
        hex_fields = [x[0] for x in hex_resolutions]
        for field in hex_fields:
            if field not in obs_data.dtype.names:
                err_msg = 'Field ' + field + ' does not exist in the '
                err_msg += 'hex_attribute file'
                raise ValueError(err_msg)

        # Create the directory structure based on the hex levels
        hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions]
        all_levels = ['plot_pixel'] + hex_levels
        for level in all_levels:
            sub_dir = os.path.join(root_dir, level)
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

        # Get the values of k
        k_values = p.riemann_k_values

        # Create a dictionary of plot ID to image year (or model_year for
        # non-imagery models) for these plots
        if p.model_type in p.imagery_model_types:
            id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data)
        else:
            id_x_year = dict((x[id_field], p.model_year) for x in obs_data)

        # Create a PredictionRun instance
        pr = prediction_run.PredictionRun(p)

        # Get the neighbors and distances for these IDs
        pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field)

        # Create the lookup of id_field to LOC_ID for the hex plots
        nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data)

        # Create a dictionary between id_field and no_self_assign_field
        # for the model plots
        env_file = p.environmental_matrix_file
        env_data = utilities.csv2rec(env_file)
        model_nsa_id_dict = dict(
            (getattr(x, id_field), x.LOC_ID) for x in env_data)

        # Stitch the two dictionaries together
        for id in sorted(model_nsa_id_dict.keys()):
            if id not in nsa_id_dict:
                nsa_id_dict[id] = model_nsa_id_dict[id]

        # Get the stand attribute metadata and retrieve only the
        # continuous accuracy attributes
        stand_metadata_file = p.stand_metadata_file
        mp = xsmp.XMLStandMetadataParser(stand_metadata_file)
        attrs = [
            x.field_name for x in mp.attributes
            if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1
        ]

        # Subset the attributes for fields that are in the
        # hex_attribute file
        attrs = [x for x in attrs if x in obs_data.dtype.names]
        plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs)

        # Write out the plot_pixel observed file
        file_name = 'plot_pixel_observed.csv'
        output_file = os.path.join(root_dir, 'plot_pixel', file_name)
        utilities.rec2csv(plot_pixel_obs, output_file)

        # Iterate over values of k
        for k in k_values:

            # Construct the output file name
            file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k)))
            file_name += '.csv'
            output_file = os.path.join(root_dir, 'plot_pixel', file_name)
            out_fh = open(output_file, 'w')

            # For the plot/pixel scale, retrieve the independent predicted
            # data for this value of k.  Even though attributes are being
            # returned from this function, we want to use the attribute list
            # that we've already found above.
            prediction_generator = pr.calculate_predictions_at_k(
                k=k,
                id_field=id_field,
                independent=True,
                nsa_id_dict=nsa_id_dict)

            # Write out the field names
            out_fh.write(id_field + ',' + ','.join(attrs) + '\n')

            # Write out the predictions for this k
            for plot_prediction in prediction_generator:

                # Write this record to the predicted attribute file
                pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs)

            # Close this file
            out_fh.close()

        # Create the fields for which to extract statistics at the hexagon
        # levels
        mean_fields = [(id_field, len, 'PLOT_COUNT')]
        mean_fields.extend([(x, np.mean, x) for x in attrs])
        mean_fields = tuple(mean_fields)

        sd_fields = [(id_field, len, 'PLOT_COUNT')]
        sd_fields.extend([(x, np.std, x) for x in attrs])
        sd_fields = tuple(sd_fields)

        stat_sets = {
            'mean': mean_fields,
            'std': sd_fields,
        }

        # For each hexagon level, associate the plots with their hexagon ID
        # and find observed and predicted statistics for each hexagon
        for hex_resolution in hex_resolutions:

            (hex_id_field, hex_distance) = hex_resolution[0:2]
            min_plots_per_hex = hex_resolution[3]
            prefix = 'hex_' + str(hex_distance)

            # Create a crosswalk between the id_field and the hex_id_field
            id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field])

            # Iterate over all sets of statistics and write a unique file
            # for each set
            for (stat_name, stat_fields) in stat_sets.iteritems():

                # Get the output file name
                obs_out_file = \
                    '_'.join((prefix, 'observed', stat_name)) + '.csv'
                obs_out_file = os.path.join(root_dir, prefix, obs_out_file)

                # Write out the observed file
                self.write_hex_stats(obs_data, hex_id_field, stat_fields,
                                     min_plots_per_hex, obs_out_file)

            # Iterate over values of k for the predicted values
            for k in k_values:

                # Open the plot_pixel predicted file for this value of k
                # and join the hex_id_field to the recarray
                prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
                prd_file = os.path.join(root_dir, 'plot_pixel', prd_file)
                prd_data = utilities.csv2rec(prd_file)
                prd_data = mlab.rec_join(id_field, prd_data, id_x_hex)

                # Iterate over all sets of statistics and write a unique file
                # for each set
                for (stat_name, stat_fields) in stat_sets.iteritems():

                    # Get the output file name
                    prd_out_file = '_'.join((prefix, 'predicted', 'k' + str(k),
                                             stat_name)) + '.csv'
                    prd_out_file = os.path.join(root_dir, prefix, prd_out_file)

                    # Write out the predicted file
                    self.write_hex_stats(prd_data, hex_id_field, stat_fields,
                                         min_plots_per_hex, prd_out_file)

        # Calculate the ECDF and AC statistics
        # For ECDF and AC, it is a paired comparison between the observed
        # and predicted data.  We do this at each value of k and for each
        # hex resolution level.

        # Open the stats file
        stats_file = p.hex_statistics_file
        stats_fh = open(stats_file, 'w')
        header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE']
        stats_fh.write(','.join(header_fields) + '\n')

        # Create a list of RiemannComparison instances which store the
        # information needed to do comparisons between observed and predicted
        # files for any level or value of k
        compare_list = []
        for hex_resolution in hex_resolutions:
            (hex_id_field, hex_distance) = hex_resolution[0:2]
            prefix = 'hex_' + str(hex_distance)
            obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv'
            obs_file = os.path.join(root_dir, prefix, obs_file)
            for k in k_values:
                prd_file = '_'.join(
                    (prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv'
                prd_file = os.path.join(root_dir, prefix, prd_file)
                r = RiemannComparison(prefix, obs_file, prd_file, hex_id_field,
                                      k)
                compare_list.append(r)

        # Add the plot_pixel comparisons to this list
        prefix = 'plot_pixel'
        obs_file = 'plot_pixel_observed.csv'
        obs_file = os.path.join(root_dir, prefix, obs_file)
        for k in k_values:
            prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
            prd_file = os.path.join(root_dir, prefix, prd_file)
            r = RiemannComparison(prefix, obs_file, prd_file, id_field, k)
            compare_list.append(r)

        # Do all the comparisons
        for c in compare_list:

            # Open the observed file
            obs_data = utilities.csv2rec(c.obs_file)

            # Open the predicted file
            prd_data = utilities.csv2rec(c.prd_file)

            # Ensure that the IDs between the observed and predicted
            # data line up
            ids1 = getattr(obs_data, c.id_field)
            ids2 = getattr(prd_data, c.id_field)
            if np.all(ids1 != ids2):
                err_msg = 'IDs do not match between observed and '
                err_msg += 'predicted data'
                raise ValueError(err_msg)

            for attr in attrs:
                arr1 = getattr(obs_data, attr)
                arr2 = getattr(prd_data, attr)
                rv = RiemannVariable(arr1, arr2)

                gmfr_stats = rv.gmfr_statistics()
                for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                                                        attr, stat.upper(),
                                                        gmfr_stats[stat])
                    stats_fh.write(stat_line)

                ks_stats = rv.ks_statistics()
                for stat in ('ks_max', 'ks_mean'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                                                        attr, stat.upper(),
                                                        ks_stats[stat])
                    stats_fh.write(stat_line)
    def run_diagnostic(self):

        # Shortcut to the parameter parser
        p = self.parameter_parser

        # ID field
        id_field = p.summary_level + 'ID'

        # Root directory for Riemann files
        root_dir = p.riemann_output_folder

        # Read in hex input file
        obs_data = utilities.csv2rec(self.hex_attribute_file)

        # Get the hexagon levels and ensure that the fields exist in the
        # hex_attribute file
        hex_resolutions = p.riemann_hex_resolutions
        hex_fields = [x[0] for x in hex_resolutions]
        for field in hex_fields:
            if field not in obs_data.dtype.names:
                err_msg = 'Field ' + field + ' does not exist in the '
                err_msg += 'hex_attribute file'
                raise ValueError(err_msg)

        # Create the directory structure based on the hex levels
        hex_levels = ['hex_' + str(x[1]) for x in hex_resolutions]
        all_levels = ['plot_pixel'] + hex_levels
        for level in all_levels:
            sub_dir = os.path.join(root_dir, level)
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

        # Get the values of k
        k_values = p.riemann_k_values

        # Create a dictionary of plot ID to image year (or model_year for
        # non-imagery models) for these plots
        if p.model_type in p.imagery_model_types:
            id_x_year = dict((x[id_field], x.IMAGE_YEAR) for x in obs_data)
        else:
            id_x_year = dict((x[id_field], p.model_year) for x in obs_data)

        # Create a PredictionRun instance
        pr = prediction_run.PredictionRun(p)

        # Get the neighbors and distances for these IDs
        pr.calculate_neighbors_at_ids(id_x_year, id_field=id_field)

        # Create the lookup of id_field to LOC_ID for the hex plots
        nsa_id_dict = dict((x[id_field], x.LOC_ID) for x in obs_data)

        # Create a dictionary between id_field and no_self_assign_field
        # for the model plots
        env_file = p.environmental_matrix_file
        env_data = utilities.csv2rec(env_file)
        model_nsa_id_dict = dict((getattr(x, id_field), x.LOC_ID)
            for x in env_data)

        # Stitch the two dictionaries together
        for id in sorted(model_nsa_id_dict.keys()):
            if id not in nsa_id_dict:
                nsa_id_dict[id] = model_nsa_id_dict[id]

        # Get the stand attribute metadata and retrieve only the
        # continuous accuracy attributes
        stand_metadata_file = p.stand_metadata_file
        mp = xsmp.XMLStandMetadataParser(stand_metadata_file)
        attrs = [x.field_name for x in mp.attributes
            if x.field_type == 'CONTINUOUS' and x.accuracy_attr == 1]

        # Subset the attributes for fields that are in the
        # hex_attribute file
        attrs = [x for x in attrs if x in obs_data.dtype.names]
        plot_pixel_obs = mlab.rec_keep_fields(obs_data, [id_field] + attrs)

        # Write out the plot_pixel observed file
        file_name = 'plot_pixel_observed.csv'
        output_file = os.path.join(root_dir, 'plot_pixel', file_name)
        utilities.rec2csv(plot_pixel_obs, output_file)

        # Iterate over values of k
        for k in k_values:

            # Construct the output file name
            file_name = '_'.join(('plot_pixel', 'predicted', 'k' + str(k)))
            file_name += '.csv'
            output_file = os.path.join(root_dir, 'plot_pixel', file_name)
            out_fh = open(output_file, 'w')

            # For the plot/pixel scale, retrieve the independent predicted
            # data for this value of k.  Even though attributes are being
            # returned from this function, we want to use the attribute list
            # that we've already found above.
            prediction_generator = pr.calculate_predictions_at_k(
                k=k, id_field=id_field, independent=True,
                nsa_id_dict=nsa_id_dict)

            # Write out the field names
            out_fh.write(id_field + ',' + ','.join(attrs) + '\n')

            # Write out the predictions for this k
            for plot_prediction in prediction_generator:

                # Write this record to the predicted attribute file
                pr.write_predicted_record(plot_prediction, out_fh, attrs=attrs)

            # Close this file
            out_fh.close()

        # Create the fields for which to extract statistics at the hexagon
        # levels
        mean_fields = [(id_field, len, 'PLOT_COUNT')]
        mean_fields.extend([(x, np.mean, x) for x in attrs])
        mean_fields = tuple(mean_fields)

        sd_fields = [(id_field, len, 'PLOT_COUNT')]
        sd_fields.extend([(x, np.std, x) for x in attrs])
        sd_fields = tuple(sd_fields)

        stat_sets = {
            'mean': mean_fields,
            'std': sd_fields,
        }

        # For each hexagon level, associate the plots with their hexagon ID
        # and find observed and predicted statistics for each hexagon
        for hex_resolution in hex_resolutions:

            (hex_id_field, hex_distance) = hex_resolution[0:2]
            min_plots_per_hex = hex_resolution[3]
            prefix = 'hex_' + str(hex_distance)

            # Create a crosswalk between the id_field and the hex_id_field
            id_x_hex = mlab.rec_keep_fields(obs_data, [id_field, hex_id_field])

            # Iterate over all sets of statistics and write a unique file
            # for each set
            for (stat_name, stat_fields) in stat_sets.iteritems():

                # Get the output file name
                obs_out_file = \
                    '_'.join((prefix, 'observed', stat_name)) + '.csv'
                obs_out_file = os.path.join(root_dir, prefix, obs_out_file)

                # Write out the observed file
                self.write_hex_stats(obs_data, hex_id_field, stat_fields,
                    min_plots_per_hex, obs_out_file)

            # Iterate over values of k for the predicted values
            for k in k_values:

                # Open the plot_pixel predicted file for this value of k
                # and join the hex_id_field to the recarray
                prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
                prd_file = os.path.join(root_dir, 'plot_pixel', prd_file)
                prd_data = utilities.csv2rec(prd_file)
                prd_data = mlab.rec_join(id_field, prd_data, id_x_hex)

                # Iterate over all sets of statistics and write a unique file
                # for each set
                for (stat_name, stat_fields) in stat_sets.iteritems():

                    # Get the output file name
                    prd_out_file = '_'.join((
                        prefix, 'predicted', 'k' + str(k), stat_name)) + '.csv'
                    prd_out_file = os.path.join(root_dir, prefix, prd_out_file)

                    # Write out the predicted file
                    self.write_hex_stats(prd_data, hex_id_field, stat_fields,
                        min_plots_per_hex, prd_out_file)

        # Calculate the ECDF and AC statistics
        # For ECDF and AC, it is a paired comparison between the observed
        # and predicted data.  We do this at each value of k and for each
        # hex resolution level.

        # Open the stats file
        stats_file = p.hex_statistics_file
        stats_fh = open(stats_file, 'w')
        header_fields = ['LEVEL', 'K', 'VARIABLE', 'STATISTIC', 'VALUE']
        stats_fh.write(','.join(header_fields) + '\n')

        # Create a list of RiemannComparison instances which store the
        # information needed to do comparisons between observed and predicted
        # files for any level or value of k
        compare_list = []
        for hex_resolution in hex_resolutions:
            (hex_id_field, hex_distance) = hex_resolution[0:2]
            prefix = 'hex_' + str(hex_distance)
            obs_file = '_'.join((prefix, 'observed', 'mean')) + '.csv'
            obs_file = os.path.join(root_dir, prefix, obs_file)
            for k in k_values:
                prd_file = '_'.join((
                    prefix, 'predicted', 'k' + str(k), 'mean')) + '.csv'
                prd_file = os.path.join(root_dir, prefix, prd_file)
                r = RiemannComparison(
                    prefix, obs_file, prd_file, hex_id_field, k)
                compare_list.append(r)

        # Add the plot_pixel comparisons to this list
        prefix = 'plot_pixel'
        obs_file = 'plot_pixel_observed.csv'
        obs_file = os.path.join(root_dir, prefix, obs_file)
        for k in k_values:
            prd_file = 'plot_pixel_predicted_k' + str(k) + '.csv'
            prd_file = os.path.join(root_dir, prefix, prd_file)
            r = RiemannComparison(prefix, obs_file, prd_file, id_field, k)
            compare_list.append(r)

        # Do all the comparisons
        for c in compare_list:

            # Open the observed file
            obs_data = utilities.csv2rec(c.obs_file)

            # Open the predicted file
            prd_data = utilities.csv2rec(c.prd_file)

            # Ensure that the IDs between the observed and predicted
            # data line up
            ids1 = getattr(obs_data, c.id_field)
            ids2 = getattr(prd_data, c.id_field)
            if np.all(ids1 != ids2):
                err_msg = 'IDs do not match between observed and '
                err_msg += 'predicted data'
                raise ValueError(err_msg)

            for attr in attrs:
                arr1 = getattr(obs_data, attr)
                arr2 = getattr(prd_data, attr)
                rv = RiemannVariable(arr1, arr2)

                gmfr_stats = rv.gmfr_statistics()
                for stat in ('gmfr_a', 'gmfr_b', 'ac', 'ac_sys', 'ac_uns'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                        attr, stat.upper(), gmfr_stats[stat])
                    stats_fh.write(stat_line)

                ks_stats = rv.ks_statistics()
                for stat in ('ks_max', 'ks_mean'):
                    stat_line = '%s,%d,%s,%s,%.4f\n' % (c.prefix.upper(), c.k,
                        attr, stat.upper(), ks_stats[stat])
                    stats_fh.write(stat_line)