Exemplo n.º 1
0
def draw_scatterplot(x, y, metadata, output_type=SCREEN, output_file=None,
    **kwargs):

    # Unpack the metadata information
    variable = metadata.field_name
    short_desc = metadata.short_description
    units = metadata.units

    # Set up the output figure
    if output_type == SCREEN:
        pl.ion()
    else:
        pl.ioff()

    pl.clf()
    pl.gcf().set_figwidth(3.4)
    pl.gcf().set_figheight(3.0)
    pl.gcf().set_dpi(250)

    # Find the min and max of both axes
    if (x.min() < y.min()):
        abs_min = x.min()
    else:
        abs_min = y.min()

    if (x.max() > y.max()):
        abs_max = x.max()
    else:
        abs_max = y.max()

    # Draw the scatterplot data and title
    pl.scatter(x, y, s=2, c='b', edgecolor='k', linewidth=0.25, **kwargs)
    pl.title(variable + ' : ' + short_desc, size=4.5)

    # Calculate correlation coefficient, normalized RMSE and r_square
    this_corr = statistics.pearson_r(x, y)
    this_rmse = statistics.rmse(x, y) / x.mean()
    this_r2 = statistics.r2(x, y)

    # Draw the annotation text on the figure
    pl.text(0.89, 0.93,
        '1:1', transform=pl.gca().transAxes, size=4.5, rotation=45)
    pl.text(0.05, 0.93,
        'Correlation coefficient:  %.4f' % (this_corr),
        transform=pl.gca().transAxes, size=4.5)
    pl.text(0.05, 0.89,
        'Normalized RMSE: %.4f' % (this_rmse),
        transform=pl.gca().transAxes, size=4.5)
    pl.text(0.05, 0.85,
        'R-square: %.4f' % (this_r2),
        transform=pl.gca().transAxes, size=4.5)

    # Draw the 1:1 line and format the x and y axes
    pl.plot([abs_min, abs_max], [abs_min, abs_max], 'k-', linewidth=0.5)
    ylabel_str = 'Predicted ' + variable
    xlabel_str = 'Observed ' + variable
    if units != 'none':
        ylabel_str += ' (' + units + ')'
        xlabel_str += ' (' + units + ')'
    pl.ylabel(ylabel_str, size=4.5)
    pl.xlabel(xlabel_str, size=4.5)

    import matplotlib.ticker as ticker
    f = ticker.OldScalarFormatter()
    # f.set_powerlimits((-3, 4))
    pl.gca().xaxis.set_major_formatter(f)
    pl.gca().xaxis.set_minor_formatter(f)
    pl.gca().yaxis.set_major_formatter(f)
    pl.gca().yaxis.set_minor_formatter(f)

    pl.xticks(size=4)
    pl.yticks(size=4)

    range = abs_max - abs_min
    pl.xlim(abs_min - (0.01 * range), abs_max + (0.01 * range))
    pl.ylim(abs_min - (0.01 * range), abs_max + (0.01 * range))

    # Position the main axis within the figure
    frame_x = 0.125
    frame_width = 0.855
    frame_y = 0.100
    frame_height = 0.830
    pl.gca().set_position([frame_x, frame_y, frame_width, frame_height])
    pl.gca().axesPatch.set_linewidth(0.2)
    axis = pl.gca()
    for spine in axis.spines:
        axis.spines[spine].set_linewidth(0.2)

    # Set fill and edge for the figure
    pl.gcf().figurePatch.set_edgecolor('k')
    pl.gcf().figurePatch.set_linewidth(2.0)

    # Draw and output to file if requested
    pl.draw()
    if output_type == FILE:
        pl.savefig(output_file, dpi=250, edgecolor='k')
Exemplo n.º 2
0
    def run_diagnostic(self):

        # Open the stats file and print out the header line
        stats_fh = open(self.statistics_file, 'w')
        out_list = [
            'VARIABLE',
            'PEARSON_R',
            'SPEARMAN_R',
            'RMSE',
            'NORMALIZED_RMSE',
            'BIAS_PERCENTAGE',
            'R_SQUARE',
        ]
        stats_fh.write(','.join(out_list) + '\n')

        # Read the observed and predicted files into numpy recarrays
        obs = utilities.csv2rec(self.observed_file)
        prd = utilities.csv2rec(self.predicted_file)

        # Subset the observed data just to the IDs that are in the
        # predicted file
        obs_keep = np.in1d(getattr(obs, self.id_field),
                           getattr(prd, self.id_field))
        obs = obs[obs_keep]

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # For each variable, calculate the statistics
        for v in obs.dtype.names:

            # Get the metadata for this field
            try:
                fm = mp.get_attribute(v)
            except:
                err_msg = v + ' is missing metadata.'
                print err_msg
                continue

            # Only continue if this is a continuous accuracy variable
            if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0:
                continue

            obs_vals = getattr(obs, v)
            prd_vals = getattr(prd, v)

            if np.all(obs_vals == 0.0):
                pearson_r = 0.0
                spearman_r = 0.0
                rmse = 0.0
                std_rmse = 0.0
                bias = 0.0
                r2 = 0.0
            else:
                if np.all(prd_vals == 0.0):
                    pearson_r = 0.0
                    spearman_r = 0.0
                else:
                    pearson_r = statistics.pearson_r(obs_vals, prd_vals)
                    spearman_r = statistics.spearman_r(obs_vals, prd_vals)
                rmse = statistics.rmse(obs_vals, prd_vals)
                std_rmse = rmse / obs_vals.mean()
                bias = statistics.bias_percentage(obs_vals, prd_vals)
                r2 = statistics.r2(obs_vals, prd_vals)

            # Print this out to the stats file
            out_list = [
                v,
                '%.6f' % pearson_r,
                '%.6f' % spearman_r,
                '%.6f' % rmse,
                '%.6f' % std_rmse,
                '%.6f' % bias,
                '%.6f' % r2,
            ]
            stats_fh.write(','.join(out_list) + '\n')
        stats_fh.close()
    def run_diagnostic(self):

        # Open the stats file and print out the header line
        stats_fh = open(self.statistics_file, 'w')
        out_list = [
            'VARIABLE',
            'PEARSON_R',
            'SPEARMAN_R',
            'RMSE',
            'NORMALIZED_RMSE',
            'BIAS_PERCENTAGE',
            'R_SQUARE',
        ]
        stats_fh.write(','.join(out_list) + '\n')

        # Read the observed and predicted files into numpy recarrays
        obs = utilities.csv2rec(self.observed_file)
        prd = utilities.csv2rec(self.predicted_file)

        # Subset the observed data just to the IDs that are in the
        # predicted file
        obs_keep = np.in1d(
            getattr(obs, self.id_field), getattr(prd, self.id_field))
        obs = obs[obs_keep]

        # Read in the stand attribute metadata
        mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file)

        # For each variable, calculate the statistics
        for v in obs.dtype.names:

            # Get the metadata for this field
            try:
                fm = mp.get_attribute(v)
            except:
                err_msg = v + ' is missing metadata.'
                print err_msg
                continue

            # Only continue if this is a continuous accuracy variable
            if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0:
                continue

            obs_vals = getattr(obs, v)
            prd_vals = getattr(prd, v)

            if np.all(obs_vals == 0.0):
                pearson_r = 0.0
                spearman_r = 0.0
                rmse = 0.0
                std_rmse = 0.0
                bias = 0.0
                r2 = 0.0
            else:
                if np.all(prd_vals == 0.0):
                    pearson_r = 0.0
                    spearman_r = 0.0
                else:
                    pearson_r = statistics.pearson_r(obs_vals, prd_vals)
                    spearman_r = statistics.spearman_r(obs_vals, prd_vals)
                rmse = statistics.rmse(obs_vals, prd_vals)
                std_rmse = rmse / obs_vals.mean()
                bias = statistics.bias_percentage(obs_vals, prd_vals)
                r2 = statistics.r2(obs_vals, prd_vals)

            # Print this out to the stats file
            out_list = [
                v,
                '%.6f' % pearson_r,
                '%.6f' % spearman_r,
                '%.6f' % rmse,
                '%.6f' % std_rmse,
                '%.6f' % bias,
                '%.6f' % r2,
            ]
            stats_fh.write(','.join(out_list) + '\n')
        stats_fh.close()