def draw_scatterplot(x, y, metadata, output_type=SCREEN, output_file=None, **kwargs): # Unpack the metadata information variable = metadata.field_name short_desc = metadata.short_description units = metadata.units # Set up the output figure if output_type == SCREEN: pl.ion() else: pl.ioff() pl.clf() pl.gcf().set_figwidth(3.4) pl.gcf().set_figheight(3.0) pl.gcf().set_dpi(250) # Find the min and max of both axes if (x.min() < y.min()): abs_min = x.min() else: abs_min = y.min() if (x.max() > y.max()): abs_max = x.max() else: abs_max = y.max() # Draw the scatterplot data and title pl.scatter(x, y, s=2, c='b', edgecolor='k', linewidth=0.25, **kwargs) pl.title(variable + ' : ' + short_desc, size=4.5) # Calculate correlation coefficient, normalized RMSE and r_square this_corr = statistics.pearson_r(x, y) this_rmse = statistics.rmse(x, y) / x.mean() this_r2 = statistics.r2(x, y) # Draw the annotation text on the figure pl.text(0.89, 0.93, '1:1', transform=pl.gca().transAxes, size=4.5, rotation=45) pl.text(0.05, 0.93, 'Correlation coefficient: %.4f' % (this_corr), transform=pl.gca().transAxes, size=4.5) pl.text(0.05, 0.89, 'Normalized RMSE: %.4f' % (this_rmse), transform=pl.gca().transAxes, size=4.5) pl.text(0.05, 0.85, 'R-square: %.4f' % (this_r2), transform=pl.gca().transAxes, size=4.5) # Draw the 1:1 line and format the x and y axes pl.plot([abs_min, abs_max], [abs_min, abs_max], 'k-', linewidth=0.5) ylabel_str = 'Predicted ' + variable xlabel_str = 'Observed ' + variable if units != 'none': ylabel_str += ' (' + units + ')' xlabel_str += ' (' + units + ')' pl.ylabel(ylabel_str, size=4.5) pl.xlabel(xlabel_str, size=4.5) import matplotlib.ticker as ticker f = ticker.OldScalarFormatter() # f.set_powerlimits((-3, 4)) pl.gca().xaxis.set_major_formatter(f) pl.gca().xaxis.set_minor_formatter(f) pl.gca().yaxis.set_major_formatter(f) pl.gca().yaxis.set_minor_formatter(f) pl.xticks(size=4) pl.yticks(size=4) range = abs_max - abs_min pl.xlim(abs_min - (0.01 * range), abs_max + (0.01 * range)) pl.ylim(abs_min - (0.01 * range), abs_max + (0.01 * range)) # Position the main axis within the figure frame_x = 0.125 frame_width = 0.855 frame_y = 0.100 frame_height = 0.830 pl.gca().set_position([frame_x, frame_y, frame_width, frame_height]) pl.gca().axesPatch.set_linewidth(0.2) axis = pl.gca() for spine in axis.spines: axis.spines[spine].set_linewidth(0.2) # Set fill and edge for the figure pl.gcf().figurePatch.set_edgecolor('k') pl.gcf().figurePatch.set_linewidth(2.0) # Draw and output to file if requested pl.draw() if output_type == FILE: pl.savefig(output_file, dpi=250, edgecolor='k')
def run_diagnostic(self): # Open the stats file and print out the header line stats_fh = open(self.statistics_file, 'w') out_list = [ 'VARIABLE', 'PEARSON_R', 'SPEARMAN_R', 'RMSE', 'NORMALIZED_RMSE', 'BIAS_PERCENTAGE', 'R_SQUARE', ] stats_fh.write(','.join(out_list) + '\n') # Read the observed and predicted files into numpy recarrays obs = utilities.csv2rec(self.observed_file) prd = utilities.csv2rec(self.predicted_file) # Subset the observed data just to the IDs that are in the # predicted file obs_keep = np.in1d(getattr(obs, self.id_field), getattr(prd, self.id_field)) obs = obs[obs_keep] # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # For each variable, calculate the statistics for v in obs.dtype.names: # Get the metadata for this field try: fm = mp.get_attribute(v) except: err_msg = v + ' is missing metadata.' print err_msg continue # Only continue if this is a continuous accuracy variable if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0: continue obs_vals = getattr(obs, v) prd_vals = getattr(prd, v) if np.all(obs_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 rmse = 0.0 std_rmse = 0.0 bias = 0.0 r2 = 0.0 else: if np.all(prd_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 else: pearson_r = statistics.pearson_r(obs_vals, prd_vals) spearman_r = statistics.spearman_r(obs_vals, prd_vals) rmse = statistics.rmse(obs_vals, prd_vals) std_rmse = rmse / obs_vals.mean() bias = statistics.bias_percentage(obs_vals, prd_vals) r2 = statistics.r2(obs_vals, prd_vals) # Print this out to the stats file out_list = [ v, '%.6f' % pearson_r, '%.6f' % spearman_r, '%.6f' % rmse, '%.6f' % std_rmse, '%.6f' % bias, '%.6f' % r2, ] stats_fh.write(','.join(out_list) + '\n') stats_fh.close()
def run_diagnostic(self): # Open the stats file and print out the header line stats_fh = open(self.statistics_file, 'w') out_list = [ 'VARIABLE', 'PEARSON_R', 'SPEARMAN_R', 'RMSE', 'NORMALIZED_RMSE', 'BIAS_PERCENTAGE', 'R_SQUARE', ] stats_fh.write(','.join(out_list) + '\n') # Read the observed and predicted files into numpy recarrays obs = utilities.csv2rec(self.observed_file) prd = utilities.csv2rec(self.predicted_file) # Subset the observed data just to the IDs that are in the # predicted file obs_keep = np.in1d( getattr(obs, self.id_field), getattr(prd, self.id_field)) obs = obs[obs_keep] # Read in the stand attribute metadata mp = xsmp.XMLStandMetadataParser(self.stand_metadata_file) # For each variable, calculate the statistics for v in obs.dtype.names: # Get the metadata for this field try: fm = mp.get_attribute(v) except: err_msg = v + ' is missing metadata.' print err_msg continue # Only continue if this is a continuous accuracy variable if fm.field_type != 'CONTINUOUS' or fm.accuracy_attr == 0: continue obs_vals = getattr(obs, v) prd_vals = getattr(prd, v) if np.all(obs_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 rmse = 0.0 std_rmse = 0.0 bias = 0.0 r2 = 0.0 else: if np.all(prd_vals == 0.0): pearson_r = 0.0 spearman_r = 0.0 else: pearson_r = statistics.pearson_r(obs_vals, prd_vals) spearman_r = statistics.spearman_r(obs_vals, prd_vals) rmse = statistics.rmse(obs_vals, prd_vals) std_rmse = rmse / obs_vals.mean() bias = statistics.bias_percentage(obs_vals, prd_vals) r2 = statistics.r2(obs_vals, prd_vals) # Print this out to the stats file out_list = [ v, '%.6f' % pearson_r, '%.6f' % spearman_r, '%.6f' % rmse, '%.6f' % std_rmse, '%.6f' % bias, '%.6f' % r2, ] stats_fh.write(','.join(out_list) + '\n') stats_fh.close()