def _write_xy_provenance(cfg, cubes, plot_path, title, *attrs): """Write provenance information for X-Y plots.""" cubes = cubes.copy() if isinstance(cubes, iris.cube.Cube): cubes = iris.cube.CubeList([cubes]) ancestors = [] for attr in attrs: ancestors.extend(attr['filename'].split('|')) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cubes, netcdf_path) long_name = ' and '.join([cube.long_name for cube in cubes]) caption = f"Line plot of {long_name}" if title: caption += f" for {title}." else: caption += '.' record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': caption, 'plot_file': plot_path, 'plot_types': ['line'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def _create_regression_file(tas_cube, cube, dataset_name, cfg, description=None): """Save regression plot as netcdf file for a given dataset.""" var = cube.var_name reg = stats.linregress(tas_cube.data, cube.data) filename = f'{var}_regression_{dataset_name}' attrs = { 'dataset': dataset_name, 'regression_r_value': reg.rvalue, 'regression_slope': reg.slope, 'regression_interception': reg.intercept, 'feedback_parameter': reg.slope, } attrs.update(cfg.get('output_attributes', {})) if description is not None: attrs['description'] = description filename += f"_{description.replace(' ', '_')}" if var in ('rtmt', 'rtnt'): attrs['ECS'] = -reg.intercept / (2.0 * reg.slope) tas_coord = iris.coords.AuxCoord( tas_cube.data, **extract_variables(cfg, as_iris=True)['tas']) cube = iris.cube.Cube(cube.data, attributes=attrs, aux_coords_and_dims=[(tas_coord, 0)], **extract_variables(cfg, as_iris=True)[var]) netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cube, netcdf_path) return netcdf_path
def plot_cdf(cfg, psi_cube, ecs_cube, obs_cube): """Plot cumulative distribution function of ECS.""" confidence_level = cfg.get('confidence_level', 0.66) (ecs_lin, ecs_pdf) = ec.gaussian_pdf(psi_cube.data, ecs_cube.data, np.mean(obs_cube.data), np.std(obs_cube.data)) ecs_cdf = ec.cdf(ecs_lin, ecs_pdf) # Provenance filename = 'cdf_{}'.format(obs_cube.attributes['dataset']) netcdf_path = get_diagnostic_filename(filename, cfg) cube = iris.cube.Cube(ecs_cdf, var_name='cdf', long_name='Cumulative distribution function', units='1') cube.add_aux_coord( iris.coords.AuxCoord(ecs_lin, **ih.convert_to_iris(ECS_ATTRS)), 0) io.iris_save(cube, netcdf_path) project = _get_project(cfg) provenance_record = get_provenance_record( "The CDF for ECS. The horizontal dot-dashed lines show the {}% " "confidence limits. The orange histograms show the prior " "distributions that arise from equal weighting of the {} models in " "0.5 K bins.".format(int(confidence_level * 100), project), ['mean'], ['other'], _get_ancestor_files(cfg, obs_cube.attributes['dataset'])) # Plot if cfg['write_plots']: AXES.plot(ecs_lin, ecs_cdf, color='black', linewidth=2.0, label='Emergent constraint') AXES.hist(ecs_cube.data, bins=6, range=(2.0, 5.0), cumulative=True, density=True, color='orange', label='{} models'.format(project)) AXES.axhline((1.0 - confidence_level) / 2.0, color='black', linestyle='dashdot') AXES.axhline((1.0 + confidence_level) / 2.0, color='black', linestyle='dashdot') # Plot appearance AXES.set_title('CDF of emergent constraint') AXES.set_xlabel('ECS / K') AXES.set_ylabel('CDF') legend = AXES.legend(loc='upper left') # Save plot provenance_record['plot_file'] = _save_fig(cfg, filename, legend) # Write provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def su(grouped_data, cfg): """Su et al. (2014) constraint.""" metric = cfg['metric'] logger.info("Found metric '%s' for Su et al. (2014) constraint", metric) # Extract cubes (var_name, reference_datasets) = _get_su_variable(grouped_data) cube_dict = _get_su_cube_dict(grouped_data, var_name, reference_datasets) diag_data = {} ref_cube = cube_dict[reference_datasets] # Variable attributes var_attrs = { 'short_name': 'alpha' if metric == 'regression_slope' else 'rho', 'long_name': f"Error in vertically-resolved tropospheric " f"zonal-average {ref_cube.long_name} between 40°N and " f"45°S expressed as {metric.replace('_', ' ')} between " f"model data and observations", 'units': '1', } attrs = { 'plot_xlabel': f'Model performance in {ref_cube.long_name} [1]', 'plot_title': 'Su et al. (2014) constraint', 'provenance_authors': ['schlund_manuel'], 'provenance_domains': ['trop', 'midlat'], 'provenance_realms': ['atmos'], 'provenance_references': ['su14jgr'], 'provenance_statistics': ['corr'], 'provenance_themes': ['EC'], } # Calculate constraint for (dataset_name, cube) in cube_dict.items(): logger.info("Processing dataset '%s'", dataset_name) # Plot cube if cube.ndim == 2: iris.quickplot.contourf(cube) filename = f"su_{dataset_name.replace('|', '_')}" plot_path = get_plot_filename(filename, cfg) plt.savefig(plot_path, **cfg['savefig_kwargs']) logger.info("Wrote %s", plot_path) plt.close() # Provenance netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cube, netcdf_path) ancestors = cube.attributes['ancestors'].split('|') provenance_record = ec.get_provenance_record( {'su': attrs}, ['su'], caption=f'{cube.long_name} for {dataset_name}.', plot_type='zonal', plot_file=plot_path, ancestors=ancestors) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record) # Similarity metric diag_data[dataset_name] = _similarity_metric(cube, ref_cube, metric) return (diag_data, var_attrs, attrs)
def postprocess_mean(cfg, cube, data): """Postprocess mean prediction cube.""" logger.info("Postprocessing mean prediction cube %s", cube.summary(shorten=True)) cube = _collapse_regular_cube(cfg, cube) _convert_units(cfg, cube) new_path = mlr.get_new_path(cfg, data['filename']) io.iris_save(cube, new_path) logger.info("Mean prediction: %s %s", cube.data, cube.units) _write_provenance(cfg, "Postprocessed", cube, new_path, [data['filename']])
def _plot(cfg, cube, dataset_name, tcr): """Create scatterplot of temperature anomaly vs. time.""" if not cfg.get('plot', True): return (None, None) logger.debug("Plotting temperature anomaly vs. time for '%s'", dataset_name) (_, axes) = plt.subplots() # Plot data x_data = np.arange(cube.shape[0]) y_data = cube.data axes.scatter(x_data, y_data, color='b', marker='o') # Plot lines line_kwargs = {'color': 'k', 'linewidth': 1.0, 'linestyle': '--'} axes.axhline(tcr, **line_kwargs) axes.axvline(START_YEAR_IDX, **line_kwargs) axes.axvline(END_YEAR_IDX, **line_kwargs) # Appearance units_str = (cube.units.symbol if cube.units.origin is None else cube.units.origin) axes.set_title(dataset_name) axes.set_xlabel('Years after experiment start') axes.set_ylabel(f'Temperature anomaly / {units_str}') axes.set_ylim([x_data[0] - 1, x_data[-1] + 1]) axes.set_ylim([-1.0, 7.0]) axes.text(0.0, tcr + 0.1, 'TCR = {:.1f} {}'.format(tcr, units_str)) # Save cube netcdf_path = get_diagnostic_filename(dataset_name, cfg) io.iris_save(cube, netcdf_path) # Save plot plot_path = get_plot_filename(dataset_name, cfg) plt.savefig(plot_path, **cfg['savefig_kwargs']) logger.info("Wrote %s", plot_path) plt.close() # Provenance provenance_record = get_provenance_record( f"Time series of the global mean surface air temperature anomaly " f"(relative to the linear fit of the pre-industrial control run) of " f"{dataset_name} for the 1% CO2 increase per year experiment. The " f"horizontal dashed line indicates the transient climate response " f"(TCR) defined as the 20 year average temperature anomaly centered " f"at the time of CO2 doubling (vertical dashed lines).") provenance_record.update({ 'plot_file': plot_path, 'plot_types': ['times'], }) return (netcdf_path, provenance_record)
def main(cfg, input_data=None, description=None): """Run the diagnostic.""" cfg = deepcopy(cfg) cfg.setdefault('dtype', 'float64') cfg.setdefault('mlr_model_name', 'MMM') cfg.setdefault('weighted_samples', { 'area_weighted': True, 'time_weighted': True }) # Get data grouped_data = get_grouped_data(cfg, input_data=input_data) description = '' if description is None else f'_for_{description}' # Loop over all tags for (tag, datasets) in grouped_data.items(): logger.info("Processing label '%s'", tag) # Get label datasets and reference dataset if possible label_datasets = select_metadata(datasets, var_type='label') (ref_dataset, pred_name) = get_reference_dataset(datasets, tag) if pred_name is None: pred_name = cfg.get('prediction_name') # Calculate multi-model mean logger.info("Calculating multi-model mean") mmm_cube = get_mmm_cube(cfg, label_datasets) add_general_attributes(mmm_cube, tag=tag, prediction_name=pred_name) mmm_path = get_diagnostic_filename( f"mmm_{tag}_prediction{description}", cfg) io.iris_save(mmm_cube, mmm_path) write_provenance( cfg, mmm_path, [d['filename'] for d in label_datasets], f"Predicted {mmm_cube.long_name} of MMM model " f"{cfg['mlr_model_name']}.") # Estimate prediction error using cross-validation if 'mmm_error_type' in cfg: save_error(cfg, label_datasets, mmm_path, tag=tag, prediction_name=pred_name) # Calculate residuals if ref_dataset is not None: save_residuals(cfg, mmm_cube, ref_dataset, label_datasets, tag=tag, prediction_name=pred_name)
def _calculate_lower_error_bound(cfg, squared_error_cube, basepath): """Calculate lower error bound.""" logger.debug("Calculating lower error bound") lower_bound = _collapse_regular_cube(cfg, squared_error_cube, power=2) lower_bound.data = np.ma.sqrt(lower_bound.data) mlr.square_root_metadata(lower_bound) _convert_units(cfg, lower_bound) lower_bound.attributes['error_type'] = 'lower_bound' new_path = basepath.replace('.nc', '_lower_bound.nc') io.iris_save(lower_bound, new_path) logger.info("Lower bound of error: %s %s", lower_bound.data, lower_bound.units) ancestors = _get_ancestors(squared_error_cube) _write_provenance(cfg, 'Lower bound of', lower_bound, new_path, ancestors)
def plot_pdf(cfg, psi_cube, ecs_cube, obs_cube): """Plot probability density function of ECS.""" obs_mean = np.mean(obs_cube.data) obs_std = np.std(obs_cube.data) (ecs_lin, ecs_pdf) = ec.gaussian_pdf(psi_cube.data, ecs_cube.data, obs_mean, obs_std) # Provenance filename = 'pdf_{}'.format(obs_cube.attributes['dataset']) netcdf_path = get_diagnostic_filename(filename, cfg) cube = iris.cube.Cube(ecs_pdf, var_name='pdf', long_name='Probability density function', units='K-1') cube.add_aux_coord( iris.coords.AuxCoord(ecs_lin, **ih.convert_to_iris(ECS_ATTRS)), 0) io.iris_save(cube, netcdf_path) project = _get_project(cfg) provenance_record = get_provenance_record( "The PDF for ECS. The orange histograms show the prior distributions " "that arise from equal weighting of the {} models in 0.5 K bins.". format(project), ['mean'], ['other'], _get_ancestor_files(cfg, obs_cube.attributes['dataset'])) # Plot if cfg['write_plots']: AXES.plot(ecs_lin, ecs_pdf, color='black', linewidth=2.0, label='Emergent constraint') AXES.hist(ecs_cube.data, bins=6, range=(2.0, 5.0), density=True, color='orange', label='{} models'.format(project)) # Plot appearance AXES.set_title('PDF of emergent constraint') AXES.set_xlabel('ECS / K') AXES.set_ylabel('Probability density') legend = AXES.legend(loc='upper left') # Save plot provenance_record['plot_file'] = _save_fig(cfg, filename, legend) # Write provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def write_data(cfg, ecs_cube, tcr_cube): """Write netcdf file.""" ecs_attrs = { 'var_name': ecs_cube.var_name, 'long_name': ecs_cube.long_name, 'units': ecs_cube.units, } # Write data ecs_coord = iris.coords.AuxCoord(ecs_cube.data, **ecs_attrs) tcr_cube.add_aux_coord(ecs_coord, 0) tcr_cube.attributes.pop('provenance', None) netcdf_path = get_diagnostic_filename('ch09_fig09_42b', cfg) io.iris_save(tcr_cube, netcdf_path) return netcdf_path
def _calculate_upper_error_bound(cfg, squared_error_cube, basepath): """Calculate upper error bound.""" logger.debug("Calculating upper error bound") upper_bound = squared_error_cube.copy() upper_bound.data = np.ma.sqrt(upper_bound.data) mlr.square_root_metadata(upper_bound) upper_bound = _collapse_regular_cube(cfg, upper_bound) _convert_units(cfg, upper_bound) upper_bound.attributes['error_type'] = 'upper_bound' new_path = basepath.replace('.nc', '_upper_bound.nc') io.iris_save(upper_bound, new_path) logger.info("Upper bound of error: %s %s", upper_bound.data, upper_bound.units) ancestors = _get_ancestors(squared_error_cube) _write_provenance(cfg, 'Upper bound of', upper_bound, new_path, ancestors)
def _create_table(table, cfg, description=None): """Create summary table containing all climate feedback parameters.""" logger.debug("Creating summary table") (cell_data, row_labels, col_labels, col_units) = _dict_to_array(table) # Create netcdf file cubes = _get_cube_list_for_table(cell_data, row_labels, col_labels, col_units) filename = 'summary_table' if description is not None: filename += f"_{description.replace(' ', '_')}" netcdf_path = get_diagnostic_filename(filename, cfg) for cube in cubes: cube.attributes.update(cfg.get('output_attributes', {})) io.iris_save(cubes, netcdf_path) # Create plot cell_text = np.vectorize('{:.2f}'.format)(cell_data) col_labels = [ f"{NICE_SYMBOLS.get(l, l)} / " f"{NICE_UNITS.get(col_units[i], 'unknown')}" for (i, l) in enumerate(col_labels) ] (_, axes) = plt.subplots() axes.axis('off') table = axes.table( cellText=cell_text, rowLabels=row_labels, colLabels=col_labels, loc='center', fontsize=8.0, ) table.scale(1.7, 1.7) # Save plot plot_path = os.path.join(cfg['plot_dir'], filename + '.pdf') plt.savefig(plot_path, bbox_inches='tight', orientation='landscape') logger.info("Wrote %s", plot_path) plt.close() # Provenance caption = ( 'Forcing, Feedback and Equilibrium Climate Sensitivity (ECS) values. ' 'SW = short wave, LW = long wave, cs = clear sky, CRE = cloud ' 'radiative effect (similar to Andrews et al., Geophys. Res. Lett., ' '39, 2012).') _write_provenance(netcdf_path, plot_path, caption, [d['filename'] for d in cfg['input_data'].values()], cfg)
def _write_provenance(cfg, data_frame, plot_path, title, ancestors, **cube_kwargs): """Write provenance information.""" cube = ec.pandas_object_to_cube(data_frame, **cube_kwargs) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cube, netcdf_path) record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': f"Boxplot of {title}.", 'plot_file': plot_path, 'plot_types': ['box'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def save_residuals(cfg, mmm_cube, ref_dataset, label_datasets, **cube_attrs): """Save residuals.""" logger.info("Calculating residuals") (ref_cube, _) = _load_cube(cfg, ref_dataset) res_cube = get_residual_cube(mmm_cube, ref_cube) add_general_attributes(res_cube, **cube_attrs) mmm_path = mmm_cube.attributes['filename'] res_path = mmm_path.replace('_prediction', '_prediction_residual') io.iris_save(res_cube, res_path) ancestors = ([d['filename'] for d in label_datasets] + [ref_dataset['filename']]) caption = (f"Residuals of predicted {res_cube.long_name} of MMM model " f"{cfg['mlr_model_name']}") if 'prediction_name' in cube_attrs: caption += f" for prediction {cube_attrs['prediction_name']}" caption += '.' write_provenance(cfg, res_path, ancestors, caption)
def _calculate_real_error(cfg, ref_cube, cov_cube, basepath): """Calculate real error using covariance.""" logger.debug("Calculating real error using covariance") real_error = _collapse_covariance_cube(cfg, cov_cube, ref_cube) real_error.data = np.ma.sqrt(real_error.data) real_error.var_name = cov_cube.var_name.replace('_cov', '_error') real_error.long_name = cov_cube.long_name.replace('(covariance)', '(error)') real_error.units = real_error.units.root(2) _convert_units(cfg, real_error) real_error.attributes['error_type'] = 'real_error' new_path = basepath.replace('.nc', '_real.nc') io.iris_save(real_error, new_path) logger.info("Real error (using covariance): %s %s", real_error.data, real_error.units) ancestors = _get_ancestors(cov_cube) _write_provenance(cfg, 'Real', real_error, new_path, ancestors)
def _write_map_provenance(cfg, cube, plot_path, title, *attrs): """Write provenance information for map plots.""" cube = cube.copy() ancestors = [] for attr in attrs: ancestors.extend(attr['filename'].split('|')) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cube, netcdf_path) record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': f"Geographical distribution of {cube.long_name} for " f"{title}.", 'plot_file': plot_path, 'plot_types': ['geo'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def _write_ecs_regression(cfg, tas_cube, rtnt_cube, reg_stats, dataset_name): """Write Gregory regression cube.""" ecs = -reg_stats.intercept / (2.0 * reg_stats.slope) attrs = { 'anomaly': 'relative to piControl run', 'regression_r_value': reg_stats.rvalue, 'regression_slope': reg_stats.slope, 'regression_interception': reg_stats.intercept, 'Climate Feedback Parameter': reg_stats.slope, 'ECS': ecs, } attrs.update(cfg.get('output_attributes', {})) cubes = iris.cube.CubeList() for cube in [tas_cube, rtnt_cube]: cube.var_name += '_anomaly' cube.long_name += ' Anomaly' cube.attributes = attrs cubes.append(cube) netcdf_path = get_diagnostic_filename('ecs_regression_' + dataset_name, cfg) io.iris_save(cubes, netcdf_path) return netcdf_path
def save_error(cfg, label_datasets, mmm_path, **cube_attrs): """Save estimated error of MMM.""" if len(label_datasets) < 2: logger.warning( "Estimating MMM prediction error not possible, at least 2 'label' " "datasets are needed, only %i is given", len(label_datasets)) return error_type = cfg['mmm_error_type'] allowed_error_types = ['loo'] logger.info("Calculating error using error type '%s'", error_type) if error_type == 'loo': err_cube = get_loo_error_cube(cfg, label_datasets) else: raise NotImplementedError( f"mmm_error_type '{error_type}' is currently not supported, " f"supported types are {allowed_error_types}") add_general_attributes(err_cube, **cube_attrs) err_path = mmm_path.replace('_prediction', '_squared_prediction_error') io.iris_save(err_cube, err_path) write_provenance( cfg, err_path, [d['filename'] for d in label_datasets], f"{err_cube.long_name} of MMM model " f"{cfg['mlr_model_name']} using error type {error_type}.")
def _create_feedback_file(feedback_cube, dataset_name, cfg, description=None): """Save feedback parameter plot vs. remaining dimensions.""" var = feedback_cube.var_name filename = ('{}_vs_{}_{}'.format(VAR_NAMES.get(var, var), '-'.join(COORDS['rad']), dataset_name)) attrs = { 'dataset': dataset_name, 'radiation_variable': var, } attrs.update(cfg.get('output_attributes', {})) if description is not None: attrs['description'] = description filename += f"_{description.replace(' ', '_')}" feedback_cube.var_name = VAR_NAMES.get(var, var) feedback_cube.long_name = LONG_NAMES.get(var, var) feedback_cube.units = UNITS.get(var, 'unknown') feedback_cube.attributes = attrs # Write cube netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(feedback_cube, netcdf_path) # Caption caption = ( 'Dependence of {} on {} for {}. The calculation follows Andrews et ' 'al., Geophys. Res. Lett., 39 (2012): The {} is defined as the ' 'slope of the linear regression between {}-dependent {} TOA radiance ' 'and the {} surface temperature anomaly{} of the abrupt 4x CO2 ' 'experiment.'.format( LONG_NAMES.get(var, var), ' and '.join(COORDS['rad']), dataset_name, LONG_NAMES.get(var, var), ' and '.join(COORDS['rad']), FEEDBACK_PARAMETERS.get(var, var), ('global mean' if NDIMS.get('tas') == 1 else '{}-dependent'.format( ' and '.join(COORDS['tas']))), '' if description is None else f' for {description}')) return (netcdf_path, caption)
def write_data(cfg, hist_cubes, pi_cubes, ecs_cube): """Write netcdf file.""" datasets = [] data_ecs = [] data_hist = [] data_pi = [] for dataset in list(hist_cubes): ecs = ecs_cube.extract(iris.Constraint(dataset=dataset)) if ecs is None: raise ValueError(f"No ECS data for '{dataset}' available") datasets.append(dataset) data_ecs.append(ecs.data) data_hist.append(hist_cubes[dataset].data) data_pi.append(pi_cubes[dataset].data) # Create cube dataset_coord = iris.coords.AuxCoord(datasets, long_name='dataset') tas_hist_coord = iris.coords.AuxCoord(data_hist, attributes={'exp': 'historical'}, **extract_variables( cfg, as_iris=True)['tas']) tas_picontrol_coord = iris.coords.AuxCoord(data_pi, attributes={'exp': 'piControl'}, **extract_variables( cfg, as_iris=True)['tas']) cube = iris.cube.Cube(data_ecs, var_name='ecs', long_name='Equilibrium Climate Sensitivity (ECS)', aux_coords_and_dims=[(dataset_coord, 0), (tas_hist_coord, 0), (tas_picontrol_coord, 0)]) # Save file path = get_diagnostic_filename('ch09_fig09_42a', cfg) io.iris_save(cube, path) return path
def plot_ecs_regression(cfg, dataset_name, tas_cube, rtnt_cube, reg_stats): """Plot linear regression used to calculate ECS.""" if not cfg['write_plots']: return (None, None) ecs = -reg_stats.intercept / (2 * reg_stats.slope) # Regression line x_reg = np.linspace(-1.0, 9.0, 2) y_reg = reg_stats.slope * x_reg + reg_stats.intercept # Plot data text = r'r = {:.2f}, $\lambda$ = {:.2f}, F = {:.2f}, ECS = {:.2f}'.format( reg_stats.rvalue, -reg_stats.slope, reg_stats.intercept, ecs) plot_path = get_plot_filename(dataset_name, cfg) plot.scatterplot( [tas_cube.data, x_reg], [rtnt_cube.data, y_reg], plot_path, plot_kwargs=[{ 'linestyle': 'none', 'markeredgecolor': 'b', 'markerfacecolor': 'none', 'marker': 's', }, { 'color': 'k', 'linestyle': '-', }], save_kwargs={ 'bbox_inches': 'tight', 'orientation': 'landscape', }, axes_functions={ 'set_title': dataset_name, 'set_xlabel': 'tas / ' + tas_cube.units.origin, 'set_ylabel': 'rtnt / ' + rtnt_cube.units.origin, 'set_xlim': [0.0, 8.0], 'set_ylim': [-2.0, 10.0], 'text': { 'args': [0.05, 0.9, text], 'kwargs': { 'transform': 'transAxes' }, }, }, ) # Write netcdf file for every plot tas_coord = iris.coords.AuxCoord( tas_cube.data, **extract_variables(cfg, as_iris=True)['tas']) attrs = { 'model': dataset_name, 'regression_r_value': reg_stats.rvalue, 'regression_slope': reg_stats.slope, 'regression_interception': reg_stats.intercept, 'Climate Feedback Parameter': -reg_stats.slope, 'ECS': ecs, } cube = iris.cube.Cube(rtnt_cube.data, attributes=attrs, aux_coords_and_dims=[(tas_coord, 0)], **extract_variables(cfg, as_iris=True)['rtnt']) netcdf_path = get_diagnostic_filename('ecs_regression_' + dataset_name, cfg) io.iris_save(cube, netcdf_path) # Provenance provenance_record = get_provenance_record( f"Scatterplot between TOA radiance and global mean surface " f"temperature anomaly for 150 years of the abrupt 4x CO2 experiment " f"including linear regression to calculate ECS for {dataset_name}.") provenance_record.update({ 'plot_file': plot_path, 'plot_types': ['scatter'], }) return (netcdf_path, provenance_record)
def plot_emergent_relationship(cfg, psi_cube, ecs_cube, lambda_cube, obs_cube): """Plot emergent relationship.""" filename = 'emergent_relationship_{}'.format( obs_cube.attributes['dataset']) cube = ecs_cube.copy() cube.add_aux_coord( iris.coords.AuxCoord(psi_cube.data, **ih.convert_to_iris(PSI_ATTRS)), 0) netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cube, netcdf_path) provenance_record = get_provenance_record( "Emergent relationship between ECS and the psi metric. The black dot-" "dashed line shows the best-fit linear regression across the model " "ensemble, with the prediction error for the fit given by the black " "dashed lines. The vertical blue lines show the observational " "constraint from the {} observations: the mean (dot-dashed line) and " "the mean plus and minus one standard deviation (dashed lines).". format(obs_cube.attributes['dataset']), ['mean', 'corr', 'var'], ['scatter'], _get_ancestor_files(cfg, obs_cube.attributes['dataset'])) # Plot if cfg['write_plots']: obs_mean = np.mean(obs_cube.data) obs_std = np.std(obs_cube.data) # Calculate regression line lines = ec.regression_surface(psi_cube.data, ecs_cube.data, n_points=1000) logger.info("Found emergent relationship with slope %.2f (R2 = %.2f)", lines['coef'], lines['R2']) # Plot points for model in psi_cube.coord('dataset').points: _plot_model_point(model, psi_cube, ecs_cube, lambda_cube) # Plot lines AXES.set_xlim(auto=False) AXES.set_ylim(auto=False) AXES.plot(lines['x'], lines['y'], color='black', linestyle='dashdot', label='Linear regression') AXES.plot(lines['x'], lines['y_minus_err'], color='black', linestyle='dashed') AXES.plot(lines['x'], lines['y_plus_err'], color='black', linestyle='dashed') AXES.axvline(obs_mean, color='blue', linestyle='dashdot', label='Observational constraint') AXES.axvline(obs_mean - obs_std, color='blue', linestyle='dashed') AXES.axvline(obs_mean + obs_std, color='blue', linestyle='dashed') # Plot appearance AXES.set_title('Emergent relationship fit') AXES.set_xlabel(r'$\Psi$ / K') AXES.set_ylabel('ECS / K') legend = AXES.legend(loc='upper left') # Save plot provenance_record['plot_file'] = _save_fig(cfg, filename, legend) # Write provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def _estimate_real_error(cfg, squared_error_cube, cov_est_dataset, basepath): """Estimate real error using estimated covariance.""" logger.debug( "Estimating real error using estimated covariance from " "'prediction_input' dataset %s", cov_est_dataset['filename']) cov_est_cube = iris.load_cube(cov_est_dataset['filename']) # Check dimensions if cov_est_cube.ndim < 2: raise ValueError( f"Expected at least 2D 'prediction_input' dataset for covariance " f"structure estimation, got {cov_est_cube.ndim:d}D dataset") if cov_est_cube.ndim < squared_error_cube.ndim: raise ValueError( f"Expected number of dimensions of 'prediction_input' dataset " f"used for covariance structure estimation to be greater than or " f"equal the number of dimensions of the errors datasets, got " f"{cov_est_cube.ndim:d} and {squared_error_cube.ndim}") # Check if all dimensions are collapsed (weights, units, coords) = _get_all_weights(cfg, squared_error_cube) if len(coords) < squared_error_cube.ndim: raise ValueError( f"Estimating real error using 'prediction_input' dataset for " f"covariance structure estimation is only possible if all " f"{squared_error_cube.ndim:d} dimensions of the error cube are " f"collapsed, got only {len(coords):d} ({coords})") # Estimate error if cov_est_cube.shape == squared_error_cube.shape: error = _estim_cov_identical_shape(squared_error_cube, cov_est_cube, weights) else: error = _estim_cov_differing_shape(cfg, squared_error_cube, cov_est_cube, weights) # Create cube (collapse using dummy operation) with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', message="Collapsing spatial coordinate 'latitude' without " "weighting", category=UserWarning, module='iris', ) real_error = squared_error_cube.collapsed(coords, iris.analysis.MEAN) real_error.data = error mlr.square_root_metadata(real_error) real_error.units *= units _convert_units(cfg, real_error) real_error.attributes['error_type'] = 'estimated_real_error' # Save cube new_path = basepath.replace('.nc', '_estimated.nc') io.iris_save(real_error, new_path) logger.info("Estimated real error (using estimated covariance): %s %s", real_error.data, real_error.units) # Provenance ancestors = [ *_get_ancestors(squared_error_cube), cov_est_dataset['filename'], ] _write_provenance(cfg, 'Estimated', real_error, new_path, ancestors)
def write_data(cfg, cube): """Write netcdf file.""" cube.attributes.pop('provenance', None) netcdf_path = get_diagnostic_filename(cube.var_name, cfg) io.iris_save(cube, netcdf_path) return netcdf_path
def plot_emergent_relationship(cfg, x_data, y_data, x_ref, x_ref_err, y_mean): """Plot emergent relationship.""" (feature, feature_units, label, label_units) = _get_tags(x_data, y_data) logger.info("Plotting emergent relationship between '%s' and '%s'", label, feature) (_, axes) = plt.subplots() # Plot data points for group in x_data.index: plot_kwargs = _get_plot_kwargs(cfg, 'plot_emergent_relationship', group=group) plot_kwargs['linestyle'] = 'none' plot_kwargs['label'] = group axes.plot(x_data.loc[group], y_data.loc[group], **plot_kwargs) # Plot regression lines axes.set_xlim(auto=False) axes.set_ylim(auto=False) lines = ec.regression_line(x_data.values.squeeze(), y_data.values.squeeze()) lines['x'] = np.squeeze(lines['x']) axes.plot(lines['x'], lines['y'], color='orange', linestyle='-', label='Linear regression') axes.fill_between(lines['x'], lines['y_minus_err'], lines['y_plus_err'], color='orange', alpha=0.2) # Plot reference x_ref = x_ref.values.squeeze() x_ref_err = x_ref_err.values.squeeze() axes.axvline(x_ref, color='k', linestyle=':', label='Observational constraint') axes.axvspan(x_ref - x_ref_err, x_ref + x_ref_err, color='k', alpha=0.1) axes.axhline(y_mean, color='k', linestyle=':') # Plot appearance axes.set_title(f"Emergent relationship between {label} and {feature}") axes.set_xlabel(f"{feature} [{feature_units}]") axes.set_ylabel(f"{label} [{label_units}]") _process_pyplot_kwargs(cfg, 'plot_emergent_relationship') plt.legend(**cfg['legend_kwargs']) text = rf"$R^2$ = {lines['rvalue']**2:.2f}, p = {lines['pvalue']:.3f}" if lines['rvalue'] > 0.0: axes.text(0.6, 0.05, text, transform=axes.transAxes) else: axes.text(0.6, 0.95, text, transform=axes.transAxes) # Save plot plot_path = get_plot_filename(f'{label}_vs_{feature}', cfg) savefig_kwargs = get_savefig_kwargs(cfg) plt.savefig(plot_path, **savefig_kwargs) logger.info("Wrote %s", plot_path) plt.close() # Provenance cube = _get_ec_cube(x_data, y_data) netcdf_path = get_diagnostic_filename(f'{label}_vs_{feature}', cfg) io.iris_save(cube, netcdf_path) record = { 'ancestors': _get_ec_ancestors(cfg), 'authors': ['schlund_manuel'], 'caption': f"Emergent relationship between {label} and {feature}.", 'plot_file': plot_path, 'plot_types': ['scatter'], 'references': ['schlund20jgr'], 'themes': ['EC'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def zhai(grouped_data, cfg): """Zhai et al. (2015) constraint.""" diag_data = {} # Variable attributes var_attrs = { 'short_name': 'mblc_sst_response', 'long_name': 'Response of seasonal Marine Boundary Layer Cloud (MBLC) ' 'fraction to change in Sea Surface Temperature (SST) ', 'units': '% K-1', } attrs = { 'plot_xlabel': r'Response of MBLC fraction to SST changes ' r'[% K$^{-1}$]', 'plot_title': 'Zhai et al. (2015) constraint', 'provenance_authors': ['schlund_manuel'], 'provenance_domains': ['trop', 'shmidlat'], 'provenance_realms': ['atmos'], 'provenance_references': ['zhai15grl'], 'provenance_statistics': ['mean'], 'provenance_themes': ['EC'], } # Calculate constraint for (dataset_name, datasets) in grouped_data.items(): diag_data[dataset_name] = [] logger.info("Processing dataset '%s'", dataset_name) _check_variables(datasets, {'cl', 'wap', 'tos'}) # Consider both hemispheres seperately n_h = (20.0, 40.0) s_h = (-40.0, -20.0) for lat_constraint in (n_h, s_h): data_frame = _get_zhai_data_frame(datasets, lat_constraint) # MBLC fraction response to SST changes reg = linregress(data_frame['tos'].values, data_frame['mblc_fraction'].values) diag_data[dataset_name].append(reg.slope) # Plot regression axes = sns.regplot(x='tos', y='mblc_fraction', data=data_frame) axes.text( 0.05, 0.05, rf"$\alpha={reg.slope:.3f}$ %/K ($R^2={reg.rvalue**2:.2f}$, " rf"$p={reg.pvalue:.4f}$)", transform=axes.transAxes) if lat_constraint == n_h: hem = 'Northern hemisphere' filename = f'zhai_{dataset_name}_nh' else: hem = 'Southern hemisphere' filename = f'zhai_{dataset_name}_sh' plot_path = get_plot_filename(filename, cfg) axes.set_title(f'{dataset_name} ({hem})') plt.savefig(plot_path, **cfg['savefig_kwargs']) logger.info("Wrote %s", plot_path) plt.close() # Provenance netcdf_path = get_diagnostic_filename(filename, cfg) cubes = iris.cube.CubeList([ ec.pandas_object_to_cube( data_frame['tos'], var_name='tos', standard_name='sea_surface_temperature', units='K', attributes={'region': hem}), ec.pandas_object_to_cube( data_frame['mblc_fraction'], var_name='mblc_fraction', long_name='Marine Boundary Layer Cloud fraction', units='%', attributes={'region': hem}), ]) io.iris_save(cubes, netcdf_path) provenance_record = ec.get_provenance_record( {'zhai': attrs}, ['zhai'], caption=f"Regression plot of 'mblc_fraction' vs 'tos' ({hem})", plot_type='scatter', plot_file=plot_path, ancestors=[d['filename'] for d in datasets]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record) # Mean over both hemispheres diag_data[dataset_name] = np.mean(diag_data[dataset_name]) return (diag_data, var_attrs, attrs)
def plot_individual_scatterplots(training_data, pred_input_data, attributes, basename, cfg): """Plot individual scatterplots for the different groups. Plot scatterplots for all pairs of ``(feature, label)`` data (Separate plot for each group). Parameters ---------- training_data : pandas.DataFrame Training data (features, label). pred_input_data : pandas.DataFrame Prediction input data (mean and error). attributes : dict Plot attributes for the different features and the label data. basename : str Basename for the name of the file. cfg : dict Recipe configuration. """ logger.info("Plotting individual scatterplots") label = training_data.y.columns[0] groups = get_groups(training_data) # Iterate over features for feature in training_data.x.columns: (x_data, y_data) = get_xy_data_without_nans(training_data, feature, label) # Individual plots for (idx, group) in enumerate(groups): try: x_sub_data = x_data.loc[group] y_sub_data = y_data.loc[group] index_droplevel = 1 except KeyError: x_sub_data = x_data y_sub_data = y_data index_droplevel = [0, 2] axes = _create_scatterplot(x_sub_data, y_sub_data, numbers_as_markers=cfg.get( 'numbers_as_markers', False), color=COLORS_ALL[idx], label=group) axes = _create_pred_input_plot( pred_input_data['mean'][feature].values, pred_input_data['error'][feature].values, axes, alpha=0.4, color=COLORS[0], label='Observation') set_plot_appearance(axes, attributes, plot_title=feature, plot_xlabel=feature, plot_ylabel=label, plot_xlim=feature, plot_ylim=label) legend = plt.legend(**LEGEND_KWARGS) filename = (f"scatterplot_{basename}_{feature}_" f"{group.replace(', ', '-')}") plot_path = get_plot_filename(filename, cfg) plt.savefig(plot_path, additional_artists=[legend], **cfg.get('savefig_kwargs', {})) logger.info("Wrote %s", plot_path) plt.close() # Provenance cubes = iris.cube.CubeList([ pandas_object_to_cube( x_sub_data, index_droplevel=index_droplevel, var_name=feature, long_name=attributes[feature]['plot_xlabel'], units=attributes[feature]['units']), pandas_object_to_cube( y_sub_data, index_droplevel=index_droplevel, var_name=label, long_name=attributes[label]['plot_ylabel'], units=attributes[label]['units']), ]) netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cubes, netcdf_path) provenance_record = get_provenance_record( attributes, [feature, label], caption=get_caption(attributes, feature, label, group=group), plot_type='scatter', plot_file=plot_path) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def test_iris_save(mock_logger, mock_save, source, output): """Test iris save function.""" io.iris_save(source, PATH) assert mock_save.call_args_list == [mock.call(output, PATH)] mock_logger.info.assert_called_once()