def test_validation(self): failures = [([0, 1], 0), (0, 0, [0, 1], (0, 1, 1))] for failure in failures: with self.assertRaises(ValueError): crps_ensemble(*failure)
def crps(forecast, truth): r"""Continous ranked probability score .. math: CRPS = \int_{-\infty}^{\infty} [P(x) - P_a(x)]^2 dx Where :math:`P(x)` is the predicted cumulative probability distribution and :math:`P_a(x)` is the true cumulative probability distribution, which is typically represented by the Heaviside function. Args: forecast (iris.cube.Cube): truth (iris.cube.Cube): Same as forecast but without the `ensemble_member` coordinate Returns: """ crps_ensemble(observations=truth, forecasts=forecast, weights=None, issorted=False, axis=-1) return
def test_basic_consistency(self): expected = np.array( [crps_ensemble(o, f) for o, f in zip(self.obs, self.forecasts)]) assert_allclose(crps_ensemble(self.obs, self.forecasts), expected) assert_allclose(crps_ensemble(self.obs, self.forecasts.T, axis=0), expected) assert_allclose(crps_ensemble(self.obs, self.obs), np.zeros(10))
def test_crps_beyond_bounds(self): vec = np.random.random(size=(100,)) self.assertAlmostEqual( crps_ensemble(-0.1, vec), 0.1 + crps_ensemble(0, vec)) self.assertAlmostEqual( crps_ensemble(+1.1, vec), 0.1 + crps_ensemble(1, vec))
def test_issorted(self): vec = np.random.random((10, )) x = np.random.random() vec_sorted = np.sort(vec) self.assertEqual(crps_ensemble(x, vec), crps_ensemble(x, vec_sorted, issorted=True)) self.assertEqual(crps_ensemble(x, vec_sorted, issorted=False), crps_ensemble(x, vec_sorted, issorted=True))
def test_nan_forecasts_consistency(self): rs = np.random.RandomState(123) # make some forecasts entirely missing self.forecasts[rs.rand(*self.obs.shape) > 0.5] = np.nan assert_allclose(crps_ensemble(self.obs, self.forecasts), _crps_ensemble_vectorized(self.obs, self.forecasts)) # forecasts shaped like obs forecasts = self.forecasts[:, 0] assert_allclose(crps_ensemble(self.obs, forecasts), _crps_ensemble_vectorized(self.obs, forecasts))
def test_weight_normalization(self): x = np.random.random() vec = np.random.random((10, )) expected = crps_ensemble(x, vec) for weights in [np.ones_like(vec), 0.1 * np.ones_like(vec)]: actual = crps_ensemble(x, vec, weights) self.assertAlmostEqual(expected, actual) with self.assertRaises(ValueError): # mismatched dimensions crps_ensemble(x, vec, np.ones(5))
def test_issorted(self): vec = np.random.random((10,)) x = np.random.random() vec_sorted = np.sort(vec) self.assertEqual( crps_ensemble(x, vec), crps_ensemble(x, vec_sorted, issorted=True)) self.assertEqual( crps_ensemble(x, vec_sorted, issorted=False), crps_ensemble(x, vec_sorted, issorted=True))
def test_basic_consistency(self): expected = np.array([crps_ensemble(o, f) for o, f in zip(self.obs, self.forecasts)]) assert_allclose( crps_ensemble(self.obs, self.forecasts), expected) assert_allclose( crps_ensemble(self.obs, self.forecasts.T, axis=0), expected) assert_allclose(crps_ensemble(self.obs, self.obs), np.zeros(10))
def test_weight_normalization(self): x = np.random.random() vec = np.random.random((10,)) expected = crps_ensemble(x, vec) for weights in [np.ones_like(vec), 0.1 * np.ones_like(vec)]: actual = crps_ensemble(x, vec, weights) self.assertAlmostEqual(expected, actual) with self.assertRaises(ValueError): # mismatched dimensions crps_ensemble(x, vec, np.ones(5))
def CRPSS(self, y_true, y_pred): fc_score = ps.crps_ensemble(y_true, y_pred, weights=self.weights) if self.return_mean: fc_score = fc_score.mean() if self.benchmark is False: return fc_score elif type(self.benchmark) in [float, int]: b_ = np.zeros_like(y_true) b_[:] = self.benchmark bench = ps.crps_ensemble(y_true, b_, weights=self.weights) if self.return_mean: bench = bench.mean() return (bench - fc_score) / bench
def crps(truth, ensemble): ''' Calculate mean CRPS of an ensemble time series Parameters ---------- truth: <np.array> A 1-D array of truth time series Dimension: [n] ensemble: <np.array> A 2-D array of ensemble time series Dimension: [n, N], where N is ensemble size; n is time series length Returns ---------- crps: <float> Time-series-mean CRPS Require ---------- import properscoring as ps ''' array_crps = np.asarray([ ps.crps_ensemble(truth[t], ensemble[t, :]) for t in range(len(truth)) ]) crps = array_crps.mean() return crps
def bnn_rules(self, model, X, y, samples): res_train = model.evaluate(X, samples) res_train = res_train.reshape(samples, X.shape[0]) sampled = res_train.T log_scores = -np.log( np.array( [gaussian_kde(sampled[j]).pdf(y) for j, y in enumerate(y)]).clip(0.001)) #fixed crps_scores = np.array([ ps.crps_ensemble(y_val, sampled[j]) for j, y_val in enumerate(y.squeeze()) ]) #fixed crps_scores = np.array([ ps.crps_gaussian(y_val, mu=sampled[j].mean(), sig=sampled[j].std()) for j, y_val in enumerate(y.squeeze()) ]) #fixed dss_scores = np.array([ sc.dss_norm(y, loc=sampled[j].mean(), scale=sampled[j].std()) for j, y in enumerate(y) ]) scores = dict() scores['CRPS'] = crps_scores.mean() scores['LS'] = log_scores.mean() scores['DSS'] = dss_scores.mean() scores_l = dict() scores_l['CRPS'] = crps_scores scores_l['LS'] = log_scores scores_l['DSS'] = dss_scores return scores, scores_l
def test_crps_degenerate_ensemble(self): x = np.random.random() vec = x * np.ones((10, )) for delta in [-np.pi, 0.0, +np.pi]: computed = crps_ensemble(x + delta, vec) expected = np.abs(delta * 1.0**2) self.assertAlmostEqual(computed, expected)
def test_crps_weight_examples(self): examples = [ # Simplest test. (1., [0, 2], [0.5, 0.5], 0.5), # Out-of-order analogues. (1., [2, 0], [0.5, 0.5], 0.5), # Test non-equal weighting. (1., [0, 2], [0.8, 0.2], 0.64 + 0.04), # Test non-equal weighting + non-equal distances. (1.5, [0, 2], [0.8, 0.2], 0.64 * 1.5 + 0.04 * 0.5), # Test distances > 1. (1., [0, 3], [0.5, 0.5], 0.75), # Test distances > 1. (1., [-1, 3], [0.5, 0.5], 1), # Test weight = 0. (1., [0, 2], [1, 0], 1), # Test 3 analogues, observation aligned. (1., [0, 1, 2], [1./3, 1./3, 1./3], 2./9), # Test 3 analogues, observation not aligned. (1.5, [0, 1, 2], [1./3, 1./3, 1./3], 1./9 + 4./9 * 0.5 + 1./9 * 0.5), # Test 3 analogues, observation below range. (-1., [0, 1, 2], [1./3, 1./3, 1./3], 1 + 1./9 + 4./9), # Test 3 analogues, observation above range. (2.5, [0, 1, 2], [1./3, 1./3, 1./3], 4./9 + 1./9 + 0.5 * 1), # Test 4 analogues, observation aligned. (1., [0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25], 3./8), # Test 4 analogues, observation not aligned. (1.5, [0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25], 1./16 + 0.5 * 4./16 + 0.5 * 4./16 + 1./16), ] for x, ensemble, weights, expected in examples: self.assertAlmostEqual( crps_ensemble(x, ensemble, weights), expected)
def test_crps_weight_examples(self): examples = [ # Simplest test. (1., [0, 2], [0.5, 0.5], 0.5), # Out-of-order analogues. (1., [2, 0], [0.5, 0.5], 0.5), # Test non-equal weighting. (1., [0, 2], [0.8, 0.2], 0.64 + 0.04), # Test non-equal weighting + non-equal distances. (1.5, [0, 2], [0.8, 0.2], 0.64 * 1.5 + 0.04 * 0.5), # Test distances > 1. (1., [0, 3], [0.5, 0.5], 0.75), # Test distances > 1. (1., [-1, 3], [0.5, 0.5], 1), # Test weight = 0. (1., [0, 2], [1, 0], 1), # Test 3 analogues, observation aligned. (1., [0, 1, 2], [1. / 3, 1. / 3, 1. / 3], 2. / 9), # Test 3 analogues, observation not aligned. (1.5, [0, 1, 2], [1. / 3, 1. / 3, 1. / 3], 1. / 9 + 4. / 9 * 0.5 + 1. / 9 * 0.5), # Test 3 analogues, observation below range. (-1., [0, 1, 2], [1. / 3, 1. / 3, 1. / 3], 1 + 1. / 9 + 4. / 9), # Test 3 analogues, observation above range. (2.5, [0, 1, 2], [1. / 3, 1. / 3, 1. / 3], 4. / 9 + 1. / 9 + 0.5 * 1), # Test 4 analogues, observation aligned. (1., [0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25], 3. / 8), # Test 4 analogues, observation not aligned. (1.5, [0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25], 1. / 16 + 0.5 * 4. / 16 + 0.5 * 4. / 16 + 1. / 16), ] for x, ensemble, weights, expected in examples: self.assertAlmostEqual(crps_ensemble(x, ensemble, weights), expected)
def crps(truth, ensemble): ''' Calculate mean CRPS of an ensemble time series Parameters ---------- truth: <np.array> A 1-D array of truth time series Dimension: [n] ensemble: <np.array> A 2-D array of ensemble time series Dimension: [n, N], where N is ensemble size; n is time series length Returns ---------- crps: <float> Time-series-mean CRPS Require ---------- import properscoring as ps ''' array_crps = np.asarray([ps.crps_ensemble(truth[t], ensemble[t, :]) for t in range(len(truth))]) crps = array_crps.mean() return crps
def test_crps_degenerate_ensemble(self): x = np.random.random() vec = x * np.ones((10,)) for delta in [-np.pi, 0.0, +np.pi]: computed = crps_ensemble(x + delta, vec) expected = np.abs(delta * 1.0 ** 2) self.assertAlmostEqual(computed, expected)
def eval_crps(z_values, z_true): z_values = np.atleast_2d(z_values) cprs = np.zeros(z_values.shape[0]) for ID, z in enumerate(z_values): cprs[ID] = ps.crps_ensemble(z_true[ID], z) return cprs
def compute_crps_ps( self, threshs, ): """ Using properscoring package. """ import properscoring as ps crps = ps.crps_ensemble(self.xa, self.xfs) return crps
def test_xr_crps_ensemble_dask(a_dask, b_dask): actual = xr_crps_ensemble(a_dask, b_dask) expected = crps_ensemble(a_dask, b_dask) expected = xr.DataArray(expected, coords=a_dask.coords) # test for numerical identity of xr_crps and crps assert_identical(actual, expected) # test that xr_crps_ensemble returns chunks assert actual.chunks is not None # show that crps_ensemble returns no chunks assert expected.chunks is None
def test_crps_consistency(self): # verify that we can integrate the brier scores to calculate CRPS obs = np.random.RandomState(123).rand(100) forecasts = np.random.RandomState(456).rand(100, 100) thresholds = np.linspace(0, 1, num=10000) td = threshold_brier_score(obs, forecasts, thresholds) actual = td.sum(1) * (thresholds[1] - thresholds[0]) desired = crps_ensemble(obs, forecasts) assert_allclose(actual, desired, atol=1e-4)
def test_crps_toy_examples_nan(self): examples = [ (np.nan, 0), (0, np.nan), (0, [np.nan, np.nan]), (0, [1], [np.nan]), (0, [np.nan], [1]), (np.nan, [1], [1]), ] for args in examples: self.assertTrue(np.isnan(crps_ensemble(*args)))
def calculate_scores(observations, forecasts): """ Given an array of forecasts (rows = samples, columns = dates), generate the crps metric """ import properscoring as ps crps_score = ps.crps_ensemble(observations, forecasts, axis=0) #brier_score = ps.brier_score( # observations, # forecasts #) return crps_score #, brier_score
def test_crps_ensemble_dask(o_dask, f_prob_dask, keep_attrs): actual = crps_ensemble(o_dask, f_prob_dask, keep_attrs=keep_attrs) expected = properscoring.crps_ensemble(o_dask, f_prob_dask, axis=0) expected = xr.DataArray(expected, coords=o_dask.coords).mean() # test for numerical identity of xskillscore crps and properscoring crps assert_allclose(actual, expected) # test that xskillscore crps_ensemble returns chunks assert actual.chunks is not None # show that properscoring crps_ensemble returns no chunks assert expected.chunks is None if keep_attrs: assert actual.attrs == o_dask.attrs else: assert actual.attrs == {}
def prob_analysis_runs(month_day, runs, horizons, base_folder='/a2/uaren/travis', ): for this_month_day in month_day: print(this_month_day) year = 2014 month = this_month_day[0] day = this_month_day[1] truth = os.path.join( base_folder, f'data/{year:04}/{month:02}/{day:02}/data.nc') truth = xr.open_dataset(truth) truth = truth['ci'] truth = letkf_io.add_crop_attributes(truth) truth = return_error_domain(truth) truth = truth.load() full_index = truth.time.to_pandas().index for run in runs: crps_df = pd.DataFrame( index=full_index, columns=horizons) print(run) full_day = letkf_io.return_day( year, month, day, run, base_folder) full_day = letkf_io.add_crop_attributes(full_day) full_day = return_error_domain(full_day) full_day = full_day['ci'] full_day = full_day.load() for horizon in horizons: this_full_day = return_horizon(full_day, horizon) these_error_times = np.intersect1d( full_index, this_full_day.time.to_pandas().index) this_full_day = this_full_day.sel(time=these_error_times) this_truth = truth.sel(time=these_error_times) this_crps = ps.crps_ensemble( this_truth.values, this_full_day.values.transpose([0, 2, 3, 1])) this_crps = pd.Series(this_crps.mean(axis=(1, 2)), index=these_error_times) crps_df[horizon] = this_crps file_path = os.path.join( base_folder, 'results', f'{year:04}', f'{month:02}', f'{day:02}', run) file_path = letkf_io.find_latest_run(file_path) file_path = os.path.join(file_path, 'crps.h5') crps_df.to_hdf(file_path, 'crps')
def setUp(self): np.random.seed(1983) shape = (2, 3) self.mu = np.random.normal(size=shape) self.sig = np.square(np.random.normal(size=shape)) self.obs = np.random.normal(loc=self.mu, scale=self.sig, size=shape) n = 1000 q = np.linspace(0. + 0.5 / n, 1. - 0.5 / n, n) # convert to the corresponding normal deviates normppf = special.ndtri z = normppf(q) forecasts = z.reshape(-1, 1, 1) * self.sig + self.mu self.expected = crps_ensemble(self.obs, forecasts, axis=0)
def test_crps_ensemble_api_and_inputs(o, f_prob, keep_attrs, input_type, chunk_bool): """Test that crps_ensemble keeps attributes, chunking, input types and equals properscoring.crps_ensemble.""" o, f_prob = modify_inputs(o, f_prob, input_type, chunk_bool) actual = crps_ensemble(o, f_prob, keep_attrs=keep_attrs) if input_type == "DataArray": # properscoring allows only DataArrays expected = properscoring.crps_ensemble(o, f_prob, axis=0) expected = xr.DataArray(expected, coords=o.coords).mean() # test for numerical identity of xskillscore crps and properscoring crps assert_allclose(actual, expected) # test that returns chunks assert_chunk(actual, chunk_bool) # test that attributes are kept assert_keep_attrs(actual, o, keep_attrs) # test that input types equal output types assign_type_input_output(actual, o)
def test_crps_nans(self): vec = np.random.random((10,)) vec_with_nans = np.r_[vec, [np.nan] * 3] weights = np.random.rand(10) weights_with_nans = np.r_[weights, np.random.rand(3)] x = np.random.random() self.assertEqual(crps_ensemble(x, vec), crps_ensemble(x, vec_with_nans)) self.assertAlmostEqual(crps_ensemble(x, vec, weights), crps_ensemble(x, vec_with_nans, weights_with_nans)) self.assertTrue(np.isnan(crps_ensemble(np.nan, vec))) self.assertTrue(np.isnan(crps_ensemble(np.nan, vec_with_nans)))
def test_pdf_derived_weights(self): # One way of evaluating the CRPS given a pdf is to simply evaluate # the pdf at a set of points (fcsts) and set weights=pdf(fcsts). # This tests that that method works. def normpdf(*args, **kwdargs): return stats.norm(*args, **kwdargs).pdf pdfs = np.vectorize(normpdf)(loc=self.mu, scale=self.sig) fcsts = np.linspace(-4., 4., 500) fcsts = (self.mu[..., np.newaxis] + self.sig[..., np.newaxis] * fcsts[np.newaxis, np.newaxis, :]) weights = np.empty_like(fcsts) for i, j in np.ndindex(pdfs.shape): weights[i, j] = pdfs[i, j](fcsts[i, j]) actual = crps_ensemble(self.obs, fcsts, weights) np.testing.assert_allclose(actual, self.expected, rtol=1e-4)
def test_crps_nans(self): vec = np.random.random((10, )) vec_with_nans = np.r_[vec, [np.nan] * 3] weights = np.random.rand(10) weights_with_nans = np.r_[weights, np.random.rand(3)] x = np.random.random() self.assertEqual(crps_ensemble(x, vec), crps_ensemble(x, vec_with_nans)) self.assertAlmostEqual( crps_ensemble(x, vec, weights), crps_ensemble(x, vec_with_nans, weights_with_nans)) self.assertTrue(np.isnan(crps_ensemble(np.nan, vec))) self.assertTrue(np.isnan(crps_ensemble(np.nan, vec_with_nans)))
def test_crps_toy_examples(self): examples = [ (0, 0, 0.0), (0, 1, 1.0), (-1, 0, 1.0), (0, [-1], 1.0), (0, [0], 0.0), (0, [1], 1.0), (0, [0, 0], 0.0), (0, [0, 1], 0.25), (0, [1, 0], 0.25), (0, [1, 1], 1.0), (2, [0, 1], 1.25), (0, [-1, 1], 0.5), (0, [0, 0, 1], 1.0 / 9), (1, [0, 0, 1], 4.0 / 9), (0, [-1, 0, 0, 1], 1.0 / 8), ] for x, ensemble, expected in examples: self.assertAlmostEqual(crps_ensemble(x, ensemble), expected) self.assertAlmostEqual(_crps_ensemble_vectorized(x, ensemble), expected)
def test_high_dimensional_consistency(self): obs = np.random.randn(10, 20) forecasts = np.random.randn(10, 20, 5) assert_allclose(crps_ensemble(obs, forecasts), _crps_ensemble_vectorized(obs, forecasts))
truth = data[fc_step * lead_time:-(max_forecast_steps - fc_step * lead_time):tres_factor] else: truth = data[fc_step * lead_time::tres_factor] # compute ensemble mean error mse_ensmean_netens = compute_mse(y_pred_ensmean_netens, truth) ensvar_mean_netens = np.mean(y_pred_ensvar_netens_2d, axis=(1, 2)) * norm_std ** 2 res_mse_netens.append(mse_ensmean_netens) res_ensvar_netens.append(ensvar_mean_netens) mse_ensmean_member = np.array([compute_mse(y_pred_netens[:,i], truth) for i in range(n_ens)]) res_mse_netens_permember.append(mse_ensmean_member) # compute crps (per forecast and per gridpoint). the crps_ensemble function # takes in arbitrary dimensions, and treats each point as a single forecast. # so what we get if we feed in our data is a crps score per time per gridpoint crps_netens = properscoring.crps_ensemble(truth, y_pred_netens, axis=1) # compute area mean crps crps_netens = np.mean(crps_netens, axis=(1,2)) res_crps_netens.append(crps_netens) # make next forecast step for all members for i in range(n_ens): y_pred_netens[:,i] = network_ensemble_all[i].predict(y_pred_netens[:,i]) res_mse_netens = np.array(res_mse_netens) res_ensvar_netens = np.array(res_ensvar_netens) res_mse_netens_permember = np.array(res_mse_netens_permember) res_crps_netens = np.array(res_crps_netens) out = {'leadtime': leadtimes, 'mse_ensmean_netens': res_mse_netens, 'spread_netens': res_ensvar_netens,
def calculate_crps( df: "classes.BeliefsDataFrame") -> "classes.BeliefsDataFrame": """Compute the continuous ranked probability score for a BeliefsDataFrame with a probabilistic (or deterministic) forecast (event_value column) and observation (reference_value column). This function supports a probabilistic observation, too. References ---------- Hans Hersbach. Decomposition of the Continuous Ranked Probability Score for Ensemble Prediction Systems in Weather and Forecasting, Volume 15, No. 5, pages 559-570, 2000. https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%282000%29015%3C0559%3ADOTCRP%3E2.0.CO%3B2 """ if len(df.groupby(level=["event_start", "source"])) > 1: raise ValueError( "Expected BeliefsDataFrame must describe a single observation and forecast." "BeliefsDataFrame cannot contain multiple events or sources.") # Split DataFrame into forecast (event_value) and observation (reference_value) df_forecast = df.dropna(subset=["event_value"])["event_value"] df_observation = df.dropna(subset=["reference_value"])["reference_value"] # Obtain the distributions pdf_p_forecast, pdf_v_forecast = get_pdfs_from_beliefsdataframe( df_forecast) pdf_p_observation, pdf_v_observation = get_pdfs_from_beliefsdataframe( df_observation) # Check if we have both a forecast and an observation if pdf_p_forecast.size == 0 or pdf_p_observation.size == 0: crps = np.nan else: cdf_p_observation = pdf_p_observation.cumsum() cdf_p_forecast = pdf_p_forecast.cumsum() crpss = [] # Loop over steps in cumulative probability (in case of a deterministic observation, this is a single step) previous_cp_observation = 0 for cp_observation, v_observation in zip(cdf_p_observation, pdf_v_observation): # Obtain the normalized pdf for this step cdf_p_forecast_i, cdf_v_forecast_i = partial_cdf( cdf_p_forecast, pdf_v_forecast, (previous_cp_observation, cp_observation), ) pdf_p_forecast_i = np.concatenate( ([cdf_p_forecast_i[0]], np.diff(cdf_p_forecast_i))) # Calculate the continuous ranked profile score for this step (i.e. how well does the forecast describe this possible outcome for the observation) crpss.append( ps.crps_ensemble(v_observation, cdf_v_forecast_i, pdf_p_forecast_i)) # Set the left cp bound for the next step previous_cp_observation = cp_observation # Calculate the weighted sum of scores over all possible outcomes for the observation. crps = np.dot(crpss, pdf_p_observation) # List the expected observation as the reference for determining percentage scores df_score = get_expected_belief(df_observation.to_frame()) df_score = df_score.droplevel("cumulative_probability") # And of course return the score as well df_score["crps"] = crps return df_score
plotdir = 'plots_generated_rainfarm' os.system(f'mkdir -p {plotdir}') reals = np.load('/climstorage/sebastian/pr_disagg/data/real_samples.npy') reals_dsum = np.sum(reals, axis=1) alpha, beta = pickle.load(open('data/spectral_slopes.pkl', 'rb')) # compute statistics over # many generated smaples # we compute the areamean, n_sample = 10000 n_fake_per_real = 1000 crps_amean_all_rainfarm = [] for i in trange(n_sample): real = reals[i] dsum = reals_dsum[i] generated = np.array([ downscale_spatiotemporal(dsum, alpha, beta, 24) for p in range(n_fake_per_real) ]) crps = properscoring.crps_ensemble(real, generated, axis=0) crps_amean_all_rainfarm.append(crps.mean(axis=(1, 2))) crps_amean_all_rainfarm = np.array(crps_amean_all_rainfarm) pickle.dump((crps_amean_all_rainfarm), open('data/crps_results_rainfarm.pkl', 'wb'))
def test_nan_observations_consistency(self): rs = np.random.RandomState(123) self.obs[rs.rand(*self.obs.shape) > 0.5] = np.nan assert_allclose( crps_ensemble(self.obs, self.forecasts), _crps_ensemble_vectorized(self.obs, self.forecasts))
def test_crps_toy_examples_skipna(self): self.assertEqual(crps_ensemble(0, [np.nan, 1]), 1) self.assertEqual(crps_ensemble(0, [1, np.nan]), 1) self.assertEqual(crps_ensemble(1, [np.nan, 0]), 1) self.assertEqual(crps_ensemble(1, [0, np.nan]), 1)
def crps(self, mod_col: str = 'modeled', obs_col: str = 'observed', member_col: str = 'member', valid_time_col: str = 'valid_time', lead_time_col: str = 'lead_time', gage_col: str = 'gage', weights=None): """ Calculate CRPS (continuous ranked probability score) using the properscoring package. See :py:fun:`crps_ensemble() <crps_ensemble>` in :py:mod:`properscoring`. Grouping is not necessary because CRPS returns a value per forecast. Grouping would happen when computing CRPSS. The Eval object generally wants one observation per modeled data point, that is overkill for this function (since the ensemble takes one observations) but we handle it in a consistent manner with the rest of Evaluation. This function is setup to identify the ensemble dimension in the following way: 1. if "member_col" is present in the columns, then this is the ensemble dimension, which is a standard ensemble forecast way 2. else, the "valid_time" dimension is used. This is the time-lagged ensembles way. 3. NOT DONE: one could consider time-lagged ensembles of ensemble forecasts. Args: mod_col: str = 'modeled': Column name of modelled data obs_col: str = 'observed': Column name of observed data. member_col: str = 'member': Column name giving the members. If the column is present, evaluation is performed across the member dimension for each combination of other columns. If member is not present the valid_time lead_time and gage cols are used to calculate CRPS across lead-time for each valid_time, gage combination. This later option is the "timelagged" ensemble verification. valid_time_col: str = 'valid_time': I lead_time_col: str = 'lead_time', gage_col: str = 'gage', Returns: CRPS for each ensemble forecast against the observations. """ # Grouping is not necessary because CRPS if isinstance(self.data, pd.DataFrame): # This is a bit hackish to get the indices columns indices = list( set(self.data.columns.tolist()) - set([mod_col, obs_col])) data = self.data.set_index(indices) modeled = data[mod_col] observed = data[obs_col] if valid_time_col in indices and member_col in indices: # Time-lagged ensemble WITH members mm = modeled.reset_index() mm = mm.set_index([valid_time_col, gage_col, member_col]) mm = mm.pivot(columns=lead_time_col) mm = mm.unstack(level='member') mm = mm.sort_index() oo = observed.reset_index().set_index( [valid_time_col, gage_col, member_col]) inds_avg = list( set(indices) - set([lead_time_col, member_col])) oo = observed.mean(axis=0, level=inds_avg).to_frame() oo = oo.reset_index().set_index([valid_time_col, gage_col]).sort_index() assert mm.index.equals(oo.index) modeled = mm observed = oo[obs_col] elif valid_time_col in indices: # Time-lagged ensemble WITHOUT members # This may be a bit too in the business of the modeled data. mm = modeled.reset_index() drop_inds = list( set(indices) - set([valid_time_col, gage_col, lead_time_col])) mm = mm.drop(columns=drop_inds) mm = mm.set_index([valid_time_col, gage_col]) # Expand lead times across the columns, across which the CRPS is calculated # for each valid time, gage mm = mm.pivot(columns=lead_time_col).sort_index() oo = observed.mean(axis=0, level=[valid_time_col, gage_col]) oo = oo.reset_index().set_index([valid_time_col, gage_col]).sort_index() assert mm.index.equals(oo.index) modeled = mm observed = oo[obs_col] elif member_col in indices: # A "regular" member-only ensemble. inds_m_member = list(set(indices) - set([member_col])) # Expand the members across the columns - across which the CRPS is calculated # for each reference_time, lead_time, gage mm = modeled.unstack(level='member') mm = mm.reset_index().set_index(inds_m_member).sort_index() # Remove the member dimension from the obs. Could check the mean # matches the values. oo = observed.mean(axis=0, level=inds_m_member) oo = oo.reset_index().set_index(inds_m_member).sort_index() assert mm.index.equals(oo.index) modeled = mm observed = oo[obs_col] result_np = ps.crps_ensemble(observed.to_numpy(), modeled.to_numpy(), weights=weights) result_pd = pd.DataFrame(result_np, columns=['crps'], index=observed.index) return result_pd else: raise ValueError('Xarray not currently implemented for CRPS')
# 'time' is init titme, change to init time fc['time'] = fc['time'] + fc['fhour'] if ifhour > 0: fc = fc[:-ifhour] truth = truth_all[ifhour:] else: truth = truth_all assert(len(fc)==len(truth)) # remove ctrl member fc_ens = fc.isel(ens=range(1,1+n_ens)) fc_ensmean = fc_ens.mean('ens') mse_per_fc = ((truth-fc_ensmean)**2).mean(('lat','lon')) spread_per_fc = fc_ens.std('ens').mean(('lat','lon')) mse_ctrl_per_fc = ((fc.isel(ens=0)-truth)**2).mean(('lat','lon')) crps_per_fc = properscoring.crps_ensemble(truth, fc_ens, axis=1).mean(axis=(1,2)) leadtimes.append(fc['fhour'].values/np.timedelta64(1,'h')) res_mse.append(mse_per_fc) res_ctrl_mse.append(mse_ctrl_per_fc) res_ensvar.append(spread_per_fc) res_crps.append(crps_per_fc) out = {'leadtime': leadtimes, 'mse_ensmean': res_mse, 'spread': res_ensvar, 'crps': res_crps, 'mse_ctrl': res_ctrl_mse,
def crps(regressor, X, y_true): y_pred_ensemble = regressor.sample_y(X) return np.mean([crps_ensemble(true, ensemble) for true, ensemble in zip(y_true, y_pred_ensemble)])
import numpy as np import properscoring as ps from scipy.stats import norm obs = [-2, -1, 0, 1, 2] baseline_score = ps.crps_ensemble(obs, [0, 0, 0, 0, 0]).mean() forecast_score = ps.crps_gaussian(obs, mu=0, sig=1).mean() skill = (baseline_score - forecast_score) / baseline_score print(skill)