def test_crossval(self): gdirs = up_to_distrib() # in case we ran crossval we need to rerun tasks.compute_ref_t_stars(gdirs) workflow.execute_entity_task(tasks.local_mustar, gdirs) workflow.execute_entity_task(tasks.apparent_mb, gdirs) # before crossval refmustars = [] for gdir in gdirs: tdf = pd.read_csv(gdir.get_filepath('local_mustar')) refmustars.append(tdf['mu_star'].values[0]) tasks.crossval_t_stars(gdirs) file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') df = pd.read_csv(file, index_col=0) # see if the process didn't brake anything mustars = [] for gdir in gdirs: tdf = pd.read_csv(gdir.get_filepath('local_mustar')) mustars.append(tdf['mu_star'].values[0]) np.testing.assert_allclose(refmustars, mustars) # make some mb tests from oggm.core.massbalance import PastMassBalance for rid in df.index: gdir = [g for g in gdirs if g.rgi_id == rid][0] h, w = gdir.get_inversion_flowline_hw() cfg.PARAMS['use_bias_for_run'] = False mbmod = PastMassBalance(gdir) mbdf = gdir.get_ref_mb_data().ANNUAL_BALANCE.to_frame(name='ref') for yr in mbdf.index: mbdf.loc[yr, 'mine'] = mbmod.get_specific_mb(h, w, year=yr) mm = mbdf.mean() np.testing.assert_allclose(df.loc[rid].bias, mm['mine'] - mm['ref'], atol=1e-3) cfg.PARAMS['use_bias_for_run'] = True mbmod = PastMassBalance(gdir) mbdf = gdir.get_ref_mb_data().ANNUAL_BALANCE.to_frame(name='ref') for yr in mbdf.index: mbdf.loc[yr, 'mine'] = mbmod.get_specific_mb(h, w, year=yr) mm = mbdf.mean() np.testing.assert_allclose(mm['mine'], mm['ref'], atol=1e-3)
gdirs = workflow.init_glacier_regions(rgidf) # Cross-validation file = path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') cvdf = pd.read_csv(file, index_col=0) for gd in gdirs: t_cvdf = cvdf.loc[gd.rgi_id] heights, widths = gd.get_inversion_flowline_hw() # Mass-balance model with cross-validated parameters instead mb_mod = PastMassBalance(gd, mu_star=t_cvdf.cv_mustar, bias=t_cvdf.cv_bias, prcp_fac=t_cvdf.cv_prcp_fac) # Mass-blaance timeseries, observed and simulated refmb = gd.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) # Compare their standard deviation std_ref = refmb.ANNUAL_BALANCE.std() rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1] if std_ref == 0: # I think that such a thing happens with some geodetic values std_ref = refmb.OGGM.std() rcor = 1 # Store the scores cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] = (refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean()) cvdf.loc[gd.rgi_id, 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM.std() / std_ref) cvdf.loc[gd.rgi_id, 'CV_MB_COR'] = rcor mb_mod = PastMassBalance(gd, mu_star=t_cvdf.interp_mustar, bias=t_cvdf.cv_bias,
def minor_xval_statistics(gdirs): # initialize the pandas dataframes # to store mass balances of every glacier mbdf = pd.DataFrame([], index=np.arange(1850, 2050)) # Cross-validation file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') cvdf = pd.read_csv(file, index_col=0) # dataframe output xval = pd.DataFrame([], columns=[ 'RGIId', 'Name', 'tstar_bias', 'xval_bias', 'interp_bias', 'mustar', 'tstar', 'xval_mustar', 'xval_tstar', 'interp_mustar' ]) for gd in gdirs: t_cvdf = cvdf.loc[gd.rgi_id] heights, widths = gd.get_inversion_flowline_hw() # Observed mass-blance refmb = gd.get_ref_mb_data().copy() # Mass-balance model with cross-validated parameters instead mb_mod = PastMassBalance(gd, mu_star=t_cvdf.cv_mustar, bias=t_cvdf.cv_bias, prcp_fac=t_cvdf.cv_prcp_fac) refmb['OGGM_cv'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) # Compare their standard deviation std_ref = refmb.ANNUAL_BALANCE.std() rcor = np.corrcoef(refmb.OGGM_cv, refmb.ANNUAL_BALANCE)[0, 1] if std_ref == 0: # I think that such a thing happens with some geodetic values std_ref = refmb.OGGM_cv.std() rcor = 1 # Store the scores cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] = (refmb.OGGM_cv.mean() - refmb.ANNUAL_BALANCE.mean()) cvdf.loc[gd.rgi_id, 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM_cv.std() / std_ref) cvdf.loc[gd.rgi_id, 'CV_MB_COR'] = rcor # Mass-balance model with interpolated mu_star mb_mod = PastMassBalance(gd, mu_star=t_cvdf.interp_mustar, bias=t_cvdf.cv_bias, prcp_fac=t_cvdf.cv_prcp_fac) refmb['OGGM_mu_interp'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS'] = (refmb.OGGM_mu_interp.mean() - refmb.ANNUAL_BALANCE.mean()) # Mass-balance model with best guess tstar mb_mod = PastMassBalance(gd, mu_star=t_cvdf.mustar, bias=t_cvdf.bias, prcp_fac=t_cvdf.prcp_fac) refmb['OGGM_tstar'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS'] = (refmb.OGGM_tstar.mean() - refmb.ANNUAL_BALANCE.mean()) # Pandas DataFrame Output # # 1. statistics tbias = cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS'] xbias = cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] ibias = cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS'] xval = xval.append( { 'Name': gd.name, 'RGIId': gd.rgi_id, 'tstar_bias': tbias, 'xval_bias': xbias, 'interp_bias': ibias, 'mustar': t_cvdf.mustar, 'tstar': t_cvdf.tstar, 'xval_mustar': t_cvdf.cv_mustar, 'xval_tstar': t_cvdf.cv_tstar, 'interp_mustar': t_cvdf.interp_mustar }, ignore_index=True) # # 2. mass balance timeseries mbarray = np.dstack( (refmb.ANNUAL_BALANCE, refmb.OGGM_tstar, refmb.OGGM_cv)).squeeze() mbdf_add = pd.DataFrame( mbarray, columns=[[gd.rgi_id, gd.rgi_id, gd.rgi_id], ['measured', 'calibrated', 'crossvalidated']], index=refmb.index) mbdf = pd.concat([mbdf, mbdf_add], axis=1) mbdf.columns = pd.MultiIndex.from_tuples(mbdf.columns) mbdf = mbdf.dropna(how='all') xval.index = xval.RGIId return xval, mbdf
def quick_crossval(gdirs, xval, major=0): # following climate.quick_crossval_t_stars # but minimized for performance full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv'), index_col=0) tmpdf = pd.DataFrame( [], columns=['std_oggm', 'std_ref', 'rmse', 'core', 'bias']) for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = [g for g in gdirs if g.rgi_id == rid][0] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # select reference glacier directories # Only necessary if tasks.compute_ref_t_stars is uncommented below # ref_gdirs = [g for g in gdirs if g.rgi_id != rid] # before the cross-val store the info about "real" mustar rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0] # redistribute t_star with utils.DisableLogger(): # compute_ref_t_stars should be done again for # every crossvalidation step # This will/might have an influence if one of the 10 surrounding # glaciers of the current glacier has more than one t_star # If so, the currently crossvalidated glacier was probably # used to select one t_star for this surrounding glacier. # # But: compute_ref_t_stars is very time consuming. And the # influence is probably very small. Also only 40 out of the 253 # reference glaciers do have more than one possible t_star. # # tasks.compute_ref_t_stars(ref_gdirs) tasks.distribute_t_stars([gdir], ref_df=tmp_ref_df) # read crossvalidated values rdf = pd.read_csv(gdir.get_filepath('local_mustar')) # ---- # --- MASS-BALANCE MODEL heights, widths = gdir.get_inversion_flowline_hw() mb_mod = PastMassBalance(gdir, mu_star=rdf['mu_star'].values[0], bias=rdf['bias'].values[0], prcp_fac=rdf['prcp_fac'].values[0]) # Mass-blaance timeseries, observed and simulated refmb = gdir.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) # store single glacier results bias = refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean() rmse = np.sqrt(np.mean(refmb.OGGM - refmb.ANNUAL_BALANCE)**2) rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1] ref_std = refmb.ANNUAL_BALANCE.std() # unclear how to treat this best if ref_std == 0: ref_std = refmb.OGGM.std() rcor = 1 tmpdf.loc[len(tmpdf.index)] = { 'std_oggm': refmb.OGGM.std(), 'std_ref': ref_std, 'bias': bias, 'rmse': rmse, 'core': rcor } if not major: # store cross validated values full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0]) full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0] full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0] full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0] # and store mean values std_quot = np.mean(tmpdf.std_oggm / tmpdf.std_ref) xval.loc[len(xval.index)] = { 'prcpsf': cfg.PARAMS['prcp_scaling_factor'], 'tliq': cfg.PARAMS['temp_all_liq'], 'tmelt': cfg.PARAMS['temp_melt'], 'tgrad': cfg.PARAMS['temp_default_gradient'], 'std_quot': std_quot, 'bias': tmpdf['bias'].mean(), 'rmse': tmpdf['rmse'].mean(), 'core': tmpdf['core'].mean() } if major: return xval else: for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = full_ref_df.loc[full_ref_df.index == rid] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # Compute the distance distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0], tmp_ref_df.lon, tmp_ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = tmp_ref_df.iloc[aso] distances = distances[aso]**2 interp = np.average(amin.mustar, weights=1. / distances) full_ref_df.loc[rid, 'interp_mustar'] = interp # write file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') full_ref_df.to_csv(file) # alternative: do not write csv file, but store the needed values # within xval_minor_statistics return xval
def __init__(self, gdir, magicc_ts=None, dt_per_dt=1, dp_per_dt=0, mu_star=None, bias=None, y0=None, halfsize=15, filename='climate_historical', input_filesuffix='', **kwargs): """Initialize Parameters ---------- gdir : GlacierDirectory the glacier directory magicc_ts : pd.Series the GMT time series mu_star : float, optional set to the alternative value of mu* you want to use (the default is to use the calibrated value) bias : float, optional set to the alternative value of the annual bias [mm we yr-1] you want to use (the default is to use the calibrated value) y0 : int, optional, default: tstar the year at the center of the period of interest. The default is to use tstar as center. dt_per_dt : float, optional, default 1 the local climate change signal, in units of °C per °C halfsize : int, optional the half-size of the time window (window size = 2 * halfsize + 1) filename : str, optional set to a different BASENAME if you want to use alternative climate data. input_filesuffix : str the file suffix of the input climate file """ if magicc_ts is None: raise InvalidParamsError('Need a magicc ts!') super(MagiccMassBalance, self).__init__() self.mbmod = MagiccConstantMassBalance( gdir, mu_star=mu_star, bias=bias, y0=y0, halfsize=halfsize, filename=filename, input_filesuffix=input_filesuffix, **kwargs) self.valid_bounds = self.mbmod.valid_bounds self.hemisphere = gdir.hemisphere # Set ys and ye self.ys = int(magicc_ts.index[0]) self.ye = int(magicc_ts.index[-1]) # Correct for dp_per_dt signal if len(np.atleast_1d(dp_per_dt)) == 12: ref_t = magicc_ts.loc[y0 - halfsize:y0 + halfsize].mean() prcp_ts = (magicc_ts - ref_t).values[:, np.newaxis] * dp_per_dt prcp_ts = pd.DataFrame(data=prcp_ts, index=magicc_ts.index, columns=np.arange(1, 13)) else: ref_t = magicc_ts.loc[y0 - halfsize:y0 + halfsize].mean() prcp_ts = (magicc_ts - ref_t) * dp_per_dt # We correct the original factor - don't forget to also scale the diff self.prcp_fac_ts = self.mbmod.prcp_fac + self.mbmod.prcp_fac * prcp_ts # Correct for dt_per_dt signal if len(np.atleast_1d(dt_per_dt)) == 12: magicc_ts = pd.DataFrame(data=magicc_ts.values[:, np.newaxis] * dt_per_dt, index=magicc_ts.index, columns=np.arange(1, 13)) else: magicc_ts = magicc_ts * dt_per_dt years = magicc_ts.loc[y0 - halfsize:y0 + halfsize].index.values # OK now check the bias to apply based on y0 and halfsize fls = gdir.read_pickle('model_flowlines') mb_ref = PastMassBalance(gdir) mb_ref = mb_ref.get_specific_mb(fls=fls, year=years).mean() def to_minimize(temp_bias): self.temp_bias_ts = magicc_ts - temp_bias mb_mine = self.get_specific_mb(fls=fls, year=years).mean() return mb_mine - mb_ref temp_bias = optimize.brentq(to_minimize, -10, 10, xtol=1e-5) self.temp_bias_ts = magicc_ts - temp_bias