Exemplo n.º 1
0
    def test_crossval(self):

        gdirs = up_to_distrib()

        # in case we ran crossval we need to rerun
        tasks.compute_ref_t_stars(gdirs)
        workflow.execute_entity_task(tasks.local_mustar, gdirs)
        workflow.execute_entity_task(tasks.apparent_mb, gdirs)

        # before crossval
        refmustars = []
        for gdir in gdirs:
            tdf = pd.read_csv(gdir.get_filepath('local_mustar'))
            refmustars.append(tdf['mu_star'].values[0])

        tasks.crossval_t_stars(gdirs)
        file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
        df = pd.read_csv(file, index_col=0)

        # see if the process didn't brake anything
        mustars = []
        for gdir in gdirs:
            tdf = pd.read_csv(gdir.get_filepath('local_mustar'))
            mustars.append(tdf['mu_star'].values[0])
        np.testing.assert_allclose(refmustars, mustars)

        # make some mb tests
        from oggm.core.massbalance import PastMassBalance
        for rid in df.index:
            gdir = [g for g in gdirs if g.rgi_id == rid][0]
            h, w = gdir.get_inversion_flowline_hw()
            cfg.PARAMS['use_bias_for_run'] = False
            mbmod = PastMassBalance(gdir)
            mbdf = gdir.get_ref_mb_data().ANNUAL_BALANCE.to_frame(name='ref')
            for yr in mbdf.index:
                mbdf.loc[yr, 'mine'] = mbmod.get_specific_mb(h, w, year=yr)
            mm = mbdf.mean()
            np.testing.assert_allclose(df.loc[rid].bias,
                                       mm['mine'] - mm['ref'],
                                       atol=1e-3)
            cfg.PARAMS['use_bias_for_run'] = True
            mbmod = PastMassBalance(gdir)
            mbdf = gdir.get_ref_mb_data().ANNUAL_BALANCE.to_frame(name='ref')
            for yr in mbdf.index:
                mbdf.loc[yr, 'mine'] = mbmod.get_specific_mb(h, w, year=yr)
            mm = mbdf.mean()
            np.testing.assert_allclose(mm['mine'], mm['ref'], atol=1e-3)
Exemplo n.º 2
0
gdirs = workflow.init_glacier_regions(rgidf)

# Cross-validation
file = path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
cvdf = pd.read_csv(file, index_col=0)
for gd in gdirs:
    t_cvdf = cvdf.loc[gd.rgi_id]
    heights, widths = gd.get_inversion_flowline_hw()
    # Mass-balance model with cross-validated parameters instead
    mb_mod = PastMassBalance(gd,
                             mu_star=t_cvdf.cv_mustar,
                             bias=t_cvdf.cv_bias,
                             prcp_fac=t_cvdf.cv_prcp_fac)
    # Mass-blaance timeseries, observed and simulated
    refmb = gd.get_ref_mb_data().copy()
    refmb['OGGM'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index)
    # Compare their standard deviation
    std_ref = refmb.ANNUAL_BALANCE.std()
    rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1]
    if std_ref == 0:
        # I think that such a thing happens with some geodetic values
        std_ref = refmb.OGGM.std()
        rcor = 1
    # Store the scores
    cvdf.loc[gd.rgi_id,
             'CV_MB_BIAS'] = (refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean())
    cvdf.loc[gd.rgi_id, 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM.std() / std_ref)
    cvdf.loc[gd.rgi_id, 'CV_MB_COR'] = rcor
    mb_mod = PastMassBalance(gd,
                             mu_star=t_cvdf.interp_mustar,
                             bias=t_cvdf.cv_bias,
Exemplo n.º 3
0
def minor_xval_statistics(gdirs):
    # initialize the pandas dataframes

    # to store mass balances of every glacier
    mbdf = pd.DataFrame([], index=np.arange(1850, 2050))

    # Cross-validation
    file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
    cvdf = pd.read_csv(file, index_col=0)

    # dataframe output
    xval = pd.DataFrame([],
                        columns=[
                            'RGIId', 'Name', 'tstar_bias', 'xval_bias',
                            'interp_bias', 'mustar', 'tstar', 'xval_mustar',
                            'xval_tstar', 'interp_mustar'
                        ])

    for gd in gdirs:
        t_cvdf = cvdf.loc[gd.rgi_id]
        heights, widths = gd.get_inversion_flowline_hw()

        # Observed mass-blance
        refmb = gd.get_ref_mb_data().copy()

        # Mass-balance model with cross-validated parameters instead
        mb_mod = PastMassBalance(gd,
                                 mu_star=t_cvdf.cv_mustar,
                                 bias=t_cvdf.cv_bias,
                                 prcp_fac=t_cvdf.cv_prcp_fac)
        refmb['OGGM_cv'] = mb_mod.get_specific_mb(heights,
                                                  widths,
                                                  year=refmb.index)
        # Compare their standard deviation
        std_ref = refmb.ANNUAL_BALANCE.std()
        rcor = np.corrcoef(refmb.OGGM_cv, refmb.ANNUAL_BALANCE)[0, 1]
        if std_ref == 0:
            # I think that such a thing happens with some geodetic values
            std_ref = refmb.OGGM_cv.std()
            rcor = 1
        # Store the scores
        cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] = (refmb.OGGM_cv.mean() -
                                             refmb.ANNUAL_BALANCE.mean())
        cvdf.loc[gd.rgi_id,
                 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM_cv.std() / std_ref)
        cvdf.loc[gd.rgi_id, 'CV_MB_COR'] = rcor

        # Mass-balance model with interpolated mu_star
        mb_mod = PastMassBalance(gd,
                                 mu_star=t_cvdf.interp_mustar,
                                 bias=t_cvdf.cv_bias,
                                 prcp_fac=t_cvdf.cv_prcp_fac)
        refmb['OGGM_mu_interp'] = mb_mod.get_specific_mb(heights,
                                                         widths,
                                                         year=refmb.index)
        cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS'] = (refmb.OGGM_mu_interp.mean() -
                                                 refmb.ANNUAL_BALANCE.mean())

        # Mass-balance model with best guess tstar
        mb_mod = PastMassBalance(gd,
                                 mu_star=t_cvdf.mustar,
                                 bias=t_cvdf.bias,
                                 prcp_fac=t_cvdf.prcp_fac)
        refmb['OGGM_tstar'] = mb_mod.get_specific_mb(heights,
                                                     widths,
                                                     year=refmb.index)
        cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS'] = (refmb.OGGM_tstar.mean() -
                                                refmb.ANNUAL_BALANCE.mean())

        # Pandas DataFrame Output
        #
        # 1. statistics
        tbias = cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS']
        xbias = cvdf.loc[gd.rgi_id, 'CV_MB_BIAS']
        ibias = cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS']
        xval = xval.append(
            {
                'Name': gd.name,
                'RGIId': gd.rgi_id,
                'tstar_bias': tbias,
                'xval_bias': xbias,
                'interp_bias': ibias,
                'mustar': t_cvdf.mustar,
                'tstar': t_cvdf.tstar,
                'xval_mustar': t_cvdf.cv_mustar,
                'xval_tstar': t_cvdf.cv_tstar,
                'interp_mustar': t_cvdf.interp_mustar
            },
            ignore_index=True)

        #
        # 2. mass balance timeseries
        mbarray = np.dstack(
            (refmb.ANNUAL_BALANCE, refmb.OGGM_tstar, refmb.OGGM_cv)).squeeze()

        mbdf_add = pd.DataFrame(
            mbarray,
            columns=[[gd.rgi_id, gd.rgi_id, gd.rgi_id],
                     ['measured', 'calibrated', 'crossvalidated']],
            index=refmb.index)
        mbdf = pd.concat([mbdf, mbdf_add], axis=1)

    mbdf.columns = pd.MultiIndex.from_tuples(mbdf.columns)

    mbdf = mbdf.dropna(how='all')

    xval.index = xval.RGIId

    return xval, mbdf
Exemplo n.º 4
0
def quick_crossval(gdirs, xval, major=0):
    # following climate.quick_crossval_t_stars
    # but minimized for performance

    full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'],
                                           'ref_tstars.csv'),
                              index_col=0)

    tmpdf = pd.DataFrame(
        [], columns=['std_oggm', 'std_ref', 'rmse', 'core', 'bias'])

    for i, rid in enumerate(full_ref_df.index):

        # the glacier to look at
        gdir = [g for g in gdirs if g.rgi_id == rid][0]

        # the reference glaciers
        tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid]

        # select reference glacier directories
        # Only necessary if tasks.compute_ref_t_stars is uncommented below
        # ref_gdirs = [g for g in gdirs if g.rgi_id != rid]

        # before the cross-val store the info about "real" mustar
        rdf = pd.read_csv(gdir.get_filepath('local_mustar'))
        full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0]

        # redistribute t_star
        with utils.DisableLogger():
            # compute_ref_t_stars should be done again for
            # every crossvalidation step
            # This will/might have an influence if one of the 10 surrounding
            # glaciers of the current glacier has more than one t_star
            # If so, the currently crossvalidated glacier was probably
            # used to select one t_star for this surrounding glacier.
            #
            # But: compute_ref_t_stars is very time consuming. And the
            # influence is probably very small. Also only 40 out of the 253
            # reference glaciers do have more than one possible t_star.
            #
            # tasks.compute_ref_t_stars(ref_gdirs)
            tasks.distribute_t_stars([gdir], ref_df=tmp_ref_df)

        # read crossvalidated values
        rdf = pd.read_csv(gdir.get_filepath('local_mustar'))

        # ----
        # --- MASS-BALANCE MODEL
        heights, widths = gdir.get_inversion_flowline_hw()
        mb_mod = PastMassBalance(gdir,
                                 mu_star=rdf['mu_star'].values[0],
                                 bias=rdf['bias'].values[0],
                                 prcp_fac=rdf['prcp_fac'].values[0])

        # Mass-blaance timeseries, observed and simulated
        refmb = gdir.get_ref_mb_data().copy()
        refmb['OGGM'] = mb_mod.get_specific_mb(heights,
                                               widths,
                                               year=refmb.index)

        # store single glacier results
        bias = refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean()
        rmse = np.sqrt(np.mean(refmb.OGGM - refmb.ANNUAL_BALANCE)**2)
        rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1]

        ref_std = refmb.ANNUAL_BALANCE.std()

        # unclear how to treat this best
        if ref_std == 0:
            ref_std = refmb.OGGM.std()
            rcor = 1

        tmpdf.loc[len(tmpdf.index)] = {
            'std_oggm': refmb.OGGM.std(),
            'std_ref': ref_std,
            'bias': bias,
            'rmse': rmse,
            'core': rcor
        }

        if not major:
            # store cross validated values
            full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0])
            full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0]
            full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0]
            full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0]

    # and store mean values
    std_quot = np.mean(tmpdf.std_oggm / tmpdf.std_ref)

    xval.loc[len(xval.index)] = {
        'prcpsf': cfg.PARAMS['prcp_scaling_factor'],
        'tliq': cfg.PARAMS['temp_all_liq'],
        'tmelt': cfg.PARAMS['temp_melt'],
        'tgrad': cfg.PARAMS['temp_default_gradient'],
        'std_quot': std_quot,
        'bias': tmpdf['bias'].mean(),
        'rmse': tmpdf['rmse'].mean(),
        'core': tmpdf['core'].mean()
    }

    if major:
        return xval
    else:
        for i, rid in enumerate(full_ref_df.index):
            # the glacier to look at
            gdir = full_ref_df.loc[full_ref_df.index == rid]
            # the reference glaciers
            tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid]

            # Compute the distance
            distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0],
                                        tmp_ref_df.lon, tmp_ref_df.lat)

            # Take the 10 closests
            aso = np.argsort(distances)[0:9]
            amin = tmp_ref_df.iloc[aso]
            distances = distances[aso]**2
            interp = np.average(amin.mustar, weights=1. / distances)
            full_ref_df.loc[rid, 'interp_mustar'] = interp
        # write
        file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
        full_ref_df.to_csv(file)
        # alternative: do not write csv file, but store the needed values
        # within xval_minor_statistics

        return xval
Exemplo n.º 5
0
    def __init__(self,
                 gdir,
                 magicc_ts=None,
                 dt_per_dt=1,
                 dp_per_dt=0,
                 mu_star=None,
                 bias=None,
                 y0=None,
                 halfsize=15,
                 filename='climate_historical',
                 input_filesuffix='',
                 **kwargs):
        """Initialize

        Parameters
        ----------
        gdir : GlacierDirectory
            the glacier directory
        magicc_ts : pd.Series
            the GMT time series
        mu_star : float, optional
            set to the alternative value of mu* you want to use
            (the default is to use the calibrated value)
        bias : float, optional
            set to the alternative value of the annual bias [mm we yr-1]
            you want to use (the default is to use the calibrated value)
        y0 : int, optional, default: tstar
            the year at the center of the period of interest. The default
            is to use tstar as center.
        dt_per_dt : float, optional, default 1
            the local climate change signal, in units of °C per °C
        halfsize : int, optional
            the half-size of the time window (window size = 2 * halfsize + 1)
        filename : str, optional
            set to a different BASENAME if you want to use alternative climate
            data.
        input_filesuffix : str
            the file suffix of the input climate file
        """

        if magicc_ts is None:
            raise InvalidParamsError('Need a magicc ts!')

        super(MagiccMassBalance, self).__init__()
        self.mbmod = MagiccConstantMassBalance(
            gdir,
            mu_star=mu_star,
            bias=bias,
            y0=y0,
            halfsize=halfsize,
            filename=filename,
            input_filesuffix=input_filesuffix,
            **kwargs)

        self.valid_bounds = self.mbmod.valid_bounds
        self.hemisphere = gdir.hemisphere

        # Set ys and ye
        self.ys = int(magicc_ts.index[0])
        self.ye = int(magicc_ts.index[-1])

        # Correct for dp_per_dt signal
        if len(np.atleast_1d(dp_per_dt)) == 12:
            ref_t = magicc_ts.loc[y0 - halfsize:y0 + halfsize].mean()
            prcp_ts = (magicc_ts - ref_t).values[:, np.newaxis] * dp_per_dt
            prcp_ts = pd.DataFrame(data=prcp_ts,
                                   index=magicc_ts.index,
                                   columns=np.arange(1, 13))
        else:
            ref_t = magicc_ts.loc[y0 - halfsize:y0 + halfsize].mean()
            prcp_ts = (magicc_ts - ref_t) * dp_per_dt

        # We correct the original factor - don't forget to also scale the diff
        self.prcp_fac_ts = self.mbmod.prcp_fac + self.mbmod.prcp_fac * prcp_ts

        # Correct for dt_per_dt signal
        if len(np.atleast_1d(dt_per_dt)) == 12:
            magicc_ts = pd.DataFrame(data=magicc_ts.values[:, np.newaxis] *
                                     dt_per_dt,
                                     index=magicc_ts.index,
                                     columns=np.arange(1, 13))
        else:
            magicc_ts = magicc_ts * dt_per_dt

        years = magicc_ts.loc[y0 - halfsize:y0 + halfsize].index.values

        # OK now check the bias to apply based on y0 and halfsize
        fls = gdir.read_pickle('model_flowlines')
        mb_ref = PastMassBalance(gdir)
        mb_ref = mb_ref.get_specific_mb(fls=fls, year=years).mean()

        def to_minimize(temp_bias):
            self.temp_bias_ts = magicc_ts - temp_bias
            mb_mine = self.get_specific_mb(fls=fls, year=years).mean()
            return mb_mine - mb_ref

        temp_bias = optimize.brentq(to_minimize, -10, 10, xtol=1e-5)
        self.temp_bias_ts = magicc_ts - temp_bias