示例#1
0
    def test_download_demo_files(self):

        f = utils.get_demo_file('Hintereisferner.shp')
        self.assertTrue(os.path.exists(f))

        sh = salem.read_shapefile(f)
        self.assertTrue(hasattr(sh, 'geometry'))

        # Data files
        cfg.initialize()

        lf, df = utils.get_wgms_files()
        self.assertTrue(os.path.exists(df))

        lf = utils.get_glathida_file()
        self.assertTrue(os.path.exists(lf))
示例#2
0
    def test_download_demo_files(self):

        f = utils.get_demo_file('Hintereisferner.shp')
        self.assertTrue(os.path.exists(f))

        sh = salem.read_shapefile(f)
        self.assertTrue(hasattr(sh, 'geometry'))

        # Data files
        cfg.initialize()

        lf, df = utils.get_wgms_files()
        self.assertTrue(os.path.exists(lf))

        lf = utils.get_glathida_file()
        self.assertTrue(os.path.exists(lf))
示例#3
0
文件: itmix.py 项目: alexjarosch/oggm
def _prepare_inv(gdirs):

    # Get test glaciers (all glaciers with thickness data)
    fpath = utils.get_glathida_file()

    try:
        gtd_df = pd.read_csv(fpath).sort_values(by=["RGI_ID"])
    except AttributeError:
        gtd_df = pd.read_csv(fpath).sort(columns=["RGI_ID"])
    dfids = gtd_df["RGI_ID"].values

    print("GTD Glac before", len(dfids))
    ref_gdirs = []
    for gdir in gdirs:
        if gdir.rgi_id not in dfids:
            continue
        if gdir.glacier_type == "Ice cap":
            continue
        if gdir.terminus_type in [
            "Marine-terminating",
            "Lake-terminating",
            "Dry calving",
            "Regenerated",
            "Shelf-terminating",
        ]:
            continue
        ref_gdirs.append(gdir)

    print("GTD Glac after", len(ref_gdirs))

    ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs]
    gtd_df = gtd_df.set_index("RGI_ID").loc[ref_rgiids]

    # Account for area differences between glathida and rgi
    ref_area_km2 = np.asarray([gdir.rgi_area_km2 for gdir in ref_gdirs])
    gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3
    ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values ** 1.375)
    ref_volume_km3 = ref_cs * ref_area_km2 ** 1.375
    ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000.0

    gtd_df["ref_area_km2"] = ref_area_km2
    gtd_df["ref_volume_km3"] = ref_volume_km3
    gtd_df["ref_thickness_m"] = ref_thickness_m
    gtd_df["ref_gdirs"] = ref_gdirs

    return gtd_df
示例#4
0
文件: itmix.py 项目: anton-ub/oggm
def _prepare_inv(gdirs):

    # Get test glaciers (all glaciers with thickness data)
    fpath = utils.get_glathida_file()

    try:
        gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID'])
    except AttributeError:
        gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID'])
    dfids = gtd_df['RGI_ID'].values

    print('GTD Glac before', len(dfids))
    ref_gdirs = []
    for gdir in gdirs:
        if gdir.rgi_id not in dfids:
            continue
        if gdir.glacier_type == 'Ice cap':
            continue
        if gdir.terminus_type in [
                'Marine-terminating', 'Lake-terminating', 'Dry calving',
                'Regenerated', 'Shelf-terminating'
        ]:
            continue
        ref_gdirs.append(gdir)

    print('GTD Glac after', len(ref_gdirs))

    ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs]
    gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids]

    # Account for area differences between glathida and rgi
    ref_area_km2 = np.asarray([gdir.rgi_area_km2 for gdir in ref_gdirs])
    gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3
    ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375)
    ref_volume_km3 = ref_cs * ref_area_km2**1.375
    ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000.

    gtd_df['ref_area_km2'] = ref_area_km2
    gtd_df['ref_volume_km3'] = ref_volume_km3
    gtd_df['ref_thickness_m'] = ref_thickness_m
    gtd_df['ref_gdirs'] = ref_gdirs

    return gtd_df
示例#5
0
def optimize_inversion_params(gdirs):
    """Optimizes fs and fd based on GlaThiDa thicknesses.

    We use the glacier averaged thicknesses provided by GlaThiDa and correct
    them for differences in area with RGI, using a glacier specific volume-area
    scaling formula.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    # Do we even need to do this?
    if not cfg.PARAMS['optimize_inversion_params']:
        log.info('User did not want to optimize the inversion params')
        return

    # Get test glaciers (all glaciers with thickness data)
    fpath = utils.get_glathida_file()
    try:
        gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID'])
    except AttributeError:
        gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID'])
    dfids = gtd_df['RGI_ID'].values

    ref_gdirs = [gdir for gdir in gdirs if gdir.rgi_id in dfids]
    ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs]
    gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids]

    # Account for area differences between glathida and rgi
    gtd_df['RGI_AREA'] = [gdir.rgi_area_km2 for gdir in ref_gdirs]
    ref_area_km2 = gtd_df.RGI_AREA.values
    gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3
    ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375)
    ref_volume_km3 = ref_cs * ref_area_km2**1.375
    ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000.

    # Minimize volume or thick RMSD?
    optim_t = cfg.PARAMS['optimize_thick']
    if optim_t:
        ref_data = ref_thickness_m
        tol = 0.1
    else:
        ref_data = ref_volume_km3
        tol = 1.e-4

    if cfg.PARAMS['invert_with_sliding']:
        # Optimize with both params
        log.info('Compute the inversion parameters.')

        def to_optimize(x):
            tmp_ref = np.zeros(len(ref_gdirs))
            glen_a = cfg.A * x[0]
            fs = cfg.FS * x[1]
            for i, gdir in enumerate(ref_gdirs):
                v, a = invert_parabolic_bed(gdir, glen_a=glen_a,
                                            fs=fs, write=False)
                if optim_t:
                    tmp_ref[i] = v / a
                else:
                    tmp_ref[i] = v * 1e-9
            return utils.rmsd(tmp_ref, ref_data)

        opti = optimization.minimize(to_optimize, [1., 1.],
                                    bounds=((0.01, 10), (0.01, 10)),
                                    tol=tol)
        # Check results and save.
        glen_a = cfg.A * opti['x'][0]
        fs = cfg.FS * opti['x'][1]
    else:
        # Optimize without sliding
        log.info('Compute the inversion parameter.')

        def to_optimize(x):
            tmp_ref = np.zeros(len(ref_gdirs))
            glen_a = cfg.A * x[0]
            for i, gdir in enumerate(ref_gdirs):
                v, a = invert_parabolic_bed(gdir, glen_a=glen_a,
                                            fs=0., write=False)
                if optim_t:
                    tmp_ref[i] = v / a
                else:
                    tmp_ref[i] = v * 1e-9
            return utils.rmsd(tmp_ref, ref_data)
        opti = optimization.minimize(to_optimize, [1.],
                                    bounds=((0.01, 10), ),
                                    tol=tol)
        # Check results and save.
        glen_a = cfg.A * opti['x'][0]
        fs = 0.

    # This is for the stats
    oggm_volume_m3 = np.zeros(len(ref_gdirs))
    rgi_area_m2 = np.zeros(len(ref_gdirs))
    for i, gdir in enumerate(ref_gdirs):
        v, a = invert_parabolic_bed(gdir, glen_a=glen_a, fs=fs,
                                    write=False)
        oggm_volume_m3[i] = v
        rgi_area_m2[i] = a
    assert np.allclose(rgi_area_m2 * 1e-6, ref_area_km2)

    # This is for each glacier
    out = dict()
    out['glen_a'] = glen_a
    out['fs'] = fs
    out['factor_glen_a'] = opti['x'][0]
    try:
        out['factor_fs'] = opti['x'][1]
    except IndexError:
        out['factor_fs'] = 0.
    for gdir in gdirs:
        gdir.write_pickle(out, 'inversion_params')

    # This is for the working dir
    # Simple stats
    out['vol_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9, ref_volume_km3)
    out['thick_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9 / ref_area_km2 / 1000,
                                   ref_thickness_m)

    log.info('Optimized glen_a and fs with a factor {factor_glen_a:.2f} and '
             '{factor_fs:.2f} for a thick RMSD of '
             '{thick_rmsd:.1f} and a volume RMSD of '
             '{vol_rmsd:.3f}'.format(**out))

    df = pd.DataFrame(out, index=[0])
    fpath = os.path.join(cfg.PATHS['working_dir'],
                         'inversion_optim_params.csv')
    df.to_csv(fpath)

    # All results
    df = dict()
    df['ref_area_km2'] = ref_area_km2
    df['ref_volume_km3'] = ref_volume_km3
    df['oggm_volume_km3'] = oggm_volume_m3 * 1e-9
    df['vas_volume_km3'] = 0.034*(df['ref_area_km2']**1.375)

    rgi_id = [gdir.rgi_id for gdir in ref_gdirs]
    df = pd.DataFrame(df, index=rgi_id)
    fpath = os.path.join(cfg.PATHS['working_dir'],
                         'inversion_optim_results.csv')
    df.to_csv(fpath)

    # return value for tests
    return out
示例#6
0
文件: itmix.py 项目: anton-ub/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                    'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster',
                    'Kesselwandferner', 'NorthGlacier', 'SouthGlacier',
                    'Tasman', 'Unteraar', 'Washmawapta', 'Columbia'
            ]:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0],
                            'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k],
                                   'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.',
                                                       '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/',
                                        'or').replace('.',
                                                      '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
示例#7
0
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq',
                            'Brewster', 'Kesselwandferner', 'NorthGlacier',
                            'SouthGlacier', 'Tasman', 'Unteraar',
                            'Washmawapta', 'Columbia']:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
示例#8
0
文件: itmix.py 项目: Enaith/oggm
def get_rgi_df(reset=False):
    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers

    df_rgi_file = os.path.expanduser('~/itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        rgidf = []
        _rgi_ids = []
        for i, row in df_itmix.iterrows():
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   row['rgi_reg'] + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
            _rgi_ids.extend(rgi_parts)

            # use the ITMIX shape where possible
            if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq',
                            'Brewster', 'Kesselwandferner', 'NorthGlacier',
                            'SouthGlacier', 'Tasman', 'Unteraar',
                            'Washmawapta']:
                for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' +
                                             row.name + '*.shp'):
                    pass
                shp = salem.utils.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' +
                                             row.name + '*.shp'):
                    pass
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        print('N WGMS before: {}'.format(len(wgms_df)))
        wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids)]
        print('N WGMS after: {}'.format(len(wgms_df)))

        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   reg + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        print('N GTD before: {}'.format(len(gtd_df)))
        gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids)]
        print('N GTD after: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   reg + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
示例#9
0
def optimize_inversion_params(gdirs):
    """Optimizes fs and fd based on GlaThiDa thicknesses.

    We use the glacier averaged thicknesses provided by GlaThiDa and correct
    them for differences in area with RGI, using a glacier specific volume-area
    scaling formula.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    # Do we even need to do this?
    if not cfg.PARAMS['optimize_inversion_params']:
        log.info('User did not want to optimize the inversion params')
        return

    # Get test glaciers (all glaciers with thickness data)
    fpath = utils.get_glathida_file()
    try:
        gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID'])
    except AttributeError:
        gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID'])

    dfids = gtd_df['RGI_ID'].values

    ref_gdirs = [gdir for gdir in gdirs if gdir.rgi_id in dfids]
    if len(ref_gdirs) == 0:
        raise RuntimeError('No reference GlaThiDa glaciers. Maybe something '
                           'went wrong with the link list?')
    ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs]
    gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids]

    # Account for area differences between glathida and rgi
    gtd_df['RGI_AREA'] = [gdir.rgi_area_km2 for gdir in ref_gdirs]
    ref_area_km2 = gtd_df.RGI_AREA.values
    ref_area_m2 = ref_area_km2 * 1e6
    gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3
    ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375)
    ref_volume_km3 = ref_cs * ref_area_km2**1.375
    ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000.

    # Minimize volume or thick RMSD?
    optim_t = cfg.PARAMS['optimize_thick']
    if optim_t:
        ref_data = ref_thickness_m
        tol = 0.1
    else:
        ref_data = ref_volume_km3
        tol = 1.e-4

    if cfg.PARAMS['invert_with_sliding']:
        # Optimize with both params
        log.info('Compute the inversion parameters.')

        def to_optimize(x):
            tmp_ref = np.zeros(len(ref_gdirs))
            glen_a = cfg.A * x[0]
            fs = cfg.FS * x[1]
            for i, gdir in enumerate(ref_gdirs):
                v, a = mass_conservation_inversion(gdir, glen_a=glen_a,
                                                   fs=fs, write=False)
                if optim_t:
                    tmp_ref[i] = v / a
                else:
                    tmp_ref[i] = v * 1e-9
            return utils.rmsd(tmp_ref, ref_data)

        opti = optimization.minimize(to_optimize, [1., 1.],
                                    bounds=((0.01, 10), (0.01, 10)),
                                    tol=tol)
        # Check results and save.
        glen_a = cfg.A * opti['x'][0]
        fs = cfg.FS * opti['x'][1]
    else:
        # Optimize without sliding
        log.info('Compute the inversion parameter.')

        def to_optimize(x):
            tmp_ref = np.zeros(len(ref_gdirs))
            glen_a = cfg.A * x[0]
            for i, gdir in enumerate(ref_gdirs):
                v, a = mass_conservation_inversion(gdir, glen_a=glen_a,
                                                   fs=0., write=False)
                if optim_t:
                    tmp_ref[i] = v / a
                else:
                    tmp_ref[i] = v * 1e-9
            return utils.rmsd(tmp_ref, ref_data)
        opti = optimization.minimize(to_optimize, [1.],
                                     bounds=((0.01, 10),),
                                     tol=tol)
        # Check results and save.
        glen_a = cfg.A * opti['x'][0]
        fs = 0.

    # This is for the stats
    oggm_volume_m3 = np.zeros(len(ref_gdirs))
    rgi_area_m2 = np.zeros(len(ref_gdirs))
    for i, gdir in enumerate(ref_gdirs):
        v, a = mass_conservation_inversion(gdir, glen_a=glen_a, fs=fs,
                                           write=False)
        oggm_volume_m3[i] = v
        rgi_area_m2[i] = a
    assert np.allclose(rgi_area_m2 * 1e-6, ref_area_km2)

    # This is for each glacier
    out = dict()
    out['glen_a'] = glen_a
    out['fs'] = fs
    out['factor_glen_a'] = opti['x'][0]
    try:
        out['factor_fs'] = opti['x'][1]
    except IndexError:
        out['factor_fs'] = 0.
    for gdir in gdirs:
        gdir.write_pickle(out, 'inversion_params')

    # This is for the working dir
    # Simple stats
    out['vol_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9, ref_volume_km3)
    out['thick_rmsd'] = utils.rmsd(oggm_volume_m3 / ref_area_m2,
                                   ref_thickness_m)

    log.info('Optimized glen_a and fs with a factor {factor_glen_a:.2f} and '
             '{factor_fs:.2f} for a thick RMSD of '
             '{thick_rmsd:.1f} m and a volume RMSD of '
             '{vol_rmsd:.3f} km3'.format(**out))

    df = pd.DataFrame(out, index=[0])
    fpath = os.path.join(cfg.PATHS['working_dir'],
                         'inversion_optim_params.csv')
    df.to_csv(fpath)

    # All results
    df = dict()
    df['ref_area_km2'] = ref_area_km2
    df['ref_volume_km3'] = ref_volume_km3
    df['oggm_volume_km3'] = oggm_volume_m3 * 1e-9
    df['vas_volume_km3'] = 0.034*(df['ref_area_km2']**1.375)

    rgi_id = [gdir.rgi_id for gdir in ref_gdirs]
    df = pd.DataFrame(df, index=rgi_id)
    fpath = os.path.join(cfg.PATHS['working_dir'],
                         'inversion_optim_results.csv')
    df.to_csv(fpath)

    # return value for tests
    return out
示例#10
0
文件: itmix.py 项目: alexjarosch/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl")
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl")
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info("Prepare RGI df for " + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T["rgi_parts_ids"]
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                "Hellstugubreen",
                "Freya",
                "Aqqutikitsoq",
                "Brewster",
                "Kesselwandferner",
                "NorthGlacier",
                "SouthGlacier",
                "Tasman",
                "Unteraar",
                "Washmawapta",
                "Columbia",
            ]:
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp = salem.utils.read_shapefile(shf)
                if row.name == "Unteraar":
                    shp = shp.iloc[[-1]]
                if "LineString" == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == "MultiLineString":
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == "LineString"
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert "Polygon" in geometry.type
                    shp.loc[shp.index[0], "geometry"] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], "geometry"] = shp
                sel.loc[sel.index[0], "Area"] = area_km2
            elif row.name == "Urumqi":
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid)
                    sel.loc[sel.index[k], "geometry"] = shp
                    sel.loc[sel.index[k], "Area"] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if "Multi" in tsel.loc[tsel.index[0], "geometry"].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.0
                add = 1e-5
                if row.name == "Devon":
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if "Multi" in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if "Multi" in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], "geometry"] = buff
                sel.loc[sel.index[0], "Area"] = new_area
                sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0]
                sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == "geometry":
                        div_sel[k] = multi
                    elif k == "RGIId":
                        div_sel["RGIID"] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ["I:" + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "W-" + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS["itmix_divs"])

        log.info("N glaciers ITMIX: {}".format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info("N glaciers WGMS: {}".format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["W:" + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info("N glaciers GTD: {}".format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["G:" + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf