def test_download_demo_files(self): f = utils.get_demo_file('Hintereisferner.shp') self.assertTrue(os.path.exists(f)) sh = salem.read_shapefile(f) self.assertTrue(hasattr(sh, 'geometry')) # Data files cfg.initialize() lf, df = utils.get_wgms_files() self.assertTrue(os.path.exists(df)) lf = utils.get_glathida_file() self.assertTrue(os.path.exists(lf))
def test_download_demo_files(self): f = utils.get_demo_file('Hintereisferner.shp') self.assertTrue(os.path.exists(f)) sh = salem.read_shapefile(f) self.assertTrue(hasattr(sh, 'geometry')) # Data files cfg.initialize() lf, df = utils.get_wgms_files() self.assertTrue(os.path.exists(lf)) lf = utils.get_glathida_file() self.assertTrue(os.path.exists(lf))
def _prepare_inv(gdirs): # Get test glaciers (all glaciers with thickness data) fpath = utils.get_glathida_file() try: gtd_df = pd.read_csv(fpath).sort_values(by=["RGI_ID"]) except AttributeError: gtd_df = pd.read_csv(fpath).sort(columns=["RGI_ID"]) dfids = gtd_df["RGI_ID"].values print("GTD Glac before", len(dfids)) ref_gdirs = [] for gdir in gdirs: if gdir.rgi_id not in dfids: continue if gdir.glacier_type == "Ice cap": continue if gdir.terminus_type in [ "Marine-terminating", "Lake-terminating", "Dry calving", "Regenerated", "Shelf-terminating", ]: continue ref_gdirs.append(gdir) print("GTD Glac after", len(ref_gdirs)) ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs] gtd_df = gtd_df.set_index("RGI_ID").loc[ref_rgiids] # Account for area differences between glathida and rgi ref_area_km2 = np.asarray([gdir.rgi_area_km2 for gdir in ref_gdirs]) gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3 ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values ** 1.375) ref_volume_km3 = ref_cs * ref_area_km2 ** 1.375 ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000.0 gtd_df["ref_area_km2"] = ref_area_km2 gtd_df["ref_volume_km3"] = ref_volume_km3 gtd_df["ref_thickness_m"] = ref_thickness_m gtd_df["ref_gdirs"] = ref_gdirs return gtd_df
def _prepare_inv(gdirs): # Get test glaciers (all glaciers with thickness data) fpath = utils.get_glathida_file() try: gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID']) except AttributeError: gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID']) dfids = gtd_df['RGI_ID'].values print('GTD Glac before', len(dfids)) ref_gdirs = [] for gdir in gdirs: if gdir.rgi_id not in dfids: continue if gdir.glacier_type == 'Ice cap': continue if gdir.terminus_type in [ 'Marine-terminating', 'Lake-terminating', 'Dry calving', 'Regenerated', 'Shelf-terminating' ]: continue ref_gdirs.append(gdir) print('GTD Glac after', len(ref_gdirs)) ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs] gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids] # Account for area differences between glathida and rgi ref_area_km2 = np.asarray([gdir.rgi_area_km2 for gdir in ref_gdirs]) gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3 ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375) ref_volume_km3 = ref_cs * ref_area_km2**1.375 ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000. gtd_df['ref_area_km2'] = ref_area_km2 gtd_df['ref_volume_km3'] = ref_volume_km3 gtd_df['ref_thickness_m'] = ref_thickness_m gtd_df['ref_gdirs'] = ref_gdirs return gtd_df
def optimize_inversion_params(gdirs): """Optimizes fs and fd based on GlaThiDa thicknesses. We use the glacier averaged thicknesses provided by GlaThiDa and correct them for differences in area with RGI, using a glacier specific volume-area scaling formula. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ # Do we even need to do this? if not cfg.PARAMS['optimize_inversion_params']: log.info('User did not want to optimize the inversion params') return # Get test glaciers (all glaciers with thickness data) fpath = utils.get_glathida_file() try: gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID']) except AttributeError: gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID']) dfids = gtd_df['RGI_ID'].values ref_gdirs = [gdir for gdir in gdirs if gdir.rgi_id in dfids] ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs] gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids] # Account for area differences between glathida and rgi gtd_df['RGI_AREA'] = [gdir.rgi_area_km2 for gdir in ref_gdirs] ref_area_km2 = gtd_df.RGI_AREA.values gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3 ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375) ref_volume_km3 = ref_cs * ref_area_km2**1.375 ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000. # Minimize volume or thick RMSD? optim_t = cfg.PARAMS['optimize_thick'] if optim_t: ref_data = ref_thickness_m tol = 0.1 else: ref_data = ref_volume_km3 tol = 1.e-4 if cfg.PARAMS['invert_with_sliding']: # Optimize with both params log.info('Compute the inversion parameters.') def to_optimize(x): tmp_ref = np.zeros(len(ref_gdirs)) glen_a = cfg.A * x[0] fs = cfg.FS * x[1] for i, gdir in enumerate(ref_gdirs): v, a = invert_parabolic_bed(gdir, glen_a=glen_a, fs=fs, write=False) if optim_t: tmp_ref[i] = v / a else: tmp_ref[i] = v * 1e-9 return utils.rmsd(tmp_ref, ref_data) opti = optimization.minimize(to_optimize, [1., 1.], bounds=((0.01, 10), (0.01, 10)), tol=tol) # Check results and save. glen_a = cfg.A * opti['x'][0] fs = cfg.FS * opti['x'][1] else: # Optimize without sliding log.info('Compute the inversion parameter.') def to_optimize(x): tmp_ref = np.zeros(len(ref_gdirs)) glen_a = cfg.A * x[0] for i, gdir in enumerate(ref_gdirs): v, a = invert_parabolic_bed(gdir, glen_a=glen_a, fs=0., write=False) if optim_t: tmp_ref[i] = v / a else: tmp_ref[i] = v * 1e-9 return utils.rmsd(tmp_ref, ref_data) opti = optimization.minimize(to_optimize, [1.], bounds=((0.01, 10), ), tol=tol) # Check results and save. glen_a = cfg.A * opti['x'][0] fs = 0. # This is for the stats oggm_volume_m3 = np.zeros(len(ref_gdirs)) rgi_area_m2 = np.zeros(len(ref_gdirs)) for i, gdir in enumerate(ref_gdirs): v, a = invert_parabolic_bed(gdir, glen_a=glen_a, fs=fs, write=False) oggm_volume_m3[i] = v rgi_area_m2[i] = a assert np.allclose(rgi_area_m2 * 1e-6, ref_area_km2) # This is for each glacier out = dict() out['glen_a'] = glen_a out['fs'] = fs out['factor_glen_a'] = opti['x'][0] try: out['factor_fs'] = opti['x'][1] except IndexError: out['factor_fs'] = 0. for gdir in gdirs: gdir.write_pickle(out, 'inversion_params') # This is for the working dir # Simple stats out['vol_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9, ref_volume_km3) out['thick_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9 / ref_area_km2 / 1000, ref_thickness_m) log.info('Optimized glen_a and fs with a factor {factor_glen_a:.2f} and ' '{factor_fs:.2f} for a thick RMSD of ' '{thick_rmsd:.1f} and a volume RMSD of ' '{vol_rmsd:.3f}'.format(**out)) df = pd.DataFrame(out, index=[0]) fpath = os.path.join(cfg.PATHS['working_dir'], 'inversion_optim_params.csv') df.to_csv(fpath) # All results df = dict() df['ref_area_km2'] = ref_area_km2 df['ref_volume_km3'] = ref_volume_km3 df['oggm_volume_km3'] = oggm_volume_m3 * 1e-9 df['vas_volume_km3'] = 0.034*(df['ref_area_km2']**1.375) rgi_id = [gdir.rgi_id for gdir in ref_gdirs] df = pd.DataFrame(df, index=rgi_id) fpath = os.path.join(cfg.PATHS['working_dir'], 'inversion_optim_results.csv') df.to_csv(fpath) # return value for tests return out
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ 'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia' ]: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia']: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
def get_rgi_df(reset=False): # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers df_rgi_file = os.path.expanduser('~/itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) rgidf = [] _rgi_ids = [] for i, row in df_itmix.iterrows(): # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", row['rgi_reg'] + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() _rgi_ids.extend(rgi_parts) # use the ITMIX shape where possible if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta']: for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' + row.name + '*.shp'): pass shp = salem.utils.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' + row.name + '*.shp'): pass shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) print('N WGMS before: {}'.format(len(wgms_df))) wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids)] print('N WGMS after: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", reg + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... print('N GTD before: {}'.format(len(gtd_df))) gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids)] print('N GTD after: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", reg + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
def optimize_inversion_params(gdirs): """Optimizes fs and fd based on GlaThiDa thicknesses. We use the glacier averaged thicknesses provided by GlaThiDa and correct them for differences in area with RGI, using a glacier specific volume-area scaling formula. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ # Do we even need to do this? if not cfg.PARAMS['optimize_inversion_params']: log.info('User did not want to optimize the inversion params') return # Get test glaciers (all glaciers with thickness data) fpath = utils.get_glathida_file() try: gtd_df = pd.read_csv(fpath).sort_values(by=['RGI_ID']) except AttributeError: gtd_df = pd.read_csv(fpath).sort(columns=['RGI_ID']) dfids = gtd_df['RGI_ID'].values ref_gdirs = [gdir for gdir in gdirs if gdir.rgi_id in dfids] if len(ref_gdirs) == 0: raise RuntimeError('No reference GlaThiDa glaciers. Maybe something ' 'went wrong with the link list?') ref_rgiids = [gdir.rgi_id for gdir in ref_gdirs] gtd_df = gtd_df.set_index('RGI_ID').loc[ref_rgiids] # Account for area differences between glathida and rgi gtd_df['RGI_AREA'] = [gdir.rgi_area_km2 for gdir in ref_gdirs] ref_area_km2 = gtd_df.RGI_AREA.values ref_area_m2 = ref_area_km2 * 1e6 gtd_df.VOLUME = gtd_df.MEAN_THICKNESS * gtd_df.GTD_AREA * 1e-3 ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375) ref_volume_km3 = ref_cs * ref_area_km2**1.375 ref_thickness_m = ref_volume_km3 / ref_area_km2 * 1000. # Minimize volume or thick RMSD? optim_t = cfg.PARAMS['optimize_thick'] if optim_t: ref_data = ref_thickness_m tol = 0.1 else: ref_data = ref_volume_km3 tol = 1.e-4 if cfg.PARAMS['invert_with_sliding']: # Optimize with both params log.info('Compute the inversion parameters.') def to_optimize(x): tmp_ref = np.zeros(len(ref_gdirs)) glen_a = cfg.A * x[0] fs = cfg.FS * x[1] for i, gdir in enumerate(ref_gdirs): v, a = mass_conservation_inversion(gdir, glen_a=glen_a, fs=fs, write=False) if optim_t: tmp_ref[i] = v / a else: tmp_ref[i] = v * 1e-9 return utils.rmsd(tmp_ref, ref_data) opti = optimization.minimize(to_optimize, [1., 1.], bounds=((0.01, 10), (0.01, 10)), tol=tol) # Check results and save. glen_a = cfg.A * opti['x'][0] fs = cfg.FS * opti['x'][1] else: # Optimize without sliding log.info('Compute the inversion parameter.') def to_optimize(x): tmp_ref = np.zeros(len(ref_gdirs)) glen_a = cfg.A * x[0] for i, gdir in enumerate(ref_gdirs): v, a = mass_conservation_inversion(gdir, glen_a=glen_a, fs=0., write=False) if optim_t: tmp_ref[i] = v / a else: tmp_ref[i] = v * 1e-9 return utils.rmsd(tmp_ref, ref_data) opti = optimization.minimize(to_optimize, [1.], bounds=((0.01, 10),), tol=tol) # Check results and save. glen_a = cfg.A * opti['x'][0] fs = 0. # This is for the stats oggm_volume_m3 = np.zeros(len(ref_gdirs)) rgi_area_m2 = np.zeros(len(ref_gdirs)) for i, gdir in enumerate(ref_gdirs): v, a = mass_conservation_inversion(gdir, glen_a=glen_a, fs=fs, write=False) oggm_volume_m3[i] = v rgi_area_m2[i] = a assert np.allclose(rgi_area_m2 * 1e-6, ref_area_km2) # This is for each glacier out = dict() out['glen_a'] = glen_a out['fs'] = fs out['factor_glen_a'] = opti['x'][0] try: out['factor_fs'] = opti['x'][1] except IndexError: out['factor_fs'] = 0. for gdir in gdirs: gdir.write_pickle(out, 'inversion_params') # This is for the working dir # Simple stats out['vol_rmsd'] = utils.rmsd(oggm_volume_m3 * 1e-9, ref_volume_km3) out['thick_rmsd'] = utils.rmsd(oggm_volume_m3 / ref_area_m2, ref_thickness_m) log.info('Optimized glen_a and fs with a factor {factor_glen_a:.2f} and ' '{factor_fs:.2f} for a thick RMSD of ' '{thick_rmsd:.1f} m and a volume RMSD of ' '{vol_rmsd:.3f} km3'.format(**out)) df = pd.DataFrame(out, index=[0]) fpath = os.path.join(cfg.PATHS['working_dir'], 'inversion_optim_params.csv') df.to_csv(fpath) # All results df = dict() df['ref_area_km2'] = ref_area_km2 df['ref_volume_km3'] = ref_volume_km3 df['oggm_volume_km3'] = oggm_volume_m3 * 1e-9 df['vas_volume_km3'] = 0.034*(df['ref_area_km2']**1.375) rgi_id = [gdir.rgi_id for gdir in ref_gdirs] df = pd.DataFrame(df, index=rgi_id) fpath = os.path.join(cfg.PATHS['working_dir'], 'inversion_optim_results.csv') df.to_csv(fpath) # return value for tests return out
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl") if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl") df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info("Prepare RGI df for " + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T["rgi_parts_ids"] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ "Hellstugubreen", "Freya", "Aqqutikitsoq", "Brewster", "Kesselwandferner", "NorthGlacier", "SouthGlacier", "Tasman", "Unteraar", "Washmawapta", "Columbia", ]: shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp = salem.utils.read_shapefile(shf) if row.name == "Unteraar": shp = shp.iloc[[-1]] if "LineString" == shp.iloc[0].geometry.type: shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == "MultiLineString": # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == "LineString" exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert "Polygon" in geometry.type shp.loc[shp.index[0], "geometry"] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], "geometry"] = shp sel.loc[sel.index[0], "Area"] = area_km2 elif row.name == "Urumqi": # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid) sel.loc[sel.index[k], "geometry"] = shp sel.loc[sel.index[k], "Area"] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if "Multi" in tsel.loc[tsel.index[0], "geometry"].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0.0 add = 1e-5 if row.name == "Devon": inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if "Multi" in buff.type: inif += add else: break x, y = multi.centroid.xy if "Multi" in buff.type: raise RuntimeError sel.loc[sel.index[0], "geometry"] = buff sel.loc[sel.index[0], "Area"] = new_area sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0] sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == "geometry": div_sel[k] = multi elif k == "RGIId": div_sel["RGIID"] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ["I:" + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "W-" + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] sel.loc[:, "Name"] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS["itmix_divs"]) log.info("N glaciers ITMIX: {}".format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers WGMS: {}".format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["W:" + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers GTD: {}".format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["G:" + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf