def test_download_rgi(self): tmp = cfg.PATHS['rgi_dir'] cfg.PATHS['rgi_dir'] = os.path.join(TEST_DIR, 'rgi_extract') of = utils.get_rgi_dir() of = os.path.join(of, '01_rgi50_Alaska', '01_rgi50_Alaska.shp') self.assertTrue(os.path.exists(of)) cfg.PATHS['rgi_dir'] = tmp
def test_download_rgi(self): cfg.initialize() tmp = cfg.PATHS['rgi_dir'] cfg.PATHS['rgi_dir'] = TEST_DIR of = utils.get_rgi_dir() of = os.path.join(of, '01_rgi50_Alaska', '01_rgi50_Alaska.shp') self.assertTrue(os.path.exists(of)) cfg.PATHS['rgi_dir'] = tmp
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia']: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
utils.mkdir(cfg.PATHS['topo_dir']) utils.mkdir(cfg.PATHS['cru_dir']) utils.mkdir(cfg.PATHS['rgi_dir']) # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True cfg.CONTINUE_ON_ERROR = True # Other params cfg.PARAMS['border'] = 80 cfg.PARAMS['temp_use_local_gradient'] = False cfg.PARAMS['invert_with_sliding'] = False cfg.PARAMS['bed_shape'] = 'mixed' # Download RGI files rgi_dir = utils.get_rgi_dir() rgi_shp = list(glob(os.path.join(rgi_dir, "*", rgi_reg+ '_rgi50_*.shp'))) assert len(rgi_shp) == 1 rgidf = gpd.read_file(rgi_shp[0]) log.info('Number of glaciers: {}'.format(len(rgidf))) # Download other files if needed _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') _ = utils.get_demo_file('Hintereisferner.shp') # Go - initialize working directories # gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) gdirs = workflow.init_glacier_regions(rgidf)
utils.mkdir(cfg.PATHS['cru_dir']) utils.mkdir(cfg.PATHS['rgi_dir']) utils.mkdir(cfg.PATHS['tmp_dir']) # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True cfg.CONTINUE_ON_ERROR = False # Read in the Benchmark RGI file rgi_pkl_path = utils.aws_file_download('rgi_benchmark.pkl') rgidf = pd.read_pickle(rgi_pkl_path) # Remove glaciers causing issues rgidf = rgidf.iloc[[s not in ('RGI50-11.00291', 'RGI50-03.02479') for s in rgidf['RGIId']]] utils.get_rgi_dir() log.info('Number of glaciers: {}'.format(len(rgidf))) # Go - initialize working directories # gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) gdirs = workflow.init_glacier_regions(rgidf) # Prepro tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.compute_downstream_lines, tasks.catchment_area, tasks.initialize_flowlines, tasks.catchment_width_geom,
cfg.PARAMS['baseline_climate'] = baseline # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs - here we want all glaciers to run cfg.PARAMS['continue_on_error'] = False if baseline == 'HISTALP': # Other params: see https://oggm.org/2018/08/10/histalp-parameters/ cfg.PARAMS['baseline_y0'] = 1850 cfg.PARAMS['prcp_scaling_factor'] = 1.75 cfg.PARAMS['temp_melt'] = -1.75 # Get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # We can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11 if baseline == 'HISTALP': rids = [rid for rid in rids if '-11.' in rid] # Make a new dataframe with those (this takes a while) log.info('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) log.info('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf)))
cfg.PARAMS['filter_for_neg_flux'] = False # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs cfg.PARAMS['continue_on_error'] = False if baseline == 'HISTALP': # Other params: see https://oggm.org/2018/08/10/histalp-parameters/ cfg.PARAMS['baseline_y0'] = 1850 cfg.PARAMS['prcp_scaling_factor'] = 1.75 cfg.PARAMS['temp_melt'] = -1.75 # Get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # We can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11 if baseline == 'HISTALP': rids = [rid for rid in rids if '-11.' in rid] # Make a new dataframe with those (this takes a while) log.info('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) log.info('For RGIV{} we have {} candidate reference glaciers.'.format( rgi_version, len(rgidf)))
def initialization_selection(): # ------------- # Initialization # ------------- cfg.initialize() # working directories cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir'] cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version'] # We are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # No need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs # maybe also here? cfg.PARAMS['continue_on_error'] = True # set negative flux filtering to false. should be standard soon cfg.PARAMS['filter_for_neg_flux'] = False # correct negative fluxes with flowline mus cfg.PARAMS['correct_for_neg_flux'] = True # use glacierwiede mu_star in order to finde t_star: it's faster! cfg.PARAMS['tstar_search_glacierwide'] = True # Pre-download other files which will be needed later # _ = cru.get_cru_file(var='tmp') # _ = cru.get_cru_file(var='pre') rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version']) # Get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])] # Make a new dataframe with those (this takes a while) rgidf = [] for reg in df['RGI_REG'].unique(): if reg == '19': continue # we have no climate data in Antarctica if mbcfg.PARAMS['region'] is not None \ and reg != mbcfg.PARAMS['region']: continue fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version']) fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0] sh = gpd.read_file(fs) rgidf.append(sh.loc[sh.RGIId.isin(rids)]) rgidf = pd.concat(rgidf) rgidf.crs = sh.crs # for geolocalisation # reduce Europe to Histalp area (exclude Pyrenees, etc...) rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) & (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)] # and set standard histalp values cfg.PARAMS['prcp_scaling_factor'] = 1.75 cfg.PARAMS['temp_all_liq'] = 2.0 cfg.PARAMS['temp_melt'] = -1.75 cfg.PARAMS['temp_default_gradient'] = -0.0065 # We have to check which of them actually have enough mb data. # Let OGGM do it: gdirs = workflow.init_glacier_regions(rgidf) # We need to know which period we have data for cfg.PARAMS['baseline_climate'] = 'HISTALP' execute_entity_task(tasks.process_histalp_data, gdirs) gdirs = utils.get_ref_mb_glaciers(gdirs) # Keep only these rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # Save rgidf.to_file(os.path.join(cfg.PATHS['working_dir'], 'mb_ref_glaciers.shp')) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) return gdirs
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ 'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia' ]: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.utils.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
cfg.PATHS['working_dir'] = WORKING_DIR cfg.PATHS['topo_dir'] = os.path.join(DATA_DIR, 'topo') cfg.PATHS['rgi_dir'] = os.path.join(DATA_DIR, 'rgi') utils.mkdir(WORKING_DIR) utils.mkdir(cfg.PATHS['topo_dir']) utils.mkdir(cfg.PATHS['rgi_dir']) # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = False cfg.PARAMS['border'] = 20 cfg.CONTINUE_ON_ERROR = False # Read in the RGI file rgisel = os.path.join(WORKING_DIR, 'rgi_selection.shp') if not os.path.exists(rgisel): rgi_dir = utils.get_rgi_dir() regions = ['{:02d}'.format(int(p)) for p in range(1, 20)] files = [ glob.glob(os.path.join(rgi_dir, '*', r + '_rgi50_*.shp'))[0] for r in regions ] rgidf = [] for fs in files: sh = salem.read_shapefile(os.path.join(rgi_dir, fs), cached=True) percs = np.asarray([0, 25, 50, 75, 100]) idppercs = np.round(percs * 0.01 * (len(sh) - 1)).astype(int) rgidf.append(sh.sort_values(by='Area').iloc[idppercs]) rgidf.append(sh.sort_values(by='CenLon').iloc[idppercs]) rgidf.append(sh.sort_values(by='CenLat').iloc[idppercs]) rgidf = gpd.GeoDataFrame(pd.concat(rgidf))
def mb_calibration(rgi_version, baseline): """ Run the mass balance calibration for the VAS model. RGI version and baseline cliamte must be given. :param rgi_version: int, RGI version :param baseline: str, baseline climate 'HISTALP' or 'CRU' """ # initialize OGGM and set up the run parameters vascaling.initialize(logging_level='WORKFLOW') # local paths (where to write the OGGM run output) # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version) # wdir = utils.gettempdir(dirname, home=True, reset=True) # utils.mkdir(wdir, reset=True) wdir = os.environ['WORKDIR'] cfg.PATHS['working_dir'] = wdir # we are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # we are using which baseline data? cfg.PARAMS['baseline_climate'] = baseline # no need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # set to True for operational runs cfg.PARAMS['continue_on_error'] = True if baseline == 'HISTALP': # other params: see https://oggm.org/2018/08/10/histalp-parameters/ # cfg.PARAMS['prcp_scaling_factor'] = 1.75 # cfg.PARAMS['temp_melt'] = -1.75 cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = -0.5 elif baseline == 'CRU': # using the parameters from Marzeion et al. (2012) cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = 1 cfg.PARAMS['temp_all_solid'] = 3 # the next step is to get all the reference glaciers, # i.e. glaciers with mass balance measurements. # get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # we can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11.01 (ALPS) if baseline == 'HISTALP' or True: rids = [rid for rid in rids if '-11' in rid] debug = False if debug: print("==================================\n" + "DEBUG MODE: only RGI60-11.00897\n" + "==================================") rids = [rid for rid in rids if '-11.00897' in rid] cfg.PARAMS['use_multiprocessing'] = False # make a new dataframe with those (this takes a while) print('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) print('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf))) # initialize the glacier regions gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True) workflow.execute_entity_task(gis.define_glacier_region, gdirs) workflow.execute_entity_task(gis.glacier_masks, gdirs) # we need to know which period we have data for print('Process the climate data...') if baseline == 'CRU': execute_entity_task(tasks.process_cru_data, gdirs, print_log=False) elif baseline == 'HISTALP': # Some glaciers are not in Alps gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01'] # cfg.PARAMS['continue_on_error'] = True execute_entity_task(tasks.process_histalp_data, gdirs, print_log=False, y0=1850) # cfg.PARAMS['continue_on_error'] = False else: execute_entity_task(tasks.process_custom_climate_data, gdirs, print_log=False) # get reference glaciers with mass balance measurements gdirs = utils.get_ref_mb_glaciers(gdirs) # keep only these glaciers rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # save to file rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp')) print('For RGIV{} and {} we have {} reference glaciers'.format(rgi_version, baseline, len(rgidf))) # sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # newly initialize glacier directories gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True) workflow.execute_entity_task(gis.define_glacier_region, gdirs) workflow.execute_entity_task(gis.glacier_masks, gdirs) # run climate tasks vascaling.compute_ref_t_stars(gdirs) execute_entity_task(vascaling.local_t_star, gdirs) # we store the associated params mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params'] with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp: json.dump(mb_calib, fp)
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl") if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl") df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info("Prepare RGI df for " + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T["rgi_parts_ids"] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ "Hellstugubreen", "Freya", "Aqqutikitsoq", "Brewster", "Kesselwandferner", "NorthGlacier", "SouthGlacier", "Tasman", "Unteraar", "Washmawapta", "Columbia", ]: shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp = salem.utils.read_shapefile(shf) if row.name == "Unteraar": shp = shp.iloc[[-1]] if "LineString" == shp.iloc[0].geometry.type: shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == "MultiLineString": # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == "LineString" exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert "Polygon" in geometry.type shp.loc[shp.index[0], "geometry"] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], "geometry"] = shp sel.loc[sel.index[0], "Area"] = area_km2 elif row.name == "Urumqi": # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid) sel.loc[sel.index[k], "geometry"] = shp sel.loc[sel.index[k], "Area"] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if "Multi" in tsel.loc[tsel.index[0], "geometry"].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0.0 add = 1e-5 if row.name == "Devon": inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if "Multi" in buff.type: inif += add else: break x, y = multi.centroid.xy if "Multi" in buff.type: raise RuntimeError sel.loc[sel.index[0], "geometry"] = buff sel.loc[sel.index[0], "Area"] = new_area sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0] sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == "geometry": div_sel[k] = multi elif k == "RGIId": div_sel["RGIID"] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ["I:" + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "W-" + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] sel.loc[:, "Name"] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS["itmix_divs"]) log.info("N glaciers ITMIX: {}".format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers WGMS: {}".format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["W:" + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers GTD: {}".format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["G:" + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf