def __read_list(self, list_events_info, years_vector, first_year): """read the list created in method __create_list and choose the events to include in the hazard set. For every year maximum one event is taken, the one with the lowest spei value on it""" intensity_min_array = np.zeros((self.n_years)) intensity_sum_array = np.zeros((self.n_years)) intensity_sum_thr_array = np.zeros((self.n_years)) date_start_array = np.zeros((self.n_years)) date_end_array = np.zeros((self.n_years)) year_offset = first_year min_spei_offset = 0 for idx_event in range(0, len(list_events_info)): start = list_events_info[idx_event][0] end = list_events_info[idx_event][1] min_spei = list_events_info[idx_event][2] sum_spei = list_events_info[idx_event][3] sum_spei_thr = list_events_info[idx_event][4] year_start = pd.to_datetime(list_events_info[idx_event][0]).year month_start = pd.to_datetime(list_events_info[idx_event][0]).month if month_start > 10: year_start += 1 idx_year = np.where(years_vector == year_start) if year_offset == year_start: if min_spei < min_spei_offset: intensity_min_array[idx_year] = min_spei intensity_sum_array[idx_year] = sum_spei intensity_sum_thr_array[idx_year] = sum_spei_thr date_start_array[idx_year] = datetime64_to_ordinal(start) date_end_array[idx_year] = datetime64_to_ordinal(end) min_spei_offset = min_spei else: intensity_min_array[idx_year] = min_spei intensity_sum_array[idx_year] = sum_spei intensity_sum_thr_array[idx_year] = sum_spei_thr date_start_array[idx_year] = datetime64_to_ordinal(start) date_end_array[idx_year] = datetime64_to_ordinal(end) min_spei_offset = min_spei year_offset = year_start return intensity_min_array, intensity_sum_array, \ intensity_sum_thr_array, date_start_array, date_end_array
def test_datetime64_to_ordinal(self): """Test _datetime64_to_ordinal""" date = np.datetime64('1999-12-26T06:00:00.000000000') ordinal = u_dt.datetime64_to_ordinal(date) self.assertEqual(u_dt.date_to_str(ordinal), '1999-12-26') date = [np.datetime64('1999-12-26T06:00:00.000000000'), np.datetime64('2000-12-26T06:00:00.000000000')] ordinal = u_dt.datetime64_to_ordinal(date) self.assertEqual(u_dt.date_to_str(ordinal[0]), '1999-12-26') self.assertEqual(u_dt.date_to_str(ordinal[1]), '2000-12-26')
def _read_one_nc(file_name, centroids, intensity_thres): """Read a single WISC footprint. Assumes a time dimension of length 1. Omits a footprint if another file with the same timestamp has already been read. Parameters ---------- file_name : str Absolute or relative path to *.nc centroids : Centroids Centr. instance that matches the coordinates used in the *.nc, only validated by size. intensity_thres : float Intensity threshold for storage in m/s. Returns ------- new_haz : StormEurope Hazard instance for one single storm. """ ncdf = xr.open_dataset(file_name) if centroids.size != (ncdf.sizes['latitude'] * ncdf.sizes['longitude']): ncdf.close() LOGGER.warning(('Centroids size doesn\'t match NCDF dimensions. ' 'Omitting file %s.'), file_name) return None # xarray does not penalise repeated assignments, see # http://xarray.pydata.org/en/stable/data-structures.html stacked = ncdf.max_wind_gust.stack(intensity=('latitude', 'longitude', 'time')) stacked = stacked.where(stacked > intensity_thres) stacked = stacked.fillna(0) # fill in values from netCDF new_haz = StormEurope() new_haz.event_name = [ncdf.storm_name] new_haz.date = np.array([datetime64_to_ordinal(ncdf.time.data[0])]) new_haz.intensity = sparse.csr_matrix(stacked) new_haz.ssi_wisc = np.array([float(ncdf.ssi)]) # fill in default values new_haz.centroids = centroids new_haz.event_id = np.array([1]) new_haz.frequency = np.array([1]) new_haz.fraction = new_haz.intensity.copy().tocsr() new_haz.fraction.data.fill(1) new_haz.orig = np.array([True]) ncdf.close() return new_haz
def from_icon_grib(cls, run_datetime, event_date=None, model_name='icon-eu-eps', description=None, grib_dir=None, delete_raw_data=True, intensity_thres=None): """Create new StormEurope object from DWD icon weather forecast footprints. New files are available for 24 hours on https://opendata.dwd.de, old files can be processed if they are already stored in grib_dir. One event is one full day in UTC. Current setup works for runs starting at 00H and 12H. Otherwise the aggregation is inaccurate, because of the given file structure with 1-hour, 3-hour and 6-hour maxima provided. The frequency for one event is 1/(number of ensemble members) Parameters ---------- run_datetime : datetime The starting timepoint of the forecast run of the icon model event_date : datetime, optional one day within the forecast period, only this day (00H-24H) will be included in the hazard model_name : str,optional select the name of the icon model to be downloaded. Must match the url on https://opendata.dwd.de (see download_icon_grib for further info) description : str, optional description of the events, defaults to a combination of model_name and run_datetime grib_dir : str, optional path to folder, where grib files are or should be stored delete_raw_data : bool,optional select if downloaded raw data in .grib.bz2 file format should be stored on the computer or removed intensity_thres : float, optional Intensity threshold for storage in m/s. Default: class attribute StormEurope.intensity_thres (same as used by WISC SSI calculations) Returns ------- haz : StormEurope StormEurope object with data from DWD icon weather forecast footprints. """ intensity_thres = cls.intensity_thres if intensity_thres is None else intensity_thres haz = cls() if not (run_datetime.hour == 0 or run_datetime.hour == 12): LOGGER.warning('The event definition is inaccuratly implemented ' + 'for starting times, which are not 00H or 12H.') # download files, if they don't already exist file_names = download_icon_grib(run_datetime, model_name=model_name, download_dir=grib_dir) # create centroids nc_centroids_file = download_icon_centroids_file(model_name, grib_dir) haz.centroids = haz._centroids_from_nc(nc_centroids_file) # read intensity from files for ind_i, file_i in enumerate(file_names): gripfile_path_i = Path(file_i[:-4]) with open(file_i, 'rb') as source, open(gripfile_path_i, 'wb') as dest: dest.write(bz2.decompress(source.read())) ds_i = xr.open_dataset(gripfile_path_i, engine='cfgrib') if ind_i == 0: stacked = ds_i else: stacked = xr.concat([stacked, ds_i], 'valid_time') # create intensity matrix with max for each full day stacked = stacked.assign_coords( date=('valid_time', stacked["valid_time"].dt.floor("D").values)) if event_date: try: stacked = stacked.sel(valid_time=event_date.strftime( '%Y-%m-%d')).groupby('date').max() except KeyError: raise ValueError('Extraction of date and coordinates failed. ' 'This is most likely because ' 'the selected event_date {} is not contained' ' in the weather forecast selected by ' 'run_datetime {}. Please adjust event_date' ' or run_datetime.'.format( event_date.strftime('%Y-%m-%d'), run_datetime.strftime('%Y-%m-%d %H:%M'))) considered_dates = np.datetime64(event_date) else: time_covered_step = stacked['valid_time'].diff('valid_time') time_covered_day = time_covered_step.groupby('date').sum() # forecast run should cover at least 18 hours of a day considered_dates_bool = time_covered_day >= np.timedelta64(18, 'h') stacked = stacked.groupby('date').max().sel( date=considered_dates_bool) considered_dates = stacked['date'].values stacked = stacked.stack(date_ensemble=('date', 'number')) stacked = stacked.where(stacked > intensity_thres) stacked = stacked.fillna(0) # fill in values from netCDF haz.intensity = sparse.csr_matrix(stacked.gust.T) haz.event_id = np.arange(stacked.date_ensemble.size) + 1 # fill in default values haz.units = 'm/s' haz.fraction = haz.intensity.copy().tocsr() haz.fraction.data.fill(1) haz.orig = np.ones_like(haz.event_id) * False haz.orig[(stacked.number == 1).values] = True haz.date = np.repeat(np.array(datetime64_to_ordinal(considered_dates)), np.unique(stacked.number).size) haz.event_name = [ date_i + '_ens' + str(ens_i) for date_i, ens_i in zip( date_to_str(haz.date), stacked.number.values) ] haz.frequency = np.divide(np.ones_like(haz.event_id), np.unique(stacked.number).size) if not description: description = ('icon weather forecast windfield ' + 'for run startet at ' + run_datetime.strftime('%Y%m%d%H')) haz.tag = TagHazard(HAZ_TYPE, 'Hazard set not saved, too large to pickle', description=description) haz.check() # delete generated .grib2 and .4cc40.idx files for ind_i, file_i in enumerate(file_names): gripfile_path_i = Path(file_i[:-4]) idxfile_path_i = next( gripfile_path_i.parent.glob( str(gripfile_path_i.name) + '.*.idx')) gripfile_path_i.unlink() idxfile_path_i.unlink() if delete_raw_data: #delete downloaded .bz2 files delete_icon_grib(run_datetime, model_name=model_name, download_dir=grib_dir) return haz
def from_cosmoe_file(cls, fp_file, run_datetime, event_date=None, model_name='COSMO-2E', description=None, intensity_thres=None): """Create a new StormEurope object with gust footprint from weather forecast. The funciton is designed for the COSMO ensemble model used by the COSMO Consortium http://www.cosmo-model.org/ and postprocessed to an netcdf file using fieldextra. One event is one full day in UTC. Works for MeteoSwiss model output of COSMO-1E (11 members, resolution 1.1 km, forecast period 33-45 hours) COSMO-2E (21 members, resolution 2.2 km, forecast period 5 days) The frequency of each event is informed by their probability in the ensemble forecast and is equal to 1/11 or 1/21 for COSMO-1E or COSMO-2E, respectively. Parameters ---------- fp_file : str string directing to one netcdf file run_datetime : datetime The starting timepoint of the forecast run of the cosmo model event_date : datetime, optional one day within the forecast period, only this day (00H-24H) will be included in the hazard model_name : str,optional provide the name of the COSMO model, for the description (e.g., 'COSMO-1E', 'COSMO-2E') description : str, optional description of the events, defaults to a combination of model_name and run_datetime intensity_thres : float, optional Intensity threshold for storage in m/s. Default: class attribute StormEurope.intensity_thres (same as used by WISC SSI calculations) Returns ------- haz : StormEurope StormEurope object with data from COSMO ensemble file. """ intensity_thres = cls.intensity_thres if intensity_thres is None else intensity_thres haz = cls() # create centroids haz.centroids = cls._centroids_from_nc(fp_file) # read intensity from file ncdf = xr.open_dataset(fp_file) ncdf = ncdf.assign_coords(date=('time', ncdf["time"].dt.floor("D").values)) if event_date: try: stacked = ncdf.sel(time=event_date.strftime( '%Y-%m-%d')).groupby('date').max().stack(intensity=('y_1', 'x_1')) except KeyError: raise ValueError('Extraction of date and coordinates failed. ' 'This is most likely because ' 'the selected event_date {} is not contained' ' in the weather forecast selected by ' 'fp_file {}. Please adjust event_date' ' or fp_file.'.format( event_date.strftime('%Y-%m-%d'), fp_file)) considered_dates = np.datetime64(event_date) else: time_covered_step = ncdf['time'].diff('time') time_covered_day = time_covered_step.groupby('date').sum() # forecast run should cover at least 18 hours of a day considered_dates_bool = time_covered_day >= np.timedelta64(18, 'h') stacked = ncdf.groupby('date').max().sel( date=considered_dates_bool).stack(intensity=('y_1', 'x_1')) considered_dates = stacked['date'].values stacked = stacked.stack(date_ensemble=('date', 'epsd_1')) stacked = stacked.where(stacked.VMAX_10M > intensity_thres) stacked = stacked.fillna(0) # fill in values from netCDF haz.intensity = sparse.csr_matrix(stacked.VMAX_10M.T) haz.event_id = np.arange(stacked.date_ensemble.size) + 1 # fill in default values haz.units = 'm/s' haz.fraction = haz.intensity.copy().tocsr() haz.fraction.data.fill(1) haz.orig = np.ones_like(haz.event_id) * False haz.orig[(stacked.epsd_1 == 0).values] = True haz.date = np.repeat(np.array(datetime64_to_ordinal(considered_dates)), np.unique(ncdf.epsd_1).size) haz.event_name = [ date_i + '_ens' + str(ens_i) for date_i, ens_i in zip( date_to_str(haz.date), stacked.epsd_1.values + 1) ] haz.frequency = np.divide(np.ones_like(haz.event_id), np.unique(ncdf.epsd_1).size) if not description: description = (model_name + ' weather forecast windfield ' + 'for run startet at ' + run_datetime.strftime('%Y%m%d%H')) haz.tag = TagHazard(HAZ_TYPE, 'Hazard set not saved, too large to pickle', description=description) # close netcdf file ncdf.close() haz.check() return haz
def read_cosmoe_file(self, fp_file, run_datetime, event_date=None, model_name='COSMO-2E', description=None): """Clear instance and read gust footprint from weather forecast into it. The funciton is designed for the COSMO ensemble model used by the COSMO Consortium http://www.cosmo-model.org/ and postprocessed to an netcdf file using fieldextra. One event is one full day in UTC. Works for MeteoSwiss model output of COSMO-1E (11 members, resolution 1.1 km, forecast period 33-45 hours) COSMO-2E (21 members, resolution 2.2 km, forecast period 5 days) Parameters: fp_file (str): string directing to one netcdf file run_datetime (datetime): The starting timepoint of the forecast run of the cosmo model event_date (datetime, optional): one day within the forecast period, only this day (00H-24H) will be included in the hazard model_name (str,optional): provide the name of the COSMO model, for the description (e.g., 'COSMO-1E', 'COSMO-2E') description (str, optional): description of the events, defaults to a combination of model_name and run_datetime """ self.clear() # create centroids self.centroids = self._centroids_from_nc(fp_file) # read intensity from file ncdf = xr.open_dataset(fp_file) ncdf = ncdf.assign_coords(date=('time', ncdf["time"].dt.floor("D"))) if event_date: try: stacked = ncdf.sel(time=event_date.strftime( '%Y-%m-%d')).groupby('date').max().stack(intensity=('y_1', 'x_1')) except KeyError: raise ValueError('Extraction of date and coordinates failed. ' 'This is most likely because ' 'the selected event_date {} is not contained' ' in the weather forecast selected by ' 'fp_file {}. Please adjust event_date' ' or fp_file.'.format( event_date.strftime('%Y-%m-%d'), fp_file)) considered_dates = np.datetime64(event_date) else: time_covered_step = ncdf['time'].diff('time') time_covered_day = time_covered_step.groupby('date').sum() # forecast run should cover at least 18 hours of a day considered_dates_bool = time_covered_day >= np.timedelta64(18, 'h') stacked = ncdf.groupby('date').max().sel( date=considered_dates_bool).stack(intensity=('y_1', 'x_1')) considered_dates = stacked['date'].values stacked = stacked.stack(date_ensemble=('date', 'epsd_1')) stacked = stacked.where(stacked.VMAX_10M > self.intensity_thres) stacked = stacked.fillna(0) # fill in values from netCDF self.intensity = sparse.csr_matrix(stacked.VMAX_10M.T) self.event_id = np.arange(stacked.date_ensemble.size) + 1 # fill in default values self.units = 'm/s' self.fraction = self.intensity.copy().tocsr() self.fraction.data.fill(1) self.orig = np.ones_like(self.event_id) * False self.orig[(stacked.epsd_1 == 0).values] = True self.date = np.repeat( np.array(datetime64_to_ordinal(considered_dates)), np.unique(ncdf.epsd_1).size) self.event_name = [ date_i + '_ens' + str(ens_i) for date_i, ens_i in zip( date_to_str(self.date), stacked.epsd_1.values + 1) ] self.frequency = np.divide(np.ones_like(self.event_id), np.unique(ncdf.epsd_1).size) if not description: description = (model_name + ' weather forecast windfield ' + 'for run startet at ' + run_datetime.strftime('%Y%m%d%H')) self.tag = TagHazard(HAZ_TYPE, 'Hazard set not saved, too large to pickle', description=description) # close netcdf file ncdf.close() self.check()