def test_category_pass(self): """Test category computation.""" max_sus_wind = np.array([25, 30, 35, 40, 45, 45, 45, 45, 35, 25]) max_sus_wind_unit = 'kn' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(0, cat) max_sus_wind = np.array([25, 25, 25, 30, 30, 30, 30, 30, 25, 25, 20]) max_sus_wind_unit = 'kn' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(-1, cat) max_sus_wind = np.array( [80, 90, 100, 115, 120, 125, 130, 120, 110, 80, 75, 80, 65]) max_sus_wind_unit = 'kn' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(4, cat) max_sus_wind = np.array([ 28.769475, 34.52337, 40.277265, 46.03116, 51.785055, 51.785055, 51.785055, 51.785055, 40.277265, 28.769475 ]) max_sus_wind_unit = 'mph' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(0, cat) max_sus_wind = np.array([ 12.86111437, 12.86111437, 12.86111437, 15.43333724, 15.43333724, 15.43333724, 15.43333724, 15.43333724, 12.86111437, 12.86111437, 10.2888915 ]) max_sus_wind_unit = 'm/s' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(-1, cat) max_sus_wind = np.array([ 148.16, 166.68, 185.2, 212.98, 222.24, 231.5, 240.76, 222.24, 203.72, 148.16, 138.9, 148.16, 120.38 ]) max_sus_wind_unit = 'km/h' cat = tc.set_category(max_sus_wind, max_sus_wind_unit) self.assertEqual(4, cat)
def _subset_to_track(msg, index, provider, timestamp_origin, name, id_no): """Subroutine to process one BUFR subset into one xr.Dataset""" sig = np.array(msg['significance'].get_values(index), dtype='int') lat = np.array(msg['latitude'].get_values(index), dtype='float') lon = np.array(msg['longitude'].get_values(index), dtype='float') wnd = np.array(msg['wind_10m'].get_values(index), dtype='float') pre = np.array(msg['pressure'].get_values(index), dtype='float') sid = msg['storm_id'].get_values(index)[0].decode().strip() timestep_int = np.array(msg['timestamp'].get_values(index)).squeeze() timestamp = timestamp_origin + timestep_int.astype('timedelta64[h]') try: track = xr.Dataset( data_vars={ 'max_sustained_wind': ('time', np.squeeze(wnd)), 'central_pressure': ('time', np.squeeze(pre)/100), 'ts_int': ('time', timestep_int), 'lat': ('time', lat[sig == 1]), 'lon': ('time', lon[sig == 1]), }, coords={ 'time': timestamp, }, attrs={ 'max_sustained_wind_unit': 'm/s', 'central_pressure_unit': 'mb', 'name': name, 'sid': sid, 'orig_event_flag': False, 'data_provider': provider, 'id_no': (int(id_no) + index / 100), 'ensemble_number': msg['ens_number'].get_values(index)[0], 'is_ensemble': msg['ens_type'].get_values(index)[0] != 0, 'forecast_time': timestamp_origin, } ) except ValueError as err: LOGGER.warning( 'Could not process track %s subset %d, error: %s', sid, index, err ) return None track = track.dropna('time') if track.sizes['time'] == 0: return None # can only make latlon coords after dropna track = track.set_coords(['lat', 'lon']) track['time_step'] = track.ts_int - \ track.ts_int.shift({'time': 1}, fill_value=0) track = track.drop_vars(['ts_int']) track['radius_max_wind'] = (('time'), np.full_like( track.time, np.nan, dtype=float) ) track['environmental_pressure'] = (('time'), np.full_like( track.time, DEF_ENV_PRESSURE, dtype=float) ) # according to specs always num-num-letter track['basin'] = ('time', np.full_like(track.time, BASINS[sid[2]], dtype=object)) if sid[2] == 'X': LOGGER.info( 'Undefined basin %s for track name %s ensemble no. %d', sid[2], track.attrs['name'], track.attrs['ensemble_number']) cat_name = CAT_NAMES[set_category( max_sus_wind=track.max_sustained_wind.values, wind_unit=track.max_sustained_wind_unit, saffir_scale=SAFFIR_MS_CAT )] track.attrs['category'] = cat_name return track
def read_one_bufr_tc(self, file, id_no=None, fcast_rep=None): """ Read a single BUFR TC track file. Parameters: file (str, filelike): Path object, string, or file-like object id_no (int): Numerical ID; optional. Else use date + random int. fcast_rep (int): Of the form 1xx000, indicating the delayed replicator containing the forecast values; optional. """ decoder = pybufrkit.decoder.Decoder() if hasattr(file, 'read'): bufr = decoder.process(file.read()) elif hasattr(file, 'read_bytes'): bufr = decoder.process(file.read_bytes()) elif os.path.isfile(file): with open(file, 'rb') as i: bufr = decoder.process(i.read()) else: raise FileNotFoundError('Check file argument') text_data = FlatTextRenderer().render(bufr) # setup parsers and querents #npparser = pybufrkit.dataquery.NodePathParser() #data_query = pybufrkit.dataquery.DataQuerent(npparser).query meparser = pybufrkit.mdquery.MetadataExprParser() meta_query = pybufrkit.mdquery.MetadataQuerent(meparser).query timestamp_origin = dt.datetime( meta_query(bufr, '%year'), meta_query(bufr, '%month'), meta_query(bufr, '%day'), meta_query(bufr, '%hour'), meta_query(bufr, '%minute'), ) timestamp_origin = np.datetime64(timestamp_origin) orig_centre = meta_query(bufr, '%originating_centre') if orig_centre == 98: provider = 'ECMWF' else: provider = 'BUFR code ' + str(orig_centre) list1=[] with StringIO(text_data) as input_data: # Skips text before the beginning of the interesting block: for line in input_data: if line.startswith('<<<<<< section 4 >>>>>>'): # Or whatever test is needed break # Reads text until the end of the block: for line in input_data: # This keeps reading the file if line.startswith('<<<<<< section 5 >>>>>>'): break list1.append(line) list1=[li for li in list1 if li.startswith(" ") or li.startswith("##") ] list2=[] for items in list1: if items.startswith("######"): list2.append([0,items.split()[1],items.split()[2]]) else: list2.append([int(items.split()[0]),items.split()[1],items.split()[-1]]) df_ = pd.DataFrame(list2,columns=['id','code','Data']) def label_en (row,co): if row['code'] == co : return int(row['Data']) return np.nan df_['subset'] = df_.apply (lambda row: label_en(row,co='subset'), axis=1) df_['subset'] =df_['subset'].fillna(method='ffill') df_['model_sgn'] = df_.apply (lambda row: label_en(row,co='008005'), axis=1) df_['model_sgn'] =df_['model_sgn'].fillna(method='ffill') df_['model_sgn'] =df_['model_sgn'].fillna(method='bfill') for names, group in df_.groupby("subset"): pcen = list(group.query('code in ["010051"]')['Data'].values) latc = list(group.query('code in ["005002"] and model_sgn in [1]')['Data'].values) lonc = list(group.query('code in ["006002"] and model_sgn in [1]')['Data'].values) latm = list(group.query('code in ["005002"] and model_sgn in [3]')['Data'].values) lonm = list(group.query('code in ["006002"] and model_sgn in [3]')['Data'].values) wind = list(group.query('code in ["011012"]')['Data'].values) vhr = list(group.query('code in ["004024"]')['Data'].values) wind=[np.nan if value=='None' else float(value) for value in wind] pre=[np.nan if value=='None' else float(value)/100 for value in pcen] lonm=[np.nan if value=='None' else float(value) for value in lonm] lonc=[np.nan if value=='None' else float(value) for value in lonc] latm=[np.nan if value=='None' else float(value) for value in latm] latc=[np.nan if value=='None' else float(value) for value in latc] vhr=[np.nan if value=='None' else int(value) for value in vhr] timestep_int = np.array(vhr).squeeze() #np.array(msg['timestamp'].get_values(index)).squeeze() timestamp = timestamp_origin + timestep_int.astype('timedelta64[h]') year = list(group.query('code in ["004001"]')['Data'].values) month = list(group.query('code in ["004002"]')['Data'].values) day = list(group.query('code in ["004003"]')['Data'].values) hour = list(group.query('code in ["004004"]')['Data'].values) #forecs_agency_id = list(group.query('code in ["001033"]')['Data'].values) storm_name = list(group.query('code in ["001027"]')['Data'].values) storm_id = list(group.query('code in ["001025"]')['Data'].values) frcst_type = list(group.query('code in ["001092"]')['Data'].values) max_radius=np.sqrt(np.square(np.array(latc)-np.array(latm))+np.square(np.array(lonc)-np.array(lonm)))*111 date_object ='%04d%02d%02d%02d'%(int(year[0]),int(month[0]),int(day[0]),int(hour[0])) date_object=dt.datetime.strptime(date_object, "%Y%m%d%H") #timestamp=[(date_object + dt.timedelta(hours=int(value))).strftime("%Y%m%d%H") for value in vhr] #timestamp=[dt.datetime.strptime(value, "%Y%m%d%H") for value in timestamp] track = xr.Dataset( data_vars={ 'max_sustained_wind': ('time', wind[1:]), 'central_pressure': ('time', pre[1:]), 'ts_int': ('time', timestep_int), 'max_radius': ('time', max_radius[1:]), 'lat': ('time', latc[1:]), 'lon': ('time', lonc[1:]), 'environmental_pressure':('time', np.full_like(timestamp, DEF_ENV_PRESSURE, dtype=float)), 'radius_max_wind':('time', np.full_like(timestamp, np.nan, dtype=float)), }, coords={'time': timestamp, }, attrs={ 'max_sustained_wind_unit': 'm/s', 'central_pressure_unit': 'mb', 'name': storm_name[0].strip("'"), 'sid': storm_id[0].split("'")[1], 'orig_event_flag': False, 'data_provider': provider, 'id_no': 'NA', 'ensemble_number': int(names), 'is_ensemble': ['TRUE' if frcst_type[0]!='0' else 'False'][0], 'forecast_time': date_object, }) track = track.set_coords(['lat', 'lon']) track['time_step'] = track.ts_int - track.ts_int.shift({'time': 1}, fill_value=0) #track = track.drop('ts_int') track.attrs['basin'] = BASINS[storm_id[0].split("'")[1][2].upper()] cat_name = CAT_NAMES[set_category( max_sus_wind=track.max_sustained_wind.values, wind_unit=track.max_sustained_wind_unit, saffir_scale=SAFFIR_MS_CAT)] track.attrs['category'] = cat_name if track.sizes['time'] == 0: track= None if track is not None: self.append(track) else: LOGGER.debug('Dropping empty track, subset %s', names)
def test_apply_decay_pass(self): """Test _apply_land_decay against MATLAB reference.""" v_rel = { 6: 0.0038950967656296597, 1: 0.0038950967656296597, 2: 0.0038950967656296597, 3: 0.0038950967656296597, 4: 0.0038950967656296597, 5: 0.0038950967656296597, 7: 0.0038950967656296597 } p_rel = { 6: (1.0499941, 0.007978940084158488), 1: (1.0499941, 0.007978940084158488), 2: (1.0499941, 0.007978940084158488), 3: (1.0499941, 0.007978940084158488), 4: (1.0499941, 0.007978940084158488), 5: (1.0499941, 0.007978940084158488), 7: (1.0499941, 0.007978940084158488) } tc_track = tc.TCTracks() tc_track.read_processed_ibtracs_csv(TC_ANDREW_FL) tc_track.data[0]['orig_event_flag'] = False extent = tc_track.get_extent() land_geom = climada.util.coordinates.get_land_geometry( extent=extent, resolution=10 ) tc.track_land_params(tc_track.data[0], land_geom) tc_synth._apply_land_decay(tc_track.data, v_rel, p_rel, land_geom, s_rel=True, check_plot=False) p_ref = np.array([ 1.010000000000000, 1.009000000000000, 1.008000000000000, 1.006000000000000, 1.003000000000000, 1.002000000000000, 1.001000000000000, 1.000000000000000, 1.000000000000000, 1.001000000000000, 1.002000000000000, 1.005000000000000, 1.007000000000000, 1.010000000000000, 1.010000000000000, 1.010000000000000, 1.010000000000000, 1.010000000000000, 1.010000000000000, 1.007000000000000, 1.004000000000000, 1.000000000000000, 0.994000000000000, 0.981000000000000, 0.969000000000000, 0.961000000000000, 0.947000000000000, 0.933000000000000, 0.922000000000000, 0.930000000000000, 0.937000000000000, 0.951000000000000, 0.947000000000000, 0.943000000000000, 0.948000000000000, 0.946000000000000, 0.941000000000000, 0.937000000000000, 0.955000000000000, 0.9741457117, 0.99244068917, 1.00086729492, 1.00545853355, 1.00818354609, 1.00941850023, 1.00986192053, 1.00998400565 ]) * 1e3 self.assertTrue(np.allclose(p_ref, tc_track.data[0].central_pressure.values)) v_ref = np.array([ 0.250000000000000, 0.300000000000000, 0.300000000000000, 0.350000000000000, 0.350000000000000, 0.400000000000000, 0.450000000000000, 0.450000000000000, 0.450000000000000, 0.450000000000000, 0.450000000000000, 0.450000000000000, 0.450000000000000, 0.400000000000000, 0.400000000000000, 0.400000000000000, 0.400000000000000, 0.450000000000000, 0.450000000000000, 0.500000000000000, 0.500000000000000, 0.550000000000000, 0.650000000000000, 0.800000000000000, 0.950000000000000, 1.100000000000000, 1.300000000000000, 1.450000000000000, 1.500000000000000, 1.250000000000000, 1.300000000000000, 1.150000000000000, 1.150000000000000, 1.150000000000000, 1.150000000000000, 1.200000000000000, 1.250000000000000, 1.250000000000000, 1.200000000000000, 0.9737967353, 0.687255951, 0.4994850556, 0.3551480462, 0.2270548036, 0.1302099557, 0.0645385918, 0.0225325851 ]) * 1e2 self.assertTrue(np.allclose(v_ref, tc_track.data[0].max_sustained_wind.values)) cat_ref = tc.set_category(tc_track.data[0].max_sustained_wind.values, tc_track.data[0].max_sustained_wind_unit) self.assertEqual(cat_ref, tc_track.data[0].category)