def add_dist2coast(self): lons = [round(x * 0.04 - 179.98, 2) for x in range(9000)] lats = [round(y * 0.04 - 89.98, 2) for y in range(4500)] dist2coast_table_name = 'dist2coast_na_sfmr' Dist2Coast = utils.get_class_by_tablename(self.engine, dist2coast_table_name) validation_tablename = utils.gen_validation_tablename( self, 'sfmr', 'smap_prediction') Validation = utils.get_class_by_tablename(self.engine, validation_tablename) validation_query = self.session.query(Validation).filter( Validation.sfmr_datetime > self.period[0], Validation.sfmr_datetime < self.period[1]) validation_count = validation_query.count() for validation_idx, validation_row in enumerate(validation_query): print(f'\r{validation_idx+1}/{validation_count}', end='') indices_to_drop = [] for src in self.sources: length = len(bias[src]) for i in range(length): print(f'\r{i+1}/{length}', end='') lookup_lon, lookup_lon_idx = \ utils.get_nearest_element_and_index( lons, bias[src]['sfmr_lon'][i]-360) lookup_lat, lookup_lat_idx = \ utils.get_nearest_element_and_index( lats, bias[src]['sfmr_lat'][i]) dist_query = self.session.query(Dist2Coast).filter( Dist2Coast.lon > lookup_lon - 0.01, Dist2Coast.lon < lookup_lon + 0.01, Dist2Coast.lat > lookup_lat - 0.01, Dist2Coast.lat < lookup_lat + 0.01, ) if dist_query.count() != 1: self.logger.error('Dist not found') breakpoint() exit(1) if dist_query[0].dist2coast > distance_to_land_threshold: indices_to_drop.append(i) utils.delete_last_lines() print('Done') bias[src].drop(indices_to_drop, inplace=True)
def extract(self): # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name'][self.basin] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) tc_query = self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]) total = tc_query.count() # Traverse WP TCs for idx, tc in enumerate(tc_query): try: converted_lon = utils.longitude_converter( tc.lon, '360', '-180') if bool(globe.is_land(tc.lat, converted_lon)): continue if tc.date_time.minute or tc.date_time.second: continue if idx < total - 1: next_tc = tc_query[idx + 1] # This TC and next TC is same TC if tc.sid == next_tc.sid: self.extract_between_two_tc_records(tc, next_tc) # This TC differents next TC else: success = self.extract_detail(tc) self.info_after_extracting_detail(tc, success, True) else: success = self.extract_detail(tc) self.info_after_extracting_detail(tc, success, True) except Exception as msg: breakpoint() exit(msg)
def _add_cwind_station_dis2coast(self): self.logger.info(('Adding column of distance to coast to table ' + 'of cwind station')) col_dis2coast = Column('distance_to_coast', Float()) cwind_station_class = utils.get_class_by_tablename( self.engine, cwind.CwindStation.__tablename__) if not hasattr(cwind_station_class, col_dis2coast.name): utils.add_column(self.engine, cwind.CwindStation.__tablename__, col_dis2coast) # Do NOT directly query cwind.CwindStation # Beacause due to some reason, its new column's value cannot # be added station_query = self.session.query(cwind_station_class) total = station_query.count() for idx, stn in enumerate(station_query): print(f'\r{stn.id} ({idx+1}/{total})', end='') stn.distance_to_coast = self._distance_from_coast( stn.latitude, stn.longitude) self.session.commit() utils.delete_last_lines() print()
def if_station_on_land(self): ISDStation = utils.get_class_by_tablename(self.engine, 'isd_scs_stations') Grid = utils.get_class_by_tablename(self.engine, 'grid') all_stn_in_sea = True for stn in self.session.query(ISDStation): y, x = utils.get_latlon_index_of_closest_grib_point( stn.lat, stn.lon, self.grid_lats, self.grid_lons) pt = self.session.query(Grid).filter(Grid.x == x, Grid.y == y).first() if pt.land: all_stn_in_sea = False print((f"""{stn.station_id} is on land, with comment: """ f"""{stn.comment} """ f"""\tstn_lon: {stn.lon}\tstn_lat: {stn.lat}""" f"""\tgrid_lon: {pt.lon}\tgrid_lat: {pt.lat}""")) if all_stn_in_sea: print(f'All ISD stations are in sea.')
def compare_with_isd(self): """Compare wind speed from different data sources with ISD's wind speed. """ # Get ISD windspd isd_manager = isd.ISDManager(self.CONFIG, self.period, self.region, self.db_root_passwd, work_mode='') # Download ISD csvs in period isd_csv_paths = isd_manager.download_and_read_scs_data() # Get windspd from different sources # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name']['scs'] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) sources_str = '' for idx, src in enumerate(self.sources): if idx < len(self.sources) - 1: sources_str = f"""{sources_str}{src.upper()} and """ else: sources_str = f"""{sources_str}{src.upper()}""" # Filter TCs during period for tc in self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]).yield_per( self.CONFIG['database']['batch_size']['query']): # if tc.wind < 64: continue if tc.r34_ne is None: continue if bool(globe.is_land(tc.lat, tc.lon)): continue # Draw windspd from CCMP, ERA5, Interium # and several satellites # self.just_download_era5_equivalent_wind(tc) success = self.get_concurrent_data(isd_csv_paths, tc) if success: print((f"""Comparing {sources_str} with ISD record """ f"""when TC {tc.name} existed on """ f"""{tc.date_time}""")) else: print((f"""Skiping comparsion of {sources_str} with """ f"""ISD record when TC {tc.name} existed """ f"""on {tc.date_time}""")) print('Done')
def compare_ccmp_with_ibtracs(self): # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name']['scs'] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) # Filter TCs during period for tc in self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]).yield_per( self.CONFIG['database']['batch_size']['query']): # if tc.r34_ne is None: continue self.compare_ccmp_with_one_tc_record(tc)
def _find_sid(self, tc_info): # Get TC table and count its row number tc_table_name = self.CONFIG['ibtracs']['table_name'] TCTable = utils.get_class_by_tablename(self.engine, tc_table_name) tc_query = self.session.query(TCTable).\ filter(extract('year', TCTable.date_time) == tc_info.year).\ filter(TCTable.basin == tc_info.basin).\ filter(TCTable.name == tc_info.name.upper()) if not tc_query.count(): print((f'SID not found: {tc_info.year} {tc_info.basin} ' + f'{tc_info.name}')) return tc_info.sid = tc_query.first().sid
def simulate_smap_windspd(self): self.logger.info((f"""Comparing wind speed from different sources""")) # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name'][self.basin] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) query_obj = self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]) in_expression = IBTrACS.name.in_(self.tc_names) tc_query = query_obj.filter(in_expression) total = tc_query.count() # Expand period if total < 2: self.logger.warning('Expand period') query_obj = self.session.query(IBTrACS).filter( IBTrACS.date_time >= (self.period[0] - datetime.timedelta(seconds=3600 * 3)), IBTrACS.date_time <= (self.period[1] + datetime.timedelta(seconds=3600 * 3))) in_expression = IBTrACS.name.in_(self.tc_names) tc_query = query_obj.filter(in_expression) total = tc_query.count() if total < 2: self.logger.error('Too few TCs') exit(1) # Filter TCs during period for idx, tc in enumerate(tc_query): if tc.name not in self.tc_names: continue converted_lon = utils.longitude_converter(tc.lon, '360', '-180') if bool(globe.is_land(tc.lat, converted_lon)): continue success = False if idx < total - 1: next_tc = tc_query[idx + 1] if tc.sid == next_tc.sid: if (tc.date_time >= self.period[1] or next_tc.date_time <= self.period[0]): continue print(f'Simulating {tc.date_time} - {next_tc.date_time}') self.simulate_between_two_tcs(tc, next_tc) print('Done')
def get_era5_table_names(self, vars_mode): table_names = [] # Get TC table and count its row number tc_table_name = self.CONFIG['ibtracs']['table_name'] TCTable = utils.get_class_by_tablename(self.engine, tc_table_name) # Loop all row of TC table for row in self.session.query(TCTable).filter( TCTable.date_time >= self.period[0], TCTable.date_time <= self.period[1]).yield_per( self.CONFIG['database']['batch_size']['query']): # Get TC datetime tc_datetime = row.date_time # Get hit result and range of ERA5 data matrix near # TC center hit, lat1, lat2, lon1, lon2 = \ utils.get_subset_range_of_grib( row.lat, row.lon, self.lat_grid_points, self.lon_grid_points, self.edge, vars_mode='era5', spatial_resolution=self.spa_resolu) if not hit: continue dirs = ['nw', 'sw', 'se', 'ne'] r34 = dict() r34['nw'], r34['sw'], r34['se'], r34['ne'] = \ row.r34_nw, row.r34_sw, row.r34_se, row.r34_ne skip_compare = False for dir in dirs: if r34[dir] is None: skip_compare = True break if skip_compare: continue # Get name, sqlalchemy Table class and python original class # of ERA5 table table_name, sa_table, ERA5Table = self.get_era5_table_class( vars_mode, row.sid, tc_datetime) table_names.append(table_name) return table_names
def draw_coverage(self): start, end = self.period[0], self.period[1] # Generate all entire hours in subperiod # `hourly_dt` is a list which has all chronological hours # that cover the whole subperoid # e.g. subperiod is from 2019-10-30 12:34:11 to # 2019-11-01 10:29:56, the hourly_dt should be # a datetime list starting with 2019-10-30 12:00:00 and # ending with 2019-11-01 11:00:00 hourly_dt = self.gen_hourly_dt_in_subperiod(start, end) # Generate all hours during subperiod # Subperiod is shorter than one day # Subperiod is longer than one day for i in range(len(hourly_dt) - 1): this_hour = hourly_dt[i] next_hour = hourly_dt[i + 1] coverage = dict() all_satels_null = True for satel_name in self.satel_names: tablename = utils.gen_satel_era5_tablename( satel_name, this_hour) SatelERA5 = utils.get_class_by_tablename( self.engine, tablename) coverage[satel_name] = self.get_satel_coverage( satel_name, SatelERA5, this_hour, next_hour) valid_pts_num, lons, lats, windspd = coverage[satel_name] if len(lons) >= 2 and len(lats) >= 2: # all_satels_null = False if all_satels_null: self.logger.info((f"""All satellites have no data """ f"""from {this_hour} to """ f"""{next_hour}""")) continue self.draw_coverage_of_all_satels(this_hour, next_hour, coverage)
def __init__(self, CONFIG, period, basin, passwd): self.CONFIG = CONFIG self.period = period self.db_root_passwd = passwd self.engine = None self.session = None self.basin = basin self.logger = logging.getLogger(__name__) utils.setup_database(self, Base) # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name'][self.basin] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) self.tc_query = self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]) self.tc_query_num = self.tc_query.count() self.detect_rapid_intensification()
def _get_target_datetime(self): """Get major datetime dictionary and minor datetime dictionary. """ tc_table_name = self.CONFIG['ibtracs']['table_name'] TCTable = utils.get_class_by_tablename(self.engine, tc_table_name) dt_major = dict() dt_minor = dict() for row in self.session.query(TCTable).filter( TCTable.date_time >= self.period[0], TCTable.date_time <= self.period[1]).yield_per( self.CONFIG['database']['batch_size']['query']): dirs = ['nw', 'sw', 'se', 'ne'] r34 = dict() r34['nw'], r34['sw'], r34['se'], r34['ne'] = \ row.r34_nw, row.r34_sw, row.r34_se, row.r34_ne skip_compare = False for dir in dirs: if r34[dir] is None: skip_compare = True break if skip_compare: continue year, month = row.date_time.year, row.date_time.month day, hour = row.date_time.day, row.date_time.hour if hour in self.main_hours: self._update_major_datetime_dict(dt_major, year, month, day, hour) else: self._update_minor_datetime_dict(dt_minor, year, month, day, hour) self.dt_major = dt_major self.dt_minor = dt_minor
def download_tc(self): self.logger.info((f"""Downloading CCMP files which containing """ f"""TCs during period""")) # Get IBTrACS table table_name = self.CONFIG['ibtracs']['table_name']['scs'] IBTrACS = utils.get_class_by_tablename(self.engine, table_name) downloaded_dates = set() # Filter TCs during period for tc in self.session.query(IBTrACS).filter( IBTrACS.date_time >= self.period[0], IBTrACS.date_time <= self.period[1]).yield_per( self.CONFIG['database']['batch_size']['query']): if tc.r34_ne is None: continue # Download corresponding CCMP files dt_cursor = tc.date_time if dt_cursor.date() in downloaded_dates: continue file_path = self.download_ccmp_on_one_day(dt_cursor) downloaded_dates.add(dt_cursor.date()) self.files_path.append(file_path)
def matchup_smap_sfmr(self): """Match SMAP and SFMR data around TC. """ center_datetime = dict() center_lonlat = dict() # Get table class of sfmr brief info SFMRInfo = utils.get_class_by_tablename( self.engine, self.CONFIG['sfmr']['table_names']['brief_info']) sfmr_info_query = self.session.query(SFMRInfo).filter( SFMRInfo.start_datetime < self.period[1], SFMRInfo.end_datetime > self.period[0]) # Traverse SFMR files for sfmr_info in sfmr_info_query: tc_name = sfmr_info.hurr_name sfmr_path = (f"""{self.CONFIG['sfmr']['dirs']['hurr']}""" f"""{sfmr_info.start_datetime.year}""" f"""/{tc_name}/{sfmr_info.filename}""") # SFMR track was closest to TC center # when SFMR SWS reached its peak center_datetime['sfmr'] = self.time_of_sfmr_peak_wind(sfmr_path) if center_datetime['sfmr'] is None: continue # Find where was TC center when SFMR SWS reached its peak center_lonlat['sfmr'] = self.lonlat_of_tc_center( tc_name, center_datetime['sfmr']) # "TC center of SFMR" means "TC center when SFMR SWS reached # its peak". "TC center of SMAP" means "TC center when and # where SMAP is enough close to SFMR track". # Farthest permitted spatial distance between # "TC center of SFMR" and "TC center of SMAP" # max_center_spatial_dist = # "region of center cells" within circle area with radius of # "max_center_spat_dist" around "TC center of SFMR" center_cells = self.cells_around_tc_center(center_lonlat['sfmr'], max_center_dist) # Farthest permitted temporal distance between # "TC center of SFMR" and "TC center of SMAP" # max_center_temporal_dist = # Check the existence of SMAP data in "region of center # cells" within temporal window exist, center_datetime['smap'], center_lonlat['smap'] = \ self.cover_tc_center(center_cells, center_datetime['sfmr'], max_center_temporal_dist) if not exist: continue # Extract lon, lat and wind speed of SMAP smap_pts = self.extract_smap(center_datetime['smap'], center_lonlat['smap']) # Largest permitted change in intensity # max_intensity_change = # To avoid cases where TC had changed too much, we need to # estimate the change in intensity between SMAP and SFMR intensity_change = self.intensity_change_between_shift( tc_name, center_datetime) if intensity_change > max_intensity_change: continue # Study region around TC center # square_edge = # Resample SFMR SWS sfmr_track, resampled_sfmr_pts = self.resample_sfmr( sfmr_path, center_datetime['sfmr'], center_lonlat['sfmr']) # Calculate shift of SFMR shift = self.cal_shift(center_lonlat) # Shift SFMR track and resampled SFMR SWS sfmr_track, resampled_sfmr_pts = self.do_shift( sfmr_track, resampled_sfmr_pts) self.record_matchup(sfmr_track, resampled_sfmr_pts)
def read_tc_oriented(self, vars_mode, file_path): # load grib file grbs = pygrib.open(file_path) # Get TC table and count its row number tc_table_name = self.CONFIG['ibtracs']['table_name'] TCTable = utils.get_class_by_tablename(self.engine, tc_table_name) tc_query = self.session.query(TCTable) total = tc_query.count() del tc_query count = 0 info = f'Reading reanalysis data of TC records' self.logger.info(info) # Loop all row of TC table for row in self.session.query(TCTable).yield_per( self.CONFIG['database']['batch_size']['query']): # Get TC datetime tc_datetime = row.date_time # Get hit result and range of ERA5 data matrix near # TC center hit, lat1, lat2, lon1, lon2 = \ utils.get_subset_range_of_grib( row.lat, row.lon, self.lat_grid_points, self.lon_grid_points, self.edge, vars_mode='era5', spatial_resolution=self.spa_resolu) if not hit: continue count += 1 print(f'\r{info} {count}/{total}', end='') dirs = ['nw', 'sw', 'se', 'ne'] r34 = dict() r34['nw'], r34['sw'], r34['se'], r34['ne'] = \ row.r34_nw, row.r34_sw, row.r34_se, row.r34_ne skip_compare = False for dir in dirs: if r34[dir] is None: skip_compare = True break if skip_compare: continue # Get name, sqlalchemy Table class and python original class # of ERA5 table table_name, sa_table, ERA5Table = self.get_era5_table_class( vars_mode, row.sid, tc_datetime) # Create entity of ERA5 table era5_table_entity = self._gen_whole_era5_table_entity( vars_mode, ERA5Table, lat1, lat2, lon1, lon2) # Record number of successfully reading data matrix of ERA5 # grib file near TC center read_hit_count = 0 # Loop all messages of grib file which consists of # all variables in all pressure levels for m in range(grbs.messages): grb = grbs.message(m + 1) # Generate datetime of message and compare it with TC's grb_date, grb_time = str(grb.dataDate), str(grb.dataTime) if grb_time == '0': grb_time = '000' grb_datetime = datetime.datetime.strptime( f'{grb_date}{grb_time}', '%Y%m%d%H%M%S') if tc_datetime != grb_datetime: continue # extract corresponding data matrix in ERA5 reanalysis read_hit = self._read_grb_matrix(vars_mode, era5_table_entity, grb, lat1, lat2, lon1, lon2) if read_hit: read_hit_count += 1 # Skip this turn of loop if not getting data matrix if not read_hit_count: continue # When ERA5 table doesn't exists, sa_table is None. # So need to create it. if sa_table is not None: # Create table of ERA5 data cube sa_table.create(self.engine) self.session.commit() # Write extracted data matrix into DB start = time.process_time() if vars_mode == 'threeD': utils.bulk_insert_avoid_duplicate_unique( era5_table_entity, int(self.CONFIG['database']['batch_size']['insert'] / 10), ERA5Table, ['x_y_z'], self.session, check_self=True) elif vars_mode == 'surface_wind' or vars_mode == 'surface_all_vars': utils.bulk_insert_avoid_duplicate_unique( era5_table_entity, int(self.CONFIG['database']['batch_size']['insert'] / 10), ERA5Table, ['x_y'], self.session, check_self=True) end = time.process_time() self.logger.debug((f'Bulk inserting ERA5 data into ' + f'{table_name} in {end-start:2f} s')) self.compare_ibtracs_era5(vars_mode, row, ERA5Table, draw=True, draw_map=True, draw_bar=False) utils.delete_last_lines() print('Done')
def _compare_with_cwind(self, ccmp_file_path): file = ccmp_file_path.split('/')[-1] base_datetime = datetime.datetime(year=int(file[19:23]), month=int(file[23:25]), day=int(file[25:27]), hour=0, minute=0, second=0) dis2coast_array = [] wspd_absolute_error = [] wdir_absolute_error = [] vars = netCDF4.Dataset(ccmp_file_path).variables ccmp_lat = vars['latitude'] ccmp_lon = vars['longitude'] lat_padding = np.zeros(92) ccmp_lat = np.append(ccmp_lat, lat_padding, axis=0) ccmp_lat = np.roll(ccmp_lat, 46, axis=0) cwind_station_class = utils.get_class_by_tablename( self.engine, cwind.CwindStation.__tablename__) cwind_station_query = self.session.query(cwind_station_class) total = cwind_station_query.count() count = 0 for stn in cwind_station_query: count += 1 info = f'Comparing CCMP with cwind station {stn.id}' print(f'\r{info} ({count}/{total})', end='') # extract cwind speed and direction cwind_data_table_name = f'cwind_{stn.id}' CwindData = utils.get_class_by_tablename(self.engine, cwind_data_table_name) if CwindData is None: return None, None for h in self.hours: target_datetime = (base_datetime + datetime.timedelta(hours=self.hours[h])) cwind_match = self.session.query(CwindData).\ filter_by(datetime=target_datetime).first() if cwind_match is None: continue map_padding = np.zeros((92, 1440)) uwnd = vars['uwnd'][h, :, :] vwnd = vars['vwnd'][h, :, :] uwnd = np.append(uwnd, map_padding, axis=0) vwnd = np.append(vwnd, map_padding, axis=0) uwnd = np.roll(uwnd, 46, axis=0) vwnd = np.roll(vwnd, 46, axis=0) ccmp_wspd, ccmp_wdir = self._ccmp_near_cwind( stn, ccmp_lat, ccmp_lon, uwnd, vwnd) if ccmp_wspd is None or ccmp_wdir is None: continue cwind_wspd = cwind_match.wspd_10 cwind_wdir = cwind_match.wdir dis2coast_array.append(stn.distance_to_coast) wspd_absolute_error.append(abs(cwind_wspd - ccmp_wspd)) wdir_absolute_error.append(abs(cwind_wdir - ccmp_wdir)) utils.delete_last_lines() print('Done') print('MAE of wind speed: ' + str(sum(wspd_absolute_error) / len(wspd_absolute_error))) print('MAE of wind direction: ' + str(sum(wdir_absolute_error) / len(wdir_absolute_error))) dis2coast_array = np.array(dis2coast_array) wspd_absolute_error = np.array(wspd_absolute_error) wdir_absolute_error = np.array(wdir_absolute_error) plt.subplot(2, 1, 1) ax_1 = sns.regplot(x=dis2coast_array, y=wspd_absolute_error, color='b') plt.xlabel('Distance to coast (km)') plt.ylabel('Wind speed absolute_error (m/s)') plt.grid(True) plt.subplot(2, 1, 2) ax_2 = sns.regplot(x=dis2coast_array, y=wdir_absolute_error, color='g') plt.xlabel('Distance to coast (km)') plt.ylabel('Wind speed absolute_error (m/s)') plt.grid(True) plt.tight_layout() fig_path = (f'{self.CONFIG["result"]["dirs"]["fig"]}' + f'ccmp_cwind_absolute_error_dis2coast.png') os.makedirs(os.path.dirname(fig_path), exist_ok=True) plt.savefig(fig_path) plt.show()