def main(args): weather = ERA5Land(args.ERA5) nasa_firms = NASAFIRMS(args.nasa_firms, args.nasa_firms_type) print(weather.shape) print(nasa_firms.shape) # Time span selection date_range = get_intersection_range(weather.time, nasa_firms.acq_date) weather = weather[weather.time.isin(date_range)] nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)] print(weather.shape) print(nasa_firms.shape) # Merge if args.type_of_merged == "departements": # drop redundant columns with weather datasets nasa_firms = nasa_firms.drop(["nom"], axis=1) merged_data = merge_datasets_by_departements(weather, "time", "code", nasa_firms, "acq_date", "code", "left") to_drop = [ "acq_date", "latitude_y", "longitude_y", "bright_t31", "frp", "acq_date_time", "confidence", ] else: # drop redundant columns with weather datasets nasa_firms = nasa_firms.drop(["code", "nom"], axis=1) merged_data = merge_datasets_by_closest_weather_point( weather, "time", nasa_firms, "acq_date") to_drop = [ "closest_weather_point", "acq_date", "latitude_y", "longitude_y", "bright_t31", "frp", "acq_date_time", "confidence", "weather_lat", "weather_lon", ] final_data = merged_data.copy() where = merged_data["confidence"] >= 60 final_data.loc[where, "Statut"] = 1 final_data.loc[~where, "Statut"] = 0 final_data["Statut"] = final_data["Statut"].astype(int) # drop unnecessary columns final_data = final_data.drop(to_drop, axis=1) print(final_data)
def main(args): weather = NOAAWeather(args.weather) history = BDIFFHistory(args.wildfire) # Time span selection date_range = get_intersection_range(weather.DATE, history.date) weather = weather[weather.DATE.isin(date_range)] history = history[history.date.isin(date_range)] # Merge df = merge_datasets_by_departements( weather, "DATE", "code", history, "date", "Département", "left" ) # Label data df.Statut = 1 - df.Statut.isna().astype(int) df = df.filter( items=[ "DATE", "code", "nom", "LATITUDE", "LONGITUDE", "ELEVATION", "DEWP", "DEWP_ATTRIBUTES", "FRSHTT", "GUST", "MAX", "MIN", "MXSPD", "PRCP", "SLP", "SLP_ATTRIBUTES", "SNDP", "STP", "STP_ATTRIBUTES", "TEMP", "TEMP_ATTRIBUTES", "VISIB", "VISIB_ATTRIBUTES", "WDSP", "WDSP_ATTRIBUTES", "Statut", ] ) print(df)
def __init__( self, era_source_path=None, viirs_source_path=None, fwi_source_path=None ): """Define the merged era-fwi-viirs dataframe. Args: era_source_path (str, optional): Era5 data source path. Defaults to None. viirs_source_path (str, optional): Viirs data source path. Defaults to None. fwi_source_path (str, optional): Fwi data source path. Defaults to None. """ weather = ERA5T(era_source_path) # ERA5Land(era_source_path) nasa_firms = NASAFIRMS_VIIRS(viirs_source_path) # Time span selection date_range = get_intersection_range(weather.time, nasa_firms.acq_date) weather = weather[weather.time.isin(date_range)] nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)] # Keep only vegetation wildfires and remove thermal anomalies with low confidence where = (nasa_firms["confidence"] != "l") & (nasa_firms["type"] == 0) nasa_firms = nasa_firms[where] # Get FWI dataset for year 2019 (1st september missing) if fwi_source_path is None: days = [ x.strftime("%Y%m%d") for x in pd.date_range(start="2019-01-01", end="2019-12-31") ] days.remove("20190901") fwi_df = GwisFwi(days_list=days) else: fwi_df = pd.read_csv(fwi_source_path) # Load FWI dataset fwi_df["day"] = pd.to_datetime(fwi_df["day"], format="%Y%m%d", errors="coerce") # Group fwi dataframe by day and department and compute min, max, mean, std agg_fwi_df = ( fwi_df.groupby(["day", "departement"])[cfg.FWI_VARS] .agg(["min", "max", "mean", "std"]) .reset_index() ) agg_fwi_df.columns = ["day", "departement"] + [ x[0] + "_" + x[1] for x in agg_fwi_df.columns if x[1] != "" ] # Group weather dataframe by day and department and compute min, max, mean, std agg_wth_df = ( weather.groupby(["time", "nom"])[cfg.WEATHER_ERA5T_VARS] .agg(["min", "max", "mean", "std"]) .reset_index() ) agg_wth_df.columns = ["day", "departement"] + [ x[0] + "_" + x[1] for x in agg_wth_df.columns if x[1] != "" ] # Merge fwi and weather together mid_df = pd.merge( agg_fwi_df, agg_wth_df, on=["day", "departement"], how="inner" ) # Count fires by day and department fires_count = ( nasa_firms.groupby(["acq_date", "nom"])["confidence"] .count() .to_frame() .reset_index() ) fires_count = fires_count.rename({"confidence": "fires"}, axis=1) # Merge fires final_df = pd.merge( mid_df, fires_count, left_on=["day", "departement"], right_on=["acq_date", "nom"], how="left", ).drop(["acq_date", "nom"], axis=1) # Fill lines with no fires with 0 final_df["fires"] = final_df["fires"].fillna(0) super().__init__(final_df)
def main(args): weather = ERA5Land(args.ERA5) nasa_firms = NASAFIRMS_VIIRS(args.nasa_firms, args.nasa_firms_type) print(weather.shape) print(nasa_firms.shape) # Time span selection date_range = get_intersection_range(weather.time, nasa_firms.acq_date) weather = weather[weather.time.isin(date_range)] nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)] print(weather.shape) print(nasa_firms.shape) # Keep only vegetation wildfires and remove thermal anomalies with low confidence where = (nasa_firms["confidence"] != "l") & (nasa_firms["type"] == 0) nasa_firms = nasa_firms[where] # Merge if args.type_of_merged == "departements": # drop redundant columns with weather datasets nasa_firms = nasa_firms.drop(["nom"], axis=1) merged_data = merge_datasets_by_departements( weather, "time", "code", nasa_firms, "acq_date", "code", "left" ) to_drop = [ "acq_date", "latitude_y", "longitude_y", "bright_ti4", "confidence", "bright_ti5", "frp", "type", "acq_date_time", ] else: # drop redundant columns with weather datasets nasa_firms = nasa_firms.drop(["code", "nom"], axis=1) merged_data = merge_by_proximity( nasa_firms, "acq_date", weather, "time", "right" ) to_drop = [ "latitude_x", "longitude_x", "closest_lat", "closest_lon", "acq_date", "bright_ti4", "confidence", "bright_ti5", "frp", "type", "acq_date_time", ] final_data = merged_data.copy() where = merged_data["confidence"].isna() final_data.loc[~where, "Statut"] = 1 final_data.loc[where, "Statut"] = 0 final_data["Statut"] = final_data["Statut"].astype(int) # drop unnecessary columns final_data = final_data.drop(to_drop, axis=1) final_data = final_data.rename( columns={"latitude_y": "latitude", "longitude_y": "longitude"} ) print(final_data)
def _test_get_intersection_range(self, s1, s2, expected_len): date_range = utils.get_intersection_range(s1, s2) self.assertIsInstance(date_range, pd.DatetimeIndex) self.assertEqual(len(date_range), expected_len)