def main(args):
    weather = ERA5Land(args.ERA5)
    nasa_firms = NASAFIRMS(args.nasa_firms, args.nasa_firms_type)
    print(weather.shape)
    print(nasa_firms.shape)

    # Time span selection
    date_range = get_intersection_range(weather.time, nasa_firms.acq_date)
    weather = weather[weather.time.isin(date_range)]
    nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)]

    print(weather.shape)
    print(nasa_firms.shape)

    # Merge
    if args.type_of_merged == "departements":
        # drop redundant columns with weather datasets
        nasa_firms = nasa_firms.drop(["nom"], axis=1)
        merged_data = merge_datasets_by_departements(weather, "time", "code",
                                                     nasa_firms, "acq_date",
                                                     "code", "left")
        to_drop = [
            "acq_date",
            "latitude_y",
            "longitude_y",
            "bright_t31",
            "frp",
            "acq_date_time",
            "confidence",
        ]

    else:
        # drop redundant columns with weather datasets
        nasa_firms = nasa_firms.drop(["code", "nom"], axis=1)
        merged_data = merge_datasets_by_closest_weather_point(
            weather, "time", nasa_firms, "acq_date")
        to_drop = [
            "closest_weather_point",
            "acq_date",
            "latitude_y",
            "longitude_y",
            "bright_t31",
            "frp",
            "acq_date_time",
            "confidence",
            "weather_lat",
            "weather_lon",
        ]

    final_data = merged_data.copy()
    where = merged_data["confidence"] >= 60
    final_data.loc[where, "Statut"] = 1
    final_data.loc[~where, "Statut"] = 0
    final_data["Statut"] = final_data["Statut"].astype(int)

    # drop unnecessary columns
    final_data = final_data.drop(to_drop, axis=1)

    print(final_data)
def main(args):
    weather = NOAAWeather(args.weather)
    history = BDIFFHistory(args.wildfire)

    # Time span selection
    date_range = get_intersection_range(weather.DATE, history.date)
    weather = weather[weather.DATE.isin(date_range)]
    history = history[history.date.isin(date_range)]

    # Merge
    df = merge_datasets_by_departements(
        weather, "DATE", "code", history, "date", "Département", "left"
    )

    # Label data
    df.Statut = 1 - df.Statut.isna().astype(int)

    df = df.filter(
        items=[
            "DATE",
            "code",
            "nom",
            "LATITUDE",
            "LONGITUDE",
            "ELEVATION",
            "DEWP",
            "DEWP_ATTRIBUTES",
            "FRSHTT",
            "GUST",
            "MAX",
            "MIN",
            "MXSPD",
            "PRCP",
            "SLP",
            "SLP_ATTRIBUTES",
            "SNDP",
            "STP",
            "STP_ATTRIBUTES",
            "TEMP",
            "TEMP_ATTRIBUTES",
            "VISIB",
            "VISIB_ATTRIBUTES",
            "WDSP",
            "WDSP_ATTRIBUTES",
            "Statut",
        ]
    )

    print(df)
示例#3
0
    def __init__(
        self, era_source_path=None, viirs_source_path=None, fwi_source_path=None
    ):
        """Define the merged era-fwi-viirs dataframe.

        Args:
            era_source_path (str, optional): Era5 data source path. Defaults to None.
            viirs_source_path (str, optional): Viirs data source path. Defaults to None.
            fwi_source_path (str, optional): Fwi data source path. Defaults to None.
        """
        weather = ERA5T(era_source_path)  # ERA5Land(era_source_path)
        nasa_firms = NASAFIRMS_VIIRS(viirs_source_path)

        # Time span selection
        date_range = get_intersection_range(weather.time, nasa_firms.acq_date)
        weather = weather[weather.time.isin(date_range)]
        nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)]

        # Keep only vegetation wildfires and remove thermal anomalies with low confidence
        where = (nasa_firms["confidence"] != "l") & (nasa_firms["type"] == 0)
        nasa_firms = nasa_firms[where]

        # Get FWI dataset for year 2019 (1st september missing)
        if fwi_source_path is None:
            days = [
                x.strftime("%Y%m%d")
                for x in pd.date_range(start="2019-01-01", end="2019-12-31")
            ]
            days.remove("20190901")
            fwi_df = GwisFwi(days_list=days)
        else:
            fwi_df = pd.read_csv(fwi_source_path)

        # Load FWI dataset
        fwi_df["day"] = pd.to_datetime(fwi_df["day"], format="%Y%m%d", errors="coerce")

        # Group fwi dataframe by day and department and compute min, max, mean, std
        agg_fwi_df = (
            fwi_df.groupby(["day", "departement"])[cfg.FWI_VARS]
            .agg(["min", "max", "mean", "std"])
            .reset_index()
        )
        agg_fwi_df.columns = ["day", "departement"] + [
            x[0] + "_" + x[1] for x in agg_fwi_df.columns if x[1] != ""
        ]

        # Group weather dataframe by day and department and compute min, max, mean, std
        agg_wth_df = (
            weather.groupby(["time", "nom"])[cfg.WEATHER_ERA5T_VARS]
            .agg(["min", "max", "mean", "std"])
            .reset_index()
        )
        agg_wth_df.columns = ["day", "departement"] + [
            x[0] + "_" + x[1] for x in agg_wth_df.columns if x[1] != ""
        ]

        # Merge fwi and weather together
        mid_df = pd.merge(
            agg_fwi_df, agg_wth_df, on=["day", "departement"], how="inner"
        )

        # Count fires by day and department
        fires_count = (
            nasa_firms.groupby(["acq_date", "nom"])["confidence"]
            .count()
            .to_frame()
            .reset_index()
        )
        fires_count = fires_count.rename({"confidence": "fires"}, axis=1)

        # Merge fires
        final_df = pd.merge(
            mid_df,
            fires_count,
            left_on=["day", "departement"],
            right_on=["acq_date", "nom"],
            how="left",
        ).drop(["acq_date", "nom"], axis=1)

        # Fill lines with no fires with 0
        final_df["fires"] = final_df["fires"].fillna(0)
        super().__init__(final_df)
def main(args):
    weather = ERA5Land(args.ERA5)
    nasa_firms = NASAFIRMS_VIIRS(args.nasa_firms, args.nasa_firms_type)
    print(weather.shape)
    print(nasa_firms.shape)

    # Time span selection
    date_range = get_intersection_range(weather.time, nasa_firms.acq_date)
    weather = weather[weather.time.isin(date_range)]
    nasa_firms = nasa_firms[nasa_firms.acq_date.isin(date_range)]

    print(weather.shape)
    print(nasa_firms.shape)

    # Keep only vegetation wildfires and remove thermal anomalies with low confidence
    where = (nasa_firms["confidence"] != "l") & (nasa_firms["type"] == 0)
    nasa_firms = nasa_firms[where]

    # Merge
    if args.type_of_merged == "departements":
        # drop redundant columns with weather datasets
        nasa_firms = nasa_firms.drop(["nom"], axis=1)
        merged_data = merge_datasets_by_departements(
            weather, "time", "code", nasa_firms, "acq_date", "code", "left"
        )
        to_drop = [
            "acq_date",
            "latitude_y",
            "longitude_y",
            "bright_ti4",
            "confidence",
            "bright_ti5",
            "frp",
            "type",
            "acq_date_time",
        ]

    else:
        # drop redundant columns with weather datasets
        nasa_firms = nasa_firms.drop(["code", "nom"], axis=1)
        merged_data = merge_by_proximity(
            nasa_firms, "acq_date", weather, "time", "right"
        )
        to_drop = [
            "latitude_x",
            "longitude_x",
            "closest_lat",
            "closest_lon",
            "acq_date",
            "bright_ti4",
            "confidence",
            "bright_ti5",
            "frp",
            "type",
            "acq_date_time",
        ]

    final_data = merged_data.copy()
    where = merged_data["confidence"].isna()
    final_data.loc[~where, "Statut"] = 1
    final_data.loc[where, "Statut"] = 0
    final_data["Statut"] = final_data["Statut"].astype(int)

    # drop unnecessary columns
    final_data = final_data.drop(to_drop, axis=1)
    final_data = final_data.rename(
        columns={"latitude_y": "latitude", "longitude_y": "longitude"}
    )

    print(final_data)
示例#5
0
 def _test_get_intersection_range(self, s1, s2, expected_len):
     date_range = utils.get_intersection_range(s1, s2)
     self.assertIsInstance(date_range, pd.DatetimeIndex)
     self.assertEqual(len(date_range), expected_len)