def preprocess(monthly=True): regrid = get_data_path() / "interim/VCI_preprocessed/data_kenya.nc" preprocessor = BokuNDVIPreprocessor(get_data_path(), resolution="1000") if monthly: preprocessor.preprocess(subset_str="kenya", regrid=regrid, resample_time="M") else: preprocessor.preprocess( subset_str="kenya", regrid=regrid, resample_time="W-MON" )
def process_vci(subset_str: str = "kenya"): data_path = get_data_path() processor = VHIPreprocessor(get_data_path(), "VCI") regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available" processor.preprocess(subset_str=subset_str, resample_time="M", upsampling=False, regrid=regrid_path)
def process_vci_2018(): processor = VHIPreprocessor(get_data_path(), "VCI") processor.preprocess(subset_str="kenya", resample_time="M", upsampling=False)
def main(target_var, all_vars): # RUN engineer engineer(target_var=target_var) autoregressive = [target_var] # 'VCI3M' dynamic = ["precip", "t2m", "pet", "E", "SMroot", "SMsurf"] static_list = [False, False, True] for vars_to_include, static_bool in zip( [autoregressive, autoregressive + dynamic, autoregressive + dynamic], static_list, ): print( f'\n{"-" * 10}\nRunning experiment with: {vars_to_include} with static: {static_bool} for {target_var}\n{"-" * 10}' ) # FIT models vars_to_exclude = [v for v in all_vars if v not in vars_to_include] parsimonious() if static_bool: lstm(vars_to_exclude, static="features") ealstm(vars_to_exclude, static="features") else: lstm(vars_to_exclude, static=None) # RENAME model directories data_dir = get_data_path() rename_model_experiment_file(data_dir, vars_to_include, static=static_bool, target_var=target_var)
def linear_nn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static="features", ignore_vars=None, num_epochs=50, early_stopping=5, layer_sizes=[100], predict_delta=False, spatial_mask=None, include_latlons=False, ): predictor = LinearNetwork( layer_sizes=layer_sizes, data_folder=get_data_path(), experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, ignore_vars=ignore_vars, predict_delta=predict_delta, spatial_mask=spatial_mask, include_latlons=include_latlons, ) predictor.train(num_epochs=num_epochs, early_stopping=early_stopping) predictor.evaluate(save_preds=True) predictor.save_model() if explain: _ = predictor.explain(save_shap_values=True)
def earnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, pretrained=True, ignore_vars=None, ): data_path = get_data_path() if not pretrained: predictor = EARecurrentNetwork( hidden_size=128, data_folder=data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, ignore_vars=ignore_vars, ) predictor.train(num_epochs=50, early_stopping=5) predictor.evaluate(save_preds=True) predictor.save_model() else: predictor = load_model(data_path / f"models/{experiment}/ealstm/model.pt") test_file = data_path / f"features/{experiment}/test/2018_3" assert test_file.exists() all_explanations_for_file(test_file, predictor, batch_size=100)
def regression( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static="features", ignore_vars=None, predict_delta=False, spatial_mask=None, include_latlons=False, ): predictor = LinearRegression( get_data_path(), experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, ignore_vars=ignore_vars, predict_delta=predict_delta, spatial_mask=spatial_mask, include_latlons=include_latlons, ) predictor.train() predictor.evaluate(save_preds=True) # mostly to test it works if explain: predictor.explain(save_shap_values=True)
def export_s5(): granularity = "hourly" pressure_level = False exporter = S5Exporter( data_folder=get_data_path(), granularity=granularity, pressure_level=pressure_level, ) variable = "total_precipitation" min_year = 1993 max_year = 2014 min_month = 1 max_month = 12 max_leadtime = None pressure_levels = [200, 500, 925] n_parallel_requests = 20 exporter.export( variable=variable, min_year=min_year, max_year=max_year, min_month=min_month, max_month=max_month, max_leadtime=max_leadtime, pressure_levels=pressure_levels, n_parallel_requests=n_parallel_requests, )
def gbdt( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, pretrained=True, explain=False, static="features", ignore_vars=None, # predict_delta=False, spatial_mask=None, include_latlons=False, ): data_path = get_data_path() # initialise, train and save GBDT model predictor = GBDT( data_folder=data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, ignore_vars=ignore_vars, spatial_mask=spatial_mask, include_latlons=include_latlons, ) predictor.train(early_stopping=5) predictor.evaluate(save_preds=True) predictor.save_model()
def process_gleam(subset_str: str = "kenya"): data_path = get_data_path() regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available"
def run_models(target_var: str): parsimonious() # ------- # LSTM # ------- rnn( # earnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static=None, # "features", ignore_vars=None, num_epochs=50, # 50 early_stopping=5, # 5 hidden_size=256, predict_delta=False, normalize_y=True, include_prev_y=False, include_latlons=False, ) # ------- # EALSTM # ------- # rename the output file data_path = get_data_path() _rename_directory( from_path=data_path / "models" / "one_month_forecast", to_path=data_path / "models" / f"one_month_forecast_adede_only_target_{target_var}", with_datetime=False, )
def export_era5(variables): exporter = ERA5Exporter(get_data_path()) # The ERA5 exporter downloads the data with wierd names. # A mapping of actual variables to the downloaded variable # names is recorded here name2var = { "precip": "precip", "total_precipitation": "total_precipitation", "evaporation": "e", "mean_eastward_turbulent_surface_stress": "metss", "mean_northward_turbulent_surface_stress": "mntss", "potential_evaporation": "pev", "slhf": "surface_latent_heat_flux", "sp": "surface_pressure", "sshf": "surface_sensible_heat_flux", "ssrc": "surface_net_solar_radiation_clear_sky", "stl1": "soil_temperature_level_1", "strc": "surface_net_thermal_radiation_clear_sky", "swvl1": "volumetric_soil_water_layer_1", "swvl2": "volumetric_soil_water_layer_2", "swvl3": "volumetric_soil_water_layer_3", "swvl4": "volumetric_soil_water_layer_4", "t2m": "2m_temperature", "u10": "10m_u_component_of_wind", "v10": "10m_v_component_of_wind", "p84.162": "vertical_integral_of_divergence_of_moisture_flux", "VCI": "VCI", } for variable in variables: exporter.export(variable=variable, granularity="hourly", break_up=True)
def export_s5(region_str="kenya"): granularity = "monthly" pressure_level = False exporter = S5Exporter( data_folder=get_data_path(), granularity=granularity, pressure_level=pressure_level, ) min_year = 1993 max_leadtime = None pressure_levels = None # [200, 500, 925] n_parallel_requests = 1 for variable in variables: print(f"\n\nWORKING ON: {variable}\n\n") exporter.export( variable=variable, min_year=min_year, max_year=max_year, min_month=min_month, max_month=max_month, max_leadtime=max_leadtime, pressure_levels=pressure_levels, n_parallel_requests=n_parallel_requests, region_str=region_str, break_up=False, )
def process_esa_cci_landcover(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = ESACCIPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path)
def preprocess_era5(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5MonthlyMeanPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path)
def persistence(experiment="one_month_forecast", ): data_path = get_data_path() spatial_mask = data_path / "interim/boundaries_preprocessed/kenya_asal_mask.nc" spatial_mask = None predictor = Persistence(data_path, experiment=experiment, spatial_mask=spatial_mask) predictor.evaluate(save_preds=True)
def preprocess_asal_mask(): data_path = get_data_path() regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = KenyaASALMask(data_path) processor.preprocess(reference_nc_filepath=regrid_path)
def models(target_var: str = "VCI1M"): # NO IGNORE VARS ignore_vars = None # drop the target variable from ignore_vars # ignore_vars = [v for v in ignore_vars if v != target_var] # assert target_var not in ignore_vars # ------------- # persistence # ------------- parsimonious() # regression(ignore_vars=ignore_vars) # gbdt(ignore_vars=ignore_vars) # linear_nn(ignore_vars=ignore_vars) # ------------- # LSTM # ------------- rnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static="features", ignore_vars=ignore_vars, num_epochs=50, early_stopping=5, hidden_size=256, include_latlons=True, ) # ------------- # EALSTM # ------------- earnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, pretrained=False, explain=False, static="features", ignore_vars=ignore_vars, num_epochs=50, early_stopping=5, hidden_size=256, static_embedding_size=64, include_latlons=True, ) # rename the output file data_path = get_data_path() _rename_directory( from_path=data_path / "models" / "one_month_forecast", to_path=data_path / "models" / f"one_month_forecast_BOKU_{target_var}_adede_only_vars", )
def preprocess_era5(subset_str: str = "kenya"): data_path = get_data_path() # regrid_path = data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc" # assert regrid_path.exists(), f"{regrid_path} not available" regrid_path = None processor = ERA5MonthlyMeanPreprocessor(data_path) processor.preprocess(subset_str=subset_str, regrid=regrid_path)
def process_precip_2018(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = CHIRPSPreprocessor(data_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False)
def move_features_dir(target_var): # rename the features dir data_path = get_data_path() _rename_directory( from_path=data_path / "features" / "one_month_forecast", to_path=data_path / "features" / f"one_month_forecast_BOKU_{target_var}_adede_only_vars", )
def preprocess_boku_ndvi(): data_path = get_data_path() processor = BokuNDVIPreprocessor(data_path) regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor.preprocess(subset_str="kenya", resample_time="W-MON", regrid=regrid_path)
def engineer(pred_months=3, target_var="VCI1M"): engineer = Engineer(get_data_path(), experiment="one_month_forecast", process_static=False) engineer.engineer( test_year=[y for y in range(2016, 2019)], target_variable=target_var, pred_months=pred_months, expected_length=pred_months, )
def process_seas5(): data_path = get_data_path() regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = S5Preprocessor(data_path) processor.preprocess( subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False )
def move_features_dir(target_var, adede_only=False, experiment_name=None): # rename the features dir data_path = get_data_path() if experiment_name is None: experiment_name = f"one_month_forecast_BOKU_{target_var}_our_vars_{'only_P_VCI' if adede_only else 'ALL'}" _rename_directory( from_path=data_path / "features" / "one_month_forecast", to_path=data_path / "features" / experiment_name, )
def export_era5POS(): exporter = ERA5ExporterPOS(get_data_path()) variables = [ "air_temperature_at_2_metres", "precipitation_amount_1hour_Accumulation", ] for variable in variables: exporter.export(variable=variable)
def preprocess_era5_hourly(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5HourlyPreprocessor(data_path) # W-MON is weekly each monday (the same as the NDVI data from Atzberger) processor.preprocess(subset_str="kenya", resample_time="W-MON")
def earnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, pretrained=False, explain=False, static="features", ignore_vars=None, num_epochs=50, early_stopping=5, static_embedding_size=10, hidden_size=128, predict_delta=False, spatial_mask=None, include_latlons=False, normalize_y=True, include_prev_y=True, include_yearly_aggs=True, # new clear_nans=True, weight_observations=False, pred_month_static=False, ): data_path = get_data_path() if not pretrained: predictor = EARecurrentNetwork( hidden_size=hidden_size, data_folder=data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, static_embedding_size=static_embedding_size, ignore_vars=ignore_vars, predict_delta=predict_delta, spatial_mask=spatial_mask, include_latlons=include_latlons, normalize_y=normalize_y, include_prev_y=include_prev_y, include_yearly_aggs=include_yearly_aggs, clear_nans=clear_nans, weight_observations=weight_observations, pred_month_static=pred_month_static, ) predictor.train(num_epochs=num_epochs, early_stopping=early_stopping) predictor.evaluate(save_preds=True) predictor.save_model() else: predictor = load_model(data_path / f"models/{experiment}/ealstm/model.pt") if explain: test_file = data_path / f"features/{experiment}/test/2018_3" assert test_file.exists() all_explanations_for_file(test_file, predictor, batch_size=100)
def process_gleam(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = GLEAMPreprocessor(data_path) processor.preprocess( subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False )
def preprocess_era5_hourly(subset_str: str = "kenya"): data_path = get_data_path() regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5HourlyPreprocessor(data_path) # W-MON is weekly each monday (the same as the NDVI data from Atzberger) processor.preprocess(subset_str=subset_str, resample_time="W-MON")