def test_yearsplit(self, tmp_path): _setup(tmp_path) dataset, _, _ = _make_dataset(size=(2, 2)) engineer = Engineer(tmp_path) train = engineer._train_test_split( dataset, years=[2001], target_variable="VHI", pred_months=11, expected_length=11, ) assert (train.time.values < np.datetime64("2001-01-01")).all( ), "Got years greater than the test year in the training set!"
# wrong shapes! datasets = engineer._get_preprocessed_files() ds_list = [xr.open_dataset(ds) for ds in datasets] dims_list = [[dim for dim in ds.dims] for ds in ds_list] variable_list = [[var for var in ds.variables if var not in dims_list[i]][0] for i, ds in enumerate(ds_list)] da_list = [ds[variable_list[i]] for i, ds in enumerate(ds_list)] # ds = engineer._make_dataset() years = [1990] train_ds, test_dict train_ds = engineer._train_test_split(ds, years, target_variable="VHI", pred_months=3, expected_length=3) xy_test, min_test_date = engineer.stratify_xy( ds, years[0], target_variable="VHI", target_month=7, pred_months=3, expected_length=3, ) train_dict = engineer._stratify_training_data(train_ds, target_variable, pred_months, expected_length) # years = [1990]