def hyper_pytorch_ml(udf_data: UdfData): """Apply a pre-trained pytorch machine learn model on a hypercube The model must be a pytorch model that has expects the input data in the constructor The prediction method must accept a torch.autograd.Variable as input. Args: udf_data (UdfData): The UDF data object that hypercubes and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ cube = udf_data.get_datacube_list()[0] # This is the input data of the model. input = torch.autograd.Variable(torch.Tensor(cube.array.values)) # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m(input) result = xarray.DataArray(data=pred.detach().numpy(), dims=cube.array.dims, coords=cube.array.coords, name=cube.id + "_pytorch") # Create the new raster collection tile result_cube = DataCube(array=result) # Insert the new hypercube in the input object. udf_data.set_datacube_list([result_cube])
def test_rct_stats(self): """Test the raster collection tile statistics UDF""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_statistics.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) temp = create_datacube(name="temp", value=1, dims=("t", "x", "y"), shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp]) run_user_code(code=udf_code.source, data=udf_data) result = udf_data.to_dict() self.assertEqual(len(result["datacubes"]), 0) self.assertEqual(len(result["structured_data_list"]), 1) self.assertEqual(result["structured_data_list"][0]["type"], "dict") self.assertEqual(result["structured_data_list"][0]["data"]["temp"], { 'max': 1.0, 'mean': 1.0, 'min': 1.0, 'sum': 27.0 })
def hyper_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red hypercube is missing in input") if nir is None: raise Exception("Nir hypercube is missing in input") ndvi = (nir.array - red.array) / (nir.array + red.array) ndvi.name = "NDVI" hc = DataCube(array=ndvi) udf_data.set_datacube_list([hc, ])
def test_pytorch_linear_nn(self): """Test linear pytorch model training and UDF application""" model = SimpleNetwork() MachineLearningPytorchTestCase.train_pytorch_model(model=model) dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_pytorch_ml.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) temp = create_datacube(name="temp", value=1, dims=("x", "y"), shape=(2, 2)) ml = MachineLearnModelConfig( framework="pytorch", name="linear_model", description= "A pytorch model that adds two numbers in range of [1,1]", path="/tmp/simple_linear_nn_pytorch.pt") udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp], ml_model_list=[ml]) run_user_code(code=udf_code.source, data=udf_data) pprint.pprint(udf_data.to_dict())
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries): """ Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries). Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band. :param udf_data: :return: """ # The list of tiles that were created tile_results = [] # Iterate over each cube for cube in udf_data.get_datacube_list(): array3d = [] #use rollaxis to make the time dimension the last one for time_x_slice in numpy.rollaxis(cube.array.values, 1): time_x_result = [] for time_slice in time_x_slice: series = pandas.Series(time_slice) transformed_series = callback(series,udf_data.user_context) time_x_result.append(transformed_series) array3d.append(time_x_result) # We need to create a new 3D array with the correct shape for the computed aggregate result_tile = numpy.rollaxis(numpy.asarray(array3d),1) assert result_tile.shape == cube.array.shape # Create the new raster collection cube rct = DataCube(xarray.DataArray(result_tile)) tile_results.append(rct) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list(tile_results) return udf_data
def fct_buffer(udf_data: UdfData): """Compute buffer of size 10 around features This function creates buffer around all features in the provided feature collection tiles. The resulting geopandas.GeoDataFrame contains the new geometries and a copy of the original attribute data. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ fct_list = [] # Iterate over each tile for tile in udf_data.feature_collection_list: # Buffer all features gseries = tile.data.buffer(distance=10) # Create a new GeoDataFrame that includes the buffered geometry and the attribute data new_data = tile.data.set_geometry(gseries) # Create the new feature collection tile fct = FeatureCollection(id=tile.id + "_buffer", data=new_data, start_times=tile.start_times, end_times=tile.end_times) fct_list.append(fct) # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_feature_collection_list(fct_list)
def unused_test_DataCube_ndvi_message_pack(self): """Test the DataCube NDVI computation with the message pack protocol""" # TODO: Reactivate this test dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_ndvi.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) hc_red = create_datacube(name="red", value=1, dims=("t", "y", "x"), shape=(3, 3, 3)) hc_nir = create_datacube(name="nir", value=3, dims=("t", "y", "x"), shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir]) udf_request = UdfRequestModel(data=udf_data.to_dict(), code=udf_code) udf_request = base64.b64encode( msgpack.packb(udf_request.dict(), use_bin_type=True)) response = self.app.post( '/udf_message_pack', data=udf_request, headers={"Content-Type": "application/base64"}) self.assertEqual(response.status_code, 200) blob = base64.b64decode(response.content) udf_data = msgpack.unpackb(blob, raw=False) self.checkDataCubeNdvi(udf_data=udf_data)
def run_model_test(self, model): MachineLearningTestCase.train_sklearn_model(model=model) dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_sklearn_ml.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) red = create_datacube(name="red", value=1, dims=("t", "x", "y"), shape=(2, 2, 2)) nir = create_datacube(name="nir", value=1, dims=("t", "x", "y"), shape=(2, 2, 2)) ml = MachineLearnModelConfig( framework="sklearn", name="random_forest", description= "A sklearn model that adds two numbers in range of [1,1]", path="/tmp/rf_add_model.pkl.xz") udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[red, nir], ml_model_list=[ml]) pprint.pprint(udf_data.to_dict()) run_user_code(code=udf_code.source, data=udf_data) result = udf_data.to_dict() self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)
def hyper_min_median_max(udf_data: UdfData): """Compute the min, median and max of the time dimension of a hyper cube Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): min = cube.array.min(dim="t") median = cube.array.median(dim="t") max = cube.array.max(dim="t") min.name = cube.id + "_min" median.name = cube.id + "_median" max.name = cube.id + "_max" cube_list.append(DataCube(array=min)) cube_list.append(DataCube(array=median)) cube_list.append(DataCube(array=max)) udf_data.set_datacube_list(cube_list)
def test_sklearn_extra_tree_message_pack_md5_hash(self): """Test extra tree training and UDF application with message pack protocol and the machine learn model uploaded to the UDF md5 hash based storage system""" model = ExtraTreesRegressor(n_estimators=100, max_depth=7, max_features="log2", min_samples_split=2, min_samples_leaf=1, verbose=0) model_path = MachineLearningTestCase.train_sklearn_model(model=model) request_model = RequestStorageModel( uri=model_path, title="This is a test model", description="This is the test description.") response = self.app.post('/storage', json=request_model.dict()) print(response.content) self.assertEqual(response.status_code, 200) md5_hash = response.content.decode("ascii").strip().replace("\"", "") dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_sklearn_ml.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) red = create_datacube(name="red", value=1, dims=("t", "x", "y"), shape=(2, 2, 2)) nir = create_datacube(name="nir", value=1, dims=("t", "x", "y"), shape=(2, 2, 2)) ml = MachineLearnModelConfig( framework="sklearn", name="random_forest", description= "A sklearn model that adds two numbers in range of [1,1]", md5_hash=md5_hash) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[red, nir], ml_model_list=[ml]) pprint.pprint(udf_data.to_dict()) run_user_code(code=udf_code.source, data=udf_data) result = udf_data.to_dict() self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2) #result = self.send_msgpack_request(data=udf_data, code=udf_code) #self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2) response = self.app.delete(f'/storage/{md5_hash}') self.assertEqual(response.status_code, 200)
def rct_sklearn_ml(udf_data: UdfData): """Apply a pre-trained sklearn machine learn model on RED and NIR tiles The model must be a sklearn model that has a prediction method: m.predict(X) The prediction method must accept a pandas.DataFrame as input. Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel of the two input raster collections. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each cube for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red data cube is missing in input") if nir is None: raise Exception("Nir data cube is missing in input") # We need to reshape the data for prediction into one dimensional arrays three_dim_shape = red.array.shape one_dim_shape = numpy.prod(three_dim_shape) red_reshape = red.array.values.reshape((one_dim_shape)) nir_reshape = nir.array.values.reshape((one_dim_shape)) # This is the input data of the model. It must be trained with a DataFrame using the same names. X = pandas.DataFrame() X["red"] = red_reshape X["nir"] = nir_reshape # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m.predict(X) # Reshape the one dimensional predicted values to three dimensions based on the input shape pred_reshape = pred.reshape(three_dim_shape) result = xarray.DataArray(data=pred_reshape, dims=red.array.dims, coords=red.array.coords, name=red.id + "_pytorch") # Create the new raster collection cube h = DataCube(array=result) # Insert the new hypercubes in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list([h, ])
def run_user_code(code: str, data: UdfData) -> UdfData: module = load_module_from_string(code) functions = {t[0]: t[1] for t in module.items() if callable(t[1])} for func in functions.items(): try: sig = signature(func[1]) except ValueError: continue params = sig.parameters params_list = [t[1] for t in sig.parameters.items()] if (func[0] == 'apply_timeseries' and 'series' in params and 'context' in params and 'pandas.core.series.Series' in str( params['series'].annotation) and 'pandas.core.series.Series' in str(sig.return_annotation)): #this is a UDF that transforms pandas series from .udf_wrapper import apply_timeseries_generic return apply_timeseries_generic(data, func[1]) elif ((func[0] == 'apply_hypercube' or func[0] == 'apply_datacube') and 'cube' in params and 'context' in params and 'openeo_udf.api.datacube.DataCube' in str( params['cube'].annotation) and 'openeo_udf.api.datacube.DataCube' in str( sig.return_annotation)): #found a datacube mapping function if len(data.get_datacube_list()) != 1: raise ValueError( "The provided UDF expects exactly one datacube, but only: %s were provided." % len(data.get_datacube_list())) result_cube = func[1](data.get_datacube_list()[0], data.user_context) if not isinstance(result_cube, DataCube): raise ValueError( "The provided UDF did not return a DataCube, but got: %s" % result_cube) data.set_datacube_list([result_cube]) break elif len(params_list) == 1 and ( params_list[0].annotation == 'openeo_udf.api.udf_data.UdfData' or params_list[0].annotation == UdfData): #found a generic UDF function func[1](data) break return data
def rct_stats(udf_data: UdfData): """Compute univariate statistics for each hypercube Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The dictionary that stores the statistical data stats = {} # Iterate over each raster collection cube and compute statistical values for cube in udf_data.get_datacube_list(): # make sure to cast the values to floats, otherwise they are not serializable stats[cube.id] = dict(sum=float(cube.array.sum()), mean=float(cube.array.mean()), min=float(cube.array.min()), max=float(cube.array.max())) # Create the structured data object sd = StructuredData(description="Statistical data sum, min, max and mean " "for each raster collection cube as dict", data=stats, type="dict") # Remove all collections and set the StructuredData list udf_data.del_datacube_list() udf_data.del_feature_collection_list() udf_data.set_structured_data_list([ sd, ])
def test_timeseries_wrapper(self): temp = create_datacube(name="temp", value=1, shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp]) from openeo_udf.api.udf_wrapper import apply_timeseries_generic rcts = udf_data.get_datacube_list apply_timeseries_generic(udf_data) self.assertEqual(rcts, udf_data.get_datacube_list)
def hyper_map_fabs(udf_data: UdfData): """Compute the absolute values of each hyper cube in the provided data Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): result = numpy.fabs(cube.array) result.name = cube.id + "_fabs" cube_list.append(DataCube(array=result)) udf_data.set_datacube_list(cube_list)
def not_implemented_yet_test_sampling(self): """Test the feature collection sampling UDF""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_sampling.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) temp = create_datacube(name="temp", value=1, shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp]) run_user_code(code=udf_code.source, data=udf_data) result = udf_data.to_dict() self.assertEqual(len(result["feature_collection_tiles"]), 1) self.assertEqual( len(result["feature_collection_tiles"][0]["data"]["features"]), 1) self.assertEqual( result["feature_collection_tiles"][0]["data"]["features"][0] ["properties"], {'temp': 4})
def run_udf(code: str, epsg_code: str, datacube_list: List[DataCube]) -> UdfData: """Run the user defined code (udf) and create the required input for the function :param code: The UDF code :param epsg_code: The EPSG code of the projection :param datacube: The id of the strds :return: The resulting udf data object """ data = UdfData(proj={"EPSG": epsg_code}, datacube_list=datacube_list) return run_user_code(code=code, data=data)
def run_legacy_user_code(dict_data: Dict) -> Dict: """Run the user defined python code on legacy data Args: dict_data: the udf request object with code and legacy data organized in a dictionary Returns: """ code = dict_data["code"]["source"] data = UdfData.from_dict(dict_data["data"]) result_data = run_user_code(code, data) return result_data.to_dict()
def hyper_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes A 4-dimensional hypercube is required with the second dimension containing the bands "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None hyper_cube = None # Check if required hyper cube is present in list of hyper cubes for cube in udf_data.get_hypercube_list(): if "hypercube1" in cube.id.lower(): hyper_cube = cube if hyper_cube is None: raise Exception("Hyper cube is missing in input") red = hyper_cube.get_array().loc[:, "B04", :, :] nir = hyper_cube.get_array().loc[:, "B08", :, :] ndvi = (nir - red) / (nir + red) ndvi.name = "NDVI" hc = HyperCube(array=ndvi) udf_data.set_hypercube_list([ hc, ])
def test_DataCube_map_fabs(self): """Test the DataCube mapping of the numpy fabs function""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_map_fabs.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) temp = create_datacube(name="temp", value=1, dims=("t", "x", "y"), shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp]) run_user_code(code=udf_code.source, data=udf_data) self.checkDataCubeMapFabs(udf_data=udf_data)
def test_DataCube_reduce_min_median_max(self): """Test the DataCube min, median, max reduction""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_reduce_time_min_median_max.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) temp = create_datacube(name="temp", value=1, dims=("t", "y", "x"), shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp]) run_user_code(code=udf_code.source, data=udf_data) self.check_DataCube_min_median_max(udf_data=udf_data)
def run_udf_model_user_code( udf_model: 'openeo_udf.server.data_model.udf_schemas.UdfRequestModel' ) -> UdfData: """Run the user defined python code Args: python: the udf request object with code and data collection Returns: """ code = udf_model.code data = UdfData.from_udf_data_model(udf_model.data) result_data = run_user_code(code.source, data) return result_data
def send_msgpack_request(self, data: UdfData, code: UdfCodeModel) -> Dict: return self.send_json_request(data=data, code=code) # TODO: Implement the code below udf_request = UdfRequestModel(data=data.to_dict(), code=code) udf_request = base64.b64encode( msgpack.packb(udf_request.dict(), use_bin_type=True)) response = self.app.post( '/udf_message_pack', data=udf_request, headers={"Content-Type": "application/base64"}) self.assertEqual(response.status_code, 200) blob = base64.b64decode(response.content) result = msgpack.unpackb(blob, raw=False) return result
def test_DataCube_ndvi(self): """Test the DataCube NDVI computation""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_ndvi.py") udf_code = UdfCodeModel(language="python", source=open(file_name, "r").read()) hc_red = create_datacube(name="red", value=1, dims=("t", "y", "x"), shape=(3, 3, 3)) hc_nir = create_datacube(name="nir", value=3, dims=("t", "y", "x"), shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir]) run_user_code(code=udf_code.source, data=udf_data) self.checkDataCubeNdvi(udf_data=udf_data)
def fct_sampling(udf_data: UdfData): """Sample any number of raster collection tiles with a single feature collection (the first if several are provided) and store the samples values in the input feature collection. Each time-slice of a raster collection is stored as a separate column in the feature collection. Hence, the size of the feature collection attributes is (number_of_raster_tile * number_of_xy_slices) x number_of_features. The number of columns is equal to (number_of_raster_tile * number_of_xy_slices). A single feature collection id stored in the input data object that contains the sample attributes and the original data. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ if not udf_data.feature_collection_list: raise Exception("A single feature collection is required as input") if len(udf_data.feature_collection_list) > 1: raise Exception( "The first feature collection will be used for sampling") # Get the first feature collection fct = udf_data.feature_collection_list[0] features = fct.data # Iterate over each raster cube for cube in udf_data.get_datacube_list(): # Compute the number and names of the attribute columns num_slices = len(cube.data) columns = {} column_names = [] for slice in range(num_slices): column_name = cube.id + "_%i" % slice column_names.append(column_name) columns[column_name] = [] # Sample the raster data with each point for feature in features.geometry: # Check if the feature is a point if feature.type == 'Point': x = feature.x y = feature.y # TODO: Thats needs to be implemented # values = cube.sample(top=y, left=x) values = [0, 0, 0] # Store the values in column specific arrays if values: for column_name, value in zip(column_names, values): columns[column_name].append(value) else: for column_name in column_names: columns[column_name].append(math.nan) else: raise Exception("Only points are allowed for sampling") # Attach the sampled attribute data to the GeoDataFrame for column_name in column_names: features[column_name] = columns[column_name] # Create the output feature collection fct = FeatureCollection(id=fct.id + "_sample", data=features, start_times=fct.start_times, end_times=fct.end_times) # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_feature_collection_list([ fct, ]) # Remove the raster collection tiles udf_data.set_datacube_list()
def udf_cropcalendars(udf_data: UdfData): context_param_var = udf_data.user_context print(context_param_var) ts_dict = udf_data.get_structured_data_list()[0].data if not ts_dict: #workaround of ts_dict is empty return ts_df = timeseries_json_to_pandas(ts_dict) ts_df.index = pd.to_datetime(ts_df.index).date # function to calculate the cropsar curve ts_df_cropsar = get_cropsar_TS( ts_df, context_param_var.get('unique_ids_fields'), context_param_var.get('metrics_order'), context_param_var.get('fAPAR_rescale_Openeo')) # rescale cropsar values ts_df_cropsar = rescale_cropSAR( ts_df_cropsar, context_param_var.get('fAPAR_range_normalization'), context_param_var.get('unique_ids_fields'), 'cropSAR') # function to rescale the metrics based # on the rescaling factor of the metric def rescale_metrics(df, rescale_factor, fAPAR_range, unique_ids_fields, metric_suffix): df[[ item + '_{}'.format(str(metric_suffix)) for item in unique_ids_fields ]] = df.loc[:, ts_df.columns.isin([ item + '_{}'.format(str(metric_suffix)) for item in unique_ids_fields ])] * rescale_factor df[[ item + '_{}'.format(str(metric_suffix)) for item in unique_ids_fields ]] = 2 * (df[[ item + '_{}'.format(str(metric_suffix)) for item in unique_ids_fields ]] - fAPAR_range[0]) / (fAPAR_range[1] - fAPAR_range[0]) - 1 return df #### USE THE FUNCTIONS TO DETERMINE THE CROP CALENDAR DATES ### EVENT 1: HARVEST DETECTION NN_model_dir = context_param_var.get('path_harvest_model') amount_metrics_model = len(context_param_var.get( 'metrics_crop_event')) * context_param_var.get('window_values') #### PREPARE THE DATAFRAMES (REFORMATTING AND RESCALING) IN THE # RIGHT FORMAT TO ALLOW THE USE OF THE TRAINED NN ts_df_prepro = rename_df_columns( ts_df, context_param_var.get('unique_ids_fields'), context_param_var.get('metrics_order')) ts_df_prepro = VHVV_calc_rescale( ts_df_prepro, context_param_var.get('unique_ids_fields'), context_param_var.get('VH_VV_range_normalization')) #### rescale the fAPAR to 0 and 1 and convert # it to values between -1 and 1 ts_df_prepro = rescale_metrics( ts_df_prepro, context_param_var.get('fAPAR_rescale_Openeo'), context_param_var.get('fAPAR_range_normalization'), context_param_var.get('unique_ids_fields'), 'fAPAR') ro_s = { 'ascending': context_param_var.get('RO_ascending_selection_per_field'), 'descending': context_param_var.get('RO_descending_selection_per_field') } #### now merge the cropsar ts file with the other # df containing the S1 metrics date_range = pd.date_range(ts_df_cropsar.index[0], ts_df_cropsar.index[-1]).date ts_df_prepro = ts_df_prepro.reindex( date_range) # need to set the index axis on the same frequency ts_df_prepro = pd.concat( [ts_df_cropsar, ts_df_prepro], axis=1 ) # the columns of the cropsar df need to be the first ones in the new df to ensure the correct position for applying the NN model ### create windows in the time series to extract the metrics # and store each window in a seperate row in the dataframe ts_df_input_NN = prepare_df_NN_model( ts_df_prepro, context_param_var.get('window_values'), context_param_var.get('unique_ids_fields'), ro_s, context_param_var.get('metrics_crop_event')) ### apply the trained NN model on the window extracts df_NN_prediction = apply_NN_model_crop_calendars( ts_df_input_NN, amount_metrics_model, context_param_var.get('thr_detection'), context_param_var.get('crop_calendar_event'), NN_model_dir) df_crop_calendars_result = create_crop_calendars_fields( df_NN_prediction, context_param_var.get('unique_ids_fields'), context_param_var.get('index_window_above_thr')) print(df_crop_calendars_result) # return the predicted crop calendar events as a dict (json format) udf_data.set_structured_data_list([ StructuredData(description="crop calendar json", data=df_crop_calendars_result.to_dict(), type="dict") ]) return udf_data
def send_json_request(self, data: UdfData, code: UdfCodeModel) -> Dict: udf_request = UdfRequestModel(data=data.to_dict(), code=code) result = run_legacy_user_code(dict_data=udf_request.dict()) return result