Пример #1
0
def hyper_pytorch_ml(udf_data: UdfData):
    """Apply a pre-trained pytorch machine learn model on a hypercube

    The model must be a pytorch model that has expects the input data in the constructor
    The prediction method must accept a torch.autograd.Variable as input.

    Args:
        udf_data (UdfData): The UDF data object that hypercubes and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    cube = udf_data.get_datacube_list()[0]

    # This is the input data of the model.
    input = torch.autograd.Variable(torch.Tensor(cube.array.values))
    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m(input)
    result = xarray.DataArray(data=pred.detach().numpy(),
                              dims=cube.array.dims,
                              coords=cube.array.coords,
                              name=cube.id + "_pytorch")
    # Create the new raster collection tile
    result_cube = DataCube(array=result)
    # Insert the new  hypercube in the input object.
    udf_data.set_datacube_list([result_cube])
Пример #2
0
    def test_rct_stats(self):
        """Test the raster collection tile statistics UDF"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_statistics.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "x", "y"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()

        self.assertEqual(len(result["datacubes"]), 0)
        self.assertEqual(len(result["structured_data_list"]), 1)
        self.assertEqual(result["structured_data_list"][0]["type"], "dict")
        self.assertEqual(result["structured_data_list"][0]["data"]["temp"], {
            'max': 1.0,
            'mean': 1.0,
            'min': 1.0,
            'sum': 27.0
        })
Пример #3
0
def hyper_ndvi(udf_data: UdfData):
    """Compute the NDVI based on RED and NIR hypercubes

    Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each tile
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red hypercube is missing in input")
    if nir is None:
        raise Exception("Nir hypercube is missing in input")

    ndvi = (nir.array - red.array) / (nir.array + red.array)
    ndvi.name = "NDVI"

    hc = DataCube(array=ndvi)
    udf_data.set_datacube_list([hc, ])
Пример #4
0
    def test_pytorch_linear_nn(self):
        """Test linear pytorch model training and UDF application"""

        model = SimpleNetwork()

        MachineLearningPytorchTestCase.train_pytorch_model(model=model)

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_pytorch_ml.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("x", "y"),
                               shape=(2, 2))

        ml = MachineLearnModelConfig(
            framework="pytorch",
            name="linear_model",
            description=
            "A pytorch model that adds two numbers in range of [1,1]",
            path="/tmp/simple_linear_nn_pytorch.pt")
        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[temp],
                           ml_model_list=[ml])
        run_user_code(code=udf_code.source, data=udf_data)
        pprint.pprint(udf_data.to_dict())
Пример #5
0
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries):
    """
    Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries).
    Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band.

    :param udf_data:
    :return:
    """
    # The list of tiles that were created
    tile_results = []

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        array3d = []
        #use rollaxis to make the time dimension the last one
        for time_x_slice in numpy.rollaxis(cube.array.values, 1):
            time_x_result = []
            for time_slice in time_x_slice:
                series = pandas.Series(time_slice)
                transformed_series = callback(series,udf_data.user_context)
                time_x_result.append(transformed_series)
            array3d.append(time_x_result)

        # We need to create a new 3D array with the correct shape for the computed aggregate
        result_tile = numpy.rollaxis(numpy.asarray(array3d),1)
        assert result_tile.shape == cube.array.shape
        # Create the new raster collection cube
        rct = DataCube(xarray.DataArray(result_tile))
        tile_results.append(rct)
    # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list(tile_results)
    return udf_data
Пример #6
0
def fct_buffer(udf_data: UdfData):
    """Compute buffer of size 10 around features

    This function creates buffer around all features in the provided feature collection tiles.
    The resulting geopandas.GeoDataFrame contains the new geometries and a copy of the original attribute data.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    fct_list = []

    # Iterate over each tile
    for tile in udf_data.feature_collection_list:
        # Buffer all features
        gseries = tile.data.buffer(distance=10)
        # Create a new GeoDataFrame that includes the buffered geometry and the attribute data
        new_data = tile.data.set_geometry(gseries)
        # Create the new feature collection tile
        fct = FeatureCollection(id=tile.id + "_buffer",
                                data=new_data,
                                start_times=tile.start_times,
                                end_times=tile.end_times)
        fct_list.append(fct)
    # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_feature_collection_list(fct_list)
Пример #7
0
    def unused_test_DataCube_ndvi_message_pack(self):
        """Test the DataCube NDVI computation with the message pack protocol"""
        # TODO: Reactivate this test

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_ndvi.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        hc_red = create_datacube(name="red",
                                 value=1,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        hc_nir = create_datacube(name="nir",
                                 value=3,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir])

        udf_request = UdfRequestModel(data=udf_data.to_dict(), code=udf_code)
        udf_request = base64.b64encode(
            msgpack.packb(udf_request.dict(), use_bin_type=True))
        response = self.app.post(
            '/udf_message_pack',
            data=udf_request,
            headers={"Content-Type": "application/base64"})
        self.assertEqual(response.status_code, 200)
        blob = base64.b64decode(response.content)
        udf_data = msgpack.unpackb(blob, raw=False)

        self.checkDataCubeNdvi(udf_data=udf_data)
Пример #8
0
    def run_model_test(self, model):

        MachineLearningTestCase.train_sklearn_model(model=model)
        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sklearn_ml.py")

        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        red = create_datacube(name="red",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))
        nir = create_datacube(name="nir",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))

        ml = MachineLearnModelConfig(
            framework="sklearn",
            name="random_forest",
            description=
            "A sklearn model that adds two numbers in range of [1,1]",
            path="/tmp/rf_add_model.pkl.xz")

        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[red, nir],
                           ml_model_list=[ml])
        pprint.pprint(udf_data.to_dict())

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()
        self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)
Пример #9
0
def hyper_min_median_max(udf_data: UdfData):
    """Compute the min, median and max of the time dimension of a hyper cube

    Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        min = cube.array.min(dim="t")
        median = cube.array.median(dim="t")
        max = cube.array.max(dim="t")

        min.name = cube.id + "_min"
        median.name = cube.id + "_median"
        max.name = cube.id + "_max"

        cube_list.append(DataCube(array=min))
        cube_list.append(DataCube(array=median))
        cube_list.append(DataCube(array=max))

    udf_data.set_datacube_list(cube_list)
Пример #10
0
    def test_sklearn_extra_tree_message_pack_md5_hash(self):
        """Test extra tree training and UDF application with message pack protocol and the machine learn model
        uploaded to the UDF md5 hash based storage system"""
        model = ExtraTreesRegressor(n_estimators=100,
                                    max_depth=7,
                                    max_features="log2",
                                    min_samples_split=2,
                                    min_samples_leaf=1,
                                    verbose=0)
        model_path = MachineLearningTestCase.train_sklearn_model(model=model)

        request_model = RequestStorageModel(
            uri=model_path,
            title="This is a test model",
            description="This is the test description.")

        response = self.app.post('/storage', json=request_model.dict())
        print(response.content)
        self.assertEqual(response.status_code, 200)

        md5_hash = response.content.decode("ascii").strip().replace("\"", "")

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sklearn_ml.py")

        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        red = create_datacube(name="red",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))
        nir = create_datacube(name="nir",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))

        ml = MachineLearnModelConfig(
            framework="sklearn",
            name="random_forest",
            description=
            "A sklearn model that adds two numbers in range of [1,1]",
            md5_hash=md5_hash)

        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[red, nir],
                           ml_model_list=[ml])
        pprint.pprint(udf_data.to_dict())

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()
        self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

        #result = self.send_msgpack_request(data=udf_data, code=udf_code)
        #self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

        response = self.app.delete(f'/storage/{md5_hash}')
        self.assertEqual(response.status_code, 200)
Пример #11
0
def rct_sklearn_ml(udf_data: UdfData):
    """Apply a pre-trained sklearn machine learn model on RED and NIR tiles

    The model must be a sklearn model that has a prediction method: m.predict(X)
    The prediction method must accept a pandas.DataFrame as input.

    Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel
    of the two input raster collections.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red data cube is missing in input")
    if nir is None:
        raise Exception("Nir data cube is missing in input")

    # We need to reshape the data for prediction into one dimensional arrays
    three_dim_shape = red.array.shape
    one_dim_shape = numpy.prod(three_dim_shape)

    red_reshape = red.array.values.reshape((one_dim_shape))
    nir_reshape = nir.array.values.reshape((one_dim_shape))

    # This is the input data of the model. It must be trained with a DataFrame using the same names.
    X = pandas.DataFrame()
    X["red"] = red_reshape
    X["nir"] = nir_reshape

    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m.predict(X)
    # Reshape the one dimensional predicted values to three dimensions based on the input shape
    pred_reshape = pred.reshape(three_dim_shape)

    result = xarray.DataArray(data=pred_reshape, dims=red.array.dims,
                              coords=red.array.coords, name=red.id + "_pytorch")
    # Create the new raster collection cube
    h = DataCube(array=result)
    # Insert the new hypercubes in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list([h, ])
Пример #12
0
def run_user_code(code: str, data: UdfData) -> UdfData:
    module = load_module_from_string(code)

    functions = {t[0]: t[1] for t in module.items() if callable(t[1])}

    for func in functions.items():
        try:
            sig = signature(func[1])
        except ValueError:
            continue
        params = sig.parameters
        params_list = [t[1] for t in sig.parameters.items()]
        if (func[0] == 'apply_timeseries' and 'series' in params
                and 'context' in params and 'pandas.core.series.Series' in str(
                    params['series'].annotation)
                and 'pandas.core.series.Series' in str(sig.return_annotation)):
            #this is a UDF that transforms pandas series
            from .udf_wrapper import apply_timeseries_generic
            return apply_timeseries_generic(data, func[1])
        elif ((func[0] == 'apply_hypercube' or func[0] == 'apply_datacube')
              and 'cube' in params and 'context' in params
              and 'openeo_udf.api.datacube.DataCube' in str(
                  params['cube'].annotation)
              and 'openeo_udf.api.datacube.DataCube' in str(
                  sig.return_annotation)):
            #found a datacube mapping function
            if len(data.get_datacube_list()) != 1:
                raise ValueError(
                    "The provided UDF expects exactly one datacube, but only: %s were provided."
                    % len(data.get_datacube_list()))
            result_cube = func[1](data.get_datacube_list()[0],
                                  data.user_context)
            if not isinstance(result_cube, DataCube):
                raise ValueError(
                    "The provided UDF did not return a DataCube, but got: %s" %
                    result_cube)
            data.set_datacube_list([result_cube])
            break
        elif len(params_list) == 1 and (
                params_list[0].annotation == 'openeo_udf.api.udf_data.UdfData'
                or params_list[0].annotation == UdfData):
            #found a generic UDF function
            func[1](data)
            break

    return data
Пример #13
0
def rct_stats(udf_data: UdfData):
    """Compute univariate statistics for each hypercube

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # The dictionary that stores the statistical data
    stats = {}
    # Iterate over each raster collection cube and compute statistical values
    for cube in udf_data.get_datacube_list():
        # make sure to cast the values to floats, otherwise they are not serializable
        stats[cube.id] = dict(sum=float(cube.array.sum()),
                              mean=float(cube.array.mean()),
                              min=float(cube.array.min()),
                              max=float(cube.array.max()))
    # Create the structured data object
    sd = StructuredData(description="Statistical data sum, min, max and mean "
                        "for each raster collection cube as dict",
                        data=stats,
                        type="dict")
    # Remove all collections and set the StructuredData list
    udf_data.del_datacube_list()
    udf_data.del_feature_collection_list()
    udf_data.set_structured_data_list([
        sd,
    ])
Пример #14
0
    def test_timeseries_wrapper(self):

        temp = create_datacube(name="temp", value=1, shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])

        from openeo_udf.api.udf_wrapper import apply_timeseries_generic
        rcts = udf_data.get_datacube_list
        apply_timeseries_generic(udf_data)

        self.assertEqual(rcts, udf_data.get_datacube_list)
Пример #15
0
def hyper_map_fabs(udf_data: UdfData):
    """Compute the absolute values of each hyper cube in the provided data

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        result = numpy.fabs(cube.array)
        result.name = cube.id + "_fabs"
        cube_list.append(DataCube(array=result))
    udf_data.set_datacube_list(cube_list)
Пример #16
0
    def not_implemented_yet_test_sampling(self):
        """Test the feature collection sampling UDF"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sampling.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp", value=1, shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()

        self.assertEqual(len(result["feature_collection_tiles"]), 1)
        self.assertEqual(
            len(result["feature_collection_tiles"][0]["data"]["features"]), 1)
        self.assertEqual(
            result["feature_collection_tiles"][0]["data"]["features"][0]
            ["properties"], {'temp': 4})
Пример #17
0
def run_udf(code: str, epsg_code: str,
            datacube_list: List[DataCube]) -> UdfData:
    """Run the user defined code (udf) and  create the required input for the function

    :param code: The UDF code
    :param epsg_code: The EPSG code of the projection
    :param datacube: The id of the strds
    :return: The resulting udf data object
    """

    data = UdfData(proj={"EPSG": epsg_code}, datacube_list=datacube_list)

    return run_user_code(code=code, data=data)
Пример #18
0
def run_legacy_user_code(dict_data: Dict) -> Dict:
    """Run the user defined python code on legacy data

    Args:
        dict_data: the udf request object with code and legacy data organized in a dictionary

    Returns:

    """
    code = dict_data["code"]["source"]
    data = UdfData.from_dict(dict_data["data"])
    result_data = run_user_code(code, data)

    return result_data.to_dict()
Пример #19
0
def hyper_ndvi(udf_data: UdfData):
    """Compute the NDVI based on RED and NIR hypercubes

    A 4-dimensional hypercube is required with the second dimension containing the bands "red" and "nir" are required. 
    The NDVI computation will be applied to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    hyper_cube = None

    # Check if required hyper cube is present in list of hyper cubes
    for cube in udf_data.get_hypercube_list():
        if "hypercube1" in cube.id.lower():
            hyper_cube = cube
    if hyper_cube is None:
        raise Exception("Hyper cube is missing in input")

    red = hyper_cube.get_array().loc[:, "B04", :, :]
    nir = hyper_cube.get_array().loc[:, "B08", :, :]

    ndvi = (nir - red) / (nir + red)
    ndvi.name = "NDVI"

    hc = HyperCube(array=ndvi)
    udf_data.set_hypercube_list([
        hc,
    ])
Пример #20
0
    def test_DataCube_map_fabs(self):
        """Test the DataCube mapping of the numpy fabs function"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_map_fabs.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "x", "y"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])
        run_user_code(code=udf_code.source, data=udf_data)
        self.checkDataCubeMapFabs(udf_data=udf_data)
Пример #21
0
    def test_DataCube_reduce_min_median_max(self):
        """Test the DataCube min, median, max reduction"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_reduce_time_min_median_max.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "y", "x"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])
        run_user_code(code=udf_code.source, data=udf_data)
        self.check_DataCube_min_median_max(udf_data=udf_data)
Пример #22
0
def run_udf_model_user_code(
    udf_model: 'openeo_udf.server.data_model.udf_schemas.UdfRequestModel'
) -> UdfData:
    """Run the user defined python code

    Args:
        python: the udf request object with code and data collection

    Returns:

    """
    code = udf_model.code
    data = UdfData.from_udf_data_model(udf_model.data)
    result_data = run_user_code(code.source, data)

    return result_data
Пример #23
0
    def send_msgpack_request(self, data: UdfData, code: UdfCodeModel) -> Dict:

        return self.send_json_request(data=data, code=code)

        # TODO: Implement the code below

        udf_request = UdfRequestModel(data=data.to_dict(), code=code)
        udf_request = base64.b64encode(
            msgpack.packb(udf_request.dict(), use_bin_type=True))
        response = self.app.post(
            '/udf_message_pack',
            data=udf_request,
            headers={"Content-Type": "application/base64"})
        self.assertEqual(response.status_code, 200)
        blob = base64.b64decode(response.content)
        result = msgpack.unpackb(blob, raw=False)
        return result
Пример #24
0
    def test_DataCube_ndvi(self):
        """Test the DataCube NDVI computation"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_ndvi.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        hc_red = create_datacube(name="red",
                                 value=1,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        hc_nir = create_datacube(name="nir",
                                 value=3,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir])

        run_user_code(code=udf_code.source, data=udf_data)
        self.checkDataCubeNdvi(udf_data=udf_data)
Пример #25
0
def fct_sampling(udf_data: UdfData):
    """Sample any number of raster collection tiles with a single feature collection (the first if several are provided)
    and store the samples values in the input feature collection. Each time-slice of a raster collection is
    stored as a separate column in the feature collection. Hence, the size of the feature collection attributes
    is (number_of_raster_tile * number_of_xy_slices) x number_of_features.
    The number of columns is equal to (number_of_raster_tile * number_of_xy_slices).

    A single feature collection id stored in the input data object that contains the sample attributes and
    the original data.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """

    if not udf_data.feature_collection_list:
        raise Exception("A single feature collection is required as input")

    if len(udf_data.feature_collection_list) > 1:
        raise Exception(
            "The first feature collection will be used for sampling")

    # Get the first feature collection
    fct = udf_data.feature_collection_list[0]
    features = fct.data

    # Iterate over each raster cube
    for cube in udf_data.get_datacube_list():

        # Compute the number and names of the attribute columns
        num_slices = len(cube.data)
        columns = {}
        column_names = []
        for slice in range(num_slices):
            column_name = cube.id + "_%i" % slice
            column_names.append(column_name)
            columns[column_name] = []

        # Sample the raster data with each point
        for feature in features.geometry:
            # Check if the feature is a point
            if feature.type == 'Point':
                x = feature.x
                y = feature.y
                # TODO: Thats needs to be implemented
                # values = cube.sample(top=y, left=x)

                values = [0, 0, 0]

                # Store the values in column specific arrays
                if values:
                    for column_name, value in zip(column_names, values):
                        columns[column_name].append(value)
                else:
                    for column_name in column_names:
                        columns[column_name].append(math.nan)
            else:
                raise Exception("Only points are allowed for sampling")
        # Attach the sampled attribute data to the GeoDataFrame
        for column_name in column_names:
            features[column_name] = columns[column_name]
    # Create the output feature collection
    fct = FeatureCollection(id=fct.id + "_sample",
                            data=features,
                            start_times=fct.start_times,
                            end_times=fct.end_times)
    # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_feature_collection_list([
        fct,
    ])
    # Remove the raster collection tiles
    udf_data.set_datacube_list()
Пример #26
0
def udf_cropcalendars(udf_data: UdfData):
    context_param_var = udf_data.user_context
    print(context_param_var)
    ts_dict = udf_data.get_structured_data_list()[0].data
    if not ts_dict:  #workaround of ts_dict is empty
        return
    ts_df = timeseries_json_to_pandas(ts_dict)
    ts_df.index = pd.to_datetime(ts_df.index).date

    # function to calculate the cropsar curve
    ts_df_cropsar = get_cropsar_TS(
        ts_df, context_param_var.get('unique_ids_fields'),
        context_param_var.get('metrics_order'),
        context_param_var.get('fAPAR_rescale_Openeo'))
    # rescale cropsar values
    ts_df_cropsar = rescale_cropSAR(
        ts_df_cropsar, context_param_var.get('fAPAR_range_normalization'),
        context_param_var.get('unique_ids_fields'), 'cropSAR')

    # function to rescale the metrics based
    # on the rescaling factor of the metric
    def rescale_metrics(df, rescale_factor, fAPAR_range, unique_ids_fields,
                        metric_suffix):
        df[[
            item + '_{}'.format(str(metric_suffix))
            for item in unique_ids_fields
        ]] = df.loc[:,
                    ts_df.columns.isin([
                        item + '_{}'.format(str(metric_suffix))
                        for item in unique_ids_fields
                    ])] * rescale_factor
        df[[
            item + '_{}'.format(str(metric_suffix))
            for item in unique_ids_fields
        ]] = 2 * (df[[
            item + '_{}'.format(str(metric_suffix))
            for item in unique_ids_fields
        ]] - fAPAR_range[0]) / (fAPAR_range[1] - fAPAR_range[0]) - 1
        return df

    #### USE THE FUNCTIONS TO DETERMINE THE CROP CALENDAR DATES

    ### EVENT 1: HARVEST DETECTION
    NN_model_dir = context_param_var.get('path_harvest_model')
    amount_metrics_model = len(context_param_var.get(
        'metrics_crop_event')) * context_param_var.get('window_values')

    #### PREPARE THE DATAFRAMES (REFORMATTING AND RESCALING) IN THE
    # RIGHT FORMAT TO ALLOW THE USE OF THE TRAINED NN
    ts_df_prepro = rename_df_columns(
        ts_df, context_param_var.get('unique_ids_fields'),
        context_param_var.get('metrics_order'))

    ts_df_prepro = VHVV_calc_rescale(
        ts_df_prepro, context_param_var.get('unique_ids_fields'),
        context_param_var.get('VH_VV_range_normalization'))

    #### rescale the fAPAR to 0 and 1 and convert
    # it to values between -1 and 1
    ts_df_prepro = rescale_metrics(
        ts_df_prepro, context_param_var.get('fAPAR_rescale_Openeo'),
        context_param_var.get('fAPAR_range_normalization'),
        context_param_var.get('unique_ids_fields'), 'fAPAR')

    ro_s = {
        'ascending': context_param_var.get('RO_ascending_selection_per_field'),
        'descending':
        context_param_var.get('RO_descending_selection_per_field')
    }

    #### now merge the cropsar ts file with the other
    # df containing the S1 metrics
    date_range = pd.date_range(ts_df_cropsar.index[0],
                               ts_df_cropsar.index[-1]).date
    ts_df_prepro = ts_df_prepro.reindex(
        date_range)  # need to set the index axis on the same frequency
    ts_df_prepro = pd.concat(
        [ts_df_cropsar, ts_df_prepro], axis=1
    )  # the columns of the cropsar df need to be the first ones in the new df to ensure the correct position for applying the NN model

    ### create windows in the time series to extract the metrics
    # and store each window in a seperate row in the dataframe
    ts_df_input_NN = prepare_df_NN_model(
        ts_df_prepro, context_param_var.get('window_values'),
        context_param_var.get('unique_ids_fields'), ro_s,
        context_param_var.get('metrics_crop_event'))

    ### apply the trained NN model on the window extracts
    df_NN_prediction = apply_NN_model_crop_calendars(
        ts_df_input_NN, amount_metrics_model,
        context_param_var.get('thr_detection'),
        context_param_var.get('crop_calendar_event'), NN_model_dir)
    df_crop_calendars_result = create_crop_calendars_fields(
        df_NN_prediction, context_param_var.get('unique_ids_fields'),
        context_param_var.get('index_window_above_thr'))
    print(df_crop_calendars_result)
    # return the predicted crop calendar events as a dict  (json format)
    udf_data.set_structured_data_list([
        StructuredData(description="crop calendar json",
                       data=df_crop_calendars_result.to_dict(),
                       type="dict")
    ])
    return udf_data
Пример #27
0
    def send_json_request(self, data: UdfData, code: UdfCodeModel) -> Dict:

        udf_request = UdfRequestModel(data=data.to_dict(), code=code)
        result = run_legacy_user_code(dict_data=udf_request.dict())
        return result