예제 #1
0
    def test_complex_case(self):
        expr = (
            '(not quality_flags.invalid'
            ' and not pixel_classif_flags.IDEPIX_CLOUD'
            ' and not pixel_classif_flags.IDEPIX_CLOUD_BUFFER'
            ' and not pixel_classif_flags.IDEPIX_CLOUD_SHADOW'
            ' and not pixel_classif_flags.IDEPIX_SNOW_ICE'
            ' and not (c2rcc_flags.Rtosa_OOS and conc_chl > 1.0)'
            ' and not c2rcc_flags.Rtosa_OOR'
            ' and not c2rcc_flags.Rhow_OOR'
            ' and not (c2rcc_flags.Cloud_risk and immersed_cyanobacteria == 0)'
            ' and floating_vegetation == 0'
            ' and conc_chl > 0.01'
            ' and not (floating_cyanobacteria == 1 or chl_pitarch > 500))')

        quality_flags = namedtuple('quality_flags', ['invalid'])
        quality_flags.invalid = np.array([0])
        pixel_classif_flags = namedtuple('pixel_classif_flags', [
            'IDEPIX_CLOUD', 'IDEPIX_CLOUD_BUFFER', 'IDEPIX_CLOUD_SHADOW',
            'IDEPIX_SNOW_ICE'
        ])
        pixel_classif_flags.IDEPIX_CLOUD = np.array([0])
        pixel_classif_flags.IDEPIX_CLOUD_BUFFER = np.array([0])
        pixel_classif_flags.IDEPIX_CLOUD_SHADOW = np.array([0])
        pixel_classif_flags.IDEPIX_SNOW_ICE = np.array([0])
        c2rcc_flags = namedtuple(
            'c2rcc_flags',
            ['Rtosa_OOS', 'Rtosa_OOR', 'Rhow_OOR', 'Cloud_risk'])
        c2rcc_flags.Rtosa_OOS = np.array([0])
        c2rcc_flags.Rtosa_OOR = np.array([0])
        c2rcc_flags.Rhow_OOR = np.array([0])
        c2rcc_flags.Cloud_risk = np.array([0])
        namespace = dict(
            np=np,
            quality_flags=quality_flags,
            pixel_classif_flags=pixel_classif_flags,
            c2rcc_flags=c2rcc_flags,
            immersed_cyanobacteria=np.array([0]),
            floating_cyanobacteria=np.array([0]),
            floating_vegetation=np.array([0]),
            conc_chl=np.array([0]),
            chl_pitarch=np.array([0]),
        )

        actual_value = compute_array_expr(expr, namespace=namespace)
        expected_value = 0
        npt.assert_array_almost_equal(actual_value, np.array([expected_value]))

        namespace['conc_chl'] = np.array([0.2])
        actual_value = compute_array_expr(expr, namespace=namespace)
        expected_value = 1
        npt.assert_array_almost_equal(actual_value, np.array([expected_value]))

        pixel_classif_flags.IDEPIX_CLOUD_SHADOW = np.array([0.2])
        actual_value = compute_array_expr(expr, namespace=namespace)
        expected_value = 0
        npt.assert_array_almost_equal(actual_value, np.array([expected_value]))
예제 #2
0
    def test_valid_exprs(self):
        namespace = dict(a=np.array([0.1, 0.3, 0.1, 0.7, 0.4, 0.9]),
                         b=np.array([0.2, 0.1, 0.3, 0.2, 0.4, 0.8]),
                         np=np,
                         xr=xr)

        value = compute_array_expr('a + 1', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([1.1, 1.3, 1.1, 1.7, 1.4, 1.9]))

        value = compute_array_expr('a * b', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([0.02, 0.03, 0.03, 0.14, 0.16, 0.72]))

        value = compute_array_expr('max(a, b)', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([0.2, 0.3, 0.3, 0.7, 0.4, 0.9]))

        value = compute_array_expr('a > b', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([False, True, False, True, False, True]))

        value = compute_array_expr('a == b', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([False, False, False, False, True, False]))

        # This weirdo expression is a result of translating SNAP conditional expressions to Python.
        value = compute_array_expr('a > 0.35 if a else b', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([0.2, 0.1, 0.3, 0.7, 0.4, 0.9]))

        # We actually mean
        value = compute_array_expr('where(a > 0.35, a, b)', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([0.2, 0.1, 0.3, 0.7, 0.4, 0.9]))
예제 #3
0
    def test_valid_exprs(self):
        namespace = dict(a=np.array([0.1, 0.3, 0.1, 0.7, 0.4, 0.9]),
                         b=np.array([0.2, 0.1, 0.3, 0.2, 0.4, 0.8]),
                         np=np)

        value = compute_array_expr('a + 1', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([1.1, 1.3, 1.1, 1.7, 1.4, 1.9]))

        value = compute_array_expr('a * b', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([0.02, 0.03, 0.03, 0.14, 0.16, 0.72]))

        value = compute_array_expr('max(a, b)', namespace=namespace)
        np.testing.assert_array_almost_equal(value,
                                             np.array([0.2, 0.3, 0.3, 0.7, 0.4, 0.9]))

        value = compute_array_expr('a > b', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([False, True, False, True, False, True]))

        value = compute_array_expr('a == b', namespace=namespace)
        np.testing.assert_equal(value,
                                np.array([False, False, False, False, True, False]))
예제 #4
0
파일: compute.py 프로젝트: dzelge/xcube
def compute_dataset(dataset: xr.Dataset,
                    processed_variables: NameDictPairList = None,
                    errors: str = 'raise') -> xr.Dataset:
    """
    Compute a dataset from another dataset and return it.

    New variables are computed according to the value of an ``expression`` attribute which, if given,
    must by a valid Python expression that can reference any other preceding variables by name.
    The expression can also reference any flags defined by another variable according the their CF
    attributes ``flag_meaning`` and ``flag_values``.

    Invalid values may be masked out using the value of an
    optional ``valid_pixel_expression`` attribute that forms a boolean Python expression.
    The value of the ``_FillValue`` attribute or NaN will be used in the new variable where the
    expression returns zero or false.

    Other attributes will be stored as variable metadata as-is.

    :param dataset: A dataset.
    :param processed_variables: Optional list of variables that will be loaded or computed in the order given.
           Each variable is either identified by name or by a name to variable attributes mapping.
    :param errors: How to deal with errors while evaluating expressions.
           May be be one of "raise", "warn", or "ignore".
    :return: new dataset with computed variables
    """

    if processed_variables:
        processed_variables = to_resolved_name_dict_pairs(processed_variables,
                                                          dataset,
                                                          keep=True)
    else:
        var_names = list(dataset.data_vars)
        var_names = sorted(var_names,
                           key=functools.partial(_get_var_sort_key, dataset))
        processed_variables = [(var_name, None) for var_name in var_names]

    # Initialize namespace with some constants and modules
    namespace = dict(NaN=np.nan, PI=math.pi, np=np, xr=xr)
    # Now add all mask sets and variables
    for var_name in dataset.data_vars:
        var = dataset[var_name]
        if MaskSet.is_flag_var(var):
            namespace[var_name] = MaskSet(var)
        else:
            namespace[var_name] = var

    for var_name, var_props in processed_variables:
        if var_name in dataset.data_vars:
            # Existing variable
            var = dataset[var_name]
            if var_props:
                var_props_temp = var_props
                var_props = dict(var.attrs)
                var_props.update(var_props_temp)
            else:
                var_props = dict(var.attrs)
        else:
            # Computed variable
            var = None
            if var_props is None:
                var_props = dict()

        expression = var_props.get('expression')
        if expression:
            # Compute new variable
            computed_array = compute_array_expr(expression,
                                                namespace=namespace,
                                                result_name=f'{var_name!r}',
                                                errors=errors)
            if computed_array is not None:
                if hasattr(computed_array, 'attrs'):
                    var = computed_array
                    var.attrs.update(var_props)
                namespace[var_name] = computed_array

        valid_pixel_expression = var_props.get('valid_pixel_expression')
        if valid_pixel_expression:
            # Compute new mask for existing variable
            if var is None:
                raise ValueError(f'undefined variable {var_name!r}')
            valid_mask = compute_array_expr(
                valid_pixel_expression,
                namespace=namespace,
                result_name=f'valid mask for {var_name!r}',
                errors=errors)
            if valid_mask is not None:
                masked_var = var.where(valid_mask)
                if hasattr(masked_var, 'attrs'):
                    masked_var.attrs.update(var_props)
                namespace[var_name] = masked_var

    computed_dataset = dataset.copy()
    for name, value in namespace.items():
        if isinstance(value, xr.DataArray):
            computed_dataset[name] = value

    return computed_dataset
예제 #5
0
def evaluate_dataset(dataset: xr.Dataset,
                     processed_variables: NameDictPairList = None,
                     errors: str = 'raise') -> xr.Dataset:
    """
    Compute new variables or mask existing variables in *dataset*
    by the evaluation of Python expressions, that may refer to other
    existing or new variables.
    Returns a new dataset that contains the old and new variables,
    where both may bew now masked.

    Expressions may be given by attributes of existing variables in
    *dataset* or passed a via the *processed_variables* argument
    which is a sequence of variable name / attributes tuples.

    Two types of expression attributes are recognized in the attributes:

    1. The attribute ``expression`` generates
       a new variable computed from its attribute value.
    2. The attribute ``valid_pixel_expression`` masks out
       invalid variable values.

    In both cases the attribuite value must be a string that forms
    a valid Python expression that can reference any other preceding
    variables by name.
    The expression can also reference any flags defined by another
    variable according the their CF attributes ``flag_meaning``
    and ``flag_values``.

    Invalid variable values may be masked out using the value the
    ``valid_pixel_expression`` attribute whose value should form
    a Boolean Python expression. In case, the expression
    returns zero or false, the value of the ``_FillValue`` attribute
    or NaN will be used in the new variable.

    Other attributes will be stored as variable metadata as-is.

    :param dataset: A dataset.
    :param processed_variables: Optional list of variable
        name-attributes pairs that will processed in the given order.
    :param errors: How to deal with errors while evaluating expressions.
           May be be one of "raise", "warn", or "ignore".
    :return: new dataset with computed variables
    """

    if processed_variables:
        processed_variables = to_resolved_name_dict_pairs(processed_variables,
                                                          dataset,
                                                          keep=True)
    else:
        var_names = list(dataset.data_vars)
        var_names = sorted(var_names,
                           key=functools.partial(_get_var_sort_key, dataset))
        processed_variables = [(var_name, None) for var_name in var_names]

    # Initialize namespace with some constants and modules
    namespace = dict(NaN=np.nan, PI=math.pi, np=np, xr=xr)
    # Now add all mask sets and variables
    for var_name in dataset.data_vars:
        var = dataset[var_name]
        if MaskSet.is_flag_var(var):
            namespace[var_name] = MaskSet(var)
        else:
            namespace[var_name] = var

    for var_name, var_props in processed_variables:
        if var_name in dataset.data_vars:
            # Existing variable
            var = dataset[var_name]
            if var_props:
                var_props_temp = var_props
                var_props = dict(var.attrs)
                var_props.update(var_props_temp)
            else:
                var_props = dict(var.attrs)
        else:
            # Computed variable
            var = None
            if var_props is None:
                var_props = dict()

        do_load = var_props.get('load', False)

        expression = var_props.get('expression')
        if expression:
            # Compute new variable
            computed_array = compute_array_expr(expression,
                                                namespace=namespace,
                                                result_name=f'{var_name!r}',
                                                errors=errors)
            if computed_array is not None:
                if hasattr(computed_array, 'attrs'):
                    var = computed_array
                    var.attrs.update(var_props)
                if do_load:
                    computed_array.load()
                namespace[var_name] = computed_array

        valid_pixel_expression = var_props.get('valid_pixel_expression')
        if valid_pixel_expression:
            # Compute new mask for existing variable
            if var is None:
                raise ValueError(f'undefined variable {var_name!r}')
            valid_mask = compute_array_expr(
                valid_pixel_expression,
                namespace=namespace,
                result_name=f'valid mask for {var_name!r}',
                errors=errors)
            if valid_mask is not None:
                masked_var = var.where(valid_mask)
                if hasattr(masked_var, 'attrs'):
                    masked_var.attrs.update(var_props)
                if do_load:
                    masked_var.load()
                namespace[var_name] = masked_var

    computed_dataset = dataset.copy()
    for name, value in namespace.items():
        if isinstance(value, xr.DataArray):
            computed_dataset[name] = value

    return computed_dataset