def test_complex_case(self): expr = ( '(not quality_flags.invalid' ' and not pixel_classif_flags.IDEPIX_CLOUD' ' and not pixel_classif_flags.IDEPIX_CLOUD_BUFFER' ' and not pixel_classif_flags.IDEPIX_CLOUD_SHADOW' ' and not pixel_classif_flags.IDEPIX_SNOW_ICE' ' and not (c2rcc_flags.Rtosa_OOS and conc_chl > 1.0)' ' and not c2rcc_flags.Rtosa_OOR' ' and not c2rcc_flags.Rhow_OOR' ' and not (c2rcc_flags.Cloud_risk and immersed_cyanobacteria == 0)' ' and floating_vegetation == 0' ' and conc_chl > 0.01' ' and not (floating_cyanobacteria == 1 or chl_pitarch > 500))') quality_flags = namedtuple('quality_flags', ['invalid']) quality_flags.invalid = np.array([0]) pixel_classif_flags = namedtuple('pixel_classif_flags', [ 'IDEPIX_CLOUD', 'IDEPIX_CLOUD_BUFFER', 'IDEPIX_CLOUD_SHADOW', 'IDEPIX_SNOW_ICE' ]) pixel_classif_flags.IDEPIX_CLOUD = np.array([0]) pixel_classif_flags.IDEPIX_CLOUD_BUFFER = np.array([0]) pixel_classif_flags.IDEPIX_CLOUD_SHADOW = np.array([0]) pixel_classif_flags.IDEPIX_SNOW_ICE = np.array([0]) c2rcc_flags = namedtuple( 'c2rcc_flags', ['Rtosa_OOS', 'Rtosa_OOR', 'Rhow_OOR', 'Cloud_risk']) c2rcc_flags.Rtosa_OOS = np.array([0]) c2rcc_flags.Rtosa_OOR = np.array([0]) c2rcc_flags.Rhow_OOR = np.array([0]) c2rcc_flags.Cloud_risk = np.array([0]) namespace = dict( np=np, quality_flags=quality_flags, pixel_classif_flags=pixel_classif_flags, c2rcc_flags=c2rcc_flags, immersed_cyanobacteria=np.array([0]), floating_cyanobacteria=np.array([0]), floating_vegetation=np.array([0]), conc_chl=np.array([0]), chl_pitarch=np.array([0]), ) actual_value = compute_array_expr(expr, namespace=namespace) expected_value = 0 npt.assert_array_almost_equal(actual_value, np.array([expected_value])) namespace['conc_chl'] = np.array([0.2]) actual_value = compute_array_expr(expr, namespace=namespace) expected_value = 1 npt.assert_array_almost_equal(actual_value, np.array([expected_value])) pixel_classif_flags.IDEPIX_CLOUD_SHADOW = np.array([0.2]) actual_value = compute_array_expr(expr, namespace=namespace) expected_value = 0 npt.assert_array_almost_equal(actual_value, np.array([expected_value]))
def test_valid_exprs(self): namespace = dict(a=np.array([0.1, 0.3, 0.1, 0.7, 0.4, 0.9]), b=np.array([0.2, 0.1, 0.3, 0.2, 0.4, 0.8]), np=np, xr=xr) value = compute_array_expr('a + 1', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([1.1, 1.3, 1.1, 1.7, 1.4, 1.9])) value = compute_array_expr('a * b', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([0.02, 0.03, 0.03, 0.14, 0.16, 0.72])) value = compute_array_expr('max(a, b)', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([0.2, 0.3, 0.3, 0.7, 0.4, 0.9])) value = compute_array_expr('a > b', namespace=namespace) np.testing.assert_equal(value, np.array([False, True, False, True, False, True])) value = compute_array_expr('a == b', namespace=namespace) np.testing.assert_equal(value, np.array([False, False, False, False, True, False])) # This weirdo expression is a result of translating SNAP conditional expressions to Python. value = compute_array_expr('a > 0.35 if a else b', namespace=namespace) np.testing.assert_equal(value, np.array([0.2, 0.1, 0.3, 0.7, 0.4, 0.9])) # We actually mean value = compute_array_expr('where(a > 0.35, a, b)', namespace=namespace) np.testing.assert_equal(value, np.array([0.2, 0.1, 0.3, 0.7, 0.4, 0.9]))
def test_valid_exprs(self): namespace = dict(a=np.array([0.1, 0.3, 0.1, 0.7, 0.4, 0.9]), b=np.array([0.2, 0.1, 0.3, 0.2, 0.4, 0.8]), np=np) value = compute_array_expr('a + 1', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([1.1, 1.3, 1.1, 1.7, 1.4, 1.9])) value = compute_array_expr('a * b', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([0.02, 0.03, 0.03, 0.14, 0.16, 0.72])) value = compute_array_expr('max(a, b)', namespace=namespace) np.testing.assert_array_almost_equal(value, np.array([0.2, 0.3, 0.3, 0.7, 0.4, 0.9])) value = compute_array_expr('a > b', namespace=namespace) np.testing.assert_equal(value, np.array([False, True, False, True, False, True])) value = compute_array_expr('a == b', namespace=namespace) np.testing.assert_equal(value, np.array([False, False, False, False, True, False]))
def compute_dataset(dataset: xr.Dataset, processed_variables: NameDictPairList = None, errors: str = 'raise') -> xr.Dataset: """ Compute a dataset from another dataset and return it. New variables are computed according to the value of an ``expression`` attribute which, if given, must by a valid Python expression that can reference any other preceding variables by name. The expression can also reference any flags defined by another variable according the their CF attributes ``flag_meaning`` and ``flag_values``. Invalid values may be masked out using the value of an optional ``valid_pixel_expression`` attribute that forms a boolean Python expression. The value of the ``_FillValue`` attribute or NaN will be used in the new variable where the expression returns zero or false. Other attributes will be stored as variable metadata as-is. :param dataset: A dataset. :param processed_variables: Optional list of variables that will be loaded or computed in the order given. Each variable is either identified by name or by a name to variable attributes mapping. :param errors: How to deal with errors while evaluating expressions. May be be one of "raise", "warn", or "ignore". :return: new dataset with computed variables """ if processed_variables: processed_variables = to_resolved_name_dict_pairs(processed_variables, dataset, keep=True) else: var_names = list(dataset.data_vars) var_names = sorted(var_names, key=functools.partial(_get_var_sort_key, dataset)) processed_variables = [(var_name, None) for var_name in var_names] # Initialize namespace with some constants and modules namespace = dict(NaN=np.nan, PI=math.pi, np=np, xr=xr) # Now add all mask sets and variables for var_name in dataset.data_vars: var = dataset[var_name] if MaskSet.is_flag_var(var): namespace[var_name] = MaskSet(var) else: namespace[var_name] = var for var_name, var_props in processed_variables: if var_name in dataset.data_vars: # Existing variable var = dataset[var_name] if var_props: var_props_temp = var_props var_props = dict(var.attrs) var_props.update(var_props_temp) else: var_props = dict(var.attrs) else: # Computed variable var = None if var_props is None: var_props = dict() expression = var_props.get('expression') if expression: # Compute new variable computed_array = compute_array_expr(expression, namespace=namespace, result_name=f'{var_name!r}', errors=errors) if computed_array is not None: if hasattr(computed_array, 'attrs'): var = computed_array var.attrs.update(var_props) namespace[var_name] = computed_array valid_pixel_expression = var_props.get('valid_pixel_expression') if valid_pixel_expression: # Compute new mask for existing variable if var is None: raise ValueError(f'undefined variable {var_name!r}') valid_mask = compute_array_expr( valid_pixel_expression, namespace=namespace, result_name=f'valid mask for {var_name!r}', errors=errors) if valid_mask is not None: masked_var = var.where(valid_mask) if hasattr(masked_var, 'attrs'): masked_var.attrs.update(var_props) namespace[var_name] = masked_var computed_dataset = dataset.copy() for name, value in namespace.items(): if isinstance(value, xr.DataArray): computed_dataset[name] = value return computed_dataset
def evaluate_dataset(dataset: xr.Dataset, processed_variables: NameDictPairList = None, errors: str = 'raise') -> xr.Dataset: """ Compute new variables or mask existing variables in *dataset* by the evaluation of Python expressions, that may refer to other existing or new variables. Returns a new dataset that contains the old and new variables, where both may bew now masked. Expressions may be given by attributes of existing variables in *dataset* or passed a via the *processed_variables* argument which is a sequence of variable name / attributes tuples. Two types of expression attributes are recognized in the attributes: 1. The attribute ``expression`` generates a new variable computed from its attribute value. 2. The attribute ``valid_pixel_expression`` masks out invalid variable values. In both cases the attribuite value must be a string that forms a valid Python expression that can reference any other preceding variables by name. The expression can also reference any flags defined by another variable according the their CF attributes ``flag_meaning`` and ``flag_values``. Invalid variable values may be masked out using the value the ``valid_pixel_expression`` attribute whose value should form a Boolean Python expression. In case, the expression returns zero or false, the value of the ``_FillValue`` attribute or NaN will be used in the new variable. Other attributes will be stored as variable metadata as-is. :param dataset: A dataset. :param processed_variables: Optional list of variable name-attributes pairs that will processed in the given order. :param errors: How to deal with errors while evaluating expressions. May be be one of "raise", "warn", or "ignore". :return: new dataset with computed variables """ if processed_variables: processed_variables = to_resolved_name_dict_pairs(processed_variables, dataset, keep=True) else: var_names = list(dataset.data_vars) var_names = sorted(var_names, key=functools.partial(_get_var_sort_key, dataset)) processed_variables = [(var_name, None) for var_name in var_names] # Initialize namespace with some constants and modules namespace = dict(NaN=np.nan, PI=math.pi, np=np, xr=xr) # Now add all mask sets and variables for var_name in dataset.data_vars: var = dataset[var_name] if MaskSet.is_flag_var(var): namespace[var_name] = MaskSet(var) else: namespace[var_name] = var for var_name, var_props in processed_variables: if var_name in dataset.data_vars: # Existing variable var = dataset[var_name] if var_props: var_props_temp = var_props var_props = dict(var.attrs) var_props.update(var_props_temp) else: var_props = dict(var.attrs) else: # Computed variable var = None if var_props is None: var_props = dict() do_load = var_props.get('load', False) expression = var_props.get('expression') if expression: # Compute new variable computed_array = compute_array_expr(expression, namespace=namespace, result_name=f'{var_name!r}', errors=errors) if computed_array is not None: if hasattr(computed_array, 'attrs'): var = computed_array var.attrs.update(var_props) if do_load: computed_array.load() namespace[var_name] = computed_array valid_pixel_expression = var_props.get('valid_pixel_expression') if valid_pixel_expression: # Compute new mask for existing variable if var is None: raise ValueError(f'undefined variable {var_name!r}') valid_mask = compute_array_expr( valid_pixel_expression, namespace=namespace, result_name=f'valid mask for {var_name!r}', errors=errors) if valid_mask is not None: masked_var = var.where(valid_mask) if hasattr(masked_var, 'attrs'): masked_var.attrs.update(var_props) if do_load: masked_var.load() namespace[var_name] = masked_var computed_dataset = dataset.copy() for name, value in namespace.items(): if isinstance(value, xr.DataArray): computed_dataset[name] = value return computed_dataset