def create_hdmedians_multiple_band_mosaic(dataset_in, clean_mask=None, no_data=-9999, intermediate_product=None, operation="median", **kwargs): assert clean_mask is not None, "A boolean mask for clean_mask must be supplied." assert operation in ['median', 'medoid'], "Only median and medoid operations are supported." dataset_in_filtered = dataset_in.where((dataset_in != no_data) & (clean_mask)) band_list = list(dataset_in_filtered.data_vars) arrays = [dataset_in_filtered[band] for band in band_list] stacked_data = np.stack(arrays) bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape, lat_shape * lon_shape) # Reshape to remove lat/lon hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape)) # Build zeroes array across time slices. for x in range(reshaped_stack.shape[2]): try: hdmedians_result[:, x] = hd.nangeomedian( reshaped_stack[:, :, x], axis=1) if operation == "median" else hd.nanmedoid( reshaped_stack[:, :, x], axis=1) except ValueError: no_data_pixel_stack = reshaped_stack[:, :, x] no_data_pixel_stack[np.isnan(no_data_pixel_stack)] = no_data hdmedians_result[:, x] = np.full((bands_shape), no_data) if operation == "median" else hd.nanmedoid( no_data_pixel_stack, axis=1) output_dict = { value: (('latitude', 'longitude'), hdmedians_result[index, :].reshape(lat_shape, lon_shape)) for index, value in enumerate(band_list) } dataset_out = xr.Dataset(output_dict, coords={'latitude': dataset_in['latitude'], 'longitude': dataset_in['longitude']}, attrs = dataset_in.attrs) nan_to_num(dataset_out, no_data) #return dataset_out return dataset_out.astype(kwargs.get('dtype', 'int32'))
def create_hdmedians_multiple_band_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, intermediate_product=None, operation="median", **kwargs): """ Calculates the geomedian or geomedoid using a multi-band processing method. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude (in that order) variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. operation: str in ['median', 'medoid'] Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) assert operation in ['median', 'medoid' ], "Only median and medoid operations are supported." band_list = list(dataset_in.data_vars) dataset_in_dtypes = None if dtype is None: # Save dtypes because masking with Dataset.where() converts to float64. dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype # Mask out clouds and scan lines. dataset_in = dataset_in.where((dataset_in != no_data) & clean_mask) arrays = [dataset_in[band] for band in band_list] stacked_data = np.stack(arrays) bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \ stacked_data.shape[1], stacked_data.shape[2], \ stacked_data.shape[3] # Reshape to remove lat/lon reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape, lat_shape * lon_shape) # Build zeroes array across time slices. hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape)) # For each pixel (lat/lon combination), find the geomedian or geomedoid across time. for x in range(reshaped_stack.shape[2]): try: hdmedians_result[:, x] = hd.nangeomedian( reshaped_stack[:, :, x], axis=1) if operation == "median" else hd.nanmedoid( reshaped_stack[:, :, x], axis=1) except ValueError as e: # If all bands have nan values across time, the geomedians are nans. hdmedians_result[:, x] = np.full((bands_shape), np.nan) output_dict = { value: (('y', 'x'), hdmedians_result[index, :].reshape(lat_shape, lon_shape)) for index, value in enumerate(band_list) } dataset_out = xr.Dataset(output_dict, coords={ 'y': dataset_in['y'], 'x': dataset_in['x'] }, attrs=dataset_in.attrs) dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data) return dataset_out
def test_nanmedoid_two_obs(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=0) r = np.array([2.0, 1.0, 1.0]) assert_array_almost_equal(m, r, decimal=3)
def test_nanmedoid_axis_one_indexonly(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=1, indexonly=True) assert_equal(m, 0)
def test_nanmedoid_axis_one(): data = np.array([[1.0, np.nan, 1.0], [2.0, 1.0, 1.0]]) m = hd.nanmedoid(data, axis=1) r = np.array([1.0, 2.0]) assert_array_almost_equal(m, r, decimal=3)
def create_hdmedians_multiple_band_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, intermediate_product=None, operation="median", **kwargs): """ Calculates the geomedian or geomedoid using a multi-band processing method. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude (in that order) variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. operation: str in ['median', 'medoid'] Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) assert operation in ['median', 'medoid'], "Only median and medoid operations are supported." # print("dataset_in:", dataset_in) # print("sum dataset_in:", dataset_in.sum()) # log_strs = kwargs.get('log_strs', None) # Save dtypes because masking with Dataset.where() converts to float64. band_list = list(dataset_in.data_vars) dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype # Mask out clouds and scan lines. dataset_in = dataset_in.where((dataset_in != -9999) & clean_mask) # if log_strs is not None: # log_strs.append("sum of dataset_in no_data:" + str(dataset_in.where(dataset_in==no_data).sum())) # print("filtered dataset_in:", dataset_in) # print("sum filtered dataset_in:", dataset_in.sum()) arrays = [dataset_in[band] for band in band_list] stacked_data = np.stack(arrays) bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \ stacked_data.shape[1], stacked_data.shape[2], \ stacked_data.shape[3] # Reshape to remove lat/lon reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape, lat_shape * lon_shape) # Build zeroes array across time slices. hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape)) # For each pixel (lat/lon combination), find the geomedian or geomedoid across time. for x in range(reshaped_stack.shape[2]): try: # if log_strs is not None: # log_strs.append("reshaped_stack[:, :, {}]" + str(reshaped_stack[:, :, x])) hdmedians_result[:, x] = hd.nangeomedian( reshaped_stack[:, :, x], axis=1) if operation == "median" else hd.nanmedoid( reshaped_stack[:, :, x], axis=1) except ValueError as e: # if log_strs is not None: # log_strs.append("ValueError! args:" + str(e.args)) # log_strs.append("~np.isnan(reshaped_stack[:, :, x]): " + str(~np.isnan(reshaped_stack[:, :, x]))) # log_strs.append("~np.isnan(reshaped_stack[:, :, x]).any(axis=1): " + str(~np.isnan(reshaped_stack[:, :, x]).any(axis=1))) # log_strs.append("ngood:" + str(np.count_nonzero(~np.isnan(reshaped_stack[:, :, x]).any(axis=1)))) # If all bands have nan values across time, the geomedians are nans. hdmedians_result[:, x] = np.full((bands_shape), np.nan) # nan_pixel_stack = reshaped_stack[:, :, x] # nan_pixel_stack[np.isnan(nan_pixel_stack)] = no_data # hdmedians_result[:, x] = np.full((bands_shape), no_data) if operation == "median" else hd.nanmedoid( # no_data_pixel_stack, axis=1) output_dict = { value: (('latitude', 'longitude'), hdmedians_result[index, :].reshape(lat_shape, lon_shape)) for index, value in enumerate(band_list) } dataset_out = xr.Dataset(output_dict, coords={'latitude': dataset_in['latitude'], 'longitude': dataset_in['longitude']}) # if log_strs is not None: # log_strs.append("dataset_in:" + str(dataset_in)) # log_strs.append("sum of dataset_in no_data:" + str(dataset_in.where(dataset_in == no_data).sum())) # log_strs.append("before conversions - dataset_out:" + str(dataset_out)) # log_strs.append("before conversions - sum of dataset_out no_data:" + str(dataset_out.where(dataset_out == no_data).sum())) # utilities.nan_to_num(dataset_out, no_data) dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data) # if dtype is not None: # # Integer types can't represent nan. # if np.issubdtype(dtype, np.integer): # This also works for Python int type. # utilities.nan_to_num(dataset_out, no_data) # convert_to_dtype(dataset_out, dtype) # else: # Restore dtypes to state before masking. # for band in band_list: # # print("dataset_in_dtypes[band]:", dataset_in_dtypes[band]) # band_dtype = dataset_in_dtypes[band] # if np.issubdtype(band_dtype, np.integer): # utilities.nan_to_num(dataset_out[band], no_data) # dataset_out[band] = dataset_out[band].astype(band_dtype) # print("dataset_out:", dataset_out) # if log_strs is not None: # log_strs.append("dataset_in_dtypes:" + str(dataset_in_dtypes)) # log_strs.append("after conversions - dataset_out:" + str(dataset_out)) # log_strs.append("after conversions - sum of dataset_out no_data:" + str(dataset_out.where(dataset_out == no_data).sum())) return dataset_out