def build_da_without_coords(index, cube, file, one_sorted_station_list: bool) -> xr.DataArray: dim_names = [ k for k in cube.__dataclass_fields__.keys() if cube[k] is not None ] constant_meta_names = [ k for k in cube.__dataclass_fields__.keys() if cube[k] is None ] dims = {k: len(cube[k]) for k in dim_names} data = OnDiskArray(file.name, index, cube, one_sorted_station_list) lock = LOCK data = TdlpackBackendArray(data, lock) data = indexing.LazilyIndexedArray(data) da = xr.DataArray(data, dims=dim_names) if 'station' in da.dims: da.encoding['preffered_chunks'] = {'station': -1} else: da.encoding['preffered_chunks'] = {'y': -1, 'x': -1} da.name = index.name.iloc[0] for meta_name in constant_meta_names: if meta_name in index.columns: da.attrs[meta_name] = index[meta_name].iloc[0] da.encoding[f'tdlp_{meta_name}'] = da.attrs[meta_name] return da
def test_lazily_indexed_array(self): original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v = Variable(['i', 'j', 'k'], original) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(['i', 'j', 'k'], lazy) I = ReturnItem() # test orthogonally applied indexers indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: if isinstance(j, np.ndarray) and j.dtype.kind == 'b': j = np.arange(20) < 5 if isinstance(k, np.ndarray) and k.dtype.kind == 'b': k = np.arange(30) < 5 expected = np.asarray(v[i, j, k]) for actual in [v_lazy[i, j, k], v_lazy[:, j, k][i], v_lazy[:, :, k][:, j][i]]: self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) # test sequentially applied indexers indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])] for i, j in indexers: expected = np.asarray(v[i][j]) actual = v_lazy[i][j] self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter)
def test_sub_array(self): original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) child = wrapped[B[:5]] self.assertIsInstance(child, indexing.MemoryCachedArray) self.assertArrayEqual(child, np.arange(5)) self.assertIsInstance(child.array, indexing.NumpyIndexingAdapter) self.assertIsInstance(wrapped.array, indexing.LazilyIndexedArray)
def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) child = wrapped[B[:5]] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) assert isinstance(wrapped.array, indexing.LazilyIndexedArray)
def test_vectorized_lazily_indexed_array(self) -> None: original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v_eager = Variable(["i", "j", "k"], x) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() def check_indexing(v_eager, v_lazy, indexers): for indexer in indexers: actual = v_lazy[indexer] expected = v_eager[indexer] assert expected.shape == actual.shape assert isinstance( actual._data, ( indexing.LazilyVectorizedIndexedArray, indexing.LazilyIndexedArray, ), ) assert_array_equal(expected, actual) v_eager = expected v_lazy = actual # test orthogonal indexing indexers = [(arr[:], 0, 1), (Variable("i", [0, 1]), )] check_indexing(v_eager, v_lazy, indexers) # vectorized indexing indexers = [ (Variable("i", [0, 1]), Variable("i", [0, 1]), slice(None)), (slice(1, 3, 2), 0), ] check_indexing(v_eager, v_lazy, indexers) indexers = [ (slice(None, None, 2), 0, slice(None, 10)), (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])), (Variable(["i", "j"], [[0, 1], [1, 2]]), ), ] check_indexing(v_eager, v_lazy, indexers) indexers = [ (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])), (Variable(["i", "j"], [[0, 1], [1, 2]]), ), ] check_indexing(v_eager, v_lazy, indexers)
def test_lazily_indexed_array(self): original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v = Variable(['i', 'j', 'k'], original) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(['i', 'j', 'k'], lazy) I = ReturnItem() # noqa: E741 # allow ambiguous name # test orthogonally applied indexers indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: if isinstance(j, np.ndarray) and j.dtype.kind == 'b': j = np.arange(20) < 5 if isinstance(k, np.ndarray) and k.dtype.kind == 'b': k = np.arange(30) < 5 expected = np.asarray(v[i, j, k]) for actual in [ v_lazy[i, j, k], v_lazy[:, j, k][i], v_lazy[:, :, k][:, j][i] ]: assert expected.shape == actual.shape assert_array_equal(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) # make sure actual.key is appropriate type if all( isinstance(k, native_int_types + (slice, )) for k in v_lazy._data.key.tuple): assert isinstance(v_lazy._data.key, indexing.BasicIndexer) else: assert isinstance(v_lazy._data.key, indexing.OuterIndexer) # test sequentially applied indexers indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])] for i, j in indexers: expected = np.asarray(v[i][j]) actual = v_lazy[i][j] assert expected.shape == actual.shape assert_array_equal(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter)
def test_lazily_indexed_array(self): x = indexing.NumpyIndexingAdapter(np.random.rand(10, 20, 30)) lazy = indexing.LazilyIndexedArray(x) I = ReturnItem() # test orthogonally applied indexers indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: expected = np.asarray(x[i, j, k]) for actual in [lazy[i, j, k], lazy[:, j, k][i], lazy[:, :, k][:, j][i]]: self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual) # test sequentially applied indexers indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])] for i, j in indexers: expected = np.asarray(x[i][j]) actual = lazy[i][j] self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual)
def test_wrapper(self): original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) self.assertArrayEqual(wrapped, np.arange(10)) self.assertIsInstance(wrapped.array, indexing.NumpyIndexingAdapter)
def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, decode_times=True, decode_endianness=True): """ Decodes a variable which may hold CF encoded information. This includes variables that have been masked and scaled, which hold CF style time variables (this is almost always the case if the dataset has been serialized) and which have strings encoded as character arrays. Parameters ---------- var : Variable A variable holding potentially CF encoded information. concat_characters : bool Should character arrays be concatenated to strings, for example: ['h', 'e', 'l', 'l', 'o'] -> 'hello' mask_and_scale: bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). decode_times : bool Decode cf times ('hours since 2000-01-01') to np.datetime64. decode_endianness : bool Decode arrays from non-native to native endianness. Returns ------- out : Variable A variable holding the decoded equivalent of var """ # use _data instead of data so as not to trigger loading data var = as_variable(var) data = var._data dimensions = var.dims attributes = var.attrs.copy() encoding = var.encoding.copy() original_dtype = data.dtype if concat_characters: if data.dtype.kind == 'S' and data.dtype.itemsize == 1 and data.shape[ -1] != 0: dimensions = dimensions[:-1] data = CharToStringArray(data) if mask_and_scale: if 'missing_value' in attributes: # missing_value is deprecated, but we still want to support it as # an alias for _FillValue. if ('_FillValue' in attributes and not utils.equivalent( attributes['_FillValue'], attributes['missing_value'])): raise ValueError( "Discovered conflicting _FillValue " "and missing_value. Considering " "opening the offending dataset using " "decode_cf=False, corrected the attributes", "and decoding explicitly using " "xarray.conventions.decode_cf(ds)") attributes['_FillValue'] = attributes.pop('missing_value') fill_value = np.array(pop_to(attributes, encoding, '_FillValue')) if fill_value.size > 1: warnings.warn("variable has multiple fill values {0}, decoding " "all values to NaN.".format(str(fill_value)), RuntimeWarning, stacklevel=3) scale_factor = pop_to(attributes, encoding, 'scale_factor') add_offset = pop_to(attributes, encoding, 'add_offset') if ((fill_value is not None and not np.any(pd.isnull(fill_value))) or scale_factor is not None or add_offset is not None): if fill_value.dtype.kind in ['U', 'S']: dtype = object else: dtype = float data = MaskedAndScaledArray(data, fill_value, scale_factor, add_offset, dtype) if decode_times and 'units' in attributes: if 'since' in attributes['units']: # datetime units = pop_to(attributes, encoding, 'units') calendar = pop_to(attributes, encoding, 'calendar') data = DecodedCFDatetimeArray(data, units, calendar) elif attributes['units'] in TIME_UNITS: # timedelta units = pop_to(attributes, encoding, 'units') data = DecodedCFTimedeltaArray(data, units) if decode_endianness and not data.dtype.isnative: # do this last, so it's only done if we didn't already unmask/scale data = NativeEndiannessArray(data) original_dtype = data.dtype if 'dtype' in encoding: if original_dtype != encoding['dtype']: warnings.warn("CF decoding is overwriting dtype") else: encoding['dtype'] = original_dtype if 'dtype' in attributes and attributes['dtype'] == 'bool': del attributes['dtype'] data = BoolTypeArray(data) return Variable(dimensions, indexing.LazilyIndexedArray(data), attributes, encoding=encoding)
def test_wrapper(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) assert_array_equal(wrapped, np.arange(10)) assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter)
def test_lazily_indexed_array(self) -> None: original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v = Variable(["i", "j", "k"], original) lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() # test orthogonally applied indexers indexers = [ arr[:], 0, -2, arr[:3], [0, 1, 2, 3], [0], np.arange(10) < 5 ] for i in indexers: for j in indexers: for k in indexers: if isinstance(j, np.ndarray) and j.dtype.kind == "b": j = np.arange(20) < 5 if isinstance(k, np.ndarray) and k.dtype.kind == "b": k = np.arange(30) < 5 expected = np.asarray(v[i, j, k]) for actual in [ v_lazy[i, j, k], v_lazy[:, j, k][i], v_lazy[:, :, k][:, j][i], ]: assert expected.shape == actual.shape assert_array_equal(expected, actual) assert isinstance(actual._data, indexing.LazilyIndexedArray) # make sure actual.key is appropriate type if all( isinstance(k, (int, slice)) for k in v_lazy._data.key.tuple): assert isinstance(v_lazy._data.key, indexing.BasicIndexer) else: assert isinstance(v_lazy._data.key, indexing.OuterIndexer) # test sequentially applied indexers indexers = [ (3, 2), (arr[:], 0), (arr[:2], -1), (arr[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], arr[:2]), ] for i, j in indexers: expected_b = v[i][j] actual = v_lazy[i][j] assert expected_b.shape == actual.shape assert_array_equal(expected_b, actual) # test transpose if actual.ndim > 1: order = np.random.choice(actual.ndim, actual.ndim) order = np.array(actual.dims) transposed = actual.transpose(*order) assert_array_equal(expected_b.transpose(*order), transposed) assert isinstance( actual._data, ( indexing.LazilyVectorizedIndexedArray, indexing.LazilyIndexedArray, ), ) assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter)
def open_store_variable(var): 'Turn CDMRemote variable into something like a numpy.ndarray' data = indexing.LazilyIndexedArray(var) return Variable(var.dimensions, data, {a: getattr(var, a) for a in var.ncattrs()})
def open_store_variable(self, name, var): """Turn CDMRemote variable into something like a numpy.ndarray.""" data = indexing.LazilyIndexedArray(CDMArrayWrapper(name, self)) return Variable(var.dimensions, data, {a: getattr(var, a) for a in var.ncattrs()})