예제 #1
0
def build_da_without_coords(index, cube, file,
                            one_sorted_station_list: bool) -> xr.DataArray:
    dim_names = [
        k for k in cube.__dataclass_fields__.keys() if cube[k] is not None
    ]
    constant_meta_names = [
        k for k in cube.__dataclass_fields__.keys() if cube[k] is None
    ]
    dims = {k: len(cube[k]) for k in dim_names}

    data = OnDiskArray(file.name, index, cube, one_sorted_station_list)
    lock = LOCK
    data = TdlpackBackendArray(data, lock)
    data = indexing.LazilyIndexedArray(data)
    da = xr.DataArray(data, dims=dim_names)

    if 'station' in da.dims:
        da.encoding['preffered_chunks'] = {'station': -1}
    else:
        da.encoding['preffered_chunks'] = {'y': -1, 'x': -1}

    da.name = index.name.iloc[0]
    for meta_name in constant_meta_names:
        if meta_name in index.columns:
            da.attrs[meta_name] = index[meta_name].iloc[0]
            da.encoding[f'tdlp_{meta_name}'] = da.attrs[meta_name]

    return da
예제 #2
0
 def test_lazily_indexed_array(self):
     original = np.random.rand(10, 20, 30)
     x = indexing.NumpyIndexingAdapter(original)
     v = Variable(['i', 'j', 'k'], original)
     lazy = indexing.LazilyIndexedArray(x)
     v_lazy = Variable(['i', 'j', 'k'], lazy)
     I = ReturnItem()
     # test orthogonally applied indexers
     indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5]
     for i in indexers:
         for j in indexers:
             for k in indexers:
                 if isinstance(j, np.ndarray) and j.dtype.kind == 'b':
                     j = np.arange(20) < 5
                 if isinstance(k, np.ndarray) and k.dtype.kind == 'b':
                     k = np.arange(30) < 5
                 expected = np.asarray(v[i, j, k])
                 for actual in [v_lazy[i, j, k],
                                v_lazy[:, j, k][i],
                                v_lazy[:, :, k][:, j][i]]:
                     self.assertEqual(expected.shape, actual.shape)
                     self.assertArrayEqual(expected, actual)
                     assert isinstance(actual._data,
                                       indexing.LazilyIndexedArray)
     # test sequentially applied indexers
     indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
                 ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
     for i, j in indexers:
         expected = np.asarray(v[i][j])
         actual = v_lazy[i][j]
         self.assertEqual(expected.shape, actual.shape)
         self.assertArrayEqual(expected, actual)
         assert isinstance(actual._data, indexing.LazilyIndexedArray)
         assert isinstance(actual._data.array,
                           indexing.NumpyIndexingAdapter)
예제 #3
0
 def test_sub_array(self):
     original = indexing.LazilyIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     child = wrapped[B[:5]]
     self.assertIsInstance(child, indexing.MemoryCachedArray)
     self.assertArrayEqual(child, np.arange(5))
     self.assertIsInstance(child.array, indexing.NumpyIndexingAdapter)
     self.assertIsInstance(wrapped.array, indexing.LazilyIndexedArray)
예제 #4
0
 def test_sub_array(self) -> None:
     original = indexing.LazilyIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     child = wrapped[B[:5]]
     assert isinstance(child, indexing.MemoryCachedArray)
     assert_array_equal(child, np.arange(5))
     assert isinstance(child.array, indexing.NumpyIndexingAdapter)
     assert isinstance(wrapped.array, indexing.LazilyIndexedArray)
예제 #5
0
    def test_vectorized_lazily_indexed_array(self) -> None:
        original = np.random.rand(10, 20, 30)
        x = indexing.NumpyIndexingAdapter(original)
        v_eager = Variable(["i", "j", "k"], x)
        lazy = indexing.LazilyIndexedArray(x)
        v_lazy = Variable(["i", "j", "k"], lazy)
        arr = ReturnItem()

        def check_indexing(v_eager, v_lazy, indexers):
            for indexer in indexers:
                actual = v_lazy[indexer]
                expected = v_eager[indexer]
                assert expected.shape == actual.shape
                assert isinstance(
                    actual._data,
                    (
                        indexing.LazilyVectorizedIndexedArray,
                        indexing.LazilyIndexedArray,
                    ),
                )
                assert_array_equal(expected, actual)
                v_eager = expected
                v_lazy = actual

        # test orthogonal indexing
        indexers = [(arr[:], 0, 1), (Variable("i", [0, 1]), )]
        check_indexing(v_eager, v_lazy, indexers)

        # vectorized indexing
        indexers = [
            (Variable("i", [0, 1]), Variable("i", [0, 1]), slice(None)),
            (slice(1, 3, 2), 0),
        ]
        check_indexing(v_eager, v_lazy, indexers)

        indexers = [
            (slice(None, None, 2), 0, slice(None, 10)),
            (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
            (Variable(["i", "j"], [[0, 1], [1, 2]]), ),
        ]
        check_indexing(v_eager, v_lazy, indexers)

        indexers = [
            (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
            (Variable(["i", "j"], [[0, 1], [1, 2]]), ),
        ]
        check_indexing(v_eager, v_lazy, indexers)
    def test_lazily_indexed_array(self):
        original = np.random.rand(10, 20, 30)
        x = indexing.NumpyIndexingAdapter(original)
        v = Variable(['i', 'j', 'k'], original)
        lazy = indexing.LazilyIndexedArray(x)
        v_lazy = Variable(['i', 'j', 'k'], lazy)
        I = ReturnItem()  # noqa: E741  # allow ambiguous name
        # test orthogonally applied indexers
        indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5]
        for i in indexers:
            for j in indexers:
                for k in indexers:
                    if isinstance(j, np.ndarray) and j.dtype.kind == 'b':
                        j = np.arange(20) < 5
                    if isinstance(k, np.ndarray) and k.dtype.kind == 'b':
                        k = np.arange(30) < 5
                    expected = np.asarray(v[i, j, k])
                    for actual in [
                            v_lazy[i, j, k], v_lazy[:, j, k][i],
                            v_lazy[:, :, k][:, j][i]
                    ]:
                        assert expected.shape == actual.shape
                        assert_array_equal(expected, actual)
                        assert isinstance(actual._data,
                                          indexing.LazilyIndexedArray)

                        # make sure actual.key is appropriate type
                        if all(
                                isinstance(k, native_int_types + (slice, ))
                                for k in v_lazy._data.key.tuple):
                            assert isinstance(v_lazy._data.key,
                                              indexing.BasicIndexer)
                        else:
                            assert isinstance(v_lazy._data.key,
                                              indexing.OuterIndexer)

        # test sequentially applied indexers
        indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
                    ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
        for i, j in indexers:
            expected = np.asarray(v[i][j])
            actual = v_lazy[i][j]
            assert expected.shape == actual.shape
            assert_array_equal(expected, actual)
            assert isinstance(actual._data, indexing.LazilyIndexedArray)
            assert isinstance(actual._data.array,
                              indexing.NumpyIndexingAdapter)
예제 #7
0
 def test_lazily_indexed_array(self):
     x = indexing.NumpyIndexingAdapter(np.random.rand(10, 20, 30))
     lazy = indexing.LazilyIndexedArray(x)
     I = ReturnItem()
     # test orthogonally applied indexers
     indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], np.arange(10) < 5]
     for i in indexers:
         for j in indexers:
             for k in indexers:
                 expected = np.asarray(x[i, j, k])
                 for actual in [lazy[i, j, k],
                                lazy[:, j, k][i],
                                lazy[:, :, k][:, j][i]]:
                     self.assertEqual(expected.shape, actual.shape)
                     self.assertArrayEqual(expected, actual)
     # test sequentially applied indexers
     indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
                 ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
     for i, j in indexers:
         expected = np.asarray(x[i][j])
         actual = lazy[i][j]
         self.assertEqual(expected.shape, actual.shape)
         self.assertArrayEqual(expected, actual)
예제 #8
0
 def test_wrapper(self):
     original = indexing.LazilyIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     self.assertArrayEqual(wrapped, np.arange(10))
     self.assertIsInstance(wrapped.array, indexing.NumpyIndexingAdapter)
예제 #9
0
def decode_cf_variable(var,
                       concat_characters=True,
                       mask_and_scale=True,
                       decode_times=True,
                       decode_endianness=True):
    """
    Decodes a variable which may hold CF encoded information.

    This includes variables that have been masked and scaled, which
    hold CF style time variables (this is almost always the case if
    the dataset has been serialized) and which have strings encoded
    as character arrays.

    Parameters
    ----------
    var : Variable
        A variable holding potentially CF encoded information.
    concat_characters : bool
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool
        Decode cf times ('hours since 2000-01-01') to np.datetime64.
    decode_endianness : bool
        Decode arrays from non-native to native endianness.

    Returns
    -------
    out : Variable
        A variable holding the decoded equivalent of var
    """
    # use _data instead of data so as not to trigger loading data
    var = as_variable(var)
    data = var._data
    dimensions = var.dims
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    original_dtype = data.dtype

    if concat_characters:
        if data.dtype.kind == 'S' and data.dtype.itemsize == 1 and data.shape[
                -1] != 0:
            dimensions = dimensions[:-1]
            data = CharToStringArray(data)

    if mask_and_scale:
        if 'missing_value' in attributes:
            # missing_value is deprecated, but we still want to support it as
            # an alias for _FillValue.
            if ('_FillValue' in attributes and not utils.equivalent(
                    attributes['_FillValue'], attributes['missing_value'])):
                raise ValueError(
                    "Discovered conflicting _FillValue "
                    "and missing_value.  Considering "
                    "opening the offending dataset using "
                    "decode_cf=False, corrected the attributes",
                    "and decoding explicitly using "
                    "xarray.conventions.decode_cf(ds)")
            attributes['_FillValue'] = attributes.pop('missing_value')

        fill_value = np.array(pop_to(attributes, encoding, '_FillValue'))
        if fill_value.size > 1:
            warnings.warn("variable has multiple fill values {0}, decoding "
                          "all values to NaN.".format(str(fill_value)),
                          RuntimeWarning,
                          stacklevel=3)
        scale_factor = pop_to(attributes, encoding, 'scale_factor')
        add_offset = pop_to(attributes, encoding, 'add_offset')
        if ((fill_value is not None and not np.any(pd.isnull(fill_value)))
                or scale_factor is not None or add_offset is not None):
            if fill_value.dtype.kind in ['U', 'S']:
                dtype = object
            else:
                dtype = float
            data = MaskedAndScaledArray(data, fill_value, scale_factor,
                                        add_offset, dtype)

    if decode_times and 'units' in attributes:
        if 'since' in attributes['units']:
            # datetime
            units = pop_to(attributes, encoding, 'units')
            calendar = pop_to(attributes, encoding, 'calendar')
            data = DecodedCFDatetimeArray(data, units, calendar)
        elif attributes['units'] in TIME_UNITS:
            # timedelta
            units = pop_to(attributes, encoding, 'units')
            data = DecodedCFTimedeltaArray(data, units)

    if decode_endianness and not data.dtype.isnative:
        # do this last, so it's only done if we didn't already unmask/scale
        data = NativeEndiannessArray(data)
        original_dtype = data.dtype

    if 'dtype' in encoding:
        if original_dtype != encoding['dtype']:
            warnings.warn("CF decoding is overwriting dtype")
    else:
        encoding['dtype'] = original_dtype

    if 'dtype' in attributes and attributes['dtype'] == 'bool':
        del attributes['dtype']
        data = BoolTypeArray(data)

    return Variable(dimensions,
                    indexing.LazilyIndexedArray(data),
                    attributes,
                    encoding=encoding)
예제 #10
0
 def test_wrapper(self) -> None:
     original = indexing.LazilyIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     assert_array_equal(wrapped, np.arange(10))
     assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter)
예제 #11
0
    def test_lazily_indexed_array(self) -> None:
        original = np.random.rand(10, 20, 30)
        x = indexing.NumpyIndexingAdapter(original)
        v = Variable(["i", "j", "k"], original)
        lazy = indexing.LazilyIndexedArray(x)
        v_lazy = Variable(["i", "j", "k"], lazy)
        arr = ReturnItem()
        # test orthogonally applied indexers
        indexers = [
            arr[:], 0, -2, arr[:3], [0, 1, 2, 3], [0],
            np.arange(10) < 5
        ]
        for i in indexers:
            for j in indexers:
                for k in indexers:
                    if isinstance(j, np.ndarray) and j.dtype.kind == "b":
                        j = np.arange(20) < 5
                    if isinstance(k, np.ndarray) and k.dtype.kind == "b":
                        k = np.arange(30) < 5
                    expected = np.asarray(v[i, j, k])
                    for actual in [
                            v_lazy[i, j, k],
                            v_lazy[:, j, k][i],
                            v_lazy[:, :, k][:, j][i],
                    ]:
                        assert expected.shape == actual.shape
                        assert_array_equal(expected, actual)
                        assert isinstance(actual._data,
                                          indexing.LazilyIndexedArray)

                        # make sure actual.key is appropriate type
                        if all(
                                isinstance(k, (int, slice))
                                for k in v_lazy._data.key.tuple):
                            assert isinstance(v_lazy._data.key,
                                              indexing.BasicIndexer)
                        else:
                            assert isinstance(v_lazy._data.key,
                                              indexing.OuterIndexer)

        # test sequentially applied indexers
        indexers = [
            (3, 2),
            (arr[:], 0),
            (arr[:2], -1),
            (arr[:4], [0]),
            ([4, 5], 0),
            ([0, 1, 2], [0, 1]),
            ([0, 3, 5], arr[:2]),
        ]
        for i, j in indexers:
            expected_b = v[i][j]
            actual = v_lazy[i][j]
            assert expected_b.shape == actual.shape
            assert_array_equal(expected_b, actual)

            # test transpose
            if actual.ndim > 1:
                order = np.random.choice(actual.ndim, actual.ndim)
                order = np.array(actual.dims)
                transposed = actual.transpose(*order)
                assert_array_equal(expected_b.transpose(*order), transposed)
                assert isinstance(
                    actual._data,
                    (
                        indexing.LazilyVectorizedIndexedArray,
                        indexing.LazilyIndexedArray,
                    ),
                )

            assert isinstance(actual._data, indexing.LazilyIndexedArray)
            assert isinstance(actual._data.array,
                              indexing.NumpyIndexingAdapter)
예제 #12
0
 def open_store_variable(var):
     'Turn CDMRemote variable into something like a numpy.ndarray'
     data = indexing.LazilyIndexedArray(var)
     return Variable(var.dimensions, data,
                     {a: getattr(var, a)
                      for a in var.ncattrs()})
예제 #13
0
 def open_store_variable(self, name, var):
     """Turn CDMRemote variable into something like a numpy.ndarray."""
     data = indexing.LazilyIndexedArray(CDMArrayWrapper(name, self))
     return Variable(var.dimensions, data,
                     {a: getattr(var, a)
                      for a in var.ncattrs()})