Exemplo n.º 1
0
    def test_lookup(self):
        for kind in ['integer', 'block']:
            idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
                                      kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), -1)
            self.assertEqual(idx.lookup(1), -1)
            self.assertEqual(idx.lookup(2), 0)
            self.assertEqual(idx.lookup(3), 1)
            self.assertEqual(idx.lookup(4), -1)

            idx = sparray._make_index(4, np.array([], dtype=np.int32),
                                      kind=kind)
            for i in range(-1, 5):
                self.assertEqual(idx.lookup(i), -1)

            idx = sparray._make_index(4, np.array([0, 1, 2, 3],
                                                  dtype=np.int32), kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), 0)
            self.assertEqual(idx.lookup(1), 1)
            self.assertEqual(idx.lookup(2), 2)
            self.assertEqual(idx.lookup(3), 3)
            self.assertEqual(idx.lookup(4), -1)

            idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
                                      kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), 0)
            self.assertEqual(idx.lookup(1), -1)
            self.assertEqual(idx.lookup(2), 1)
            self.assertEqual(idx.lookup(3), 2)
            self.assertEqual(idx.lookup(4), -1)
Exemplo n.º 2
0
    def test_lookup_array(self):
        for kind in ['integer', 'block']:
            idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)

            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, -1, 0], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 0, -1, 1], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
            exp = np.array([-1, -1, -1, -1], dtype=np.int32)

            idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
                              kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, 0, 2], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 2, 1, 3], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
                              kind=kind)
            res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
            exp = np.array([1, -1, 2, 0], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
            exp = np.array([-1, -1, 1, -1], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)
Exemplo n.º 3
0
    def test_block_internal(self):
        idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 2)
        tm.assert_numpy_array_equal(idx.blocs,
                                    np.array([2], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([2], dtype=np.int32))

        idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 0)
        tm.assert_numpy_array_equal(idx.blocs,
                                    np.array([], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([], dtype=np.int32))

        idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
                          kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 4)
        tm.assert_numpy_array_equal(idx.blocs,
                                    np.array([0], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([4], dtype=np.int32))

        idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 3)
        tm.assert_numpy_array_equal(idx.blocs,
                                    np.array([0, 2], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([1, 2], dtype=np.int32))
Exemplo n.º 4
0
    def test_block_internal(self):
        idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 2)
        tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([2],
                                                           dtype=np.int32))

        idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 0)
        tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))

        idx = _make_index(4,
                          np.array([0, 1, 2, 3], dtype=np.int32),
                          kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 4)
        tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths, np.array([4],
                                                           dtype=np.int32))

        idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
        self.assertIsInstance(idx, BlockIndex)
        self.assertEqual(idx.npoints, 3)
        tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2],
                                                        dtype=np.int32))
        tm.assert_numpy_array_equal(idx.blengths,
                                    np.array([1, 2], dtype=np.int32))
Exemplo n.º 5
0
    def test_lookup(self):
        for kind in ['integer', 'block']:
            idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), -1)
            self.assertEqual(idx.lookup(1), -1)
            self.assertEqual(idx.lookup(2), 0)
            self.assertEqual(idx.lookup(3), 1)
            self.assertEqual(idx.lookup(4), -1)

            idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)

            for i in range(-1, 5):
                self.assertEqual(idx.lookup(i), -1)

            idx = _make_index(4,
                              np.array([0, 1, 2, 3], dtype=np.int32),
                              kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), 0)
            self.assertEqual(idx.lookup(1), 1)
            self.assertEqual(idx.lookup(2), 2)
            self.assertEqual(idx.lookup(3), 3)
            self.assertEqual(idx.lookup(4), -1)

            idx = _make_index(4,
                              np.array([0, 2, 3], dtype=np.int32),
                              kind=kind)
            self.assertEqual(idx.lookup(-1), -1)
            self.assertEqual(idx.lookup(0), 0)
            self.assertEqual(idx.lookup(1), -1)
            self.assertEqual(idx.lookup(2), 1)
            self.assertEqual(idx.lookup(3), 2)
            self.assertEqual(idx.lookup(4), -1)
Exemplo n.º 6
0
    def shift(self, periods, freq=None, axis=0):
        if periods == 0:
            return self.copy()

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            shifted = self.to_dense().shift(periods, freq=freq, axis=axis)
            return shifted.to_sparse(fill_value=self.fill_value,
                                     kind=self.kind)

        if freq is not None:
            return self._constructor(
                self.sp_values,
                sparse_index=self.sp_index,
                index=self.index.shift(periods, freq),
                fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]
        new_sp_index = _make_index(len(self), new_indices, self.sp_index)

        arr = self.values._simple_new(self.sp_values[start:end].copy(),
                                      new_sp_index,
                                      fill_value=np.nan)
        return self._constructor(arr, index=self.index).__finalize__(self)
Exemplo n.º 7
0
    def shift(self, periods, freq=None, axis=0):
        if periods == 0:
            return self.copy()

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            shifted = self.to_dense().shift(periods, freq=freq,
                                            axis=axis)
            return shifted.to_sparse(fill_value=self.fill_value,
                                     kind=self.kind)

        if freq is not None:
            return self._constructor(
                self.sp_values, sparse_index=self.sp_index,
                index=self.index.shift(periods, freq),
                fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]
        new_sp_index = _make_index(len(self), new_indices, self.sp_index)

        arr = self.values._simple_new(self.sp_values[start:end].copy(),
                                      new_sp_index, fill_value=np.nan)
        return self._constructor(arr, index=self.index).__finalize__(self)
Exemplo n.º 8
0
    def test_make_block_boundary(self):
        for i in [5, 10, 100, 101]:
            idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32),
                              kind='block')

            exp = np.arange(0, i, 2, dtype=np.int32)
            tm.assert_numpy_array_equal(idx.blocs, exp)
            tm.assert_numpy_array_equal(idx.blengths,
                                        np.ones(len(exp), dtype=np.int32))
Exemplo n.º 9
0
    def test_int_internal(self):
        idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 2)
        tm.assert_numpy_array_equal(idx.indices,
                                    np.array([2, 3], dtype=np.int32))

        idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 0)
        tm.assert_numpy_array_equal(idx.indices,
                                    np.array([], dtype=np.int32))

        idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
                          kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 4)
        tm.assert_numpy_array_equal(idx.indices,
                                    np.array([0, 1, 2, 3], dtype=np.int32))
Exemplo n.º 10
0
    def test_int_internal(self):
        idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 2)
        tm.assert_numpy_array_equal(idx.indices,
                                    np.array([2, 3], dtype=np.int32))

        idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 0)
        tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))

        idx = _make_index(4,
                          np.array([0, 1, 2, 3], dtype=np.int32),
                          kind='integer')
        self.assertIsInstance(idx, IntIndex)
        self.assertEqual(idx.npoints, 4)
        tm.assert_numpy_array_equal(idx.indices,
                                    np.array([0, 1, 2, 3], dtype=np.int32))
Exemplo n.º 11
0
    def test_make_block_boundary(self):
        for i in [5, 10, 100, 101]:
            idx = _make_index(i,
                              np.arange(0, i, 2, dtype=np.int32),
                              kind='block')

            exp = np.arange(0, i, 2, dtype=np.int32)
            tm.assert_numpy_array_equal(idx.blocs, exp)
            tm.assert_numpy_array_equal(idx.blengths,
                                        np.ones(len(exp), dtype=np.int32))
Exemplo n.º 12
0
    def test_lookup_array(self):
        for kind in ['integer', 'block']:
            idx = sparray._make_index(4,
                                      np.array([2, 3], dtype=np.int32),
                                      kind=kind)

            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, -1, 0], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 0, -1, 1], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            idx = sparray._make_index(4,
                                      np.array([], dtype=np.int32),
                                      kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
            exp = np.array([-1, -1, -1, -1], dtype=np.int32)

            idx = sparray._make_index(4,
                                      np.array([0, 1, 2, 3], dtype=np.int32),
                                      kind=kind)
            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
            exp = np.array([-1, 0, 2], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
            exp = np.array([-1, 2, 1, 3], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            idx = sparray._make_index(4,
                                      np.array([0, 2, 3], dtype=np.int32),
                                      kind=kind)
            res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
            exp = np.array([1, -1, 2, 0], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)

            res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
            exp = np.array([-1, -1, 1, -1], dtype=np.int32)
            self.assert_numpy_array_equal(res, exp)
Exemplo n.º 13
0
def _concat_sparse(to_concat, axis=0, typs=None):
    """
    provide concatenation of an sparse/dense array of arrays each of which is a
    single dtype

    Parameters
    ----------
    to_concat : array of arrays
    axis : axis to provide concatenation
    typs : set of to_concat dtypes

    Returns
    -------
    a single array, preserving the combined dtypes
    """

    from pandas.sparse.array import SparseArray, _make_index

    def convert_sparse(x, axis):
        # coerce to native type
        if isinstance(x, SparseArray):
            x = x.get_values()
        x = x.ravel()
        if axis > 0:
            x = np.atleast_2d(x)
        return x

    if typs is None:
        typs = com.get_dtype_kinds(to_concat)

    if len(typs) == 1:
        # concat input as it is if all inputs are sparse
        # and have the same fill_value
        fill_values = set(c.fill_value for c in to_concat)
        if len(fill_values) == 1:
            sp_values = [c.sp_values for c in to_concat]
            indexes = [c.sp_index.to_int_index() for c in to_concat]

            indices = []
            loc = 0
            for idx in indexes:
                indices.append(idx.indices + loc)
                loc += idx.length
            sp_values = np.concatenate(sp_values)
            indices = np.concatenate(indices)
            sp_index = _make_index(loc, indices, kind=to_concat[0].sp_index)

            return SparseArray(sp_values,
                               sparse_index=sp_index,
                               fill_value=to_concat[0].fill_value)

    # input may be sparse / dense mixed and may have different fill_value
    # input must contain sparse at least 1
    sparses = [c for c in to_concat if com.is_sparse(c)]
    fill_values = [c.fill_value for c in sparses]
    sp_indexes = [c.sp_index for c in sparses]

    # densify and regular concat
    to_concat = [convert_sparse(x, axis) for x in to_concat]
    result = np.concatenate(to_concat, axis=axis)

    if not len(typs - set(['sparse', 'f', 'i'])):
        # sparsify if inputs are sparse and dense numerics
        # first sparse input's fill_value and SparseIndex is used
        result = SparseArray(result.ravel(),
                             fill_value=fill_values[0],
                             kind=sp_indexes[0])
    else:
        # coerce to object if needed
        result = result.astype('object')
    return result
Exemplo n.º 14
0
def _concat_sparse(to_concat, axis=0, typs=None):
    """
    provide concatenation of an sparse/dense array of arrays each of which is a
    single dtype

    Parameters
    ----------
    to_concat : array of arrays
    axis : axis to provide concatenation
    typs : set of to_concat dtypes

    Returns
    -------
    a single array, preserving the combined dtypes
    """

    from pandas.sparse.array import SparseArray, _make_index

    def convert_sparse(x, axis):
        # coerce to native type
        if isinstance(x, SparseArray):
            x = x.get_values()
        x = x.ravel()
        if axis > 0:
            x = np.atleast_2d(x)
        return x

    if typs is None:
        typs = com.get_dtype_kinds(to_concat)

    if len(typs) == 1:
        # concat input as it is if all inputs are sparse
        # and have the same fill_value
        fill_values = set(c.fill_value for c in to_concat)
        if len(fill_values) == 1:
            sp_values = [c.sp_values for c in to_concat]
            indexes = [c.sp_index.to_int_index() for c in to_concat]

            indices = []
            loc = 0
            for idx in indexes:
                indices.append(idx.indices + loc)
                loc += idx.length
            sp_values = np.concatenate(sp_values)
            indices = np.concatenate(indices)
            sp_index = _make_index(loc, indices, kind=to_concat[0].sp_index)

            return SparseArray(sp_values, sparse_index=sp_index,
                               fill_value=to_concat[0].fill_value)

    # input may be sparse / dense mixed and may have different fill_value
    # input must contain sparse at least 1
    sparses = [c for c in to_concat if com.is_sparse(c)]
    fill_values = [c.fill_value for c in sparses]
    sp_indexes = [c.sp_index for c in sparses]

    # densify and regular concat
    to_concat = [convert_sparse(x, axis) for x in to_concat]
    result = np.concatenate(to_concat, axis=axis)

    if not len(typs - set(['sparse', 'f', 'i'])):
        # sparsify if inputs are sparse and dense numerics
        # first sparse input's fill_value and SparseIndex is used
        result = SparseArray(result.ravel(), fill_value=fill_values[0],
                             kind=sp_indexes[0])
    else:
        # coerce to object if needed
        result = result.astype('object')
    return result