def test_take_filling_all_nan(self): sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan]) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([np.nan, np.nan, np.nan]) tm.assert_sp_array_equal(result, expected) result = sparse.take(np.array([1, 0, -1]), fill_value=True) expected = SparseArray([np.nan, np.nan, np.nan]) tm.assert_sp_array_equal(result, expected) with tm.assertRaises(IndexError): sparse.take(np.array([1, -6])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.values, second.values), fill_value=first.fill_value) tm.assert_isinstance(res, SparseArray) assert_almost_equal(res.values, exp.values) res2 = op(first, second.values) tm.assert_isinstance(res2, SparseArray) assert_sp_array_equal(res, res2) res3 = op(first.values, second) tm.assert_isinstance(res3, SparseArray) assert_sp_array_equal(res, res3) res4 = op(first, 4) tm.assert_isinstance(res4, SparseArray) # ignore this if the actual op raises (e.g. pow) try: exp = op(first.values, 4) exp_fv = op(first.fill_value, 4) assert_almost_equal(res4.fill_value, exp_fv) assert_almost_equal(res4.values, exp) except (ValueError): pass
def test_float_same_index_comparison(self): # when sp_index are the same for kind in ['integer', 'block']: values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) a = SparseArray(values, kind=kind) b = SparseArray(rvalues, kind=kind) self._check_comparison_ops(a, b, values, rvalues) values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.]) rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.]) a = SparseArray(values, kind=kind, fill_value=0) b = SparseArray(rvalues, kind=kind, fill_value=0) self._check_comparison_ops(a, b, values, rvalues)
def test_cumsum(self): data = np.arange(10).astype(float) out = SparseArray(data).cumsum() expected = SparseArray(data.cumsum()) tm.assert_sp_array_equal(out, expected) # TODO: gh-12855 - return a SparseArray here data[5] = np.nan out = SparseArray(data, fill_value=2).cumsum() self.assertNotIsInstance(out, SparseArray) tm.assert_numpy_array_equal(out, data.cumsum()) out = SparseArray(data, fill_value=np.nan).cumsum() expected = SparseArray( np.array([0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40])) tm.assert_sp_array_equal(out, expected)
def test_numpy_mean(self): data = np.arange(10).astype(float) out = np.mean(SparseArray(data)) self.assertEqual(out, 4.5) data[5] = np.nan out = np.mean(SparseArray(data)) self.assertEqual(out, 40.0 / 9) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.mean, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.mean, SparseArray(data), out=out)
def test_astype(self): res = self.arr.astype('f8') res.sp_values[:3] = 27 self.assertFalse((self.arr.sp_values[:3] == 27).any()) msg = "unable to coerce current fill_value nan to int64 dtype" with tm.assertRaisesRegexp(ValueError, msg): self.arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1]) with tm.assertRaisesRegexp(ValueError, msg): arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1], fill_value=0) msg = "Cannot convert NA to integer" with tm.assertRaisesRegexp(ValueError, msg): arr.astype('i8')
def test_getslice(self): result = self.arr[:-3] exp = SparseArray(self.arr.values[:-3]) tm.assert_sp_array_equal(result, exp) result = self.arr[-4:] exp = SparseArray(self.arr.values[-4:]) tm.assert_sp_array_equal(result, exp) # two corner cases from Series result = self.arr[-12:] exp = SparseArray(self.arr) tm.assert_sp_array_equal(result, exp) result = self.arr[:-12] exp = SparseArray(self.arr.values[:0]) tm.assert_sp_array_equal(result, exp)
def test_constructor_dtype(self): arr = SparseArray([np.nan, 1, 2, np.nan]) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.fill_value, 0) arr = SparseArray([0, 1, 2, 4], dtype=np.float64) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseArray([0, 1, 2, 4], dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) arr = SparseArray([0, 1, 2, 4], dtype=None) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0)
def test_numpy_cumsum(self): data = np.arange(10).astype(float) out = np.cumsum(SparseArray(data)) expected = SparseArray(data.cumsum()) tm.assert_sp_array_equal(out, expected) # TODO: gh-12855 - return a SparseArray here data[5] = np.nan out = np.cumsum(SparseArray(data, fill_value=2)) self.assertNotIsInstance(out, SparseArray) tm.assert_numpy_array_equal(out, data.cumsum()) out = np.cumsum(SparseArray(data, fill_value=np.nan)) expected = SparseArray( np.array([0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40])) tm.assert_sp_array_equal(out, expected) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), out=out)
def test_numpy_cumsum(self): non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) non_null_expected = SparseArray(non_null_data.cumsum()) null_data = np.array([1, 2, np.nan, 4, 5], dtype=float) null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])) for data, expected in [(null_data, null_expected), (non_null_data, non_null_expected)]: out = np.cumsum(SparseArray(data)) tm.assert_sp_array_equal(out, expected) out = np.cumsum(SparseArray(data, fill_value=np.nan)) tm.assert_sp_array_equal(out, expected) out = np.cumsum(SparseArray(data, fill_value=2)) tm.assert_sp_array_equal(out, expected) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), out=out)
def test_to_dense(self): vals = np.array([1, np.nan, np.nan, 3, np.nan]) res = SparseArray(vals).to_dense() tm.assert_numpy_array_equal(res, vals) res = SparseArray(vals, fill_value=0).to_dense() tm.assert_numpy_array_equal(res, vals) vals = np.array([1, np.nan, 0, 3, 0]) res = SparseArray(vals).to_dense() tm.assert_numpy_array_equal(res, vals) res = SparseArray(vals, fill_value=0).to_dense() tm.assert_numpy_array_equal(res, vals) vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseArray(vals).to_dense() tm.assert_numpy_array_equal(res, vals) res = SparseArray(vals, fill_value=0).to_dense() tm.assert_numpy_array_equal(res, vals) # see gh-14647 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): SparseArray(vals).to_dense(fill=2)
def test_append_na(self): arr = self.na_data splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) sparr = splist.to_array() assert_sp_array_equal(sparr, SparseArray(arr))
def test_float_scalar_comparison(self): values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) for kind in ['integer', 'block']: a = SparseArray(values, kind=kind) self._check_comparison_ops(a, 1, values, 1) self._check_comparison_ops(a, 0, values, 0) self._check_comparison_ops(a, 3, values, 3) a = SparseArray(values, kind=kind, fill_value=0) self._check_comparison_ops(a, 1, values, 1) self._check_comparison_ops(a, 0, values, 0) self._check_comparison_ops(a, 3, values, 3) a = SparseArray(values, kind=kind, fill_value=2) self._check_comparison_ops(a, 1, values, 1) self._check_comparison_ops(a, 0, values, 0) self._check_comparison_ops(a, 3, values, 3)
def test_append_zero(self): arr = self.zero_data splist = SparseList(fill_value=0) splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) sparr = splist.to_array() assert_sp_array_equal(sparr, SparseArray(arr, fill_value=0))
def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) res = sparse[4:, ] exp = SparseArray(dense[4:, ]) tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) res = sparse[4:, ] exp = SparseArray(dense[4:, ], fill_value=0) tm.assert_sp_array_equal(res, exp) with tm.assertRaises(IndexError): sparse[4:, :] with tm.assertRaises(IndexError): # check numpy compat dense[4:, :]
def test_generator_warnings(self): sp_arr = SparseArray([1, 2, 3]) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings(action='always', category=DeprecationWarning) warnings.filterwarnings(action='always', category=PendingDeprecationWarning) for _ in sp_arr: pass assert len(w) == 0
def test_append_na(self): with tm.assert_produces_warning(FutureWarning): arr = self.na_data splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) sparr = splist.to_array() tm.assert_sp_array_equal(sparr, SparseArray(arr))
def test_numpy_sum(self): data = np.arange(10).astype(float) out = np.sum(SparseArray(data)) self.assertEqual(out, 45.0) data[5] = np.nan out = np.sum(SparseArray(data, fill_value=2)) self.assertEqual(out, 40.0) out = np.sum(SparseArray(data, fill_value=np.nan)) self.assertEqual(out, 40.0) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.sum, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.sum, SparseArray(data), out=out)
def test_append_zero(self): with tm.assert_produces_warning(FutureWarning): arr = self.zero_data splist = SparseList(fill_value=0) splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) sparr = splist.to_array() tm.assert_sp_array_equal(sparr, SparseArray(arr, fill_value=0))
def test_astype_all(self): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) types = [np.float64, np.float32, np.int64, np.int32, np.int16, np.int8] for typ in types: res = arr.astype(typ) self.assertEqual(res.dtype, typ) self.assertEqual(res.sp_values.dtype, typ) tm.assert_numpy_array_equal(res.values, vals.astype(typ))
def test_copy(self): arr = self.na_data exp_sparr = SparseArray(arr) splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) cp = splist.copy() cp.append(arr[6:]) self.assertEquals(splist.nchunks, 2) assert_sp_array_equal(cp.to_array(), exp_sparr)
def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan])) self.assertEqual(arr.dtype, np.float64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseArray(data=[0, 1, 2, 3], sparse_index=IntIndex(4, [0, 1, 2, 3]), dtype=np.int64) exp = SparseArray([0, 1, 2, 3], dtype=np.int64) tm.assert_sp_array_equal(arr, exp) self.assertEqual(arr.dtype, np.int64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) tm.assert_sp_array_equal(arr, exp) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0) arr = SparseArray(data=[0, 1, 2, 3], sparse_index=IntIndex(4, [0, 1, 2, 3]), dtype=None) exp = SparseArray([0, 1, 2, 3], dtype=None) tm.assert_sp_array_equal(arr, exp) self.assertEqual(arr.dtype, np.int64) self.assertTrue(np.isnan(arr.fill_value)) arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) tm.assert_sp_array_equal(arr, exp) self.assertEqual(arr.dtype, np.int64) self.assertEqual(arr.fill_value, 0)
def test_copy(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): arr = self.na_data exp_sparr = SparseArray(arr) splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) cp = splist.copy() cp.append(arr[6:]) self.assertEqual(splist.nchunks, 2) tm.assert_sp_array_equal(cp.to_array(), exp_sparr)
def test_astype_bool(self): sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], fill_value=0, dtype=np.int64), 'B': SparseArray([0, 5, 0, 7], fill_value=0, dtype=np.int64)}, default_fill_value=0) self.assertEqual(sparse['A'].dtype, np.int64) self.assertEqual(sparse['B'].dtype, np.int64) res = sparse.astype(bool) exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False), 'B': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False)}, default_fill_value=False) tm.assert_sp_frame_equal(res, exp) self.assertEqual(res['A'].dtype, np.bool) self.assertEqual(res['B'].dtype, np.bool)
def test_constructor_float32(self): # GH 10648 data = np.array([1., np.nan, 3], dtype=np.float32) arr = SparseArray(data, dtype=np.float32) self.assertEqual(arr.dtype, np.float32) tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3])) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2])) for dense in [arr.to_dense(), arr.values]: self.assertEqual(dense.dtype, np.float32) self.assert_numpy_array_equal(dense, data)
def test_append_zero(self): with tm.assert_produces_warning(FutureWarning): arr = self.zero_data splist = SparseList(fill_value=0) splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) # list always produces int64, but SA constructor # is platform dtype aware sparr = splist.to_array() exp = SparseArray(arr, fill_value=0) tm.assert_sp_array_equal(sparr, exp, check_dtype=False)
def test_constructor_bool(self): # GH 10648 data = np.array([False, False, True, True, False, False]) arr = SparseArray(data, fill_value=False, dtype=bool) self.assertEqual(arr.dtype, bool) tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3])) for dense in [arr.to_dense(), arr.values]: self.assertEqual(dense.dtype, bool) tm.assert_numpy_array_equal(dense, data)
def test_set_fill_value(self): arr = SparseArray([1., np.nan, 2.], fill_value=np.nan) arr.fill_value = 2 self.assertEqual(arr.fill_value, 2) arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) arr.fill_value = 2 self.assertEqual(arr.fill_value, 2) # coerces to int msg = "unable to set fill_value 3\\.1 to int64 dtype" with tm.assertRaisesRegexp(ValueError, msg): arr.fill_value = 3.1 msg = "unable to set fill_value nan to int64 dtype" with tm.assertRaisesRegexp(ValueError, msg): arr.fill_value = np.nan arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) arr.fill_value = True self.assertTrue(arr.fill_value) # coerces to bool msg = "unable to set fill_value 0 to bool dtype" with tm.assertRaisesRegexp(ValueError, msg): arr.fill_value = 0 msg = "unable to set fill_value nan to bool dtype" with tm.assertRaisesRegexp(ValueError, msg): arr.fill_value = np.nan # invalid msg = "fill_value must be a scalar" for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]: with tm.assertRaisesRegexp(ValueError, msg): arr.fill_value = val
def test_consolidate(self): arr = self.na_data exp_sparr = SparseArray(arr) splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) consol = splist.consolidate(inplace=False) self.assertEqual(consol.nchunks, 1) self.assertEqual(splist.nchunks, 3) assert_sp_array_equal(consol.to_array(), exp_sparr) splist.consolidate() self.assertEqual(splist.nchunks, 1) assert_sp_array_equal(splist.to_array(), exp_sparr)
def test_consolidate(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): arr = self.na_data exp_sparr = SparseArray(arr) splist = SparseList() splist.append(arr[:5]) splist.append(arr[5]) splist.append(arr[6:]) consol = splist.consolidate(inplace=False) self.assertEqual(consol.nchunks, 1) self.assertEqual(splist.nchunks, 3) tm.assert_sp_array_equal(consol.to_array(), exp_sparr) splist.consolidate() self.assertEqual(splist.nchunks, 1) tm.assert_sp_array_equal(splist.to_array(), exp_sparr)