def test_dataframe_to_string(): with set_options(formatting={'nrows': 5, 'ncols': 8}): # Test basic df = DataFrame([('a', [1, 2, 3, 4, 5, 6]), ('b', [11, 12, 13, 14, 15, 16])]) string = str(df) print(string) assert string.splitlines()[-1] == '[1 more rows]' # Test skipped columns df = DataFrame([('a', [1, 2, 3, 4, 5, 6]), ('b', [11, 12, 13, 14, 15, 16]), ('c', [11, 12, 13, 14, 15, 16]), ('d', [11, 12, 13, 14, 15, 16])]) string = df.to_string(ncols=3) print(string) assert string.splitlines()[-2] == '[1 more rows]' assert string.splitlines()[-1] == '[1 more columns]' # Test masked df = DataFrame([('a', [1, 2, 3, 4, 5, 6]), ('b', [11, 12, 13, 14, 15, 16])]) data = np.arange(6) mask = np.zeros(1, dtype=np.uint8) mask[0] = 0b00101101 masked = Series.from_masked_array(data, mask) assert masked.null_count == 2 df['c'] = masked # check data values = list(masked) validids = [0, 2, 3, 5] densearray = masked.to_array() np.testing.assert_equal(data[validids], densearray) # valid position is corret for i in validids: assert data[i] == values[i] # null position is correct for i in range(len(values)): if i not in validids: assert values[i] is None got = df.to_string(nrows=None) print(got) expect = ''' a b c 0 1 11 0 1 2 12 2 3 13 2 3 4 14 3 4 5 15 5 6 16 5 ''' # values should match despite whitespace difference assert got.split() == expect.split()
def test_scan_boolean(): s = Series([0, -1, -300, 23, 4, -3, 0, 0, 100]) got = (s > 0).cumsum() expect = pd.Series([False, False, False, True, True, True, True, True, True]) assert_eq(expect, got) got = (s > 0).astype(np.int32).cumsum() expect = pd.Series([0, 0, 0, 1, 2, 2, 2, 2, 3]) assert_eq(expect, got)
def test_nonmatching_index_setitem(nrows): np.random.seed(0) gdf = DataFrame() gdf['a'] = np.random.randint(2147483647, size=nrows) gdf['b'] = np.random.randint(2147483647, size=nrows) gdf = gdf.set_index('b') test_values = np.random.randint(2147483647, size=nrows) gdf['c'] = test_values assert (len(test_values) == len(gdf['c'])) assert (gdf['c'].to_pandas().equals( Series(test_values).set_index(gdf._index).to_pandas()))
def test_cummin(dtype, nelem): if dtype == np.int8: # to keep data in range data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) decimal = 4 if dtype == np.float32 else 6 # series gs = Series(data) ps = pd.Series(data) np.testing.assert_array_almost_equal(gs.cummin(), ps.cummin(), decimal=decimal) # dataframe series (named series) gdf = DataFrame() gdf['a'] = Series(data) pdf = pd.DataFrame() pdf['a'] = pd.Series(data) np.testing.assert_array_almost_equal(gdf.a.cummin(), pdf.a.cummin(), decimal=decimal)
def test_series_indexing(): a1 = np.arange(20) series = Series(a1) # Indexing sr1 = series[:12] assert sr1.null_count == 0 np.testing.assert_equal(sr1.to_array(), a1[:12]) sr2 = sr1[3:] assert sr2.null_count == 0 np.testing.assert_equal(sr2.to_array(), a1[3:12]) # Index with stride sr3 = sr2[::2] assert sr3.null_count == 0 np.testing.assert_equal(sr3.to_array(), a1[3:12:2])
def test_cummin_masked(): data = [1, 2, None, 4, 5] float_types = ['float32', 'float64'] int_types = ['int8', 'int16', 'int32', 'int64'] for type_ in float_types: gs = Series(data).astype(type_) ps = pd.Series(data).astype(type_) assert_eq(gs.cummin(), ps.cummin()) for type_ in int_types: expected = pd.Series([1, 1, -1, 1, 1]).astype(type_) gs = Series(data).astype(type_) assert_eq(gs.cummin(), expected)
def test_series_basic(): # Make series from buffer a1 = np.arange(10, dtype=np.float64) series = Series(a1) assert len(series) == 10 np.testing.assert_equal(series.to_array(), np.hstack([a1])) # Add new buffer a2 = np.arange(5) series = series.append(a2) assert len(series) == 15 np.testing.assert_equal(series.to_array(), np.hstack([a1, a2])) # Ensure appending to previous buffer a3 = np.arange(3) series = series.append(a3) assert len(series) == 18 a4 = np.hstack([a1, a2, a3]) np.testing.assert_equal(series.to_array(), a4)
def test_series_indexing(i1, i2, i3): a1 = np.arange(20) series = Series(a1) # Indexing sr1 = series[i1] assert sr1.null_count == 0 np.testing.assert_equal(sr1.to_array(), a1[:12]) sr2 = sr1[i2] assert sr2.null_count == 0 np.testing.assert_equal(sr2.to_array(), a1[3:12]) # Index with stride sr3 = sr2[i3] assert sr3.null_count == 0 np.testing.assert_equal(sr3.to_array(), a1[3:12:2]) # Integer indexing if isinstance(i1, range): for i in i1: # Python int-s assert series[i] == a1[i] if isinstance(i1, np.ndarray) and i1.dtype in index_dtypes: for i in i1: # numpy integers assert series[i] == a1[i]
def test_series_init_none(): # test for creating empty series # 1: without initializing sr1 = Series() got = sr1.to_string() print(got) expect = '<empty Series of dtype=float64>' # values should match despite whitespace difference assert got.split() == expect.split() # 2: Using `None` as a initializer sr2 = Series(None) got = sr2.to_string() print(got) expect = '<empty Series of dtype=float64>' # values should match despite whitespace difference assert got.split() == expect.split()
def test_dataframe_boolean_mask_Series(gdf): mask = Series([True, False, True, False]) mask2 = Series([True, True, True, True]) mask3 = Series([True, True, True, True, True, True, True, True]) mask4 = Series([True]) # More likely to trigger an undefined memory read mask5 = Series([False]) mask6 = Series([False, False, False, False]) gdf_masked = gdf[mask] gdf_masked2 = gdf[mask2] gdf_masked3 = gdf[mask3] gdf_masked4 = gdf[mask4] gdf_masked5 = gdf[mask5] gdf_masked6 = gdf[mask6] assert gdf_masked.shape[0] == 2 assert gdf_masked2.shape[0] == 4 assert gdf_masked3.shape[0] == 8 assert gdf_masked4.shape[0] == 1 assert gdf_masked5.shape[0] == 0 assert gdf_masked6.shape[0] == 0
def test_series_shape_empty(): ps = pd.Series() cs = Series([]) assert ps.shape == cs.shape
def test_series_shape(): ps = pd.Series([1, 2, 3, 4]) cs = Series([1, 2, 3, 4]) assert ps.shape == cs.shape
def test_series_append(): a1 = np.arange(10, dtype=np.float64) series = Series(a1) # Add new buffer a2 = np.arange(5) series = series.append(a2) assert len(series) == 15 np.testing.assert_equal(series.to_array(), np.hstack([a1, a2])) # Ensure appending to previous buffer a3 = np.arange(3) series = series.append(a3) assert len(series) == 18 a4 = np.hstack([a1, a2, a3]) np.testing.assert_equal(series.to_array(), a4) # Appending different dtype a5 = np.array([1, 2, 3], dtype=np.int32) a6 = np.array([4.5, 5.5, 6.5], dtype=np.float64) series = Series(a5).append(a6) np.testing.assert_equal(series.to_array(), np.hstack([a5, a6])) series = Series(a6).append(a5) np.testing.assert_equal(series.to_array(), np.hstack([a6, a5]))
def test_series_basic(): # Make series from buffer a1 = np.arange(10, dtype=np.float64) series = Series(a1) assert len(series) == 10 np.testing.assert_equal(series.to_array(), np.hstack([a1]))
np.testing.assert_equal(series.to_array(), np.hstack([a6, a5])) index_dtypes = [np.int64, np.int32, np.int16, np.int8, np.uint64, np.uint32, np.uint16, np.uint8] @pytest.mark.parametrize( 'i1, i2, i3', ([(slice(None, 12), slice(3, None), slice(None, None, 2)), (range(12), range(3, 12), range(0, 9, 2)), (np.arange(12), np.arange(3, 12), np.arange(0, 9, 2)), (list(range(12)), list(range(3, 12)), list(range(0, 9, 2))), (pd.Series(range(12)), pd.Series(range(3, 12)), pd.Series(range(0, 9, 2))), (Series(range(12)), Series(range(3, 12)), Series(range(0, 9, 2))), ([i in range(12) for i in range(20)], [i in range(3, 12) for i in range(12)], [i in range(0, 9, 2) for i in range(9)]), (np.array([i in range(12) for i in range(20)], dtype=bool), np.array([i in range(3, 12) for i in range(12)], dtype=bool), np.array([i in range(0, 9, 2) for i in range(9)], dtype=bool))] + [(np.arange(12, dtype=t), np.arange(3, 12, dtype=t), np.arange(0, 9, 2, dtype=t)) for t in index_dtypes]), ids=(['slice', 'range', 'numpy.array', 'list', 'pandas.Series', 'Series', 'list[bool]', 'numpy.array[bool]'] + ['numpy.array[%s]' % t.__name__ for t in index_dtypes])) def test_series_indexing(i1, i2, i3): a1 = np.arange(20) series = Series(a1) # Indexing