示例#1
0
def test_dataframe_to_string():
    with set_options(formatting={'nrows': 5, 'ncols': 8}):
        # Test basic
        df = DataFrame([('a', [1, 2, 3, 4, 5, 6]),
                        ('b', [11, 12, 13, 14, 15, 16])])
        string = str(df)
        print(string)
        assert string.splitlines()[-1] == '[1 more rows]'

        # Test skipped columns
        df = DataFrame([('a', [1,  2,  3,  4,  5,  6]),
                        ('b', [11, 12, 13, 14, 15, 16]),
                        ('c', [11, 12, 13, 14, 15, 16]),
                        ('d', [11, 12, 13, 14, 15, 16])])
        string = df.to_string(ncols=3)
        print(string)
        assert string.splitlines()[-2] == '[1 more rows]'
        assert string.splitlines()[-1] == '[1 more columns]'

        # Test masked
        df = DataFrame([('a', [1, 2, 3, 4, 5, 6]),
                        ('b', [11, 12, 13, 14, 15, 16])])

        data = np.arange(6)
        mask = np.zeros(1, dtype=np.uint8)
        mask[0] = 0b00101101

        masked = Series.from_masked_array(data, mask)
        assert masked.null_count == 2
        df['c'] = masked

        # check data
        values = list(masked)
        validids = [0, 2, 3, 5]
        densearray = masked.to_array()
        np.testing.assert_equal(data[validids], densearray)
        # valid position is corret
        for i in validids:
            assert data[i] == values[i]
        # null position is correct
        for i in range(len(values)):
            if i not in validids:
                assert values[i] is None

        got = df.to_string(nrows=None)
        print(got)
        expect = '''
  a b  c
0 1 11 0
1 2 12
2 3 13 2
3 4 14 3
4 5 15
5 6 16 5
'''
        # values should match despite whitespace difference
        assert got.split() == expect.split()
示例#2
0
def test_scan_boolean():
    s = Series([0, -1, -300, 23, 4, -3, 0, 0, 100])

    got = (s > 0).cumsum()
    expect = pd.Series([False, False, False, True, True,
                        True, True, True, True])

    assert_eq(expect, got)

    got = (s > 0).astype(np.int32).cumsum()
    expect = pd.Series([0, 0, 0, 1, 2, 2, 2, 2, 3])
    assert_eq(expect, got)
示例#3
0
def test_nonmatching_index_setitem(nrows):
    np.random.seed(0)

    gdf = DataFrame()
    gdf['a'] = np.random.randint(2147483647, size=nrows)
    gdf['b'] = np.random.randint(2147483647, size=nrows)
    gdf = gdf.set_index('b')

    test_values = np.random.randint(2147483647, size=nrows)
    gdf['c'] = test_values
    assert (len(test_values) == len(gdf['c']))
    assert (gdf['c'].to_pandas().equals(
        Series(test_values).set_index(gdf._index).to_pandas()))
示例#4
0
def test_cummin(dtype, nelem):
    if dtype == np.int8:
        # to keep data in range
        data = gen_rand(dtype, nelem, low=-2, high=2)
    else:
        data = gen_rand(dtype, nelem)

    decimal = 4 if dtype == np.float32 else 6

    # series
    gs = Series(data)
    ps = pd.Series(data)
    np.testing.assert_array_almost_equal(gs.cummin(), ps.cummin(),
                                         decimal=decimal)

    # dataframe series (named series)
    gdf = DataFrame()
    gdf['a'] = Series(data)
    pdf = pd.DataFrame()
    pdf['a'] = pd.Series(data)
    np.testing.assert_array_almost_equal(gdf.a.cummin(), pdf.a.cummin(),
                                         decimal=decimal)
示例#5
0
def test_series_indexing():
    a1 = np.arange(20)
    series = Series(a1)
    # Indexing
    sr1 = series[:12]
    assert sr1.null_count == 0
    np.testing.assert_equal(sr1.to_array(), a1[:12])
    sr2 = sr1[3:]
    assert sr2.null_count == 0
    np.testing.assert_equal(sr2.to_array(), a1[3:12])
    # Index with stride
    sr3 = sr2[::2]
    assert sr3.null_count == 0
    np.testing.assert_equal(sr3.to_array(), a1[3:12:2])
示例#6
0
def test_cummin_masked():
    data = [1, 2, None, 4, 5]
    float_types = ['float32', 'float64']
    int_types = ['int8', 'int16', 'int32', 'int64']

    for type_ in float_types:
        gs = Series(data).astype(type_)
        ps = pd.Series(data).astype(type_)
        assert_eq(gs.cummin(), ps.cummin())

    for type_ in int_types:
        expected = pd.Series([1, 1, -1, 1, 1]).astype(type_)
        gs = Series(data).astype(type_)
        assert_eq(gs.cummin(), expected)
示例#7
0
def test_series_basic():
    # Make series from buffer
    a1 = np.arange(10, dtype=np.float64)
    series = Series(a1)
    assert len(series) == 10
    np.testing.assert_equal(series.to_array(), np.hstack([a1]))

    # Add new buffer
    a2 = np.arange(5)
    series = series.append(a2)
    assert len(series) == 15
    np.testing.assert_equal(series.to_array(), np.hstack([a1, a2]))

    # Ensure appending to previous buffer
    a3 = np.arange(3)
    series = series.append(a3)
    assert len(series) == 18
    a4 = np.hstack([a1, a2, a3])
    np.testing.assert_equal(series.to_array(), a4)
示例#8
0
def test_series_indexing(i1, i2, i3):
    a1 = np.arange(20)
    series = Series(a1)
    # Indexing
    sr1 = series[i1]
    assert sr1.null_count == 0
    np.testing.assert_equal(sr1.to_array(), a1[:12])
    sr2 = sr1[i2]
    assert sr2.null_count == 0
    np.testing.assert_equal(sr2.to_array(), a1[3:12])
    # Index with stride
    sr3 = sr2[i3]
    assert sr3.null_count == 0
    np.testing.assert_equal(sr3.to_array(), a1[3:12:2])

    # Integer indexing
    if isinstance(i1, range):
        for i in i1:  # Python int-s
            assert series[i] == a1[i]
    if isinstance(i1, np.ndarray) and i1.dtype in index_dtypes:
        for i in i1:  # numpy integers
            assert series[i] == a1[i]
示例#9
0
def test_series_init_none():

    # test for creating empty series
    # 1: without initializing
    sr1 = Series()
    got = sr1.to_string()
    print(got)
    expect = '<empty Series of dtype=float64>'
    # values should match despite whitespace difference
    assert got.split() == expect.split()

    # 2: Using `None` as a initializer
    sr2 = Series(None)
    got = sr2.to_string()
    print(got)
    expect = '<empty Series of dtype=float64>'
    # values should match despite whitespace difference
    assert got.split() == expect.split()
示例#10
0
def test_dataframe_boolean_mask_Series(gdf):
    mask = Series([True, False, True, False])
    mask2 = Series([True, True, True, True])
    mask3 = Series([True, True, True, True, True, True, True, True])
    mask4 = Series([True])  # More likely to trigger an undefined memory read
    mask5 = Series([False])
    mask6 = Series([False, False, False, False])
    gdf_masked = gdf[mask]
    gdf_masked2 = gdf[mask2]
    gdf_masked3 = gdf[mask3]
    gdf_masked4 = gdf[mask4]
    gdf_masked5 = gdf[mask5]
    gdf_masked6 = gdf[mask6]
    assert gdf_masked.shape[0] == 2
    assert gdf_masked2.shape[0] == 4
    assert gdf_masked3.shape[0] == 8
    assert gdf_masked4.shape[0] == 1
    assert gdf_masked5.shape[0] == 0
    assert gdf_masked6.shape[0] == 0
示例#11
0
def test_series_shape_empty():
    ps = pd.Series()
    cs = Series([])

    assert ps.shape == cs.shape
示例#12
0
def test_series_shape():
    ps = pd.Series([1, 2, 3, 4])
    cs = Series([1, 2, 3, 4])

    assert ps.shape == cs.shape
示例#13
0
def test_series_append():
    a1 = np.arange(10, dtype=np.float64)
    series = Series(a1)
    # Add new buffer
    a2 = np.arange(5)
    series = series.append(a2)
    assert len(series) == 15
    np.testing.assert_equal(series.to_array(), np.hstack([a1, a2]))

    # Ensure appending to previous buffer
    a3 = np.arange(3)
    series = series.append(a3)
    assert len(series) == 18
    a4 = np.hstack([a1, a2, a3])
    np.testing.assert_equal(series.to_array(), a4)

    # Appending different dtype
    a5 = np.array([1, 2, 3], dtype=np.int32)
    a6 = np.array([4.5, 5.5, 6.5], dtype=np.float64)
    series = Series(a5).append(a6)
    np.testing.assert_equal(series.to_array(), np.hstack([a5, a6]))
    series = Series(a6).append(a5)
    np.testing.assert_equal(series.to_array(), np.hstack([a6, a5]))
示例#14
0
def test_series_basic():
    # Make series from buffer
    a1 = np.arange(10, dtype=np.float64)
    series = Series(a1)
    assert len(series) == 10
    np.testing.assert_equal(series.to_array(), np.hstack([a1]))
示例#15
0
    np.testing.assert_equal(series.to_array(), np.hstack([a6, a5]))


index_dtypes = [np.int64, np.int32, np.int16, np.int8,
                np.uint64, np.uint32, np.uint16, np.uint8]


@pytest.mark.parametrize(
    'i1, i2, i3',
    ([(slice(None, 12), slice(3, None), slice(None, None, 2)),
      (range(12), range(3, 12), range(0, 9, 2)),
      (np.arange(12), np.arange(3, 12), np.arange(0, 9, 2)),
      (list(range(12)), list(range(3, 12)), list(range(0, 9, 2))),
      (pd.Series(range(12)), pd.Series(range(3, 12)),
       pd.Series(range(0, 9, 2))),
      (Series(range(12)), Series(range(3, 12)), Series(range(0, 9, 2))),
      ([i in range(12) for i in range(20)],
       [i in range(3, 12) for i in range(12)],
       [i in range(0, 9, 2) for i in range(9)]),
      (np.array([i in range(12) for i in range(20)], dtype=bool),
       np.array([i in range(3, 12) for i in range(12)], dtype=bool),
       np.array([i in range(0, 9, 2) for i in range(9)], dtype=bool))]
     + [(np.arange(12, dtype=t), np.arange(3, 12, dtype=t),
         np.arange(0, 9, 2, dtype=t)) for t in index_dtypes]),
    ids=(['slice', 'range', 'numpy.array', 'list', 'pandas.Series',
          'Series', 'list[bool]', 'numpy.array[bool]']
         + ['numpy.array[%s]' % t.__name__ for t in index_dtypes]))
def test_series_indexing(i1, i2, i3):
    a1 = np.arange(20)
    series = Series(a1)
    # Indexing