def test_valid_str_indexing(self):
        assert isinstance(self.x['a'], Array)
        assert not isinstance(self.x['a'], DataFrame)
        assert self.x['a'].equals(Array([1, 2, 3]))

        assert isinstance(self.x['b'], Array)
        assert not isinstance(self.x['b'], DataFrame)
        assert self.x['b'].equals(Array(['a', 'b', 'c']))
 def test_empty_from_shape(self):
     df = DataFrame.from_shape((0, 0))
     assert isinstance(df, DataFrame)
     assert df.shape == (0, 0)
     assert len(df.names) == 0
     assert df.names.equals(Array([]))
     assert len(df.dtypes) == 0
     assert df.dtypes.equals(Array([]))
Пример #3
0
class TestArray:
    x = Array([1, 2, 3])
    y = Array([1.0, 2.0, 3.0])
    z = Array([1, 2, 3, 4])
    w = Array([1, 2])

    def test_int_dtype(self):
        assert identical(self.x, self.x) is True
        assert identical(self.x, self.y) is False
        assert identical(self.x, self.z) is False
        assert identical(self.x, self.w) is False

    def test_float_dtype(self):
        assert identical(self.y, self.y) is True
    def test_valid_int_indexing(self):
        ''' Ensure that int indexing works correctly '''
        assert isinstance(self.x[0], Array)
        assert not isinstance(self.x[0], DataFrame)
        assert self.x[0].equals(Array([1, 2, 3]))

        assert isinstance(self.x[1], Array)
        assert not isinstance(self.x[1], DataFrame)
        assert self.x[1].equals(Array(['a', 'b', 'c']))

        assert isinstance(self.x[-1], Array)
        assert not isinstance(self.x[-1], DataFrame)
        assert self.x[-1].equals(Array(['a', 'b', 'c']))

        assert isinstance(self.x[-2], Array)
        assert not isinstance(self.x[-2], DataFrame)
        assert self.x[-2].equals(Array([1, 2, 3]))
    def test_from_columns_basic(self):
        xdf = DataFrame.from_columns(self.x)
        assert isinstance(xdf, DataFrame)
        assert xdf.shape == (2, 3)
        assert xdf.names.equals(Array(['C0', 'C1', 'C2']))
        assert xdf.dtypes.equals(Array([int, str, float]))
        for i in range(xdf.nrow):
            for j in range(xdf.ncol):
                assert xdf[i, j] == self.x[j][i]

        xdf = DataFrame.from_columns(self.x, self.names)
        assert isinstance(xdf, DataFrame)
        assert xdf.shape == (2, 3)
        assert xdf.names.equals(Array(self.names))
        assert xdf.dtypes.equals(Array([int, str, float]))
        for i in range(xdf.nrow):
            for j in range(xdf.ncol):
                assert xdf[i, j] == self.x[j][i]
    def test_from_numpy_basic(self):
        xdf = DataFrame.from_numpy(self.x)
        assert isinstance(xdf, DataFrame)
        assert xdf.shape == self.x.shape
        assert xdf.names.equals(Array(['C0', 'C1', 'C2', 'C3']))
        assert all(xdf.dtypes == int)
        for i in range(xdf.nrow):
            for j in range(xdf.ncol):
                assert xdf[i, j] == self.x[i, j]

        xdf = DataFrame.from_numpy(self.x, self.names)
        assert isinstance(xdf, DataFrame)
        assert xdf.shape == self.x.shape
        assert xdf.names.equals(Array(self.names))
        assert all(xdf.dtypes == int)
        for i in range(xdf.nrow):
            for j in range(xdf.ncol):
                assert xdf[i, j] == self.x[i, j]
 def test_from_shape_basic(self):
     shape = (2, 3)
     df = DataFrame.from_shape(shape)
     assert isinstance(df, DataFrame)
     assert df.shape == shape
     assert df.names.equals(Array(['C0', 'C1', 'C2']))
     assert all(df.dtypes == type(None))
     for i in range(df.nrow):
         for j in range(df.ncol):
             assert df[i, j] is None
 def test_from_numpy_with_nan(self):
     ydf = DataFrame.from_numpy(self.y)
     assert isinstance(ydf, DataFrame)
     assert ydf.shape == self.y.shape
     assert ydf.names.equals(Array(['C0', 'C1']))
     assert all(ydf.dtypes == float)
     for i in range(ydf.nrow):
         for j in range(ydf.ncol):
             if np.isnan(self.y[i, j]):
                 assert ydf[i, j] is None
             else:
                 assert ydf[i, j] == self.y[i, j]
Пример #9
0
class TestUnique:
    x1 = Array([1, 2, 3, 4, 5, 6, 7, 8])
    x2 = Array([1, 1, 1, 1, 1, 1, 1, 1])
    x3 = Array([1, 1, 1, 1, 2, 2, 2, 2])

    y1 = Array(['', ' ', 'a'])
    y2 = Array(['', ' ', 'a', 'a', 'a', '', '', ' '])

    z1 = Array([1, 2, None, 4, None])

    def test_already_unique_x1(self):
        u = unique(self.x1)
        assert isinstance(u, Array)
        assert u.dtype == self.x1.dtype
        assert len(u) == len(self.x1)
        assert self.x1.equals(u)
        assert all(self.x1 == u)

    def test_not_unique_x2(self):
        u = unique(self.x2)
        assert isinstance(u, Array)
        assert u.dtype == self.x2.dtype
        assert len(u) == 1
        assert u[0] == 1

    def test_not_unique_x3(self):
        u = unique(self.x3)
        assert isinstance(u, Array)
        assert u.dtype == self.x3.dtype
        assert len(u) == 2
        assert u[0] == 1
        assert u[1] == 2

    def test_already_unique_y1(self):
        u = unique(self.y1)
        assert isinstance(u, Array)
        assert u.dtype == self.y1.dtype
        assert len(u) == len(self.y1)
        assert self.y1.equals(u)
        assert all(self.y1 == u)

    def test_not_unique_y2(self):
        u = unique(self.y2)
        assert isinstance(u, Array)
        assert u.dtype == self.y2.dtype
        assert len(u) == 3
        assert u[0] == ''
        assert u[1] == ' '
        assert u[2] == 'a'

    def test_unique_with_none(self):
        u = unique(self.z1)
        assert isinstance(u, Array)
        assert u.dtype == self.z1.dtype
        assert len(u) == 4
        assert u[0] == 1
        assert u[1] == 2
        assert u[2] == 4
        assert u[3] is None
Пример #10
0
class TestBasicArrayIndexing:
    n = 5
    y = Array(range(n))

    def test_valid_object_creation(self):
        ''' Ensure object is created correctly '''
        assert len(self.y) == self.n
        assert self.y.dtype is int
        for i in range(self.n):
            assert self.y[i] == i

    def test_valid_int_indexing(self):
        for i in range(self.n):
            assert isinstance(self.y[i], int)
        for i in [-1, -2, -3, -4, -5]:
            assert isinstance(self.y[i], int)

    def test_invalid_int_indexing(self):
        with pytest.raises(IndexError):
            self.y[5]
        with pytest.raises(IndexError):
            self.y[500]
        with pytest.raises(IndexError):
            self.y[-6]
        with pytest.raises(IndexError):
            self.y[-7]

    def test_invalid_indexing_by_type(self):
        with pytest.raises(TypeError):
            # Array cannot be indexed using str
            self.y['a']
        with pytest.raises(TypeError):
            # Array cannot be indexed using str
            self.y['']
        with pytest.raises(TypeError):
            # Array cannot be indexed using str
            self.y[' ']
        with pytest.raises(TypeError):
            # Array cannot be indexed using str
            self.y['invalid']
        with pytest.raises(TypeError):
            # Array cannot be indexed using float
            self.y[34.45]
Пример #11
0
class TestEmptyArray:
    x = Array([])

    def test_valid_object_creation(self):
        ''' Ensure object is created correctly '''
        assert isinstance(self.x, Array)
        assert len(self.x) == 0
        assert self.x.dtype is type(None)

    def test_invalid_indexing_for_empty_column(self):
        ''' Ensure we see errors in indexing an empty Array object '''
        with pytest.raises(IndexError):
            self.x[0]
        with pytest.raises(IndexError):
            self.x[1]
        with pytest.raises(IndexError):
            self.x[100]
        with pytest.raises(IndexError):
            self.x[-1]
        with pytest.raises(IndexError):
            self.x[[0]]
        with pytest.raises(IndexError):
            self.x[[0, 1, 2]]

    def test_invalid_indexing_by_type(self):
        with pytest.raises(TypeError):
            # Array cannot be indexed using str
            self.x['a']
        with pytest.raises(TypeError):
            # Array cannot be indexed using float
            self.x[34.45]

    def test_valid_indexing_slice(self):
        assert isinstance(self.x[:], Array)
        assert isinstance(self.x[1:], Array)
        assert isinstance(self.x[:2], Array)
        assert isinstance(self.x[1:2], Array)
    def test_empty_pandas(self):
        df = DataFrame.from_pandas(pd.DataFrame())
        assert isinstance(df, DataFrame)
        assert df.shape == (0, 0)
        assert len(df.names) == 0
        assert df.names.equals(Array([]))
        assert len(df.dtypes) == 0
        assert df.dtypes.equals(Array([]))

        df = DataFrame.from_pandas(pd.DataFrame([]))
        assert isinstance(df, DataFrame)
        assert df.shape == (0, 0)
        assert len(df.names) == 0
        assert df.names.equals(Array([]))
        assert len(df.dtypes) == 0
        assert df.dtypes.equals(Array([]))

        df = DataFrame.from_pandas(pd.DataFrame({}))
        assert isinstance(df, DataFrame)
        assert df.shape == (0, 0)
        assert len(df.names) == 0
        assert df.names.equals(Array([]))
        assert len(df.dtypes) == 0
        assert df.dtypes.equals(Array([]))
Пример #13
0
class TestArrayMissing:
    x1 = Array([1, 2, 3, 4, 5, 6, 7])
    x2 = Array([1, 2, 3, None, 5, None, 7])

    y1 = Array(['a', 'b', 'c', 'None', 'd'])
    y2 = Array(['a', 'b', 'c', None, 'd'])

    def test_no_missing_values(self):
        assert isinstance(is_na(self.x1), Array)
        assert isinstance(is_none(self.x1), Array)
        assert isinstance(is_missing(self.x1), Array)

        assert len(is_na(self.x1)) == len(self.x1)
        assert len(is_none(self.x1)) == len(self.x1)
        assert len(is_missing(self.x1)) == len(self.x1)

        assert is_na(self.x1).dtype is bool
        assert is_none(self.x1).dtype is bool
        assert is_missing(self.x1).dtype is bool

        assert not any(is_na(self.x1))
        assert not any(is_none(self.x1))
        assert not any(is_missing(self.x1))

        assert not all(is_na(self.x1))
        assert not all(is_none(self.x1))
        assert not all(is_missing(self.x1))

        assert all(map(lambda x: x is False, is_na(self.x1)))
        assert all(map(lambda x: x is False, is_none(self.x1)))
        assert all(map(lambda x: x is False, is_missing(self.x1)))

        assert any(map(lambda x: x is None, is_na(self.x1))) is False
        assert any(map(lambda x: x is None, is_none(self.x1))) is False
        assert any(map(lambda x: x is None, is_missing(self.x1))) is False

        assert isinstance(is_na(self.y1), Array)
        assert isinstance(is_none(self.y1), Array)
        assert isinstance(is_missing(self.y1), Array)

        assert len(is_na(self.y1)) == len(self.y1)
        assert len(is_none(self.y1)) == len(self.y1)
        assert len(is_missing(self.y1)) == len(self.y1)

        assert is_na(self.y1).dtype is bool
        assert is_none(self.y1).dtype is bool
        assert is_missing(self.y1).dtype is bool

        assert not any(is_na(self.y1))
        assert not any(is_none(self.y1))
        assert not any(is_missing(self.y1))

        assert not all(is_na(self.y1))
        assert not all(is_none(self.y1))
        assert not all(is_missing(self.y1))

        assert all(map(lambda x: x is False, is_na(self.y1)))
        assert all(map(lambda x: x is False, is_none(self.y1)))
        assert all(map(lambda x: x is False, is_missing(self.y1)))

        assert any(map(lambda x: x is None, is_na(self.y1))) is False
        assert any(map(lambda x: x is None, is_none(self.y1))) is False
        assert any(map(lambda x: x is None, is_missing(self.y1))) is False

    def test_missing_values(self):
        assert isinstance(is_na(self.x2), Array)
        assert isinstance(is_none(self.x2), Array)
        assert isinstance(is_missing(self.x2), Array)

        assert len(is_na(self.x2)) == len(self.x2)
        assert len(is_none(self.x2)) == len(self.x2)
        assert len(is_missing(self.x2)) == len(self.x2)

        assert is_na(self.x2).dtype is bool
        assert is_none(self.x2).dtype is bool
        assert is_missing(self.x2).dtype is bool

        assert any(is_na(self.x2))
        assert any(is_none(self.x2))
        assert any(is_missing(self.x2))

        assert not all(is_na(self.x2))
        assert not all(is_none(self.x2))
        assert not all(is_missing(self.x2))

        assert is_na(self.x2)[3] is True
        assert is_na(self.x2)[5] is True
        assert is_none(self.x2)[3] is True
        assert is_none(self.x2)[5] is True
        assert is_missing(self.x2)[3] is True
        assert is_missing(self.x2)[5] is True

        assert any(map(lambda x: x is None, is_na(self.x2))) is False
        assert any(map(lambda x: x is None, is_none(self.x2))) is False
        assert any(map(lambda x: x is None, is_missing(self.x2))) is False

        assert isinstance(is_na(self.y2), Array)
        assert isinstance(is_none(self.y2), Array)
        assert isinstance(is_missing(self.y2), Array)

        assert len(is_na(self.y2)) == len(self.y2)
        assert len(is_none(self.y2)) == len(self.y2)
        assert len(is_missing(self.y2)) == len(self.y2)

        assert is_na(self.y2).dtype is bool
        assert is_none(self.y2).dtype is bool
        assert is_missing(self.y2).dtype is bool

        assert any(is_na(self.y2))
        assert any(is_none(self.y2))
        assert any(is_missing(self.y2))

        assert not all(is_na(self.y2))
        assert not all(is_none(self.y2))
        assert not all(is_missing(self.y2))

        assert is_na(self.y2)[3] is True
        assert is_none(self.y2)[3] is True
        assert is_missing(self.y2)[3] is True

        assert any(map(lambda x: x is None, is_na(self.y2))) is False
        assert any(map(lambda x: x is None, is_none(self.y2))) is False
        assert any(map(lambda x: x is None, is_missing(self.y2))) is False
Пример #14
0
class TestArrayIsIn:
    x = Array([])
    y = Array([1, 2, 3])

    def test_empty1(self):
        output = self.x.isin([1])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([1, 2, 3, None])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([None])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([''])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([' '])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([' ', 'a', None])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

        output = self.x.isin([1.0, -1.0])
        assert isinstance(output, Array)
        assert len(output) == len(self.x)
        assert (output.dtype is bool) or (output.dtype is type(None))

    def test_errors(self):
        with pytest.raises(ValueError):
            self.x.isin(1)
        # Look here (June 14, 2017): string is iterable.
        with pytest.raises(ValueError):
            self.x.isin('a')
        with pytest.raises(ValueError):
            self.x.isin('')
        with pytest.raises(ValueError):
            self.x.isin('wefjkewfk')
        with pytest.raises(ValueError):
            self.x.isin(' ')
        with pytest.raises(ValueError):
            self.x.isin(None)
        with pytest.raises(ValueError):
            self.x.isin(1.0)
        with pytest.raises(ValueError):
            self.x.isin(-1.0)

        with pytest.raises(ValueError):
            self.y.isin(1)
        with pytest.raises(ValueError):
            self.y.isin('a')
        with pytest.raises(ValueError):
            self.y.isin('')
        with pytest.raises(ValueError):
            self.y.isin('wefjkewfk')
        with pytest.raises(ValueError):
            self.y.isin(' ')
        with pytest.raises(ValueError):
            self.y.isin(None)
        with pytest.raises(ValueError):
            self.y.isin(1.0)
        with pytest.raises(ValueError):
            self.y.isin(-1.0)

    def test_integer_array(self):
        output = self.y.isin([1])
        assert isinstance(output, Array)
        assert len(output) == len(self.y)
        assert (output.dtype is bool) or (output.dtype is type(None))
        assert output[0] is True
        assert output[1] is False
        assert output[2] is False

        output = self.y.isin([0])
        assert isinstance(output, Array)
        assert len(output) == len(self.y)
        assert (output.dtype is bool) or (output.dtype is type(None))
        assert output[0] is False
        assert output[1] is False
        assert output[2] is False

        output = self.y.isin(['a'])
        assert isinstance(output, Array)
        assert len(output) == len(self.y)
        assert (output.dtype is bool) or (output.dtype is type(None))
        assert output[0] is False
        assert output[1] is False
        assert output[2] is False

        output = self.y.isin([1.0])
        assert isinstance(output, Array)
        assert len(output) == len(self.y)
        assert (output.dtype is bool) or (output.dtype is type(None))
        assert output[0] is False
        assert output[1] is False
        assert output[2] is False

        output = self.y.isin([[1]])
        assert isinstance(output, Array)
        assert len(output) == len(self.y)
        assert (output.dtype is bool) or (output.dtype is type(None))
        assert output[0] is False
        assert output[1] is False
        assert output[2] is False