def test_valid_str_indexing(self): assert isinstance(self.x['a'], Array) assert not isinstance(self.x['a'], DataFrame) assert self.x['a'].equals(Array([1, 2, 3])) assert isinstance(self.x['b'], Array) assert not isinstance(self.x['b'], DataFrame) assert self.x['b'].equals(Array(['a', 'b', 'c']))
def test_empty_from_shape(self): df = DataFrame.from_shape((0, 0)) assert isinstance(df, DataFrame) assert df.shape == (0, 0) assert len(df.names) == 0 assert df.names.equals(Array([])) assert len(df.dtypes) == 0 assert df.dtypes.equals(Array([]))
class TestArray: x = Array([1, 2, 3]) y = Array([1.0, 2.0, 3.0]) z = Array([1, 2, 3, 4]) w = Array([1, 2]) def test_int_dtype(self): assert identical(self.x, self.x) is True assert identical(self.x, self.y) is False assert identical(self.x, self.z) is False assert identical(self.x, self.w) is False def test_float_dtype(self): assert identical(self.y, self.y) is True
def test_valid_int_indexing(self): ''' Ensure that int indexing works correctly ''' assert isinstance(self.x[0], Array) assert not isinstance(self.x[0], DataFrame) assert self.x[0].equals(Array([1, 2, 3])) assert isinstance(self.x[1], Array) assert not isinstance(self.x[1], DataFrame) assert self.x[1].equals(Array(['a', 'b', 'c'])) assert isinstance(self.x[-1], Array) assert not isinstance(self.x[-1], DataFrame) assert self.x[-1].equals(Array(['a', 'b', 'c'])) assert isinstance(self.x[-2], Array) assert not isinstance(self.x[-2], DataFrame) assert self.x[-2].equals(Array([1, 2, 3]))
def test_from_columns_basic(self): xdf = DataFrame.from_columns(self.x) assert isinstance(xdf, DataFrame) assert xdf.shape == (2, 3) assert xdf.names.equals(Array(['C0', 'C1', 'C2'])) assert xdf.dtypes.equals(Array([int, str, float])) for i in range(xdf.nrow): for j in range(xdf.ncol): assert xdf[i, j] == self.x[j][i] xdf = DataFrame.from_columns(self.x, self.names) assert isinstance(xdf, DataFrame) assert xdf.shape == (2, 3) assert xdf.names.equals(Array(self.names)) assert xdf.dtypes.equals(Array([int, str, float])) for i in range(xdf.nrow): for j in range(xdf.ncol): assert xdf[i, j] == self.x[j][i]
def test_from_numpy_basic(self): xdf = DataFrame.from_numpy(self.x) assert isinstance(xdf, DataFrame) assert xdf.shape == self.x.shape assert xdf.names.equals(Array(['C0', 'C1', 'C2', 'C3'])) assert all(xdf.dtypes == int) for i in range(xdf.nrow): for j in range(xdf.ncol): assert xdf[i, j] == self.x[i, j] xdf = DataFrame.from_numpy(self.x, self.names) assert isinstance(xdf, DataFrame) assert xdf.shape == self.x.shape assert xdf.names.equals(Array(self.names)) assert all(xdf.dtypes == int) for i in range(xdf.nrow): for j in range(xdf.ncol): assert xdf[i, j] == self.x[i, j]
def test_from_shape_basic(self): shape = (2, 3) df = DataFrame.from_shape(shape) assert isinstance(df, DataFrame) assert df.shape == shape assert df.names.equals(Array(['C0', 'C1', 'C2'])) assert all(df.dtypes == type(None)) for i in range(df.nrow): for j in range(df.ncol): assert df[i, j] is None
def test_from_numpy_with_nan(self): ydf = DataFrame.from_numpy(self.y) assert isinstance(ydf, DataFrame) assert ydf.shape == self.y.shape assert ydf.names.equals(Array(['C0', 'C1'])) assert all(ydf.dtypes == float) for i in range(ydf.nrow): for j in range(ydf.ncol): if np.isnan(self.y[i, j]): assert ydf[i, j] is None else: assert ydf[i, j] == self.y[i, j]
class TestUnique: x1 = Array([1, 2, 3, 4, 5, 6, 7, 8]) x2 = Array([1, 1, 1, 1, 1, 1, 1, 1]) x3 = Array([1, 1, 1, 1, 2, 2, 2, 2]) y1 = Array(['', ' ', 'a']) y2 = Array(['', ' ', 'a', 'a', 'a', '', '', ' ']) z1 = Array([1, 2, None, 4, None]) def test_already_unique_x1(self): u = unique(self.x1) assert isinstance(u, Array) assert u.dtype == self.x1.dtype assert len(u) == len(self.x1) assert self.x1.equals(u) assert all(self.x1 == u) def test_not_unique_x2(self): u = unique(self.x2) assert isinstance(u, Array) assert u.dtype == self.x2.dtype assert len(u) == 1 assert u[0] == 1 def test_not_unique_x3(self): u = unique(self.x3) assert isinstance(u, Array) assert u.dtype == self.x3.dtype assert len(u) == 2 assert u[0] == 1 assert u[1] == 2 def test_already_unique_y1(self): u = unique(self.y1) assert isinstance(u, Array) assert u.dtype == self.y1.dtype assert len(u) == len(self.y1) assert self.y1.equals(u) assert all(self.y1 == u) def test_not_unique_y2(self): u = unique(self.y2) assert isinstance(u, Array) assert u.dtype == self.y2.dtype assert len(u) == 3 assert u[0] == '' assert u[1] == ' ' assert u[2] == 'a' def test_unique_with_none(self): u = unique(self.z1) assert isinstance(u, Array) assert u.dtype == self.z1.dtype assert len(u) == 4 assert u[0] == 1 assert u[1] == 2 assert u[2] == 4 assert u[3] is None
class TestBasicArrayIndexing: n = 5 y = Array(range(n)) def test_valid_object_creation(self): ''' Ensure object is created correctly ''' assert len(self.y) == self.n assert self.y.dtype is int for i in range(self.n): assert self.y[i] == i def test_valid_int_indexing(self): for i in range(self.n): assert isinstance(self.y[i], int) for i in [-1, -2, -3, -4, -5]: assert isinstance(self.y[i], int) def test_invalid_int_indexing(self): with pytest.raises(IndexError): self.y[5] with pytest.raises(IndexError): self.y[500] with pytest.raises(IndexError): self.y[-6] with pytest.raises(IndexError): self.y[-7] def test_invalid_indexing_by_type(self): with pytest.raises(TypeError): # Array cannot be indexed using str self.y['a'] with pytest.raises(TypeError): # Array cannot be indexed using str self.y[''] with pytest.raises(TypeError): # Array cannot be indexed using str self.y[' '] with pytest.raises(TypeError): # Array cannot be indexed using str self.y['invalid'] with pytest.raises(TypeError): # Array cannot be indexed using float self.y[34.45]
class TestEmptyArray: x = Array([]) def test_valid_object_creation(self): ''' Ensure object is created correctly ''' assert isinstance(self.x, Array) assert len(self.x) == 0 assert self.x.dtype is type(None) def test_invalid_indexing_for_empty_column(self): ''' Ensure we see errors in indexing an empty Array object ''' with pytest.raises(IndexError): self.x[0] with pytest.raises(IndexError): self.x[1] with pytest.raises(IndexError): self.x[100] with pytest.raises(IndexError): self.x[-1] with pytest.raises(IndexError): self.x[[0]] with pytest.raises(IndexError): self.x[[0, 1, 2]] def test_invalid_indexing_by_type(self): with pytest.raises(TypeError): # Array cannot be indexed using str self.x['a'] with pytest.raises(TypeError): # Array cannot be indexed using float self.x[34.45] def test_valid_indexing_slice(self): assert isinstance(self.x[:], Array) assert isinstance(self.x[1:], Array) assert isinstance(self.x[:2], Array) assert isinstance(self.x[1:2], Array)
def test_empty_pandas(self): df = DataFrame.from_pandas(pd.DataFrame()) assert isinstance(df, DataFrame) assert df.shape == (0, 0) assert len(df.names) == 0 assert df.names.equals(Array([])) assert len(df.dtypes) == 0 assert df.dtypes.equals(Array([])) df = DataFrame.from_pandas(pd.DataFrame([])) assert isinstance(df, DataFrame) assert df.shape == (0, 0) assert len(df.names) == 0 assert df.names.equals(Array([])) assert len(df.dtypes) == 0 assert df.dtypes.equals(Array([])) df = DataFrame.from_pandas(pd.DataFrame({})) assert isinstance(df, DataFrame) assert df.shape == (0, 0) assert len(df.names) == 0 assert df.names.equals(Array([])) assert len(df.dtypes) == 0 assert df.dtypes.equals(Array([]))
class TestArrayMissing: x1 = Array([1, 2, 3, 4, 5, 6, 7]) x2 = Array([1, 2, 3, None, 5, None, 7]) y1 = Array(['a', 'b', 'c', 'None', 'd']) y2 = Array(['a', 'b', 'c', None, 'd']) def test_no_missing_values(self): assert isinstance(is_na(self.x1), Array) assert isinstance(is_none(self.x1), Array) assert isinstance(is_missing(self.x1), Array) assert len(is_na(self.x1)) == len(self.x1) assert len(is_none(self.x1)) == len(self.x1) assert len(is_missing(self.x1)) == len(self.x1) assert is_na(self.x1).dtype is bool assert is_none(self.x1).dtype is bool assert is_missing(self.x1).dtype is bool assert not any(is_na(self.x1)) assert not any(is_none(self.x1)) assert not any(is_missing(self.x1)) assert not all(is_na(self.x1)) assert not all(is_none(self.x1)) assert not all(is_missing(self.x1)) assert all(map(lambda x: x is False, is_na(self.x1))) assert all(map(lambda x: x is False, is_none(self.x1))) assert all(map(lambda x: x is False, is_missing(self.x1))) assert any(map(lambda x: x is None, is_na(self.x1))) is False assert any(map(lambda x: x is None, is_none(self.x1))) is False assert any(map(lambda x: x is None, is_missing(self.x1))) is False assert isinstance(is_na(self.y1), Array) assert isinstance(is_none(self.y1), Array) assert isinstance(is_missing(self.y1), Array) assert len(is_na(self.y1)) == len(self.y1) assert len(is_none(self.y1)) == len(self.y1) assert len(is_missing(self.y1)) == len(self.y1) assert is_na(self.y1).dtype is bool assert is_none(self.y1).dtype is bool assert is_missing(self.y1).dtype is bool assert not any(is_na(self.y1)) assert not any(is_none(self.y1)) assert not any(is_missing(self.y1)) assert not all(is_na(self.y1)) assert not all(is_none(self.y1)) assert not all(is_missing(self.y1)) assert all(map(lambda x: x is False, is_na(self.y1))) assert all(map(lambda x: x is False, is_none(self.y1))) assert all(map(lambda x: x is False, is_missing(self.y1))) assert any(map(lambda x: x is None, is_na(self.y1))) is False assert any(map(lambda x: x is None, is_none(self.y1))) is False assert any(map(lambda x: x is None, is_missing(self.y1))) is False def test_missing_values(self): assert isinstance(is_na(self.x2), Array) assert isinstance(is_none(self.x2), Array) assert isinstance(is_missing(self.x2), Array) assert len(is_na(self.x2)) == len(self.x2) assert len(is_none(self.x2)) == len(self.x2) assert len(is_missing(self.x2)) == len(self.x2) assert is_na(self.x2).dtype is bool assert is_none(self.x2).dtype is bool assert is_missing(self.x2).dtype is bool assert any(is_na(self.x2)) assert any(is_none(self.x2)) assert any(is_missing(self.x2)) assert not all(is_na(self.x2)) assert not all(is_none(self.x2)) assert not all(is_missing(self.x2)) assert is_na(self.x2)[3] is True assert is_na(self.x2)[5] is True assert is_none(self.x2)[3] is True assert is_none(self.x2)[5] is True assert is_missing(self.x2)[3] is True assert is_missing(self.x2)[5] is True assert any(map(lambda x: x is None, is_na(self.x2))) is False assert any(map(lambda x: x is None, is_none(self.x2))) is False assert any(map(lambda x: x is None, is_missing(self.x2))) is False assert isinstance(is_na(self.y2), Array) assert isinstance(is_none(self.y2), Array) assert isinstance(is_missing(self.y2), Array) assert len(is_na(self.y2)) == len(self.y2) assert len(is_none(self.y2)) == len(self.y2) assert len(is_missing(self.y2)) == len(self.y2) assert is_na(self.y2).dtype is bool assert is_none(self.y2).dtype is bool assert is_missing(self.y2).dtype is bool assert any(is_na(self.y2)) assert any(is_none(self.y2)) assert any(is_missing(self.y2)) assert not all(is_na(self.y2)) assert not all(is_none(self.y2)) assert not all(is_missing(self.y2)) assert is_na(self.y2)[3] is True assert is_none(self.y2)[3] is True assert is_missing(self.y2)[3] is True assert any(map(lambda x: x is None, is_na(self.y2))) is False assert any(map(lambda x: x is None, is_none(self.y2))) is False assert any(map(lambda x: x is None, is_missing(self.y2))) is False
class TestArrayIsIn: x = Array([]) y = Array([1, 2, 3]) def test_empty1(self): output = self.x.isin([1]) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin([1, 2, 3, None]) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin([None]) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin(['']) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin([' ']) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin([' ', 'a', None]) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) output = self.x.isin([1.0, -1.0]) assert isinstance(output, Array) assert len(output) == len(self.x) assert (output.dtype is bool) or (output.dtype is type(None)) def test_errors(self): with pytest.raises(ValueError): self.x.isin(1) # Look here (June 14, 2017): string is iterable. with pytest.raises(ValueError): self.x.isin('a') with pytest.raises(ValueError): self.x.isin('') with pytest.raises(ValueError): self.x.isin('wefjkewfk') with pytest.raises(ValueError): self.x.isin(' ') with pytest.raises(ValueError): self.x.isin(None) with pytest.raises(ValueError): self.x.isin(1.0) with pytest.raises(ValueError): self.x.isin(-1.0) with pytest.raises(ValueError): self.y.isin(1) with pytest.raises(ValueError): self.y.isin('a') with pytest.raises(ValueError): self.y.isin('') with pytest.raises(ValueError): self.y.isin('wefjkewfk') with pytest.raises(ValueError): self.y.isin(' ') with pytest.raises(ValueError): self.y.isin(None) with pytest.raises(ValueError): self.y.isin(1.0) with pytest.raises(ValueError): self.y.isin(-1.0) def test_integer_array(self): output = self.y.isin([1]) assert isinstance(output, Array) assert len(output) == len(self.y) assert (output.dtype is bool) or (output.dtype is type(None)) assert output[0] is True assert output[1] is False assert output[2] is False output = self.y.isin([0]) assert isinstance(output, Array) assert len(output) == len(self.y) assert (output.dtype is bool) or (output.dtype is type(None)) assert output[0] is False assert output[1] is False assert output[2] is False output = self.y.isin(['a']) assert isinstance(output, Array) assert len(output) == len(self.y) assert (output.dtype is bool) or (output.dtype is type(None)) assert output[0] is False assert output[1] is False assert output[2] is False output = self.y.isin([1.0]) assert isinstance(output, Array) assert len(output) == len(self.y) assert (output.dtype is bool) or (output.dtype is type(None)) assert output[0] is False assert output[1] is False assert output[2] is False output = self.y.isin([[1]]) assert isinstance(output, Array) assert len(output) == len(self.y) assert (output.dtype is bool) or (output.dtype is type(None)) assert output[0] is False assert output[1] is False assert output[2] is False