def test_intcolumn(): _test_numericcolumn(IntColumn) _test_copying(IntColumn) # Test automatic conversion to int dm = DataMatrix(length=2) dm.col = IntColumn dm.col = 1.9, '2.9' check_col(dm.col, [1, 2]) # Test setting invalid values @raises(TypeError) def _(): dm.col[0] = 'x' _() @raises(TypeError) def _(): dm.col = 'x' _() @raises(TypeError) def _(): dm.col[:-1] = 'x' _() # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def check_mixedcolumn_sorting(): dm = DataMatrix(length=24) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, '', 'None', 'alpha', 'beta', None, None, None, None, NAN, NAN, NAN, NAN, ])
def test_weight(): dm = DataMatrix(length=3) dm.a = 'a', 'b', 'c' dm.b = 1, 0, 2 dm = ops.weight(dm.b) check_col(dm.a, ['a', 'c', 'c']) check_col(dm.b, [1, 2, 2])
def check_str_operations(): dm = DataMatrix(length=2, default_col_type=MixedColumn) dm.col = 'a', 'b' check_col(dm.col, ['a', 'b']) dm.col += 'c', 'd' check_col(dm.col, ['ac', 'bd']) check_integrity(dm)
def test_weight(): dm = DataMatrix(length=3) dm.a = 'a', 'b', 'c' dm.b = 1, 0, 2 dm = ops.weight(dm.b) check_col(dm.a, ['a', 'c', 'c']) check_col(dm.b, [1, 2, 2])
def check_int_operations(): dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1.5, 2.5 check_col(dm.col, [1, 2]) dm.col *= 1.5 check_col(dm.col, [1, 3]) check_integrity(dm)
def check_str_operations(): dm = DataMatrix(length=2, default_col_type=MixedColumn) dm.col = 'a', 'b' check_col(dm.col, ['a', 'b']) dm.col += 'c', 'd' check_col(dm.col, ['ac', 'bd']) check_integrity(dm)
def test_fullfactorial(): dm = DataMatrix(length=3) dm.a = 'a', 'b', '' dm.b = 0, 1, 2 dm = ops.fullfactorial(dm) check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b']) check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_fullfactorial(): dm = DataMatrix(length=3) dm.a = 'a', 'b', '' dm.b = 0, 1, 2 dm = ops.fullfactorial(dm) check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b']) check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_reduce_(): dm = DataMatrix(length=2) dm.series = SeriesColumn(depth=3) dm.series[0] = 1, 2, 3 dm.series[1] = 2, 3, 4 dm.col = series.reduce_(dm.series) check_col(dm.col, [2, 3]) check_integrity(dm)
def test_mixedcolumn(): _test_numericcolumn(MixedColumn) _test_copying(MixedColumn) dm = DataMatrix(length=4) dm.col = '1.1', '1', 'x', None check_col(dm.col, [1.1, 1, 'x', None]) dm.col[dm.col == {1, None}] = 'a', 'b' check_col(dm.col, [1.1, 'a', 'x', 'b'])
def check_intcolumn_sorting(): dm = DataMatrix(length=8, default_col_type=IntColumn) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.8', ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ 1, 1, 1, 1, 2, 2, 2, 2 ])
def check_int_operations(): dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1.5, 2.5 check_col(dm.col, [1, 2]) dm.col *= 2.5 check_col(dm.col, [2, 4]) def _(): with pytest.raises(TypeError): dm.col *= 'x' _() check_integrity(dm)
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) check_col(dm_.col, [1,2]) dm_ = (dm.col == 1) & (dm.col == 2) check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) check_col(dm_.col, [1,2]) check_integrity(dm)
def test_tuple_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.tuple_split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def test_sort(): dm = DataMatrix(length=2) dm.a = 'b', 'a' dm.b = 1, 0 dm.a = ops.sort(dm.a) check_col(dm.a, ['a', 'b']) check_col(dm.b, [1, 0]) dm = ops.sort(dm, by=dm.b) check_col(dm.a, ['b', 'a']) check_col(dm.b, [0, 1])
def test_sort(): dm = DataMatrix(length=2) dm.a = 'b', 'a' dm.b = 1, 0 dm.a = ops.sort(dm.a) check_col(dm.a, ['a', 'b']) check_col(dm.b, [1, 0]) dm = ops.sort(dm, by=dm.b) check_col(dm.a, ['b', 'a']) check_col(dm.b, [0, 1])
def test_replace(): dm = DataMatrix(length=3) dm.a = 0, 1, 2 dm.c = FloatColumn dm.c = np.nan, 1, 2 dm.s = SeriesColumn(depth=3) dm.s[0] = 0, 1, 2 dm.s[1] = np.nan, 1, 2 dm.s[2] = np.nan, 1, 2 dm.a = ops.replace(dm.a, {0: 100, 2: 200}) dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan}) dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan}) check_col(dm.a, [100, 1, 200]) check_col(dm.c, [100, 1, np.nan]) check_series(dm.s, [ [0, 1, np.nan], [100, 1, np.nan], [100, 1, np.nan], ])
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 g = ops.split(dm.a) val, dm = g.next() eq_(val, 'a') check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = g.next() eq_(val, 'b') check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3])
def test_intcolumn(): check_getrow(IntColumn) check_select(IntColumn) check_concat(IntColumn, invalid=0) # Check selections with non-int types dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1, 2 dm2 = dm.col == '1.1' # Floored to 1 check_col(dm2.col, [1]) dm2 = dm.col == '' check_col(dm2.col, []) dm2 = dm.col != '' check_col(dm2.col, [1, 2]) @raises(TypeError) def _(): dm.col > '' _() # Check type selectors dm = DataMatrix(length=2, default_col_type=IntColumn) dm.col = 1, 2 eq_(len(dm.col == int), 2) eq_(len(dm.col != int), 0) eq_(len(dm.col == float), 0) eq_(len(dm.col != float), 2) eq_(len(dm.col == str), 0) eq_(len(dm.col != str), 2)
def test_intcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = IntColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) @raises(TypeError) def _(): dm.col[0] = "test" _() @raises(TypeError) def _(): dm.col[:] = "test" _() # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.int64) check_integrity(dm)
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-'])
def check_floatcolumn_sorting(): dm = DataMatrix(length=24, default_col_type=FloatColumn) with pytest.warns(UserWarning): dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, ])
def test_shuffle_horiz(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 dm.c = '-', '-' while True: dm2 = ops.shuffle_horiz(dm) try: check_row(dm2[0], [0, '-', 'a']) break except: pass while True: dm2 = ops.shuffle_horiz(dm.a, dm.b) try: check_row(dm2[0], [0, 'a', '-']) break except: pass for i in range(1000): dm2 = ops.shuffle_horiz(dm.a, dm.b) check_col(dm.c, ['-', '-']) ops.shuffle_horiz(dm.a)
def check_float_operations(): dm = DataMatrix(length=2, default_col_type=FloatColumn) dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col *= 2.5 check_col(dm.col, [2.5, 5]) dm.col *= np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) check_integrity(dm)
def check_concat(col_type, invalid): dm1 = DataMatrix(length=2, default_col_type=col_type) dm1.col1 = 1, 2 dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2, default_col_type=col_type) dm2.col2 = 5, 6 dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_col(dm3.col1, [1, 2, invalid, invalid]) check_col(dm3.col_shared, [3, 4, 7, 8]) check_col(dm3.col2, [invalid, invalid, 5, 6])
def check_concat(col_type, invalid): dm1 = DataMatrix(length=2, default_col_type=col_type) dm1.col1 = 1, 2 dm1.col_shared = 3, 4 dm2 = DataMatrix(length=2, default_col_type=col_type) dm2.col2 = 5, 6 dm2.col_shared = 7, 8 dm3 = dm1 << dm2 check_col(dm3.col1, [1,2,invalid,invalid]) check_col(dm3.col_shared, [3,4,7,8]) check_col(dm3.col2, [invalid,invalid,5,6])
def test_mixedcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" check_col(dm.col, ["test", "test"]) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) check_integrity(dm)
def test_shuffle(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm.a = ops.shuffle(dm.a) check_col(dm.b, [0, 1]) try: check_col(dm.a, ['b', 'a']) break except: pass dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm = ops.shuffle(dm) try: check_col(dm.a, ['b', 'a']) check_col(dm.b, [1, 0]) break except: pass
def test_shuffle(): dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm.a = ops.shuffle(dm.a) check_col(dm.b, [0, 1]) try: check_col(dm.a, ['b', 'a']) break except: pass dm = DataMatrix(length=2) dm.a = 'a', 'b' dm.b = 0, 1 while True: dm = ops.shuffle(dm) try: check_col(dm.a, ['b', 'a']) check_col(dm.b, [1, 0]) break except: pass
def test_floatcolumn(): dm = DataMatrix(length=2) # Test assignment dm.col = FloatColumn dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2, 3 check_col(dm.col, [2, 3]) dm.col[:-1] = 4 check_col(dm.col, [4, 3]) dm.col[:] = "test" for value in dm.col: ok_(np.isnan(value)) # Test shortening and lengthening dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test setting by DataMatrix dm = DataMatrix(length=10) dm.x = range(10) dm.y = FloatColumn dm = dm.x != {3, 6} dm.y[dm.x > 3] = 10 dm.y[dm.x >= 8] = 11 check_col(dm.y, [np.nan] * 3 + [10] * 3 + [11] * 2) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 assert sorted(dm.col.unique) == [1,2] dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] assert isinstance(val, (int, float)) assert val == 3 # (int, int) -> FloatColumn val = dm.col[1, 3] assert isinstance(val, cls) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] assert isinstance(val, cls) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array assert isinstance(a, np.ndarray) assert a.shape == (5,) assert all(a == [1, 2, 3, 4, 5])
def test_floatcolumn(): check_getrow(FloatColumn) check_select(FloatColumn) check_concat(FloatColumn, invalid=np.nan) # Check selections with non-int types dm = DataMatrix(length=4, default_col_type=FloatColumn) dm.col = 1, 2, np.nan, np.inf dm2 = dm.col == '1' check_col(dm2.col, [1]) with pytest.warns(UserWarning): dm2 = dm.col == '' check_col(dm2.col, [np.nan]) with pytest.warns(UserWarning): dm2 = dm.col != '' check_col(dm2.col, [1, 2, np.inf]) dm2 = dm.col == np.nan check_col(dm2.col, [np.nan]) dm2 = dm.col != np.nan check_col(dm2.col, [1, 2, np.inf]) dm2 = dm.col == np.inf check_col(dm2.col, [np.inf]) dm2 = dm.col != np.inf check_col(dm2.col, [1, 2, np.nan]) with pytest.warns(UserWarning): with pytest.raises(TypeError): dm.col > '' # Check type selectors dm = DataMatrix(length=2, default_col_type=FloatColumn) dm.col = 1, 2 assert len(dm.col == float) == 2 assert len(dm.col != float) == 0 assert len(dm.col == str) == 0 assert len(dm.col != str) == 2 assert len(dm.col == int) == 0 assert len(dm.col != int) == 2
def test_bin_split(): dm = DataMatrix(length=4) dm.a = range(4) dm = ops.shuffle(dm) dm1, dm2 = ops.bin_split(dm.a, 2) check_col(dm1.a, [0, 1]) check_col(dm2.a, [2, 3]) dm1, dm2, dm3 = ops.bin_split(dm.a, 3) check_col(dm1.a, [0]) check_col(dm2.a, [1]) check_col(dm3.a, [2, 3]) dm1, = ops.bin_split(dm.a, 1) check_col(dm1.a, [0, 1, 2, 3]) def _(): with pytest.raises(ValueError): x, = ops.bin_split(dm.a, 5) _()
def check_nan_sort(): dm = DataMatrix(length=3, default_col_type=FloatColumn) dm.col1 = 2, np.nan, 1 dm.col2 = 1, 2, np.nan dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [np.nan, 1, 2]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [2, np.nan, 1]) check_col(dm.col2, [1, 2, np.nan]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [1, 2, np.nan]) check_integrity(dm)
def check_select(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm_ = dm.col < 2 check_col(dm_.col, [1]) dm_ = dm.col == 2 check_col(dm_.col, [2]) dm_ = (dm.col == 1) | (dm.col == 2) # or check_col(dm_.col, [1, 2]) dm_ = (dm.col == 1) & (dm.col == 2) # and check_col(dm_.col, []) dm_ = (dm.col == 1) ^ (dm.col == 2) # xor check_col(dm_.col, [1, 2]) # Pair-wise select by matching-length sequence dm_ = dm.col == (1, 3) check_col(dm_.col, [1]) # Check by set multimatching dm_ = dm.col == {2, 3, 4} check_col(dm_.col, [2]) dm_ = dm.col != {1, 3, 4} check_col(dm_.col, [2]) # Check by lambda comparison dm_ = dm.col == (lambda x: x == 2) check_col(dm_.col, [2]) dm_ = dm.col != (lambda x: x == 2) check_col(dm_.col, [1]) check_integrity(dm)
def _test_numericcolumn(cls): # Test init and change by single value dm = DataMatrix(length=2) dm.col = cls dm.col = 1 check_col(dm.col, [1, 1]) dm.col = 2 check_col(dm.col, [2, 2]) # Test init and change by sequence dm = DataMatrix(length=2) dm.col = cls dm.col = 1, 2 check_col(dm.col, [1, 2]) dm.col = 3, 4 check_col(dm.col, [3, 4]) # Test setting by slice dm = DataMatrix(length=3) dm.col = cls dm.col = 1 dm.col[1:] = 2 check_col(dm.col, [1, 2, 2]) dm.col[:-1] = 4, 3 check_col(dm.col, [4, 3, 2]) # Test shortening and lengthening dm = DataMatrix(length=4) dm.length = 0 dm.length = 4 # Check uniqueness dm.col = 1, 2, 1, 2 ok_(sorted(dm.col.unique) == [1, 2]) dm.col[dm.col == 2] = 0, 0 check_col(dm.col, [1, 0, 1, 0]) check_integrity(dm) # Check if numericcolumns return right type dm = DataMatrix(length=5) dm.col = cls dm.col = 1, 2, 3, 4, 5 # int -> float val = dm.col[2] ok_(isinstance(val, (int, float))) eq_(val, 3) # (int, int) -> FloatColumn val = dm.col[1, 3] ok_(isinstance(val, cls)) check_col(val, [2, 4]) # slice -> FloatColumn val = dm.col[1:-1] ok_(isinstance(val, cls)) check_col(val, [2, 3, 4]) # Check array setting and getting if cls != MixedColumn: a = dm.col.array ok_(isinstance(a, np.ndarray)) eq_(a.shape, (5, )) ok_(all(a == [1, 2, 3, 4, 5]))
def test_floatcolumn(): check_getrow(FloatColumn) check_select(FloatColumn) check_concat(FloatColumn, invalid=np.nan) # Check selections with non-int types dm = DataMatrix(length=4, default_col_type=FloatColumn) dm.col = 1, 2, np.nan, np.inf dm2 = dm.col == '1' check_col(dm2.col, [1]) dm2 = dm.col == '' check_col(dm2.col, [np.nan]) dm2 = dm.col != '' check_col(dm2.col, [1, 2, np.inf]) dm2 = dm.col == np.nan check_col(dm2.col, [np.nan]) dm2 = dm.col != np.nan check_col(dm2.col, [1, 2, np.inf]) dm2 = dm.col == np.inf check_col(dm2.col, [np.inf]) dm2 = dm.col != np.inf check_col(dm2.col, [1, 2, np.nan]) @raises(TypeError) def _(): dm.col > '' _() # Check type selectors dm = DataMatrix(length=2, default_col_type=FloatColumn) dm.col = 1, 2 eq_(len(dm.col == float), 2) eq_(len(dm.col != float), 0) eq_(len(dm.col == str), 0) eq_(len(dm.col != str), 2) eq_(len(dm.col == int), 0) eq_(len(dm.col != int), 2)
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3])
def check_nan_sort(): dm = DataMatrix(length=3, default_col_type=FloatColumn) dm.col1 = 2,np.nan,1 dm.col2 = 1,2,np.nan dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [np.nan, 1, 2]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [2, np.nan, 1]) check_col(dm.col2, [1, 2, np.nan]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [1, 2, np.nan]) check_integrity(dm)
def check_operations(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, '2' dm.col += 1 check_col(dm.col, [2, 3]) dm.col += 1, '2' check_col(dm.col, [3, 5]) dm.col -= 1 check_col(dm.col, [2, 4]) dm.col -= 1, '2' check_col(dm.col, [1, 2]) dm.col *= 2 check_col(dm.col, [2, 4]) dm.col *= 1, '2' check_col(dm.col, [2, 8]) dm.col /= 2 check_col(dm.col, [1, 4]) dm.col /= 1, '2' check_col(dm.col, [1, 2]) # Right-hand operations dm.col = 1 + dm.col check_col(dm.col, [2, 3]) dm.col = (1, 2) + dm.col check_col(dm.col, [3, 5]) dm.col = 5 - dm.col check_col(dm.col, [2, 0]) dm.col = (3, 1) - dm.col check_col(dm.col, [1, 1]) dm.col = 2 * dm.col check_col(dm.col, [2, 2]) dm.col = (1, 2) * dm.col check_col(dm.col, [2, 4]) dm.col = 4 / dm.col check_col(dm.col, [2, 1]) dm.col = (4, 2) / dm.col check_col(dm.col, [2, 2]) dm.col = 2 ** dm.col check_col(dm.col, [4, 4]) dm.col = (2, 4) ** dm.col check_col(dm.col, [16, 256]) dm.col = 17 % dm.col check_col(dm.col, [1, 17]) dm.col = (2, 16) % dm.col check_col(dm.col, [0, 16]) check_integrity(dm)
def test_bin_split(): dm = DataMatrix(length=4) dm.a = range(4) dm = ops.shuffle(dm) dm1, dm2 = ops.bin_split(dm.a, 2) check_col(dm1.a, [0,1]) check_col(dm2.a, [2,3]) dm1, dm2, dm3 = ops.bin_split(dm.a, 3) check_col(dm1.a, [0]) check_col(dm2.a, [1]) check_col(dm3.a, [2,3]) dm1, = ops.bin_split(dm.a, 1) check_col(dm1.a, [0,1,2,3]) @raises(ValueError) def _(): x, = ops.bin_split(dm.a, 5) _()
def check_sort(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 3,2,1 dm.col2 = 1,2,3 dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [3, 2, 1]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [3, 2, 1]) check_col(dm.col2, [1, 2, 3]) dm.col2 = operations.sort(dm.col2, by=dm.col1) check_col(dm.col2, [3, 2, 1]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [1, 2, 3]) check_integrity(dm)
def check_operations(col_type): dm = DataMatrix(length=2, default_col_type=col_type) dm.col = 1, 2 dm.col += 1 check_col(dm.col, [2, 3]) dm.col += 1, 2 check_col(dm.col, [3, 5]) dm.col -= 1 check_col(dm.col, [2, 4]) dm.col -= 1, 2 check_col(dm.col, [1, 2]) dm.col *= 2 check_col(dm.col, [2, 4]) dm.col *= 1.5, 3 check_col(dm.col, [3, 12]) dm.col /= 3 check_col(dm.col, [1, 4]) dm.col /= 1, 2 check_col(dm.col, [1, 2]) dm.col //= 1.5, 2.5 check_col(dm.col, [0, 0]) check_integrity(dm)
def test_split(): dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 # Without values g = ops.split(dm.a) val, dm = next(g) assert val == 'a' check_col(dm.a, ['a', 'a']) check_col(dm.b, [0, 1]) val, dm = next(g) assert val == 'b' check_col(dm.a, ['b', 'b']) check_col(dm.b, [2, 3]) # With values dm = DataMatrix(length=4) dm.a = 'a', 'a', 'b', 'b' dm.b = 0, 1, 2, 3 dma, dmb = ops.split(dm.a, 'a', 'b') check_col(dma.a, ['a', 'a']) check_col(dma.b, [0, 1]) check_col(dmb.a, ['b', 'b']) check_col(dmb.b, [2, 3]) # With multiple columns dm = DataMatrix(length=8) dm.A = 0, 0, 1, 1, 0, 0, 1, 1 dm.B = 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b' dm.C = 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y' g = ops.split(dm.A, dm.B) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert (len(sdm) == 2) val1, val2, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert (len(sdm) == 2) g = ops.split(dm.A, dm.B, dm.C) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 0 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'a' assert val3 == 'y' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'x' assert (len(sdm) == 1) val1, val2, val3, sdm = next(g) assert val1 == 1 assert val2 == 'b' assert val3 == 'y' assert (len(sdm) == 1)
def test_floatcolumn(): _test_numericcolumn(FloatColumn) _test_copying(FloatColumn) # Test automatic conversion to float dm = DataMatrix(length=2) dm.col = FloatColumn dm.col = 1.9, '2.9' check_col(dm.col, [1.9, 2.9]) # Test nans dm.col = 'nan' check_col(dm.col, [np.nan, np.nan]) dm.col = None check_col(dm.col, [np.nan, np.nan]) dm.col = np.nan check_col(dm.col, [np.nan, np.nan]) dm.col = 'x' check_col(dm.col, [np.nan, np.nan]) # Test infs dm.col = 'inf' check_col(dm.col, [np.inf, np.inf]) dm.col = np.inf check_col(dm.col, [np.inf, np.inf]) # Test nans and infs dm.col = 'nan', 'inf' check_col(dm.col, [np.nan, np.inf]) dm.col = np.inf, np.nan check_col(dm.col, [np.inf, np.nan]) dm.col = 'x', None check_col(dm.col, [np.nan, np.nan]) # Check dtype ok_(dm.col._seq.dtype == np.float64) check_integrity(dm)
def test_seriescolumn(): _test_copying(SeriesColumn(depth=1)) dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) # Set all rows to a single value dm.col = 1 check_series(dm.col, [[1, 1, 1], [1, 1, 1]]) # Set rows to different single values dm.col = 2, 3 check_series(dm.col, [[2, 2, 2], [3, 3, 3]]) # Set one row to a single value dm.col[0] = 4 check_series(dm.col, [[4, 4, 4], [3, 3, 3]]) # Set one row to different single values dm.col[1] = 5, 6, 7 check_series(dm.col, [[4, 4, 4], [5, 6, 7]]) # Set all rows to different single values dm.col.setallrows([8, 9, 10]) check_series(dm.col, [[8, 9, 10], [8, 9, 10]]) # Set the first value in all rows dm.col[:, 0] = 1 check_series(dm.col, [[1, 9, 10], [1, 9, 10]]) # Set all values in the first row dm.col[0, :] = 2 check_series(dm.col, [[2, 2, 2], [1, 9, 10]]) # Set all values dm.col[:, :] = 3 check_series(dm.col, [[3, 3, 3], [3, 3, 3]]) # Test shortening and lengthening dm.length = 0 check_series(dm.col, []) dm.length = 3 dm.col = 1, 2, 3 dm.col.depth = 1 check_series(dm.col, [[1], [2], [3]]) dm.col.depth = 3 check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]]) check_integrity(dm) # Test dm = DataMatrix(length=2) dm.col = SeriesColumn(depth=3) dm.col = 1, 2 check_series(dm.col, [[1, 1, 1], [2, 2, 2]]) dm.col = 3, 4, 5 check_series(dm.col, [[3, 4, 5]] * 2) dm.col.depth = 2 dm.col[:] = 1, 2 check_series(dm.col, [[1, 1], [2, 2]]) dm.col[:, :] = 3, 4 check_series(dm.col, [[3, 4], [3, 4]]) # Check if series return right type dm = DataMatrix(length=4) dm.col = SeriesColumn(depth=5) dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]] # (int, int) -> float val = dm.col[2, 2] eq_(val, 13) eq_(type(val), float) # (int) -> array val = dm.col[2] ok_(all(val == np.array([11, 12, 13, 14, 15]))) eq_(type(val), np.ndarray) # (int, slice) -> array val = dm.col[2, 1:-1] ok_(all(val == np.array([12, 13, 14]))) eq_(type(val), np.ndarray) # (int, (int, int)) -> array val = dm.col[2, (1, 3)] ok_(all(val == np.array([12, 14]))) eq_(type(val), np.ndarray) # (slice) -> SeriesColumn val = dm.col[1:-1] check_series(val, [ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], ]) # (slice, int) -> FloatColumn val = dm.col[1:-1, 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 13]) # ((int, int), int) -> FloatColumn val = dm.col[(1, 3), 2] ok_(isinstance(val, FloatColumn)) check_col(val, [8, 18]) # (slice, slice) -> SeriesColumn val = dm.col[1:-1, 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [12, 13, 14], ]) # ((int, int), slice) -> SeriesColumn val = dm.col[(1, 3), 1:-1] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 8, 9], [17, 18, 19], ]) # ((int, int), (int int)) -> SeriesColumn val = dm.col[(1, 3), (1, 3)] ok_(isinstance(val, _SeriesColumn)) check_series(val, [ [7, 9], [17, 19], ])
def check_sort(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 3, 2, 1 dm.col2 = 1, 2, 3 dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [3, 2, 1]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [3, 2, 1]) check_col(dm.col2, [1, 2, 3]) dm.col2 = operations.sort(dm.col2, by=dm.col1) check_col(dm.col2, [3, 2, 1]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [1, 2, 3]) check_integrity(dm)