示例#1
0
def test_intcolumn():

    _test_numericcolumn(IntColumn)
    _test_copying(IntColumn)
    # Test automatic conversion to int
    dm = DataMatrix(length=2)
    dm.col = IntColumn
    dm.col = 1.9, '2.9'
    check_col(dm.col, [1, 2])
    # Test setting invalid values
    @raises(TypeError)
    def _():
        dm.col[0] = 'x'

    _()

    @raises(TypeError)
    def _():
        dm.col = 'x'

    _()

    @raises(TypeError)
    def _():
        dm.col[:-1] = 'x'

    _()
    # Check dtype
    ok_(dm.col._seq.dtype == np.int64)
    check_integrity(dm)
示例#2
0
def check_mixedcolumn_sorting():

    dm = DataMatrix(length=24)
    dm.c = [
        1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN,
        NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', ''
    ]
    dm.c = ops.shuffle(dm.c)
    dm = ops.sort(dm, by=dm.c)
    check_col(dm.c, [
        -INF,
        -INF,
        1,
        1,
        1.1,
        1.1,
        2,
        2,
        2.1,
        2.1,
        INF,
        INF,
        '',
        'None',
        'alpha',
        'beta',
        None,
        None,
        None,
        None,
        NAN,
        NAN,
        NAN,
        NAN,
    ])
def test_weight():

    dm = DataMatrix(length=3)
    dm.a = 'a', 'b', 'c'
    dm.b = 1, 0, 2
    dm = ops.weight(dm.b)
    check_col(dm.a, ['a', 'c', 'c'])
    check_col(dm.b, [1, 2, 2])
def check_str_operations():

	dm = DataMatrix(length=2, default_col_type=MixedColumn)
	dm.col = 'a', 'b'
	check_col(dm.col, ['a', 'b'])
	dm.col += 'c', 'd'
	check_col(dm.col, ['ac', 'bd'])
	check_integrity(dm)
def test_weight():

	dm = DataMatrix(length=3)
	dm.a = 'a', 'b', 'c'
	dm.b = 1, 0, 2
	dm = ops.weight(dm.b)
	check_col(dm.a, ['a', 'c', 'c'])
	check_col(dm.b, [1, 2, 2])
def check_int_operations():

	dm = DataMatrix(length=2, default_col_type=IntColumn)
	dm.col = 1.5, 2.5
	check_col(dm.col, [1, 2])
	dm.col *= 1.5
	check_col(dm.col, [1, 3])
	check_integrity(dm)
def check_str_operations():

	dm = DataMatrix(length=2, default_col_type=MixedColumn)
	dm.col = 'a', 'b'
	check_col(dm.col, ['a', 'b'])
	dm.col += 'c', 'd'
	check_col(dm.col, ['ac', 'bd'])
	check_integrity(dm)
def test_fullfactorial():

    dm = DataMatrix(length=3)
    dm.a = 'a', 'b', ''
    dm.b = 0, 1, 2
    dm = ops.fullfactorial(dm)
    check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b'])
    check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_fullfactorial():

	dm = DataMatrix(length=3)
	dm.a = 'a', 'b', ''
	dm.b = 0, 1, 2
	dm = ops.fullfactorial(dm)
	check_col(dm.a, ['a', 'b', 'a', 'b', 'a', 'b'])
	check_col(dm.b, [0, 0, 1, 1, 2, 2])
def test_reduce_():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=3)
    dm.series[0] = 1, 2, 3
    dm.series[1] = 2, 3, 4
    dm.col = series.reduce_(dm.series)
    check_col(dm.col, [2, 3])
    check_integrity(dm)
示例#11
0
def test_mixedcolumn():

    _test_numericcolumn(MixedColumn)
    _test_copying(MixedColumn)
    dm = DataMatrix(length=4)
    dm.col = '1.1', '1', 'x', None
    check_col(dm.col, [1.1, 1, 'x', None])
    dm.col[dm.col == {1, None}] = 'a', 'b'
    check_col(dm.col, [1.1, 'a', 'x', 'b'])
示例#12
0
def check_intcolumn_sorting():

	dm = DataMatrix(length=8, default_col_type=IntColumn)
	dm.c = [
		1, '1', 2, '2',
		1.1, '1.1', 2.1, '2.8',
	]
	dm.c = ops.shuffle(dm.c)
	dm = ops.sort(dm, by=dm.c)
	check_col(dm.c, [
		1, 1, 1, 1, 2, 2, 2, 2
	])
def check_int_operations():

	dm = DataMatrix(length=2, default_col_type=IntColumn)
	dm.col = 1.5, 2.5
	check_col(dm.col, [1, 2])
	dm.col *= 2.5
	check_col(dm.col, [2, 4])
	def _():
		with pytest.raises(TypeError):
			dm.col *= 'x'
	_()
	check_integrity(dm)
def check_select(col_type):

	dm = DataMatrix(length=2, default_col_type=col_type)
	dm.col = 1, 2
	dm_ = dm.col < 2
	check_col(dm_.col, [1])
	dm_ = dm.col == 2
	check_col(dm_.col, [2])
	dm_ = (dm.col == 1) | (dm.col == 2)
	check_col(dm_.col, [1,2])
	dm_ = (dm.col == 1) & (dm.col == 2)
	check_col(dm_.col, [])
	dm_ = (dm.col == 1) ^ (dm.col == 2)
	check_col(dm_.col, [1,2])
	check_integrity(dm)
def test_tuple_split():

	dm = DataMatrix(length=4)
	dm.a = 'a', 'a', 'b', 'b'
	dm.b = 0, 1, 2, 3
	dma, dmb = ops.tuple_split(dm.a, 'a', 'b')
	check_col(dma.a, ['a', 'a'])
	check_col(dma.b, [0, 1])
	check_col(dmb.a, ['b', 'b'])
	check_col(dmb.b, [2, 3])
def test_sort():

	dm = DataMatrix(length=2)
	dm.a = 'b', 'a'
	dm.b = 1, 0
	dm.a = ops.sort(dm.a)
	check_col(dm.a, ['a', 'b'])
	check_col(dm.b, [1, 0])
	dm = ops.sort(dm, by=dm.b)
	check_col(dm.a, ['b', 'a'])
	check_col(dm.b, [0, 1])
def test_sort():

    dm = DataMatrix(length=2)
    dm.a = 'b', 'a'
    dm.b = 1, 0
    dm.a = ops.sort(dm.a)
    check_col(dm.a, ['a', 'b'])
    check_col(dm.b, [1, 0])
    dm = ops.sort(dm, by=dm.b)
    check_col(dm.a, ['b', 'a'])
    check_col(dm.b, [0, 1])
def test_replace():

    dm = DataMatrix(length=3)
    dm.a = 0, 1, 2
    dm.c = FloatColumn
    dm.c = np.nan, 1, 2
    dm.s = SeriesColumn(depth=3)
    dm.s[0] = 0, 1, 2
    dm.s[1] = np.nan, 1, 2
    dm.s[2] = np.nan, 1, 2
    dm.a = ops.replace(dm.a, {0: 100, 2: 200})
    dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan})
    dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan})
    check_col(dm.a, [100, 1, 200])
    check_col(dm.c, [100, 1, np.nan])
    check_series(dm.s, [
        [0, 1, np.nan],
        [100, 1, np.nan],
        [100, 1, np.nan],
    ])
def test_split():

	dm = DataMatrix(length=4)
	dm.a = 'a', 'a', 'b', 'b'
	dm.b = 0, 1, 2, 3
	g = ops.split(dm.a)
	val, dm = g.next()
	eq_(val, 'a')
	check_col(dm.a, ['a', 'a'])
	check_col(dm.b, [0, 1])
	val, dm = g.next()
	eq_(val, 'b')
	check_col(dm.a, ['b', 'b'])
	check_col(dm.b, [2, 3])
def test_intcolumn():

    check_getrow(IntColumn)
    check_select(IntColumn)
    check_concat(IntColumn, invalid=0)
    # Check selections with non-int types
    dm = DataMatrix(length=2, default_col_type=IntColumn)
    dm.col = 1, 2
    dm2 = dm.col == '1.1'  # Floored to 1
    check_col(dm2.col, [1])
    dm2 = dm.col == ''
    check_col(dm2.col, [])
    dm2 = dm.col != ''
    check_col(dm2.col, [1, 2])

    @raises(TypeError)
    def _():
        dm.col > ''

    _()
    # Check type selectors
    dm = DataMatrix(length=2, default_col_type=IntColumn)
    dm.col = 1, 2
    eq_(len(dm.col == int), 2)
    eq_(len(dm.col != int), 0)
    eq_(len(dm.col == float), 0)
    eq_(len(dm.col != float), 2)
    eq_(len(dm.col == str), 0)
    eq_(len(dm.col != str), 2)
示例#21
0
def test_intcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = IntColumn
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])

    @raises(TypeError)
    def _():
        dm.col[0] = "test"

    _()

    @raises(TypeError)
    def _():
        dm.col[:] = "test"

    _()
    # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    # Check dtype
    ok_(dm.col._seq.dtype == np.int64)
    check_integrity(dm)
def test_shuffle_horiz():

	dm = DataMatrix(length=2)
	dm.a = 'a', 'b'
	dm.b = 0, 1
	dm.c = '-', '-'
	while True:
		dm2 = ops.shuffle_horiz(dm)
		try:
			check_row(dm2[0], [0, '-', 'a'])
			break
		except:
			pass
	while True:
		dm2 = ops.shuffle_horiz(dm.a, dm.b)
		try:
			check_row(dm2[0], [0, 'a', '-'])
			break
		except:
			pass
	for i in range(1000):
		dm2 = ops.shuffle_horiz(dm.a, dm.b)
		check_col(dm.c, ['-', '-'])
示例#23
0
def check_floatcolumn_sorting():

    dm = DataMatrix(length=24, default_col_type=FloatColumn)
    with pytest.warns(UserWarning):
        dm.c = [
            1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf',
            NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta',
            'None', ''
        ]
    dm.c = ops.shuffle(dm.c)
    dm = ops.sort(dm, by=dm.c)
    check_col(dm.c, [
        -INF,
        -INF,
        1,
        1,
        1.1,
        1.1,
        2,
        2,
        2.1,
        2.1,
        INF,
        INF,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
        NAN,
    ])
def test_shuffle_horiz():

    dm = DataMatrix(length=2)
    dm.a = 'a', 'b'
    dm.b = 0, 1
    dm.c = '-', '-'
    while True:
        dm2 = ops.shuffle_horiz(dm)
        try:
            check_row(dm2[0], [0, '-', 'a'])
            break
        except:
            pass
    while True:
        dm2 = ops.shuffle_horiz(dm.a, dm.b)
        try:
            check_row(dm2[0], [0, 'a', '-'])
            break
        except:
            pass
    for i in range(1000):
        dm2 = ops.shuffle_horiz(dm.a, dm.b)
        check_col(dm.c, ['-', '-'])
    ops.shuffle_horiz(dm.a)
def check_float_operations():

	dm = DataMatrix(length=2, default_col_type=FloatColumn)
	dm.col = 1, 2
	check_col(dm.col, [1, 2])
	dm.col *= 2.5
	check_col(dm.col, [2.5, 5])
	dm.col *= np.inf, np.nan
	check_col(dm.col, [np.inf, np.nan])
	check_integrity(dm)
def check_concat(col_type, invalid):

    dm1 = DataMatrix(length=2, default_col_type=col_type)
    dm1.col1 = 1, 2
    dm1.col_shared = 3, 4
    dm2 = DataMatrix(length=2, default_col_type=col_type)
    dm2.col2 = 5, 6
    dm2.col_shared = 7, 8
    dm3 = dm1 << dm2
    check_col(dm3.col1, [1, 2, invalid, invalid])
    check_col(dm3.col_shared, [3, 4, 7, 8])
    check_col(dm3.col2, [invalid, invalid, 5, 6])
def check_concat(col_type, invalid):

	dm1 = DataMatrix(length=2, default_col_type=col_type)
	dm1.col1 = 1, 2
	dm1.col_shared = 3, 4
	dm2 = DataMatrix(length=2, default_col_type=col_type)
	dm2.col2 = 5, 6
	dm2.col_shared = 7, 8
	dm3 = dm1 << dm2
	check_col(dm3.col1, [1,2,invalid,invalid])
	check_col(dm3.col_shared, [3,4,7,8])
	check_col(dm3.col2, [invalid,invalid,5,6])
示例#28
0
def test_mixedcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])
    dm.col[:] = "test"
    check_col(dm.col, ["test", "test"])
    # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    check_integrity(dm)
def test_shuffle():

    dm = DataMatrix(length=2)
    dm.a = 'a', 'b'
    dm.b = 0, 1
    while True:
        dm.a = ops.shuffle(dm.a)
        check_col(dm.b, [0, 1])
        try:
            check_col(dm.a, ['b', 'a'])
            break
        except:
            pass
    dm = DataMatrix(length=2)
    dm.a = 'a', 'b'
    dm.b = 0, 1
    while True:
        dm = ops.shuffle(dm)
        try:
            check_col(dm.a, ['b', 'a'])
            check_col(dm.b, [1, 0])
            break
        except:
            pass
def test_shuffle():

	dm = DataMatrix(length=2)
	dm.a = 'a', 'b'
	dm.b = 0, 1
	while True:
		dm.a = ops.shuffle(dm.a)
		check_col(dm.b, [0, 1])
		try:
			check_col(dm.a, ['b', 'a'])
			break
		except:
			pass
	dm = DataMatrix(length=2)
	dm.a = 'a', 'b'
	dm.b = 0, 1
	while True:
		dm = ops.shuffle(dm)
		try:
			check_col(dm.a, ['b', 'a'])
			check_col(dm.b, [1, 0])
			break
		except:
			pass
示例#31
0
def test_floatcolumn():

    dm = DataMatrix(length=2)
    # Test assignment
    dm.col = FloatColumn
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2, 3
    check_col(dm.col, [2, 3])
    dm.col[:-1] = 4
    check_col(dm.col, [4, 3])
    dm.col[:] = "test"
    for value in dm.col:
        ok_(np.isnan(value))
        # Test shortening and lengthening
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    # Check dtype
    ok_(dm.col._seq.dtype == np.float64)
    check_integrity(dm)
示例#32
0
def _test_numericcolumn(cls):

	# Test init and change by single value
	dm = DataMatrix(length=2)
	dm.col = cls
	dm.col = 1
	check_col(dm.col, [1, 1])
	dm.col = 2
	check_col(dm.col, [2, 2])
	# Test init and change by sequence
	dm = DataMatrix(length=2)
	dm.col = cls
	dm.col = 1, 2
	check_col(dm.col, [1, 2])
	dm.col = 3, 4
	check_col(dm.col, [3, 4])
	# Test setting by slice
	dm = DataMatrix(length=3)
	dm.col = cls
	dm.col = 1
	dm.col[1:] = 2
	check_col(dm.col, [1, 2, 2])
	dm.col[:-1] = 4, 3
	check_col(dm.col, [4, 3, 2])
	# Test setting by DataMatrix
	dm = DataMatrix(length=10)
	dm.x = range(10)
	dm.y = FloatColumn
	dm = dm.x != {3, 6}
	dm.y[dm.x > 3] = 10
	dm.y[dm.x >= 8] = 11
	check_col(dm.y, [np.nan] * 3 + [10] * 3 + [11] * 2)
	# Test shortening and lengthening
	dm = DataMatrix(length=4)
	dm.length = 0
	dm.length = 4
	# Check uniqueness
	dm.col = 1, 2, 1, 2
	assert sorted(dm.col.unique) == [1,2]
	dm.col[dm.col == 2] = 0, 0
	check_col(dm.col, [1, 0, 1, 0])
	check_integrity(dm)
	# Check if numericcolumns return right type
	dm = DataMatrix(length=5)
	dm.col = cls
	dm.col = 1, 2, 3, 4, 5
	# int -> float
	val = dm.col[2]
	assert isinstance(val, (int, float))
	assert val == 3
	# (int, int) -> FloatColumn
	val = dm.col[1, 3]
	assert isinstance(val, cls)
	check_col(val, [2, 4])
	# slice -> FloatColumn
	val = dm.col[1:-1]
	assert isinstance(val, cls)
	check_col(val, [2, 3, 4])
	# Check array setting and getting
	if cls != MixedColumn:
		a = dm.col.array
		assert isinstance(a, np.ndarray)
		assert a.shape == (5,)
		assert all(a == [1, 2, 3, 4, 5])
def test_floatcolumn():

    check_getrow(FloatColumn)
    check_select(FloatColumn)
    check_concat(FloatColumn, invalid=np.nan)
    # Check selections with non-int types
    dm = DataMatrix(length=4, default_col_type=FloatColumn)
    dm.col = 1, 2, np.nan, np.inf
    dm2 = dm.col == '1'
    check_col(dm2.col, [1])
    with pytest.warns(UserWarning):
        dm2 = dm.col == ''
    check_col(dm2.col, [np.nan])
    with pytest.warns(UserWarning):
        dm2 = dm.col != ''
    check_col(dm2.col, [1, 2, np.inf])
    dm2 = dm.col == np.nan
    check_col(dm2.col, [np.nan])
    dm2 = dm.col != np.nan
    check_col(dm2.col, [1, 2, np.inf])
    dm2 = dm.col == np.inf
    check_col(dm2.col, [np.inf])
    dm2 = dm.col != np.inf
    check_col(dm2.col, [1, 2, np.nan])
    with pytest.warns(UserWarning):
        with pytest.raises(TypeError):
            dm.col > ''
    # Check type selectors
    dm = DataMatrix(length=2, default_col_type=FloatColumn)
    dm.col = 1, 2
    assert len(dm.col == float) == 2
    assert len(dm.col != float) == 0
    assert len(dm.col == str) == 0
    assert len(dm.col != str) == 2
    assert len(dm.col == int) == 0
    assert len(dm.col != int) == 2
def test_bin_split():

    dm = DataMatrix(length=4)
    dm.a = range(4)
    dm = ops.shuffle(dm)
    dm1, dm2 = ops.bin_split(dm.a, 2)
    check_col(dm1.a, [0, 1])
    check_col(dm2.a, [2, 3])
    dm1, dm2, dm3 = ops.bin_split(dm.a, 3)
    check_col(dm1.a, [0])
    check_col(dm2.a, [1])
    check_col(dm3.a, [2, 3])
    dm1, = ops.bin_split(dm.a, 1)
    check_col(dm1.a, [0, 1, 2, 3])

    def _():
        with pytest.raises(ValueError):
            x, = ops.bin_split(dm.a, 5)

    _()
示例#35
0
def check_nan_sort():

    dm = DataMatrix(length=3, default_col_type=FloatColumn)
    dm.col1 = 2, np.nan, 1
    dm.col2 = 1, 2, np.nan
    dm = operations.sort(dm, by=dm.col1)
    check_col(dm.col1, [1, 2, np.nan])
    check_col(dm.col2, [np.nan, 1, 2])
    dm = operations.sort(dm, by=dm.col2)
    check_col(dm.col1, [2, np.nan, 1])
    check_col(dm.col2, [1, 2, np.nan])
    dm.col1 = operations.sort(dm.col1)
    dm.col2 = operations.sort(dm.col2)
    check_col(dm.col1, [1, 2, np.nan])
    check_col(dm.col2, [1, 2, np.nan])
    check_integrity(dm)
def check_select(col_type):

    dm = DataMatrix(length=2, default_col_type=col_type)
    dm.col = 1, 2
    dm_ = dm.col < 2
    check_col(dm_.col, [1])
    dm_ = dm.col == 2
    check_col(dm_.col, [2])
    dm_ = (dm.col == 1) | (dm.col == 2)  # or
    check_col(dm_.col, [1, 2])
    dm_ = (dm.col == 1) & (dm.col == 2)  # and
    check_col(dm_.col, [])
    dm_ = (dm.col == 1) ^ (dm.col == 2)  # xor
    check_col(dm_.col, [1, 2])
    # Pair-wise select by matching-length sequence
    dm_ = dm.col == (1, 3)
    check_col(dm_.col, [1])
    # Check by set multimatching
    dm_ = dm.col == {2, 3, 4}
    check_col(dm_.col, [2])
    dm_ = dm.col != {1, 3, 4}
    check_col(dm_.col, [2])
    # Check by lambda comparison
    dm_ = dm.col == (lambda x: x == 2)
    check_col(dm_.col, [2])
    dm_ = dm.col != (lambda x: x == 2)
    check_col(dm_.col, [1])
    check_integrity(dm)
示例#37
0
def _test_numericcolumn(cls):

    # Test init and change by single value
    dm = DataMatrix(length=2)
    dm.col = cls
    dm.col = 1
    check_col(dm.col, [1, 1])
    dm.col = 2
    check_col(dm.col, [2, 2])
    # Test init and change by sequence
    dm = DataMatrix(length=2)
    dm.col = cls
    dm.col = 1, 2
    check_col(dm.col, [1, 2])
    dm.col = 3, 4
    check_col(dm.col, [3, 4])
    # Test setting by slice
    dm = DataMatrix(length=3)
    dm.col = cls
    dm.col = 1
    dm.col[1:] = 2
    check_col(dm.col, [1, 2, 2])
    dm.col[:-1] = 4, 3
    check_col(dm.col, [4, 3, 2])
    # Test shortening and lengthening
    dm = DataMatrix(length=4)
    dm.length = 0
    dm.length = 4
    # Check uniqueness
    dm.col = 1, 2, 1, 2
    ok_(sorted(dm.col.unique) == [1, 2])
    dm.col[dm.col == 2] = 0, 0
    check_col(dm.col, [1, 0, 1, 0])
    check_integrity(dm)
    # Check if numericcolumns return right type
    dm = DataMatrix(length=5)
    dm.col = cls
    dm.col = 1, 2, 3, 4, 5
    # int -> float
    val = dm.col[2]
    ok_(isinstance(val, (int, float)))
    eq_(val, 3)
    # (int, int) -> FloatColumn
    val = dm.col[1, 3]
    ok_(isinstance(val, cls))
    check_col(val, [2, 4])
    # slice -> FloatColumn
    val = dm.col[1:-1]
    ok_(isinstance(val, cls))
    check_col(val, [2, 3, 4])
    # Check array setting and getting
    if cls != MixedColumn:
        a = dm.col.array
        ok_(isinstance(a, np.ndarray))
        eq_(a.shape, (5, ))
        ok_(all(a == [1, 2, 3, 4, 5]))
def test_floatcolumn():

    check_getrow(FloatColumn)
    check_select(FloatColumn)
    check_concat(FloatColumn, invalid=np.nan)
    # Check selections with non-int types
    dm = DataMatrix(length=4, default_col_type=FloatColumn)
    dm.col = 1, 2, np.nan, np.inf
    dm2 = dm.col == '1'
    check_col(dm2.col, [1])
    dm2 = dm.col == ''
    check_col(dm2.col, [np.nan])
    dm2 = dm.col != ''
    check_col(dm2.col, [1, 2, np.inf])
    dm2 = dm.col == np.nan
    check_col(dm2.col, [np.nan])
    dm2 = dm.col != np.nan
    check_col(dm2.col, [1, 2, np.inf])
    dm2 = dm.col == np.inf
    check_col(dm2.col, [np.inf])
    dm2 = dm.col != np.inf
    check_col(dm2.col, [1, 2, np.nan])

    @raises(TypeError)
    def _():
        dm.col > ''

    _()
    # Check type selectors
    dm = DataMatrix(length=2, default_col_type=FloatColumn)
    dm.col = 1, 2
    eq_(len(dm.col == float), 2)
    eq_(len(dm.col != float), 0)
    eq_(len(dm.col == str), 0)
    eq_(len(dm.col != str), 2)
    eq_(len(dm.col == int), 0)
    eq_(len(dm.col != int), 2)
示例#39
0
def test_split():

    dm = DataMatrix(length=4)
    dm.a = 'a', 'a', 'b', 'b'
    dm.b = 0, 1, 2, 3
    # Without values
    g = ops.split(dm.a)
    val, dm = next(g)
    assert val == 'a'
    check_col(dm.a, ['a', 'a'])
    check_col(dm.b, [0, 1])
    val, dm = next(g)
    assert val == 'b'
    check_col(dm.a, ['b', 'b'])
    check_col(dm.b, [2, 3])
    # With values
    dm = DataMatrix(length=4)
    dm.a = 'a', 'a', 'b', 'b'
    dm.b = 0, 1, 2, 3
    dma, dmb = ops.split(dm.a, 'a', 'b')
    check_col(dma.a, ['a', 'a'])
    check_col(dma.b, [0, 1])
    check_col(dmb.a, ['b', 'b'])
    check_col(dmb.b, [2, 3])
示例#40
0
def check_nan_sort():

	dm = DataMatrix(length=3, default_col_type=FloatColumn)
	dm.col1 = 2,np.nan,1
	dm.col2 = 1,2,np.nan
	dm = operations.sort(dm, by=dm.col1)
	check_col(dm.col1, [1, 2, np.nan])
	check_col(dm.col2, [np.nan, 1, 2])
	dm = operations.sort(dm, by=dm.col2)
	check_col(dm.col1, [2, np.nan, 1])
	check_col(dm.col2, [1, 2, np.nan])
	dm.col1 = operations.sort(dm.col1)
	dm.col2 = operations.sort(dm.col2)
	check_col(dm.col1, [1, 2, np.nan])
	check_col(dm.col2, [1, 2, np.nan])
	check_integrity(dm)
def check_operations(col_type):

	dm = DataMatrix(length=2, default_col_type=col_type)
	dm.col = 1, '2'
	dm.col += 1
	check_col(dm.col, [2, 3])
	dm.col += 1, '2'
	check_col(dm.col, [3, 5])
	dm.col -= 1
	check_col(dm.col, [2, 4])
	dm.col -= 1, '2'
	check_col(dm.col, [1, 2])
	dm.col *= 2
	check_col(dm.col, [2, 4])
	dm.col *= 1, '2'
	check_col(dm.col, [2, 8])
	dm.col /= 2
	check_col(dm.col, [1, 4])
	dm.col /= 1, '2'
	check_col(dm.col, [1, 2])
	# Right-hand operations
	dm.col = 1 + dm.col
	check_col(dm.col, [2, 3])
	dm.col = (1, 2) + dm.col
	check_col(dm.col, [3, 5])
	dm.col = 5 - dm.col
	check_col(dm.col, [2, 0])
	dm.col = (3, 1) - dm.col
	check_col(dm.col, [1, 1])
	dm.col = 2 * dm.col
	check_col(dm.col, [2, 2])
	dm.col = (1, 2) * dm.col
	check_col(dm.col, [2, 4])
	dm.col = 4 / dm.col
	check_col(dm.col, [2, 1])
	dm.col = (4, 2) / dm.col
	check_col(dm.col, [2, 2])
	dm.col = 2 ** dm.col
	check_col(dm.col, [4, 4])
	dm.col = (2, 4) ** dm.col
	check_col(dm.col, [16, 256])
	dm.col = 17 % dm.col
	check_col(dm.col, [1, 17])
	dm.col = (2, 16) % dm.col
	check_col(dm.col, [0, 16])
	check_integrity(dm)
def test_bin_split():

	dm = DataMatrix(length=4)
	dm.a = range(4)
	dm = ops.shuffle(dm)
	dm1, dm2 = ops.bin_split(dm.a, 2)
	check_col(dm1.a, [0,1])
	check_col(dm2.a, [2,3])
	dm1, dm2, dm3 = ops.bin_split(dm.a, 3)
	check_col(dm1.a, [0])
	check_col(dm2.a, [1])
	check_col(dm3.a, [2,3])
	dm1, = ops.bin_split(dm.a, 1)
	check_col(dm1.a, [0,1,2,3])
	@raises(ValueError)
	def _():
		x, = ops.bin_split(dm.a, 5)
	_()
示例#43
0
def check_sort(col_type):

	dm = DataMatrix(length=3, default_col_type=col_type)
	dm.col1 = 3,2,1
	dm.col2 = 1,2,3
	dm = operations.sort(dm, by=dm.col1)
	check_col(dm.col1, [1, 2, 3])
	check_col(dm.col2, [3, 2, 1])
	dm = operations.sort(dm, by=dm.col2)
	check_col(dm.col1, [3, 2, 1])
	check_col(dm.col2, [1, 2, 3])
	dm.col2 = operations.sort(dm.col2, by=dm.col1)
	check_col(dm.col2, [3, 2, 1])
	dm.col1 = operations.sort(dm.col1)
	dm.col2 = operations.sort(dm.col2)
	check_col(dm.col1, [1, 2, 3])
	check_col(dm.col2, [1, 2, 3])
	check_integrity(dm)
def check_operations(col_type):

	dm = DataMatrix(length=2, default_col_type=col_type)
	dm.col = 1, 2
	dm.col += 1
	check_col(dm.col, [2, 3])
	dm.col += 1, 2
	check_col(dm.col, [3, 5])
	dm.col -= 1
	check_col(dm.col, [2, 4])
	dm.col -= 1, 2
	check_col(dm.col, [1, 2])
	dm.col *= 2
	check_col(dm.col, [2, 4])
	dm.col *= 1.5, 3
	check_col(dm.col, [3, 12])
	dm.col /= 3
	check_col(dm.col, [1, 4])
	dm.col /= 1, 2
	check_col(dm.col, [1, 2])
	dm.col //= 1.5, 2.5
	check_col(dm.col, [0, 0])
	check_integrity(dm)
def test_split():

    dm = DataMatrix(length=4)
    dm.a = 'a', 'a', 'b', 'b'
    dm.b = 0, 1, 2, 3
    # Without values
    g = ops.split(dm.a)
    val, dm = next(g)
    assert val == 'a'
    check_col(dm.a, ['a', 'a'])
    check_col(dm.b, [0, 1])
    val, dm = next(g)
    assert val == 'b'
    check_col(dm.a, ['b', 'b'])
    check_col(dm.b, [2, 3])
    # With values
    dm = DataMatrix(length=4)
    dm.a = 'a', 'a', 'b', 'b'
    dm.b = 0, 1, 2, 3
    dma, dmb = ops.split(dm.a, 'a', 'b')
    check_col(dma.a, ['a', 'a'])
    check_col(dma.b, [0, 1])
    check_col(dmb.a, ['b', 'b'])
    check_col(dmb.b, [2, 3])
    # With multiple columns
    dm = DataMatrix(length=8)
    dm.A = 0, 0, 1, 1, 0, 0, 1, 1
    dm.B = 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b'
    dm.C = 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y'
    g = ops.split(dm.A, dm.B)
    val1, val2, sdm = next(g)
    assert val1 == 0
    assert val2 == 'a'
    assert (len(sdm) == 2)
    val1, val2, sdm = next(g)
    assert val1 == 0
    assert val2 == 'b'
    assert (len(sdm) == 2)
    val1, val2, sdm = next(g)
    assert val1 == 1
    assert val2 == 'a'
    assert (len(sdm) == 2)
    val1, val2, sdm = next(g)
    assert val1 == 1
    assert val2 == 'b'
    assert (len(sdm) == 2)
    g = ops.split(dm.A, dm.B, dm.C)
    val1, val2, val3, sdm = next(g)
    assert val1 == 0
    assert val2 == 'a'
    assert val3 == 'x'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 0
    assert val2 == 'a'
    assert val3 == 'y'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 0
    assert val2 == 'b'
    assert val3 == 'x'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 0
    assert val2 == 'b'
    assert val3 == 'y'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 1
    assert val2 == 'a'
    assert val3 == 'x'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 1
    assert val2 == 'a'
    assert val3 == 'y'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 1
    assert val2 == 'b'
    assert val3 == 'x'
    assert (len(sdm) == 1)
    val1, val2, val3, sdm = next(g)
    assert val1 == 1
    assert val2 == 'b'
    assert val3 == 'y'
    assert (len(sdm) == 1)
示例#46
0
def test_floatcolumn():

    _test_numericcolumn(FloatColumn)
    _test_copying(FloatColumn)
    # Test automatic conversion to float
    dm = DataMatrix(length=2)
    dm.col = FloatColumn
    dm.col = 1.9, '2.9'
    check_col(dm.col, [1.9, 2.9])
    # Test nans
    dm.col = 'nan'
    check_col(dm.col, [np.nan, np.nan])
    dm.col = None
    check_col(dm.col, [np.nan, np.nan])
    dm.col = np.nan
    check_col(dm.col, [np.nan, np.nan])
    dm.col = 'x'
    check_col(dm.col, [np.nan, np.nan])
    # Test infs
    dm.col = 'inf'
    check_col(dm.col, [np.inf, np.inf])
    dm.col = np.inf
    check_col(dm.col, [np.inf, np.inf])
    # Test nans and infs
    dm.col = 'nan', 'inf'
    check_col(dm.col, [np.nan, np.inf])
    dm.col = np.inf, np.nan
    check_col(dm.col, [np.inf, np.nan])
    dm.col = 'x', None
    check_col(dm.col, [np.nan, np.nan])
    # Check dtype
    ok_(dm.col._seq.dtype == np.float64)
    check_integrity(dm)
示例#47
0
def test_seriescolumn():

    _test_copying(SeriesColumn(depth=1))
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    # Set all rows to a single value
    dm.col = 1
    check_series(dm.col, [[1, 1, 1], [1, 1, 1]])
    # Set rows to different single values
    dm.col = 2, 3
    check_series(dm.col, [[2, 2, 2], [3, 3, 3]])
    # Set one row to a single value
    dm.col[0] = 4
    check_series(dm.col, [[4, 4, 4], [3, 3, 3]])
    # Set one row to different single values
    dm.col[1] = 5, 6, 7
    check_series(dm.col, [[4, 4, 4], [5, 6, 7]])
    # Set all rows to different single values
    dm.col.setallrows([8, 9, 10])
    check_series(dm.col, [[8, 9, 10], [8, 9, 10]])
    # Set the first value in all rows
    dm.col[:, 0] = 1
    check_series(dm.col, [[1, 9, 10], [1, 9, 10]])
    # Set all values in the first row
    dm.col[0, :] = 2
    check_series(dm.col, [[2, 2, 2], [1, 9, 10]])
    # Set all values
    dm.col[:, :] = 3
    check_series(dm.col, [[3, 3, 3], [3, 3, 3]])
    # Test shortening and lengthening
    dm.length = 0
    check_series(dm.col, [])
    dm.length = 3
    dm.col = 1, 2, 3
    dm.col.depth = 1
    check_series(dm.col, [[1], [2], [3]])
    dm.col.depth = 3
    check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]])
    check_integrity(dm)
    # Test
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    dm.col = 1, 2
    check_series(dm.col, [[1, 1, 1], [2, 2, 2]])
    dm.col = 3, 4, 5
    check_series(dm.col, [[3, 4, 5]] * 2)
    dm.col.depth = 2
    dm.col[:] = 1, 2
    check_series(dm.col, [[1, 1], [2, 2]])
    dm.col[:, :] = 3, 4
    check_series(dm.col, [[3, 4], [3, 4]])
    # Check if series return right type
    dm = DataMatrix(length=4)
    dm.col = SeriesColumn(depth=5)
    dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]]
    # (int, int) -> float
    val = dm.col[2, 2]
    eq_(val, 13)
    eq_(type(val), float)
    # (int) -> array
    val = dm.col[2]
    ok_(all(val == np.array([11, 12, 13, 14, 15])))
    eq_(type(val), np.ndarray)
    # (int, slice) -> array
    val = dm.col[2, 1:-1]
    ok_(all(val == np.array([12, 13, 14])))
    eq_(type(val), np.ndarray)
    # (int, (int, int)) -> array
    val = dm.col[2, (1, 3)]
    ok_(all(val == np.array([12, 14])))
    eq_(type(val), np.ndarray)
    # (slice) -> SeriesColumn
    val = dm.col[1:-1]
    check_series(val, [
        [6, 7, 8, 9, 10],
        [11, 12, 13, 14, 15],
    ])
    # (slice, int) -> FloatColumn
    val = dm.col[1:-1, 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 13])
    # ((int, int), int) -> FloatColumn
    val = dm.col[(1, 3), 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 18])
    # (slice, slice) -> SeriesColumn
    val = dm.col[1:-1, 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [12, 13, 14],
    ])
    # ((int, int), slice) -> SeriesColumn
    val = dm.col[(1, 3), 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [17, 18, 19],
    ])
    # ((int, int), (int int)) -> SeriesColumn
    val = dm.col[(1, 3), (1, 3)]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 9],
        [17, 19],
    ])
示例#48
0
def check_sort(col_type):

    dm = DataMatrix(length=3, default_col_type=col_type)
    dm.col1 = 3, 2, 1
    dm.col2 = 1, 2, 3
    dm = operations.sort(dm, by=dm.col1)
    check_col(dm.col1, [1, 2, 3])
    check_col(dm.col2, [3, 2, 1])
    dm = operations.sort(dm, by=dm.col2)
    check_col(dm.col1, [3, 2, 1])
    check_col(dm.col2, [1, 2, 3])
    dm.col2 = operations.sort(dm.col2, by=dm.col1)
    check_col(dm.col2, [3, 2, 1])
    dm.col1 = operations.sort(dm.col1)
    dm.col2 = operations.sort(dm.col2)
    check_col(dm.col1, [1, 2, 3])
    check_col(dm.col2, [1, 2, 3])
    check_integrity(dm)