def test_seriesStatByIndex(self): dataLocal = [((1,), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) assert_true(array_equal(data.seriesStatByIndex('sum').values().first(), array([6, 22, 38]))) assert_true(array_equal(data.seriesStatByIndex('mean').values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesStatByIndex('min').values().first(), array([0, 4, 8]))) assert_true(array_equal(data.seriesStatByIndex('max').values().first(), array([3, 7, 11]))) assert_true(array_equal(data.seriesStatByIndex('count').values().first(), array([4, 4, 4]))) assert_true(array_equal(data.seriesStatByIndex('median').values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesSumByIndex().values().first(), array([6, 22, 38]))) assert_true(array_equal(data.seriesMeanByIndex().values().first(), array([1.5, 5.5, 9.5]))) assert_true(array_equal(data.seriesMinByIndex().values().first(), array([0, 4, 8]))) assert_true(array_equal(data.seriesMaxByIndex().values().first(), array([3, 7, 11]))) assert_true(array_equal(data.seriesCountByIndex().values().first(), array([4, 4, 4]))) assert_true(array_equal(data.seriesMedianByIndex().values().first(), array([1.5, 5.5, 9.5]))) index = [ [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3] ] data.index = array(index).T result = data.seriesStatByIndex('sum', level=[0, 1]) assert_true(array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true(array_equal(result.index, array([[0,0], [0, 1], [1, 0], [1, 1]]))) result = data.seriesSumByIndex(level=[0, 1]) assert_true(array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true(array_equal(result.index, array([[0,0], [0, 1], [1, 0], [1, 1]])))
def test_index_setter_getter(self): dataLocal = [((1, ), array([1.0, 2.0, 3.0])), ((2, ), array([2.0, 2.0, 4.0])), ((3, ), array([4.0, 2.0, 1.0]))] data = Series(self.sc.parallelize(dataLocal)) assert_true(array_equal(data.index, array([0, 1, 2]))) data.index = [3, 2, 1] assert_true(data.index == [3, 2, 1]) def setIndex(data, idx): data.index = idx assert_raises(ValueError, setIndex, data, 5) assert_raises(ValueError, setIndex, data, [1, 2])
def test_index_setter_getter(self): dataLocal = [ ((1,), array([1.0, 2.0, 3.0])), ((2,), array([2.0, 2.0, 4.0])), ((3,), array([4.0, 2.0, 1.0])) ] data = Series(self.sc.parallelize(dataLocal)) assert_true(array_equal(data.index, array([0, 1, 2]))) data.index = [3, 2, 1] assert_true(data.index == [3, 2, 1]) def setIndex(data, idx): data.index = idx assert_raises(ValueError, setIndex, data, 5) assert_raises(ValueError, setIndex, data, [1, 2])
def test_selectByIndex(self): dataLocal = [((1, ), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) result = data.selectByIndex(1) assert_true(array_equal(result.values().first(), array([4, 5, 6, 7]))) assert_true(array_equal(result.index, array([1, 1, 1, 1]))) result = data.selectByIndex(1, squeeze=True) assert_true(array_equal(result.index, array([0, 1, 2, 3]))) index = [[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]] data.index = array(index).T result, mask = data.selectByIndex(0, level=2, returnMask=True) assert_true(array_equal(result.values().first(), array([0, 2, 6, 8]))) assert_true( array_equal(result.index, array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]]))) assert_true( array_equal(mask, array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0]))) result = data.selectByIndex(0, level=2, squeeze=True) assert_true(array_equal(result.values().first(), array([0, 2, 6, 8]))) assert_true( array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]]))) result = data.selectByIndex([1, 0], level=[0, 1]) assert_true(array_equal(result.values().first(), array([6, 7]))) assert_true(array_equal(result.index, array([[1, 0, 0], [1, 0, 1]]))) result = data.selectByIndex(val=[0, [2, 3]], level=[0, 2]) assert_true(array_equal(result.values().first(), array([4, 5]))) assert_true(array_equal(result.index, array([[0, 1, 2], [0, 1, 3]]))) result = data.selectByIndex(1, level=1, filter=True) assert_true(array_equal(result.values().first(), array([0, 1, 6, 7]))) assert_true( array_equal(result.index, array([[0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1]])))
def test_seriesAggregateByIndex(self): dataLocal = [((1,), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) result = data.seriesAggregateByIndex(sum) print result.values().first() assert_true(array_equal(result.values().first(), array([6, 22, 38]))) assert_true(array_equal(result.index, array([0, 1, 2]))) index = [ [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3] ] data.index = array(index).T result = data.seriesAggregateByIndex(sum, level=[0, 1]) assert_true(array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))
def test_seriesAggregateByIndex(self): dataLocal = [((1, ), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) result = data.seriesAggregateByIndex(sum) print result.values().first() assert_true(array_equal(result.values().first(), array([6, 22, 38]))) assert_true(array_equal(result.index, array([0, 1, 2]))) index = [[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]] data.index = array(index).T result = data.seriesAggregateByIndex(sum, level=[0, 1]) assert_true( array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true( array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))
def test_selectByIndex(self): dataLocal = [((1,), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) result = data.selectByIndex(1) assert_true(array_equal(result.values().first(), array([4, 5, 6, 7]))) assert_true(array_equal(result.index, array([1, 1, 1, 1]))) result = data.selectByIndex(1, squeeze=True) assert_true(array_equal(result.index, array([0, 1, 2, 3]))) index = [ [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3] ] data.index = array(index).T result, mask = data.selectByIndex(0, level=2, returnMask=True) assert_true(array_equal(result.values().first(), array([0, 2, 6, 8]))) assert_true(array_equal(result.index, array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]]))) assert_true(array_equal(mask, array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0]))) result = data.selectByIndex(0, level=2, squeeze=True) assert_true(array_equal(result.values().first(), array([0, 2, 6, 8]))) assert_true(array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]]))) result = data.selectByIndex([1, 0], level=[0, 1]) assert_true(array_equal(result.values().first(), array([6, 7]))) assert_true(array_equal(result.index, array([[1, 0, 0], [1, 0, 1]]))) result = data.selectByIndex(val=[0, [2,3]], level=[0, 2]) assert_true(array_equal(result.values().first(), array([4, 5]))) assert_true(array_equal(result.index, array([[0, 1, 2], [0, 1, 3]]))) result = data.selectByIndex(1, level=1, filter=True) assert_true(array_equal(result.values().first(), array([0, 1, 6, 7]))) assert_true(array_equal(result.index, array([[0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1]])))
def test_seriesStatByIndex(self): dataLocal = [((1, ), arange(12))] index = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] data = Series(self.sc.parallelize(dataLocal), index=index) assert_true( array_equal( data.seriesStatByIndex('sum').values().first(), array([6, 22, 38]))) assert_true( array_equal( data.seriesStatByIndex('mean').values().first(), array([1.5, 5.5, 9.5]))) assert_true( array_equal( data.seriesStatByIndex('min').values().first(), array([0, 4, 8]))) assert_true( array_equal( data.seriesStatByIndex('max').values().first(), array([3, 7, 11]))) assert_true( array_equal( data.seriesStatByIndex('count').values().first(), array([4, 4, 4]))) assert_true( array_equal( data.seriesStatByIndex('median').values().first(), array([1.5, 5.5, 9.5]))) assert_true( array_equal(data.seriesSumByIndex().values().first(), array([6, 22, 38]))) assert_true( array_equal(data.seriesMeanByIndex().values().first(), array([1.5, 5.5, 9.5]))) assert_true( array_equal(data.seriesMinByIndex().values().first(), array([0, 4, 8]))) assert_true( array_equal(data.seriesMaxByIndex().values().first(), array([3, 7, 11]))) assert_true( array_equal(data.seriesCountByIndex().values().first(), array([4, 4, 4]))) assert_true( array_equal(data.seriesMedianByIndex().values().first(), array([1.5, 5.5, 9.5]))) index = [[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3]] data.index = array(index).T result = data.seriesStatByIndex('sum', level=[0, 1]) assert_true( array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true( array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]]))) result = data.seriesSumByIndex(level=[0, 1]) assert_true( array_equal(result.values().first(), array([1, 14, 13, 38]))) assert_true( array_equal(result.index, array([[0, 0], [0, 1], [1, 0], [1, 1]])))