def test_head(self): data = LazyResult(np.array([1, 2, 3]), np.dtype(np.int64), 1) series = Series(data.expr, np.dtype(np.int64), RangeIndex(0, 2, 1)) expected_result = np.array([1, 2]) result = series.head(2) np.testing.assert_array_equal(expected_result, result)
def test_std(self): data = np.array([1, 2, 3, 4]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 4, 1)) expected_result = 1.2909944487358056 result = series.std() np.testing.assert_equal(expected_result, evaluate_if_necessary(result))
def test_count(self): data = np.array([1, 2, 3]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = 3 result = series.count() np.testing.assert_equal(expected_result, evaluate_if_necessary(result))
def test_mean(self): data = np.array([1, 2, 3, 4]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 4, 1)) expected_result = 2.5 result = series.mean() np.testing.assert_equal(expected_result, evaluate_if_necessary(result))
def test_describe(self): data = np.array([1, 2, 3]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = np.array([1, 3, 2, 1], np.float64) result = series.describe(['min', 'max', 'mean', 'std']) np.testing.assert_equal(expected_result, evaluate_if_necessary(result))
def test_comparison(self): data = np.array([1, 2, 3, 4]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 4, 1)) expected_result = Series(np.array([True, True, False, False]), np.dtype(np.bool), RangeIndex(0, 4, 1)) result = series < 3 test_equal_series(expected_result, result)
def test_getitem_series(self): data = np.array([1, 2, 3]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = Series(np.array([1, 3]), np.dtype(np.int64), Index(np.array([0, 2]), np.dtype(np.int64))) result = series[series != 2] test_equal_series(expected_result, result)
def test_element_wise_operation(self): data = np.array([1, 2, 3]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = Series(np.array([2, 4, 6]), np.dtype(np.int64), RangeIndex(0, 3, 1)) result = series * 2 test_equal_series(expected_result, result)
def test_getitem_slice(self): weld_type = numpy_to_weld_type('int64') data = LazyResult(np.array([1, 2, 3]), weld_type, 1) series = Series(data.expr, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = Series(np.array([1, 2]), np.dtype(np.int64), RangeIndex(0, 2, 1)) result = series[:2] test_equal_series(expected_result, result)
def test_agg(self): data = np.array([1, 2, 3, 4]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 4, 1)) expected_result = Series( np.array([1., 4.], dtype=np.float64), np.dtype('float64'), Index(np.array(['min', 'max'], dtype=np.str), np.dtype(np.str))) result = series.agg(['min', 'max']) test_equal_series(expected_result, result)
def test_array_operation(self): data = np.array([1, 2, 3]) series = Series(data, np.dtype(np.int64), RangeIndex(0, 3, 1)) expected_result = Series(np.array([3, 5, 7]), np.dtype(np.int64), RangeIndex(0, 3, 1)) result = series + Series(np.array([2, 3, 4]), np.dtype(np.int64), RangeIndex(0, 3, 1)) test_equal_series(expected_result, result)
def test_map_weld_code(self): series = Series(np.array([1, 3, 4]), np.dtype(np.int64), RangeIndex(0, 3, 1)) weld_template = "map(%(self)s, |e| e + %(scalar)s)" mapping = {'scalar': '2L'} result = series.map(weld_template, mapping) expected_result = Series(np.array([3, 5, 6]), np.dtype(np.int64), RangeIndex(0, 3, 1)) test_equal_series(expected_result, result)
def test_bitwise_and(self): data = np.array([True, True, False, False]) series = Series(data, np.dtype(np.bool), RangeIndex(0, 4, 1)) data_other = np.array([True, False, True, False]) series_other = Series(data_other, np.dtype(np.bool), RangeIndex(0, 4, 1)) expected_result = Series(np.array([True, False, False, False]), np.dtype(np.bool), RangeIndex(0, 4, 1)) result = series & series_other test_equal_series(expected_result, result)
def test_map_weld_cudf(self): import os WeldObject.load_binary(os.path.dirname(__file__) + '/cudf/udf_c.so') series = Series(np.array([1, 3, 4]), np.dtype(np.int64), RangeIndex(0, 3, 1)) weld_template = "cudf[udf_add, vec[i64]](%(self)s, %(scalar)s)" mapping = {'scalar': '2L'} result = series.map(weld_template, mapping) expected_result = Series(np.array([3, 5, 6]), np.dtype(np.int64), RangeIndex(0, 3, 1)) test_equal_series(expected_result, result)
def _aggregate(self, operation): aggregated_data = weld_groupby_aggregate( self.expr, [str(numpy_to_weld_type(k)) for k in self.by_types], [str(numpy_to_weld_type(k)) for k in self.columns_types], operation) if len(self.by) == 1: new_index = Index(weld_get_column(aggregated_data, 0, True), self.by_types[0], self.by[0]) else: arrays = [ LazyResult(weld_get_column(aggregated_data, index, True), numpy_to_weld_type(self.by_types[index]), 1) for index in xrange(len(self.by)) ] new_index = MultiIndex.from_arrays(arrays, self.by) new_data = OrderedDict() for i in xrange(len(self.columns)): column_name = self.columns[i] new_data[column_name] = Series( weld_get_column(aggregated_data, i, False), self.columns_types[i], new_index, column_name) return DataFrame(new_data, new_index)