def groups(self): if self._groups is None: axis = self._group_axis self._groups = tseries.groupby(axis, self.grouper, output=GroupDict()) return self._groups
def bench_groupby(): N = 200 arr = np.arange(10000).astype(object) values = np.random.randn(10000) keys = arr // 10 d = dict(zip(arr, keys)) f = lambda: groupby_nocython(arr, d.get) print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) f = lambda: tseries.arrmap(arr, d.get) timing = _timeit(f, n=N) * 1000 print 'arrmap: %.2f ms per iteration' % timing f = lambda: isnull(tseries.arrmap(arr, d.get)) print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing) f = lambda: tseries.groupby(arr, d.get) print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) f = lambda: tseries.groupby_indices(arr, d.get) print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000) def _test(): groups = tseries.groupby_indices(arr, d.get) result = {} for k, v in groups.iteritems(): result[k] = np.mean(values.take(v)) return result print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)
def test_groupby(): mapping = Series({ 1 : 2., 2 : 2., 3 : np.NaN, 4 : np.NaN, 5 : 3., 6 : 3., 7 : np.NaN }) index = Index([1, 2, 3, 4, 5, 6, 7]) expected = { 2 : [1, 2], 3 : [5, 6], np.NaN : [3, 4, 7] } def compare_with_null(d1, d2): d1_nulls = None d2_nulls = None for k, v in d1.iteritems(): if _isnan(k): d1_nulls = v else: assert(k in d2) assert(np.array_equal(v, d2[k])) for k, v in d2.iteritems(): if _isnan(k): d2_nulls = v else: assert(k in d1) if d1_nulls is not None or d2_nulls is not None: assert(np.array_equal(d1_nulls, d2_nulls)) grouped = tseries.groupby(index, mapping.get) compare_with_null(grouped, expected)
def test_groupby(): mapping = Series({ 1: 2., 2: 2., 3: np.NaN, 4: np.NaN, 5: 3., 6: 3., 7: np.NaN }) index = Index([1, 2, 3, 4, 5, 6, 7]) expected = {2: [1, 2], 3: [5, 6], np.NaN: [3, 4, 7]} def compare_with_null(d1, d2): d1_nulls = None d2_nulls = None for k, v in d1.iteritems(): if _isnan(k): d1_nulls = v else: assert (k in d2) assert (np.array_equal(v, d2[k])) for k, v in d2.iteritems(): if _isnan(k): d2_nulls = v else: assert (k in d1) if d1_nulls is not None or d2_nulls is not None: assert (np.array_equal(d1_nulls, d2_nulls)) grouped = tseries.groupby(index, mapping.get) compare_with_null(grouped, expected)