def rec_groupby(a, keynames, *fun_fields_name): '''A special version of np_groupby for record arrays, somewhat similar to the function found in matplotlib.mlab.rec_groupby. This is basically a wrapper around np_groupy that automatically generates lambda's like the ones in the np_groupby doc string. That same call would look like this using rec_grouby: rec_groupby(a, ['m', 'n'], (np.mean, 'o', 'mean_o'), (np.std, 'o', 'std_o'), (np.min, 'p', 'min_p')) and the second function could be written as: def compute_some_thing(x): o, p = x['o'], x['p'] return np.mean(o) / np.std(o) * np.min(p) rec_groupby(a, ['m', 'n'], (compute_some_thing, ['o', 'p'], 'its_complicated')) In general, this function is faster than matplotlib.mlab, but not as fast as pandas and probably misses some corner cases for each :) ''' keynames = list(keynames) if islistlike(keynames) else [keynames] keyarr = fields_view(a, keynames) funs, fields_list, names = zip(*fun_fields_name) functions = [_outfielder(fun, fields) for fun, fields, name in fun_fields_name] names = [i[-1] for i in fun_fields_name] return np_groupby(keyarr, a, *functions, names=keynames + names)
def fields_view(arr, fields): '''Select fields from a record array without a copy Taken from: http://stackoverflow.com/questions/15182381/how-to-return-a-view-of-several-columns-in-numpy-structured-array ''' fields = fields if islistlike(fields) else [fields] newdtype = np.dtype({name: arr.dtype.fields[name] for name in fields}) return np.ndarray(arr.shape, newdtype, arr, 0, arr.strides)