def compute(self, context, *args, **kwargs): filter_value = kwargs.pop('filter', None) skip_na = kwargs.pop('skip_na', True) values, args = args[0], args[1:] values = np.asanyarray(values) if (skip_na and np.issubdtype(values.dtype, np.inexact) and self.nan_func[0] is not None): usenanfunc = True func = self.nan_func[0] else: usenanfunc = False func = self.get_compute_func() if values.shape: if values.ndim == 1: if skip_na and not usenanfunc: if filter_value is not None: # we should *not* use an inplace operation because # filter_value can be a simple variable filter_value = filter_value & ispresent(values) else: filter_value = ispresent(values) if filter_value is not None and filter_value is not True: values = values[filter_value] elif values.ndim > 1 and filter_value is not None: raise Exception("filter argument is not supported on arrays " "with more than 1 dimension") args = (values,) + args return func(*args, **kwargs)
def compute(self, func, args, kwargs, filter_value=None): # the first argument should be the array to work on ('a') assert self.arg_names[0] == 'a' values, args = args[0], args[1:] values = np.asanyarray(values) usenanfunc = False if (self.skip_na and issubclass(values.dtype.type, np.inexact) and self.nan_func[0] is not None): usenanfunc = True func = self.nan_func[0] if values.shape: if values.ndim == 1: if self.skip_na and not usenanfunc: if filter_value is not None: # we should *not* use an inplace operation because # filter_value can be a simple variable filter_value = filter_value & ispresent(values) else: filter_value = ispresent(values) if filter_value is not None and filter_value is not True: values = values[filter_value] elif values.ndim > 1 and filter_value is not None: raise Exception("filter argument is not supported on arrays " "with more than 1 dimension") return func(values, *args, **kwargs)
def compute(self, context, *args, **kwargs): filter_value = kwargs.pop('filter', None) skip_na = kwargs.pop('skip_na', True) values, args = args[0], args[1:] values = np.asanyarray(values) if (skip_na and np.issubdtype(values.dtype, np.inexact) and self.nan_func[0] is not None): usenanfunc = True func = self.nan_func[0] else: usenanfunc = False func = self.get_compute_func() if values.shape: if values.ndim == 1: if skip_na and not usenanfunc: if filter_value is not None: # we should *not* use an inplace operation because # filter_value can be a simple variable filter_value = filter_value & ispresent(values) else: filter_value = ispresent(values) if filter_value is not None and filter_value is not True: values = values[filter_value] elif values.ndim > 1 and filter_value is not None: raise Exception("filter argument is not supported on arrays " "with more than 1 dimension") args = (values, ) + args return func(*args, **kwargs)
def na_sum(a, overwrite=False): if issubclass(a.dtype.type, np.inexact): func = np.nansum else: func = np.sum if overwrite: a *= ispresent(a) else: a = a * ispresent(a) return func(a)
def na_sum(a, overwrite=False): if np.issubdtype(a.dtype, np.inexact): func = nansum else: func = np.sum if overwrite: a *= ispresent(a) else: a = a * ispresent(a) return func(a)
def evaluate(self, context): values = expr_eval(self.expr, context) values = np.asarray(values) filter_expr = self._getfilter(context) if filter_expr is not None: filter_values = expr_eval(filter_expr, context) else: filter_values = True if self.skip_na: # we should *not* use an inplace operation because filter_values # can be a simple variable filter_values = filter_values & ispresent(values) if filter_values is not True: values = values[filter_values] # from Wikipedia: # G = 1/n * (n + 1 - 2 * (sum((n + 1 - i) * a[i]) / sum(a[i]))) # i=1..n i=1..n # but sum((n + 1 - i) * a[i]) # i=1..n # = sum((n - i) * a[i] for i in range(n)) # = sum(cumsum(a)) sorted_values = np.sort(values) n = len(values) # force float to avoid overflows with integer input expressions cumsum = np.cumsum(sorted_values, dtype=float) values_sum = cumsum[-1] if values_sum == 0: print("gini(%s, filter=%s): expression is all zeros (or nan) " "for filter" % (self.expr, filter_expr)) return (n + 1 - 2 * np.sum(cumsum) / values_sum) / n
def compute(self, context, expr, filter=None, skip_na=True): # FIXME: either take "contextual filter" into account here (by using # self._getfilter), or don't do it in sum & gini if filter is not None: tmpvar = self.add_tmp_var(context, filter) if getdtype(expr, context) is bool: # convert expr to int because mul_bbb is not implemented in # numexpr # expr *= 1 expr = BinaryOp('*', expr, 1) # expr *= filter_values expr = BinaryOp('*', expr, tmpvar) else: filter = True values = expr_eval(expr, context) values = np.asarray(values) if skip_na: # we should *not* use an inplace operation because filter can be a # simple variable filter = filter & ispresent(values) if filter is True: numrows = len(values) else: numrows = np.sum(filter) if numrows: if skip_na: return na_sum(values) / float(numrows) else: return np.sum(values) / float(numrows) else: return float('nan')
def compute(self, context, expr, filter=None, skip_na=True): values = np.asarray(expr) filter_expr = self._getfilter(context, filter) if filter_expr is not None: filter_values = expr_eval(filter_expr, context) else: filter_values = True if skip_na: # we should *not* use an inplace operation because filter_values # can be a simple variable filter_values = filter_values & ispresent(values) if filter_values is not True: values = values[filter_values] # from Wikipedia: # G = 1/n * (n + 1 - 2 * (sum((n + 1 - i) * a[i]) / sum(a[i]))) # i=1..n i=1..n # but sum((n + 1 - i) * a[i]) # i=1..n # = sum((n - i) * a[i] for i in range(n)) # = sum(cumsum(a)) sorted_values = np.sort(values) n = len(values) # force float to avoid overflows with integer input expressions cumsum = np.cumsum(sorted_values, dtype=float) values_sum = cumsum[-1] if values_sum == 0: print("gini(%s, filter=%s): expression is all zeros (or nan) " "for filter" % (self.args[0], filter)) return (n + 1 - 2 * np.sum(cumsum) / values_sum) / n
def evaluate(self, context): expr = self.expr #FIXME: either take "contextual filter" into account here (by using # self._getfilter), or don't do it in sum & gini if self.filter is not None: filter_values = expr_eval(self.filter, context) tmp_varname = get_tmp_varname() context = context.copy() context[tmp_varname] = filter_values if getdtype(expr, context) is bool: # convert expr to int because mul_bbb is not implemented in # numexpr expr *= 1 expr *= Variable(tmp_varname) else: filter_values = True values = expr_eval(expr, context) values = np.asarray(values) if self.skip_na: # we should *not* use an inplace operation because filter_values # can be a simple variable filter_values = filter_values & ispresent(values) if filter_values is True: numrows = len(values) else: numrows = np.sum(filter_values) if numrows: if self.skip_na: return na_sum(values) / float(numrows) else: return np.sum(values) / float(numrows) else: return float('nan')