def sdc_nansum_overload(self): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.nansum. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nansum """ dtype = self.dtype isnan = get_isnan(dtype) if not isinstance(self, types.Array): return None if isinstance(dtype, types.Number): def sdc_nansum_number_impl(self): length = len(self) result = 0 for i in prange(length): if not numpy.isnan(self[i]): result += self[i] return result return sdc_nansum_number_impl if isinstance(dtype, (types.Boolean, bool)): return gen_sum_bool_impl()
def ov_impl(a): if not isinstance(a, types.Array): return if isinstance(a.dtype, (types.Float, types.Complex)): isnan = get_isnan(a.dtype) initial_result = { min: numpy.inf, max: -numpy.inf, }[reduce_op] def impl(a): result = initial_result nan_count = 0 length = len(a) for i in prange(length): v = a[i] if not isnan(v): result = reduce_op(result, v) else: nan_count += 1 if nan_count == length: return numpy.nan return result return impl else: def impl(a): result = a[0] for i in prange(len(a) - 1): result = reduce_op(result, a[i + 1]) return result return impl
def np_nanmean(a): if not isinstance(a, types.Array): return isnan = get_isnan(a.dtype) def nanmean_impl(a): c = 0.0 count = 0 for i in prange(len(a)): v = a[i] if not isnan(v): c += v count += 1 # np.divide() doesn't raise ZeroDivisionError return np.divide(c, count) return nanmean_impl
def np_nancumsum(arr, like_pandas=False): if not isinstance(arr, types.Array): return if isinstance(arr.dtype, (types.Boolean, types.Integer)): # dtype cannot possibly contain NaN return lambda arr, like_pandas=False: cumsum(arr) else: retty = arr.dtype is_nan = get_isnan(retty) zero = retty(0) def nancumsum_impl(arr, like_pandas=False): chunks = parallel_chunks(len(arr)) partial_sum = numpy.zeros(len(chunks), dtype=retty) result = numpy.empty_like(arr) # below line is only needed since Literal[bool] var cannot be converted to bool # in a prange due to a bug related to absence of BooleanLiterals in Numba _like_pandas = True if like_pandas else False for i in prange(len(chunks)): chunk = chunks[i] partial = zero for j in range(chunk.start, chunk.stop): if _like_pandas: result[j] = partial + arr[j] if ~is_nan(arr[j]): partial = result[j] else: if ~is_nan(arr[j]): partial += arr[j] result[j] = partial partial_sum[i] = partial for i in prange(len(chunks)): prefix = sum(partial_sum[0:i]) chunk = chunks[i] for j in range(chunk.start, chunk.stop): result[j] += prefix return result return nancumsum_impl
def np_nancumsum(arr, like_pandas=False): if not isinstance(arr, types.Array): return if isinstance(arr.dtype, (types.Boolean, types.Integer)): # dtype cannot possibly contain NaN return lambda arr, like_pandas=False: cumsum(arr) else: retty = arr.dtype is_nan = get_isnan(retty) zero = retty(0) def nancumsum_impl(arr, like_pandas=False): chunks = parallel_chunks(len(arr)) partial_sum = numpy.zeros(len(chunks), dtype=retty) result = numpy.empty_like(arr) for i in prange(len(chunks)): chunk = chunks[i] partial = zero for j in range(chunk.start, chunk.stop): if like_pandas: result[j] = partial + arr[j] if ~is_nan(arr[j]): partial = result[j] else: if ~is_nan(arr[j]): partial += arr[j] result[j] = partial partial_sum[i] = partial for i in prange(len(chunks)): prefix = sum(partial_sum[0:i]) chunk = chunks[i] for j in range(chunk.start, chunk.stop): result[j] += prefix return result return nancumsum_impl
def np_nanvar(a): if not isinstance(a, types.Array): return isnan = get_isnan(a.dtype) def nanvar_impl(a): # Compute the mean m = nanmean(a) # Compute the sum of square diffs ssd = 0.0 count = 0 for i in prange(len(a)): v = a[i] if not isnan(v): val = (v.item() - m) ssd += np.real(val * np.conj(val)) count += 1 # np.divide() doesn't raise ZeroDivisionError return np.divide(ssd, count) return nanvar_impl
def np_nanprod(a): """ Reimplemented with parfor from numba.np.arraymath. """ if not isinstance(a, types.Array): return if isinstance(a.dtype, types.Integer): retty = types.intp else: retty = a.dtype one = retty(1) isnan = get_isnan(a.dtype) def nanprod_impl(a): c = one for i in prange(len(a)): v = a[i] if not isnan(v): c *= v return c return nanprod_impl
def sdc_isnan_overload(self): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.isnan. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan """ if not isinstance(self, types.Array): return None ty_checker = TypeChecker("numpy-like 'isnan'") dtype = self.dtype isnan = get_isnan(dtype) if isinstance(dtype, (types.Integer, types.Boolean, bool)): def sdc_isnan_int_impl(self): length = len(self) res = numpy.zeros(shape=length, dtype=numpy.bool_) return res return sdc_isnan_int_impl if isinstance(dtype, types.Float): def sdc_isnan_float_impl(self): length = len(self) res = numpy.empty(shape=length, dtype=numpy.bool_) for i in prange(length): res[i] = isnan(self[i]) return res return sdc_isnan_float_impl ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
def dropna_overload(arr, idx, name): dtype = arr.dtype dtype_idx = idx.dtype isnan = get_isnan(dtype) def dropna_impl(arr, idx, name): chunks = parallel_chunks(len(arr)) arr_len = numpy.empty(len(chunks), dtype=numpy.int64) length = 0 for i in prange(len(chunks)): chunk = chunks[i] res = 0 for j in range(chunk.start, chunk.stop): if not isnan(arr[j]): res += 1 length += res arr_len[i] = res result_data = numpy.empty(shape=length, dtype=dtype) result_index = numpy.empty(shape=length, dtype=dtype_idx) for i in prange(len(chunks)): chunk = chunks[i] new_start = int(sum(arr_len[0:i])) new_stop = new_start + arr_len[i] current_pos = new_start for j in range(chunk.start, chunk.stop): if not isnan(arr[j]): result_data[current_pos] = arr[j] result_index[current_pos] = idx[j] current_pos += 1 return pandas.Series(result_data, result_index, name) return dropna_impl
def sdc_fillna_overload(self, inplace=False, value=None): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of fillna. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna """ if not isinstance(self, (types.Array, StringArrayType)): return None if not isinstance(inplace, (types.Literal, types.Omitted) or inplace is False): return None dtype = self.dtype isnan = get_isnan(dtype) if (isinstance(inplace, types.Literal) and inplace.literal_value is True): def sdc_fillna_inplace_noop(self, inplace=False, value=None): return None if isinstance(value, (types.NoneType, types.Omitted)) or value is None: return sdc_fillna_inplace_noop if isinstance(dtype, (types.Integer, types.Boolean)): return sdc_fillna_inplace_noop if isinstance(dtype, types.Float): def sdc_fillna_inplace_float_impl(self, inplace=False, value=None): _value = np.nan if value is None else value length = len(self) for i in prange(length): if isnan(self[i]): self[i] = _value return None return sdc_fillna_inplace_float_impl if isinstance(dtype, types.UnicodeType): # TO-DO: not supported, since no generic setitem for StringArray return None else: def sdc_fillna_noop(self, inplace=False, value=None): return copy(self) if isinstance(value, (types.NoneType, types.Omitted)) or value is None: return sdc_fillna_noop if isinstance(dtype, (types.Integer, types.Boolean)): return sdc_fillna_noop if isinstance(dtype, types.Float): def sdc_fillna_impl(self, inplace=False, value=None): _value = np.nan if value is None else value length = len(self) filled_data = numpy.empty(length, dtype=dtype) for i in prange(length): if isnan(self[i]): filled_data[i] = _value else: filled_data[i] = self[i] return filled_data return sdc_fillna_impl if isinstance(self.dtype, types.UnicodeType): def sdc_fillna_str_impl(self, inplace=False, value=None): n = len(self) num_chars = 0 # get total chars in new array for i in prange(n): s = self[i] if sdc.hiframes.api.isna(self, i): num_chars += get_utf8_size(value) else: num_chars += get_utf8_size(s) filled_data = pre_alloc_string_array(n, num_chars) # StringArray doesn't support parallel setitem, thus no prange here for i in numpy.arange(n): if sdc.hiframes.api.isna(self, i): filled_data[i] = value else: filled_data[i] = self[i] return filled_data return sdc_fillna_str_impl
def arg_impl(self): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of numpy.argmin/numpy.argmax. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax """ ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'") dtype = self.dtype isnan = get_isnan(dtype) max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64)) if isinstance(dtype, types.Integer): initial_result = { min: max_dtype_int_val(dtype), max: min_dtype_int_val(dtype), }[reduce_op] if isinstance(dtype, types.Float): initial_result = { min: max_dtype_float_val(dtype), max: min_dtype_float_val(dtype), }[reduce_op] if not isinstance(self, types.Array): return None if isinstance(dtype, types.Number): def sdc_argmin_impl(self): chunks = parallel_chunks(len(self)) arr_res = numpy.empty(shape=len(chunks), dtype=dtype) arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64) for i in prange(len(chunks)): chunk = chunks[i] res = initial_result pos = max_int64 for j in range(chunk.start, chunk.stop): if not isnan(self[j]): if reduce_op(res, self[j]) != self[j]: continue if res == self[j]: pos = min(pos, j) else: pos = j res = self[j] else: if numpy.isnan(res): pos = min(pos, j) else: pos = j res = self[j] arr_res[i] = res arr_pos[i] = pos general_res = initial_result general_pos = max_int64 for i in range(len(chunks)): if not isnan(arr_res[i]): if reduce_op(general_res, arr_res[i]) != arr_res[i]: continue if general_res == arr_res[i]: general_pos = min(general_pos, arr_pos[i]) else: general_pos = arr_pos[i] general_res = arr_res[i] else: if numpy.isnan(general_res): general_pos = min(general_pos, arr_pos[i]) else: general_pos = arr_pos[i] general_res = arr_res[i] return general_pos return sdc_argmin_impl ty_checker.raise_exc(dtype, 'number', 'self.dtype')
def sdc_fillna_overload(self, inplace=False, value=None): """ Intel Scalable Dataframe Compiler Developer Guide ************************************************* Parallel replacement of fillna. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna """ if not isinstance(self, (types.Array, StringArrayType)): return None dtype = self.dtype isnan = get_isnan(dtype) if ((isinstance(inplace, types.Literal) and inplace.literal_value == True) or # noqa (isinstance(inplace, bool) and inplace == True) # noqa ): if isinstance(dtype, (types.Integer, types.Boolean)): def sdc_fillna_inplace_int_impl(self, inplace=False, value=None): return None return sdc_fillna_inplace_int_impl def sdc_fillna_inplace_float_impl(self, inplace=False, value=None): length = len(self) for i in prange(length): if isnan(self[i]): self[i] = value return None return sdc_fillna_inplace_float_impl else: if isinstance(self.dtype, types.UnicodeType): def sdc_fillna_str_impl(self, inplace=False, value=None): n = len(self) num_chars = 0 # get total chars in new array for i in prange(n): s = self[i] if sdc.hiframes.api.isna(self, i): num_chars += len(value) else: num_chars += len(s) filled_data = pre_alloc_string_array(n, num_chars) for i in prange(n): if sdc.hiframes.api.isna(self, i): filled_data[i] = value else: filled_data[i] = self[i] return filled_data return sdc_fillna_str_impl if isinstance(dtype, (types.Integer, types.Boolean)): def sdc_fillna_int_impl(self, inplace=False, value=None): return copy(self) return sdc_fillna_int_impl def sdc_fillna_impl(self, inplace=False, value=None): length = len(self) filled_data = numpy.empty(length, dtype=dtype) for i in prange(length): if isnan(self[i]): filled_data[i] = value else: filled_data[i] = self[i] return filled_data return sdc_fillna_impl