示例#1
0
def sdc_nansum_overload(self):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of numpy.nansum.
    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nansum
    """

    dtype = self.dtype
    isnan = get_isnan(dtype)
    if not isinstance(self, types.Array):
        return None

    if isinstance(dtype, types.Number):

        def sdc_nansum_number_impl(self):
            length = len(self)
            result = 0
            for i in prange(length):
                if not numpy.isnan(self[i]):
                    result += self[i]

            return result

        return sdc_nansum_number_impl

    if isinstance(dtype, (types.Boolean, bool)):
        return gen_sum_bool_impl()
示例#2
0
    def ov_impl(a):
        if not isinstance(a, types.Array):
            return

        if isinstance(a.dtype, (types.Float, types.Complex)):
            isnan = get_isnan(a.dtype)
            initial_result = {
                min: numpy.inf,
                max: -numpy.inf,
            }[reduce_op]

            def impl(a):
                result = initial_result
                nan_count = 0
                length = len(a)
                for i in prange(length):
                    v = a[i]
                    if not isnan(v):
                        result = reduce_op(result, v)
                    else:
                        nan_count += 1

                if nan_count == length:
                    return numpy.nan

                return result
            return impl
        else:
            def impl(a):
                result = a[0]
                for i in prange(len(a) - 1):
                    result = reduce_op(result, a[i + 1])
                return result
            return impl
示例#3
0
def np_nanmean(a):
    if not isinstance(a, types.Array):
        return
    isnan = get_isnan(a.dtype)

    def nanmean_impl(a):
        c = 0.0
        count = 0
        for i in prange(len(a)):
            v = a[i]
            if not isnan(v):
                c += v
                count += 1
        # np.divide() doesn't raise ZeroDivisionError
        return np.divide(c, count)

    return nanmean_impl
示例#4
0
def np_nancumsum(arr, like_pandas=False):
    if not isinstance(arr, types.Array):
        return

    if isinstance(arr.dtype, (types.Boolean, types.Integer)):
        # dtype cannot possibly contain NaN
        return lambda arr, like_pandas=False: cumsum(arr)
    else:
        retty = arr.dtype
        is_nan = get_isnan(retty)
        zero = retty(0)

        def nancumsum_impl(arr, like_pandas=False):
            chunks = parallel_chunks(len(arr))
            partial_sum = numpy.zeros(len(chunks), dtype=retty)
            result = numpy.empty_like(arr)

            # below line is only needed since Literal[bool] var cannot be converted to bool
            # in a prange due to a bug related to absence of BooleanLiterals in Numba
            _like_pandas = True if like_pandas else False
            for i in prange(len(chunks)):
                chunk = chunks[i]
                partial = zero
                for j in range(chunk.start, chunk.stop):
                    if _like_pandas:
                        result[j] = partial + arr[j]
                        if ~is_nan(arr[j]):
                            partial = result[j]
                    else:
                        if ~is_nan(arr[j]):
                            partial += arr[j]
                        result[j] = partial
                partial_sum[i] = partial

            for i in prange(len(chunks)):
                prefix = sum(partial_sum[0:i])
                chunk = chunks[i]
                for j in range(chunk.start, chunk.stop):
                    result[j] += prefix

            return result

        return nancumsum_impl
示例#5
0
def np_nancumsum(arr, like_pandas=False):
    if not isinstance(arr, types.Array):
        return

    if isinstance(arr.dtype, (types.Boolean, types.Integer)):
        # dtype cannot possibly contain NaN
        return lambda arr, like_pandas=False: cumsum(arr)
    else:
        retty = arr.dtype
        is_nan = get_isnan(retty)
        zero = retty(0)

        def nancumsum_impl(arr, like_pandas=False):
            chunks = parallel_chunks(len(arr))
            partial_sum = numpy.zeros(len(chunks), dtype=retty)
            result = numpy.empty_like(arr)

            for i in prange(len(chunks)):
                chunk = chunks[i]
                partial = zero
                for j in range(chunk.start, chunk.stop):
                    if like_pandas:
                        result[j] = partial + arr[j]
                        if ~is_nan(arr[j]):
                            partial = result[j]
                    else:
                        if ~is_nan(arr[j]):
                            partial += arr[j]
                        result[j] = partial
                partial_sum[i] = partial

            for i in prange(len(chunks)):
                prefix = sum(partial_sum[0:i])
                chunk = chunks[i]
                for j in range(chunk.start, chunk.stop):
                    result[j] += prefix

            return result

        return nancumsum_impl
示例#6
0
def np_nanvar(a):
    if not isinstance(a, types.Array):
        return
    isnan = get_isnan(a.dtype)

    def nanvar_impl(a):
        # Compute the mean
        m = nanmean(a)

        # Compute the sum of square diffs
        ssd = 0.0
        count = 0
        for i in prange(len(a)):
            v = a[i]
            if not isnan(v):
                val = (v.item() - m)
                ssd += np.real(val * np.conj(val))
                count += 1
        # np.divide() doesn't raise ZeroDivisionError
        return np.divide(ssd, count)

    return nanvar_impl
示例#7
0
def np_nanprod(a):
    """
    Reimplemented with parfor from numba.np.arraymath.
    """
    if not isinstance(a, types.Array):
        return
    if isinstance(a.dtype, types.Integer):
        retty = types.intp
    else:
        retty = a.dtype
    one = retty(1)
    isnan = get_isnan(a.dtype)

    def nanprod_impl(a):
        c = one
        for i in prange(len(a)):
            v = a[i]
            if not isnan(v):
                c *= v
        return c

    return nanprod_impl
示例#8
0
def sdc_isnan_overload(self):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of numpy.isnan.
    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan
    """

    if not isinstance(self, types.Array):
        return None

    ty_checker = TypeChecker("numpy-like 'isnan'")
    dtype = self.dtype
    isnan = get_isnan(dtype)
    if isinstance(dtype, (types.Integer, types.Boolean, bool)):

        def sdc_isnan_int_impl(self):
            length = len(self)
            res = numpy.zeros(shape=length, dtype=numpy.bool_)

            return res

        return sdc_isnan_int_impl

    if isinstance(dtype, types.Float):

        def sdc_isnan_float_impl(self):
            length = len(self)
            res = numpy.empty(shape=length, dtype=numpy.bool_)
            for i in prange(length):
                res[i] = isnan(self[i])

            return res

        return sdc_isnan_float_impl

    ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
示例#9
0
def dropna_overload(arr, idx, name):
    dtype = arr.dtype
    dtype_idx = idx.dtype
    isnan = get_isnan(dtype)

    def dropna_impl(arr, idx, name):
        chunks = parallel_chunks(len(arr))
        arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
        length = 0

        for i in prange(len(chunks)):
            chunk = chunks[i]
            res = 0
            for j in range(chunk.start, chunk.stop):
                if not isnan(arr[j]):
                    res += 1
            length += res
            arr_len[i] = res

        result_data = numpy.empty(shape=length, dtype=dtype)
        result_index = numpy.empty(shape=length, dtype=dtype_idx)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            new_start = int(sum(arr_len[0:i]))
            new_stop = new_start + arr_len[i]
            current_pos = new_start

            for j in range(chunk.start, chunk.stop):
                if not isnan(arr[j]):
                    result_data[current_pos] = arr[j]
                    result_index[current_pos] = idx[j]
                    current_pos += 1

        return pandas.Series(result_data, result_index, name)

    return dropna_impl
示例#10
0
def sdc_fillna_overload(self, inplace=False, value=None):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of fillna.
    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna
    """
    if not isinstance(self, (types.Array, StringArrayType)):
        return None

    if not isinstance(inplace,
                      (types.Literal, types.Omitted) or inplace is False):
        return None

    dtype = self.dtype
    isnan = get_isnan(dtype)

    if (isinstance(inplace, types.Literal) and inplace.literal_value is True):

        def sdc_fillna_inplace_noop(self, inplace=False, value=None):
            return None

        if isinstance(value, (types.NoneType, types.Omitted)) or value is None:
            return sdc_fillna_inplace_noop

        if isinstance(dtype, (types.Integer, types.Boolean)):
            return sdc_fillna_inplace_noop

        if isinstance(dtype, types.Float):

            def sdc_fillna_inplace_float_impl(self, inplace=False, value=None):
                _value = np.nan if value is None else value
                length = len(self)
                for i in prange(length):
                    if isnan(self[i]):
                        self[i] = _value
                return None

            return sdc_fillna_inplace_float_impl

        if isinstance(dtype, types.UnicodeType):
            # TO-DO: not supported, since no generic setitem for StringArray
            return None

    else:

        def sdc_fillna_noop(self, inplace=False, value=None):
            return copy(self)

        if isinstance(value, (types.NoneType, types.Omitted)) or value is None:
            return sdc_fillna_noop

        if isinstance(dtype, (types.Integer, types.Boolean)):
            return sdc_fillna_noop

        if isinstance(dtype, types.Float):

            def sdc_fillna_impl(self, inplace=False, value=None):
                _value = np.nan if value is None else value
                length = len(self)
                filled_data = numpy.empty(length, dtype=dtype)
                for i in prange(length):
                    if isnan(self[i]):
                        filled_data[i] = _value
                    else:
                        filled_data[i] = self[i]
                return filled_data

            return sdc_fillna_impl

        if isinstance(self.dtype, types.UnicodeType):

            def sdc_fillna_str_impl(self, inplace=False, value=None):
                n = len(self)
                num_chars = 0
                # get total chars in new array
                for i in prange(n):
                    s = self[i]
                    if sdc.hiframes.api.isna(self, i):
                        num_chars += get_utf8_size(value)
                    else:
                        num_chars += get_utf8_size(s)

                filled_data = pre_alloc_string_array(n, num_chars)
                # StringArray doesn't support parallel setitem, thus no prange here
                for i in numpy.arange(n):
                    if sdc.hiframes.api.isna(self, i):
                        filled_data[i] = value
                    else:
                        filled_data[i] = self[i]
                return filled_data

            return sdc_fillna_str_impl
示例#11
0
    def arg_impl(self):
        """
        Intel Scalable Dataframe Compiler Developer Guide
        *************************************************
        Parallel replacement of numpy.argmin/numpy.argmax.

        .. only:: developer
        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin
        Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax

        """

        ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'")
        dtype = self.dtype
        isnan = get_isnan(dtype)
        max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
        if isinstance(dtype, types.Integer):
            initial_result = {
                min: max_dtype_int_val(dtype),
                max: min_dtype_int_val(dtype),
            }[reduce_op]

        if isinstance(dtype, types.Float):
            initial_result = {
                min: max_dtype_float_val(dtype),
                max: min_dtype_float_val(dtype),
            }[reduce_op]

        if not isinstance(self, types.Array):
            return None

        if isinstance(dtype, types.Number):

            def sdc_argmin_impl(self):
                chunks = parallel_chunks(len(self))
                arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
                arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
                for i in prange(len(chunks)):
                    chunk = chunks[i]
                    res = initial_result
                    pos = max_int64
                    for j in range(chunk.start, chunk.stop):
                        if not isnan(self[j]):
                            if reduce_op(res, self[j]) != self[j]:
                                continue
                            if res == self[j]:
                                pos = min(pos, j)
                            else:
                                pos = j
                                res = self[j]
                        else:
                            if numpy.isnan(res):
                                pos = min(pos, j)
                            else:
                                pos = j
                            res = self[j]

                    arr_res[i] = res
                    arr_pos[i] = pos
                general_res = initial_result
                general_pos = max_int64
                for i in range(len(chunks)):
                    if not isnan(arr_res[i]):
                        if reduce_op(general_res, arr_res[i]) != arr_res[i]:
                            continue
                        if general_res == arr_res[i]:
                            general_pos = min(general_pos, arr_pos[i])
                        else:
                            general_pos = arr_pos[i]
                            general_res = arr_res[i]
                    else:
                        if numpy.isnan(general_res):
                            general_pos = min(general_pos, arr_pos[i])
                        else:
                            general_pos = arr_pos[i]
                        general_res = arr_res[i]
                return general_pos

            return sdc_argmin_impl

        ty_checker.raise_exc(dtype, 'number', 'self.dtype')
示例#12
0
def sdc_fillna_overload(self, inplace=False, value=None):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of fillna.
    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna
    """
    if not isinstance(self, (types.Array, StringArrayType)):
        return None

    dtype = self.dtype
    isnan = get_isnan(dtype)
    if ((isinstance(inplace, types.Literal) and inplace.literal_value == True)
            or  # noqa
        (isinstance(inplace, bool) and inplace == True)  # noqa
        ):
        if isinstance(dtype, (types.Integer, types.Boolean)):

            def sdc_fillna_inplace_int_impl(self, inplace=False, value=None):
                return None

            return sdc_fillna_inplace_int_impl

        def sdc_fillna_inplace_float_impl(self, inplace=False, value=None):
            length = len(self)
            for i in prange(length):
                if isnan(self[i]):
                    self[i] = value
            return None

        return sdc_fillna_inplace_float_impl

    else:
        if isinstance(self.dtype, types.UnicodeType):

            def sdc_fillna_str_impl(self, inplace=False, value=None):
                n = len(self)
                num_chars = 0
                # get total chars in new array
                for i in prange(n):
                    s = self[i]
                    if sdc.hiframes.api.isna(self, i):
                        num_chars += len(value)
                    else:
                        num_chars += len(s)

                filled_data = pre_alloc_string_array(n, num_chars)
                for i in prange(n):
                    if sdc.hiframes.api.isna(self, i):
                        filled_data[i] = value
                    else:
                        filled_data[i] = self[i]
                return filled_data

            return sdc_fillna_str_impl

        if isinstance(dtype, (types.Integer, types.Boolean)):

            def sdc_fillna_int_impl(self, inplace=False, value=None):
                return copy(self)

            return sdc_fillna_int_impl

        def sdc_fillna_impl(self, inplace=False, value=None):
            length = len(self)
            filled_data = numpy.empty(length, dtype=dtype)
            for i in prange(length):
                if isnan(self[i]):
                    filled_data[i] = value
                else:
                    filled_data[i] = self[i]
            return filled_data

        return sdc_fillna_impl