Exemplo n.º 1
0
    def dropna_impl(arr, idx, name):
        chunks = parallel_chunks(len(arr))
        arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
        length = 0

        for i in prange(len(chunks)):
            chunk = chunks[i]
            res = 0
            for j in range(chunk.start, chunk.stop):
                if not isnan(arr[j]):
                    res += 1
            length += res
            arr_len[i] = res

        result_data = numpy.empty(shape=length, dtype=dtype)
        result_index = numpy.empty(shape=length, dtype=dtype_idx)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            new_start = int(sum(arr_len[0:i]))
            new_stop = new_start + arr_len[i]
            current_pos = new_start

            for j in range(chunk.start, chunk.stop):
                if not isnan(arr[j]):
                    result_data[current_pos] = arr[j]
                    result_index[current_pos] = idx[j]
                    current_pos += 1

        return pandas.Series(result_data, result_index, name)
Exemplo n.º 2
0
        def nancumsum_impl(arr, like_pandas=False):
            chunks = parallel_chunks(len(arr))
            partial_sum = numpy.zeros(len(chunks), dtype=retty)
            result = numpy.empty_like(arr)

            # below line is only needed since Literal[bool] var cannot be converted to bool
            # in a prange due to a bug related to absence of BooleanLiterals in Numba
            _like_pandas = True if like_pandas else False
            for i in prange(len(chunks)):
                chunk = chunks[i]
                partial = zero
                for j in range(chunk.start, chunk.stop):
                    if _like_pandas:
                        result[j] = partial + arr[j]
                        if ~is_nan(arr[j]):
                            partial = result[j]
                    else:
                        if ~is_nan(arr[j]):
                            partial += arr[j]
                        result[j] = partial
                partial_sum[i] = partial

            for i in prange(len(chunks)):
                prefix = sum(partial_sum[0:i])
                chunk = chunks[i]
                for j in range(chunk.start, chunk.stop):
                    result[j] += prefix

            return result
Exemplo n.º 3
0
            def sdc_nanargmin_impl(self):
                chunks = parallel_chunks(len(self))
                arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
                arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
                for i in prange(len(chunks)):
                    chunk = chunks[i]
                    res = initial_result
                    pos = max_int64
                    for j in range(chunk.start, chunk.stop):
                        if reduce_op(res, self[j]) != self[j]:
                            continue
                        if isnan(self[j]):
                            continue
                        if res == self[j]:
                            pos = min(pos, j)
                        else:
                            pos = j
                            res = self[j]
                    arr_res[i] = res
                    arr_pos[i] = pos

                general_res = initial_result
                general_pos = max_int64
                for i in range(len(chunks)):
                    if reduce_op(general_res, arr_res[i]) != arr_res[i]:
                        continue
                    if general_res == arr_res[i]:
                        general_pos = min(general_pos, arr_pos[i])
                    else:
                        general_pos = arr_pos[i]
                        general_res = arr_res[i]

                return general_pos
Exemplo n.º 4
0
        def nancumsum_impl(arr, like_pandas=False):
            chunks = parallel_chunks(len(arr))
            partial_sum = numpy.zeros(len(chunks), dtype=retty)
            result = numpy.empty_like(arr)

            for i in prange(len(chunks)):
                chunk = chunks[i]
                partial = zero
                for j in range(chunk.start, chunk.stop):
                    if like_pandas:
                        result[j] = partial + arr[j]
                        if ~is_nan(arr[j]):
                            partial = result[j]
                    else:
                        if ~is_nan(arr[j]):
                            partial += arr[j]
                        result[j] = partial
                partial_sum[i] = partial

            for i in prange(len(chunks)):
                prefix = sum(partial_sum[0:i])
                chunk = chunks[i]
                for j in range(chunk.start, chunk.stop):
                    result[j] += prefix

            return result
Exemplo n.º 5
0
    def find_idx_impl(arr, idx):
        chunks = parallel_chunks(len(arr))
        new_arr = [List.empty_list(types.int64) for i in range(len(chunks))]
        for i in prange(len(chunks)):
            chunk = chunks[i]
            for j in range(chunk.start, chunk.stop):
                if arr[j] == idx:
                    new_arr[i].append(j)

        return new_arr
Exemplo n.º 6
0
        def apply_converter_to_column_impl(table, col_idx, func):
            table_size = len(table)
            prange_chunks = parallel_chunks(table_size)
            n_chunks = len(prange_chunks)

            res = df_alloc_column_of_dtype(res_dtype, table_size)
            for j in numba.prange(n_chunks):
                res_storage = alloc_res_storage(col_dtype)
                for i in range(prange_chunks[j].start, prange_chunks[j].stop):
                    arrow_reader_get_table_cell(table, col_idx_literal_val, i,
                                                res_storage)
                    cell_val = read_from_storage(col_dtype, res_storage)
                    res[i] = func(cell_val)
                # no release_res_storage as meminfo managed objects are freed automatically

            return res
Exemplo n.º 7
0
    def impl(self):
        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        chunks = parallel_chunks(length)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            nfinite = 0
            result = init_result

            if win == 0:
                for idx in range(chunk.start, chunk.stop):
                    output_arr[idx] = result_or_nan(nfinite, minp, result)
                continue

            prelude_start = max(0, chunk.start - win + 1)
            prelude_stop = chunk.start

            interlude_start = prelude_stop
            interlude_stop = min(prelude_start + win, chunk.stop)

            for idx in range(prelude_start, prelude_stop):
                value = input_arr[idx]
                nfinite, result = put(value, nfinite, result)

            for idx in range(interlude_start, interlude_stop):
                value = input_arr[idx]
                nfinite, result = put(value, nfinite, result)
                output_arr[idx] = result_or_nan(nfinite, minp, result)

            for idx in range(interlude_stop, chunk.stop):
                put_value = input_arr[idx]
                pop_value = input_arr[idx - win]
                nfinite, result = put(put_value, nfinite, result)
                nfinite, result = pop(pop_value, nfinite, result, input_arr,
                                      idx, win)
                output_arr[idx] = result_or_nan(nfinite, minp, result)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)
Exemplo n.º 8
0
    def cumsum_impl(arr):
        chunks = parallel_chunks(len(arr))
        partial_sum = numpy.zeros(len(chunks), dtype=retty)
        result = numpy.empty_like(arr)

        for i in prange(len(chunks)):
            chunk = chunks[i]
            partial = zero
            for j in range(chunk.start, chunk.stop):
                result[j] = partial + arr[j]
                partial = result[j]
            partial_sum[i] = partial

        for i in prange(len(chunks)):
            prefix = sum(partial_sum[0:i])
            chunk = chunks[i]
            for j in range(chunk.start, chunk.stop):
                result[j] += prefix

        return result
Exemplo n.º 9
0
    def getitem_by_mask_impl(arr, idx):
        chunks = parallel_chunks(len(arr))
        arr_len = numpy.empty(len(chunks), dtype=numpy.int64)
        length = 0

        for i in prange(len(chunks)):
            chunk = chunks[i]
            res = 0
            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    res += 1
            length += res
            arr_len[i] = res

        if is_str_arr == True:  # noqa
            result_data = [''] * length
            result_nan_mask = numpy.empty(shape=length, dtype=types.bool_)
        else:
            result_data = numpy.empty(shape=length, dtype=res_dtype)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            new_start = int(sum(arr_len[0:i]))
            current_pos = new_start

            for j in range(chunk.start, chunk.stop):
                if idx[j]:
                    if is_range == True:  # noqa
                        value = arr.start + arr.step * j
                    else:
                        value = arr[j]
                    result_data[current_pos] = value
                    if is_str_arr == True:  # noqa
                        result_nan_mask[current_pos] = isna(arr, j)
                    current_pos += 1

        if is_str_arr == True:  # noqa
            result_data_as_str_arr = create_str_arr_from_list(result_data)
            str_arr_set_na_by_mask(result_data_as_str_arr, result_nan_mask)
            return result_data_as_str_arr
        else:
            return result_data
    def _impl(self, other=None, pairwise=None, ddof=1):
        win = self._window
        minp = self._min_periods

        main_series = self._data
        main_arr = main_series._data

        if nan_other == True:  # noqa
            other_arr = main_arr
        else:
            other_arr = other._data

        main_arr_length = len(main_arr)
        other_arr_length = len(other_arr)
        min_length = min(main_arr_length, other_arr_length)
        length = max(main_arr_length, other_arr_length)
        output_arr = numpy.empty(length, dtype=float64)

        chunks = parallel_chunks(length)
        for i in prange(len(chunks)):
            chunk = chunks[i]
            nfinite = 0
            result = (0., 0., 0., 0.)

            if win == 0:
                for idx in range(chunk.start, chunk.stop):
                    output_arr[idx] = cov_result_or_nan(nfinite, minp, result, ddof)
                continue

            prelude_start = max(0, chunk.start - win + 1)
            prelude_stop = min(chunk.start, min_length)

            interlude_start = chunk.start
            interlude_stop = min(prelude_start + win, chunk.stop, min_length)

            postlude_start = min(prelude_start + win, chunk.stop)
            postlude_stop = min(chunk.stop, min_length)

            for idx in range(prelude_start, prelude_stop):
                x, y = main_arr[idx], other_arr[idx]
                nfinite, result = put_cov(x, y, nfinite, result,
                                          align_finiteness=align_finiteness)

            for idx in range(interlude_start, interlude_stop):
                x, y = main_arr[idx], other_arr[idx]
                nfinite, result = put_cov(x, y, nfinite, result,
                                          align_finiteness=align_finiteness)
                output_arr[idx] = cov_result_or_nan(nfinite, minp, result, ddof)

            for idx in range(postlude_start, postlude_stop):
                put_x, put_y = main_arr[idx], other_arr[idx]
                pop_x, pop_y = main_arr[idx - win], other_arr[idx - win]
                nfinite, result = put_cov(put_x, put_y, nfinite, result,
                                          align_finiteness=align_finiteness)
                nfinite, result = pop_cov(pop_x, pop_y, nfinite, result,
                                          align_finiteness=align_finiteness)
                output_arr[idx] = cov_result_or_nan(nfinite, minp, result, ddof)

            last_start = max(min_length, interlude_start)
            for idx in range(last_start, postlude_start):
                output_arr[idx] = cov_result_or_nan(nfinite, minp, result, ddof)

            last_start = max(min_length, postlude_start)
            last_stop = min(min_length + win, chunk.stop)
            for idx in range(last_start, last_stop):
                x, y = main_arr[idx - win], other_arr[idx - win]
                nfinite, result = pop_cov(x, y, nfinite, result,
                                          align_finiteness=align_finiteness)
                output_arr[idx] = cov_result_or_nan(nfinite, minp, result, ddof)

            for idx in range(last_stop, chunk.stop):
                output_arr[idx] = numpy.nan

        return pandas.Series(output_arr)