def sdc_pandas_dataframe_rolling_corr(self, other=None, pairwise=None):

    ty_checker = TypeChecker('Method rolling.corr().')
    ty_checker.check(self, DataFrameRollingType)

    accepted_other = (Omitted, NoneType, DataFrameType, SeriesType)
    if not isinstance(other, accepted_other) and other is not None:
        ty_checker.raise_exc(other, 'DataFrame, Series', 'other')

    accepted_pairwise = (bool, Boolean, Omitted, NoneType)
    if not isinstance(pairwise, accepted_pairwise) and pairwise is not None:
        ty_checker.raise_exc(pairwise, 'bool', 'pairwise')

    none_other = isinstance(other, (Omitted, NoneType)) or other is None
    kws = {'other': 'None', 'pairwise': 'None'}

    if none_other:
        return gen_df_rolling_method_other_none_impl('corr', self, kws=kws)

    if isinstance(other, DataFrameType):
        return gen_df_rolling_method_other_df_impl('corr',
                                                   self,
                                                   other,
                                                   kws=kws)

    return gen_df_rolling_method_impl('corr', self, kws=kws)
Пример #2
0
def pd_range_index_getitem_overload(self, idx):
    if not isinstance(self, RangeIndexType):
        return None

    _func_name = 'Operator getitem().'
    ty_checker = TypeChecker(_func_name)

    # TO-DO: extend getitem to support other indexers (Arrays, Lists, etc)
    # for Arrays and Lists it requires Int64Index class as return value
    if not isinstance(idx, (types.Integer, types.SliceType)):
        ty_checker.raise_exc(idx, 'integer', 'idx')

    if isinstance(idx, types.Integer):
        def pd_range_index_getitem_impl(self, idx):
            range_len = len(self._data)
            idx = (range_len + idx) if idx < 0 else idx
            if (idx < 0 or idx >= range_len):
                raise IndexError("RangeIndex.getitem: index is out of bounds")
            return self.start + self.step * idx

        return pd_range_index_getitem_impl

    if isinstance(idx, types.SliceType):
        def pd_range_index_getitem_impl(self, idx):
            fix_start, fix_stop, fix_step = idx.indices(len(self._data))
            return pd.RangeIndex(
                self.start + self.step * fix_start,
                self.start + self.step * fix_stop,
                self.step * fix_step,
                name=self._name
            )

        return pd_range_index_getitem_impl
Пример #3
0
def pd_int64_index_getitem_overload(self, idx):
    if not isinstance(self, Int64IndexType):
        return None

    _func_name = 'Operator getitem().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(idx, (types.Integer, types.SliceType))
            or isinstance(idx, (types.Array, types.List))
            and isinstance(idx.dtype, (types.Integer, types.Boolean))):
        ty_checker.raise_exc(idx, 'integer, slice, integer array or list',
                             'idx')

    if isinstance(idx, types.Integer):

        def pd_int64_index_getitem_impl(self, idx):
            index_len = len(self._data)
            # FIXME_Numba#5801: Numba type unification rules make this float
            idx = types.int64((index_len + idx) if idx < 0 else idx)
            if (idx < 0 or idx >= index_len):
                raise IndexError("Int64Index.getitem: index is out of bounds")

            return self._data[idx]

        return pd_int64_index_getitem_impl

    else:

        def pd_int64_index_getitem_impl(self, idx):
            index_data = self._data[idx]
            return pd.Int64Index(index_data, name=self._name)

        return pd_int64_index_getitem_impl
Пример #4
0
def pd_int64_index_take_overload(self, indexes):
    if not isinstance(self, Int64IndexType):
        return None

    _func_name = 'Method take().'
    ty_checker = TypeChecker(_func_name)

    valid_indexes_types = (types.Array, types.List,
                           types.ListType) + sdc_pandas_index_types
    if not (isinstance(indexes, valid_indexes_types)
            and isinstance(indexes.dtype, (types.Integer, types.ListType))):
        ty_checker.raise_exc(indexes,
                             'array/list of integers or integer index',
                             'indexes')

    # separate handling when indexes is nested lists produces with parallel impls
    if isinstance(indexes.dtype, types.ListType):

        def pd_int64_index_take_chunked_impl(self, indexes):
            new_index_data = numpy_like.take(self.values, indexes)
            return pd.Int64Index(new_index_data, name=self._name)

        return pd_int64_index_take_chunked_impl

    convert_target = isinstance(
        indexes,
        sdc_pandas_index_types) and not isinstance(indexes, types.Array)

    def pd_int64_index_take_impl(self, indexes):
        _indexes = indexes.values if convert_target == True else indexes  # noqa
        new_index_data = numpy_like.take(self._data, _indexes)
        return pd.Int64Index(new_index_data, name=self._name)

    return pd_int64_index_take_impl
Пример #5
0
def concurrent_dict_type_fromkeys_ovld(cls, keys, value):
    if cls.instance_type is not ConcurrentDictType:
        return

    _func_name = f'Method ConcurrentDict::fromkeys()'
    ty_checker = TypeChecker(_func_name)

    valid_keys_types = (types.Sequence, types.Array, StringArrayType)
    if not isinstance(keys, valid_keys_types):
        ty_checker.raise_exc(keys, f'array or sequence', 'keys')

    dict_key_type, dict_value_type = keys.dtype, value
    if isinstance(keys, (types.Array, StringArrayType)):

        def concurrent_dict_fromkeys_impl(cls, keys, value):
            res = ConcurrentDict.empty(dict_key_type, dict_value_type)
            for i in numba.prange(len(keys)):
                res[keys[i]] = value
            return res
    else:  # generic for all other iterables

        def concurrent_dict_fromkeys_impl(cls, keys, value):
            res = ConcurrentDict.empty(dict_key_type, dict_value_type)
            for k in keys:
                res[k] = value
            return res

    return concurrent_dict_fromkeys_impl
Пример #6
0
def concurrent_dict_empty(cls, key_type, value_type):

    if cls.instance_type is not ConcurrentDictType:
        return

    _func_name = 'Method ConcurrentDictTypeRef::empty().'
    ty_checker = TypeChecker(_func_name)

    supported_key_types = (types.NumberClass, types.TypeRef)
    supported_value_types = (types.NumberClass, types.TypeRef)

    if not isinstance(key_type, supported_key_types):
        ty_checker.raise_exc(key_type,
                             f'Numba type of dict keys (e.g. types.int32)',
                             'key_type')
    if not isinstance(value_type, supported_value_types):
        ty_checker.raise_exc(value_type,
                             f'Numba type of dict values (e.g. types.int32)',
                             'value_type')

    if (isinstance(key_type, types.NumberClass)
            and key_type.dtype not in supported_numeric_key_types
            or isinstance(key_type, types.TypeRef) and not isinstance(
                key_type.instance_type, (types.UnicodeType, types.Hashable)
                or isinstance(value_type, types.NumberClass)
                and value_type.dtype not in supported_numeric_value_types)):
        error_msg = '{} SDC ConcurrentDict({}, {}) is not supported. '
        raise TypingError(error_msg.format(_func_name, key_type, value_type))

    def concurrent_dict_empty_impl(cls, key_type, value_type):
        return hashmap_create(key_type, value_type)

    return concurrent_dict_empty_impl
Пример #7
0
def concurrent_dict_pop_ovld(self, key, default=None):
    if not isinstance(self, ConcurrentDictType):
        return None

    _func_name = f'Method {self}::pop()'
    ty_checker = TypeChecker(_func_name)

    # default value is expected to be of the same (or safely casted) type as dict's value_type
    no_default = isinstance(default,
                            (types.NoneType, types.Omitted)) or default is None
    default_is_optional = isinstance(default, types.Optional)
    if not (no_default or check_types_comparable(default, self.value_type)
            or default_is_optional
            and check_types_comparable(default.type, self.value_type)):
        ty_checker.raise_exc(default,
                             f'{self.value_type} or convertible or None',
                             'default')

    dict_key_type, dict_value_type = self.key_type, self.value_type
    cast_key = key is not dict_key_type

    def concurrent_dict_pop_impl(self, key, default=None):
        _key = key if cast_key == False else _cast(key, dict_key_type)  # noqa
        found, res = hashmap_pop(self, _key)

        if not found:
            if no_default == False:  # noqa
                return _cast(default, dict_value_type)
            else:
                return None
        return res

    return concurrent_dict_pop_impl
Пример #8
0
def pd_multi_index_from_product_overload(cls,
                                         iterables,
                                         sortorder=None,
                                         names=None):
    if cls.instance_type is not MultiIndexType:
        return

    _func_name = f'Method from_product()'
    valid_levels_data_types = sdc_pandas_index_types + sdc_pandas_df_column_types + (
        types.List, types.ListType)
    ty_checker = TypeChecker(_func_name)
    if not (isinstance(iterables, (types.List, types.ListType, types.UniTuple))
            and isinstance(iterables.dtype, valid_levels_data_types)
            or isinstance(iterables, types.Tuple) and all(
                map(lambda x: isinstance(x, valid_levels_data_types),
                    iterables))):
        ty_checker.raise_exc(iterables, 'list or tuple of tuples ',
                             'iterables')

    if not (isinstance(sortorder,
                       (types.Omitted, types.NoneType)) or sortorder is None):
        raise TypingError(
            '{} Unsupported parameters. Given sortorder: {}'.format(
                _func_name, sortorder))

    if not (isinstance(names,
                       (types.Omitted, types.NoneType)) or names is None):
        raise TypingError('{} Unsupported parameters. Given names: {}'.format(
            _func_name, names))

    def pd_multi_index_from_product_impl(cls,
                                         iterables,
                                         sortorder=None,
                                         names=None):

        # TO-DO: support indexes.unique() method and use it here
        levels_factorized = sdc_tuple_map(factorize_level, iterables)

        levels_names = sdc_tuple_map(sdc_indexes_get_name, iterables)

        index_levels = sdc_tuple_map(lambda x: fix_df_index(list(x[0])),
                                     levels_factorized)

        temp_cumprod_sizes = [
            1,
        ]
        codes_info = sdc_tuple_map(next_codes_info, levels_factorized,
                                   temp_cumprod_sizes)

        res_index_size = temp_cumprod_sizes[-1]
        index_codes = sdc_tuple_map(next_codes_array, codes_info,
                                    res_index_size)

        res = sdc_pandas_multi_index_ctor(index_levels,
                                          index_codes,
                                          name=levels_names)

        return res

    return pd_multi_index_from_product_impl
Пример #9
0
def pd_multi_index_equals_overload(self, other):
    if not isinstance(self, MultiIndexType):
        return None

    _func_name = 'Method equals().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(other, MultiIndexType) and self.dtype is other.dtype):
        ty_checker.raise_exc(other, 'pandas MultiIndex', 'other')

    def pd_multi_index_equals_impl(self, other):

        if self.nlevels != other.nlevels:
            return False

        self_and_other_data = _multi_index_binop_helper(self, other)
        tup_levels_cmp_res = sdc_tuple_map(
            lambda x: cat_array_equal(*x),
            self_and_other_data,
        )

        # np.all is not supported for Tuples and below compiles a bit faster
        # than 'np.all(np.array(list(tup_levels_cmp_res)))'
        for cmp_res in tup_levels_cmp_res:
            if not cmp_res:
                return False
        return True

    return pd_multi_index_equals_impl
Пример #10
0
def pd_int64_index_append_overload(self, other):
    if not isinstance(self, Int64IndexType):
        return None

    _func_name = 'Method append().'
    ty_checker = TypeChecker(_func_name)

    if not isinstance(other, sdc_pandas_index_types):
        ty_checker.raise_exc(other, 'pandas index', 'other')

    if not check_types_comparable(self, other):
        raise TypingError('{} Not allowed for non comparable indexes. \
        Given: self={}, other={}'.format(_func_name, self, other))

    convert_other = not isinstance(other, types.Array)
    _, res_index_dtype = find_index_common_dtype(self, other)
    return_as_array_index = res_index_dtype is not types.int64

    def pd_int64_index_append_impl(self, other):
        _other = other.values if convert_other == True else other  # noqa
        new_index_data = hpat_arrays_append(self._data, _other)
        # this is only needed while some indexes are represented with arrays
        # TO-DO: support pd.Index() overload with dtype arg to create indexes
        if return_as_array_index == False:  # noqa
            return pd.Int64Index(new_index_data)
        else:
            return new_index_data

    return pd_int64_index_append_impl
Пример #11
0
def sdc_pandas_series_operator_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.binop` implementation

    Note: Currently implemented for numeric Series only.
        Differs from Pandas in returning Series with fixed dtype :obj:`float64`

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op1*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op2*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Method comp_binop().'
    ty_checker = TypeChecker(_func_name)
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    # this overload is not for string series
    self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType)
    other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType)
    if self_is_string_series or other_is_string_series:
        return None

    if not isinstance(self, (SeriesType, types.Number)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        series_indexes_comparable = check_types_comparable(self.index, other.index)
        if not series_indexes_comparable:
            raise TypingError('{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def series_operator_binop_wrapper(self, other):
        return sdc_binop(self, other)

    return series_operator_binop_wrapper
Пример #12
0
def sdc_astype_overload(self, dtype):
    """
    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************
    Parallel replacement of numpy.astype.

    .. only:: developer
       Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k astype

    """

    ty_checker = TypeChecker("numpy-like 'astype'")
    valid_self_types = (types.Array,) + sdc_pandas_index_types
    if not (isinstance(self, valid_self_types)
            and not isinstance(self, types.NoneType)):
        return None

    accepted_dtype_types = (types.functions.NumberClass, types.Function, types.StringLiteral)
    if not isinstance(dtype, accepted_dtype_types):
        def impl(self, dtype):
            return literally(dtype)

        return impl

    if not isinstance(dtype, accepted_dtype_types):
        ty_checker.raise_exc(dtype, 'string or type', 'dtype')

    if (
        (isinstance(dtype, types.Function) and dtype.typing_key == str) or
        (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')
    ):
        def sdc_astype_number_to_string_impl(self, dtype):
            num_bytes = 0
            arr_len = len(self)

            # Get total bytes for new array
            for i in prange(arr_len):
                item = self[i]
                num_bytes += get_utf8_size(str(item))

            data = pre_alloc_string_array(arr_len, num_bytes)

            for i in range(arr_len):
                item = self[i]
                data[i] = str(item)  # TODO: check NA

            return data

        return sdc_astype_number_to_string_impl

    if (isinstance(self, (types.Array, RangeIndexType, Int64IndexType))
            and isinstance(dtype, (types.StringLiteral, types.functions.NumberClass))):
        def sdc_astype_number_impl(self, dtype):
            arr = numpy.empty(len(self), dtype=numpy.dtype(dtype))
            for i in numba.prange(len(self)):
                arr[i] = self[i]

            return arr

        return sdc_astype_number_impl
def hpat_pandas_stringmethods_strip(self, to_strip=None):
    ty_checker = TypeChecker('Method strip().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(to_strip, (NoneType, StringLiteral, UnicodeType, Omitted)) and to_strip is not None:
        ty_checker.raise_exc(to_strip, 'str', 'to_strip')

    return sdc_pandas_series_str_strip_impl
Пример #14
0
def sdc_pandas_series_operator_comp_binop(self, other):
    """
    Pandas Series operator :attr:`pandas.Series.comp_binop` implementation

    .. only:: developer

    **Test**: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_op7*
              python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_operator_comp_binop*

    Parameters
    ----------
    series: :obj:`pandas.Series`
        Input series
    other: :obj:`pandas.Series` or :obj:`scalar`
        Series or scalar value to be used as a second argument of binary operation

    Returns
    -------
    :obj:`pandas.Series`
        The result of the operation
    """

    _func_name = 'Operator comp_binop().'
    ty_checker = TypeChecker(_func_name)
    self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(
        other, SeriesType)
    if not (self_is_series or other_is_series):
        return None

    if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(self, 'pandas.series or scalar', 'self')

    if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)):
        ty_checker.raise_exc(other, 'pandas.series or scalar', 'other')

    operands_are_series = self_is_series and other_is_series
    if operands_are_series:
        none_or_numeric_indexes = ((isinstance(self.index, types.NoneType)
                                    or check_index_is_numeric(self))
                                   and (isinstance(other.index, types.NoneType)
                                        or check_index_is_numeric(other)))
        series_indexes_comparable = check_types_comparable(
            self.index, other.index) or none_or_numeric_indexes
        if not series_indexes_comparable:
            raise TypingError(
                '{} Not implemented for series with not-comparable indexes. \
            Given: self.index={}, other.index={}'.format(
                    _func_name, self.index, other.index))

    series_data_comparable = check_types_comparable(self, other)
    if not series_data_comparable:
        raise TypingError('{} Not supported for not-comparable operands. \
        Given: self={}, other={}'.format(_func_name, self, other))

    def sdc_pandas_series_operator_comp_binop_impl(self, other):
        return self.comp_binop(other)

    return sdc_pandas_series_operator_comp_binop_impl
def hpat_pandas_series_rolling_corr(self, other=None, pairwise=None):

    ty_checker = TypeChecker('Method rolling.corr().')
    ty_checker.check(self, SeriesRollingType)

    # TODO: check `other` is Series after a circular import of SeriesType fixed
    # accepted_other = (bool, Omitted, NoneType, SeriesType)
    # if not isinstance(other, accepted_other) and other is not None:
    #     ty_checker.raise_exc(other, 'Series', 'other')

    accepted_pairwise = (bool, Boolean, Omitted, NoneType)
    if not isinstance(pairwise, accepted_pairwise) and pairwise is not None:
        ty_checker.raise_exc(pairwise, 'bool', 'pairwise')

    nan_other = isinstance(other, (Omitted, NoneType)) or other is None

    def hpat_pandas_rolling_series_corr_impl(self, other=None, pairwise=None):
        win = self._window
        minp = self._min_periods

        main_series = self._data
        main_arr = main_series._data
        main_arr_length = len(main_arr)

        if nan_other == True:  # noqa
            other_arr = main_arr
        else:
            other_arr = other._data

        other_arr_length = len(other_arr)
        length = max(main_arr_length, other_arr_length)
        output_arr = numpy.empty(length, dtype=float64)

        def calc_corr(main, other, minp):
            # align arrays `main` and `other` by size and finiteness
            min_length = min(len(main), len(other))
            main_valid_indices = numpy.isfinite(main[:min_length])
            other_valid_indices = numpy.isfinite(other[:min_length])
            valid = main_valid_indices & other_valid_indices

            if len(main[valid]) < minp:
                return numpy.nan
            else:
                return arr_corr(main[valid], other[valid])

        for i in prange(min(win, length)):
            main_arr_range = main_arr[:i + 1]
            other_arr_range = other_arr[:i + 1]
            output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp)

        for i in prange(win, length):
            main_arr_range = main_arr[i + 1 - win:i + 1]
            other_arr_range = other_arr[i + 1 - win:i + 1]
            output_arr[i] = calc_corr(main_arr_range, other_arr_range, minp)

        return pandas.Series(output_arr)

    return hpat_pandas_rolling_series_corr_impl
def sdc_pandas_dataframe_rolling_var(self, ddof=1):

    ty_checker = TypeChecker('Method rolling.var().')
    ty_checker.check(self, DataFrameRollingType)

    if not isinstance(ddof, (int, Integer, Omitted)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    return gen_df_rolling_method_impl('var', self, kws={'ddof': '1'})
def hpat_pandas_series_rolling_var(self, ddof=1):

    ty_checker = TypeChecker('Method rolling.var().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(ddof, (int, Integer, Omitted)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    return sdc_pandas_series_rolling_var_impl
def hpat_pandas_stringmethods_center(self, width, fillchar=' '):
    """
    Intel Scalable Dataframe Compiler User Guide
    ********************************************
    Pandas API: pandas.Series.str.center

    Examples
    --------
    .. literalinclude:: ../../../examples/series/str/series_str_center.py
       :language: python
       :lines: 27-
       :caption: Filling left and right side of strings in the Series with an additional character
       :name: ex_series_str_center

    .. command-output:: python ./series/str/series_str_center.py
       :cwd: ../../../examples

    .. seealso::
        :ref:`Series.str.rjust <pandas.Series.str.rjust>`
            Fills the left side of strings with an arbitrary character.
        :ref:`Series.str.ljust <pandas.Series.str.ljust>`
            Fills the right side of strings with an arbitrary character.

    .. todo:: Add support of 32-bit Unicode for `str.center()`

    Intel Scalable Dataframe Compiler Developer Guide
    *************************************************

    Pandas Series method :meth:`pandas.core.strings.StringMethods.center()` implementation.

    .. only:: developer

    Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_center
    """

    ty_checker = TypeChecker('Method center().')
    ty_checker.check(self, StringMethodsType)

    if not isinstance(width, Integer):
        ty_checker.raise_exc(width, 'int', 'width')

    accepted_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(fillchar, accepted_types) and fillchar != ' ':
        ty_checker.raise_exc(fillchar, 'str', 'fillchar')

    def hpat_pandas_stringmethods_center_impl(self, width, fillchar=' '):
        mask = get_nan_mask(self._data._data)
        item_count = len(self._data)
        res_list = [''] * item_count
        for idx in numba.prange(item_count):
            res_list[idx] = self._data._data[idx].center(width, fillchar)
        str_arr = create_str_arr_from_list(res_list)
        result = str_arr_set_na_by_mask(str_arr, mask)

        return pandas.Series(result, self._data._index, name=self._data._name)

    return hpat_pandas_stringmethods_center_impl
Пример #19
0
def pd_multi_index_getitem_overload(self, idx):
    if not isinstance(self, MultiIndexType):
        return None

    _func_name = 'Operator getitem().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(idx, (types.Integer, types.SliceType))
            or isinstance(idx, (types.Array, types.List))
            and isinstance(idx.dtype, (types.Integer, types.Boolean))):
        ty_checker.raise_exc(idx, 'integer, slice, integer array or list',
                             'idx')

    if isinstance(idx, types.Integer):

        def pd_multi_index_getitem_idx_scalar_impl(self, idx):
            index_len = len(self)
            # FIXME_Numba#5801: Numba type unification rules make this float
            idx = types.int64((index_len + idx) if idx < 0 else idx)
            if (idx < 0 or idx >= index_len):
                raise IndexError("MultiIndex.getitem: index is out of bounds")

            return _multi_index_getitem_impl(self, idx)

        return pd_multi_index_getitem_idx_scalar_impl

    elif isinstance(idx, types.SliceType):

        def pd_multi_index_getitem_idx_slice_impl(self, idx):

            new_levels = self._levels
            new_codes = sdc_tuple_map(
                lambda arr_codes, taken_idxs: arr_codes[taken_idxs],
                self._codes, idx)
            return pd.MultiIndex(new_levels, new_codes)

        return pd_multi_index_getitem_idx_slice_impl

    elif isinstance(idx, types.Array) and isinstance(idx.dtype, types.Boolean):

        def pd_multi_index_getitem_idx_bool_array_impl(self, idx):

            new_levels = self._levels
            new_codes = sdc_tuple_map(
                lambda arr_codes, taken_idxs: numpy_like.getitem_by_mask(
                    arr_codes, taken_idxs), self._codes, idx)
            return pd.MultiIndex(new_levels, new_codes)

        return pd_multi_index_getitem_idx_bool_array_impl

    elif isinstance(idx, types.Array) and isinstance(idx.dtype, types.Integer):

        def pd_multi_index_getitem_as_take_impl(self, idx):
            return self.take(idx)

        return pd_multi_index_getitem_as_take_impl
Пример #20
0
def pd_range_index_overload(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None):

    _func_name = 'pd.RangeIndex().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(copy, types.Omitted) or copy is False):
        raise SDCLimitation(f"{_func_name} Unsupported parameter. Given 'copy': {copy}")

    if not (isinstance(copy, types.Omitted) or fastpath is None):
        raise SDCLimitation(f"{_func_name} Unsupported parameter. Given 'fastpath': {fastpath}")

    dtype_is_np_int64 = dtype is types.NumberClass(types.int64)
    if not _check_dtype_param_type(dtype):
        ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype')

    # TODO: support ensure_python_int from pandas.core.dtype.common to handle integers as float params
    if not (isinstance(start, (types.NoneType, types.Omitted, types.Integer)) or start is None):
        ty_checker.raise_exc(start, 'number or none', 'start')
    if not (isinstance(stop, (types.NoneType, types.Omitted, types.Integer)) or stop is None):
        ty_checker.raise_exc(stop, 'number or none', 'stop')
    if not (isinstance(step, (types.NoneType, types.Omitted, types.Integer)) or step is None):
        ty_checker.raise_exc(step, 'number or none', 'step')

    if not (isinstance(name, (types.NoneType, types.Omitted, types.StringLiteral, types.UnicodeType)) or name is None):
        ty_checker.raise_exc(name, 'string or none', 'name')

    if ((isinstance(start, (types.NoneType, types.Omitted)) or start is None)
            and (isinstance(stop, (types.NoneType, types.Omitted)) or stop is None)
            and (isinstance(step, (types.NoneType, types.Omitted)) or step is None)):
        def pd_range_index_ctor_dummy_impl(
                start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None):
            raise TypeError("RangeIndex(...) must be called with integers")

        return pd_range_index_ctor_dummy_impl

    def pd_range_index_ctor_impl(start=None, stop=None, step=None, dtype=None, copy=False, name=None, fastpath=None):

        if not (dtype is None
                or dtype == 'int64'
                or dtype_is_np_int64):
            raise TypeError("Invalid to pass a non-int64 dtype to RangeIndex")

        _start = types.int64(start) if start is not None else types.int64(0)

        if stop is None:
            _start, _stop = types.int64(0), types.int64(start)
        else:
            _stop = types.int64(stop)

        _step = types.int64(step) if step is not None else types.int64(1)
        if _step == 0:
            raise ValueError("Step must not be zero")

        return init_range_index(range(_start, _stop, _step), name)

    return pd_range_index_ctor_impl
Пример #21
0
def sdc_pandas_dataframe_rolling_apply(self, func, raw=None):

    ty_checker = TypeChecker('Method rolling.apply().')
    ty_checker.check(self, DataFrameRollingType)

    raw_accepted = (Omitted, NoneType, Boolean)
    if not isinstance(raw, raw_accepted) and raw is not None:
        ty_checker.raise_exc(raw, 'bool', 'raw')

    return gen_df_rolling_method_impl('apply', self, args=['func'],
                                      kws={'raw': 'None'})
Пример #22
0
def hpat_pandas_series_rolling_quantile(self,
                                        quantile,
                                        interpolation='linear'):

    ty_checker = TypeChecker('Method rolling.quantile().')
    ty_checker.check(self, SeriesRollingType)

    if not isinstance(quantile, Number):
        ty_checker.raise_exc(quantile, 'float', 'quantile')

    str_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(interpolation, str_types) and interpolation != 'linear':
        ty_checker.raise_exc(interpolation, 'str', 'interpolation')

    def hpat_pandas_rolling_series_quantile_impl(self,
                                                 quantile,
                                                 interpolation='linear'):
        if quantile < 0 or quantile > 1:
            raise ValueError('quantile value not in [0, 1]')
        if interpolation != 'linear':
            raise ValueError('interpolation value not "linear"')

        win = self._window
        minp = self._min_periods

        input_series = self._data
        input_arr = input_series._data
        length = len(input_arr)
        output_arr = numpy.empty(length, dtype=float64)

        def calc_quantile(arr, quantile, minp):
            finite_arr = arr[numpy.isfinite(arr)]
            if len(finite_arr) < minp:
                return numpy.nan
            else:
                return arr_quantile(finite_arr, quantile)

        boundary = min(win, length)
        for i in prange(boundary):
            arr_range = input_arr[:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        for i in prange(boundary, length):
            arr_range = input_arr[i + 1 - win:i + 1]
            output_arr[i] = calc_quantile(arr_range, quantile, minp)

        return pandas.Series(output_arr,
                             input_series._index,
                             name=input_series._name)

    return hpat_pandas_rolling_series_quantile_impl
Пример #23
0
def pd_range_index_getitem_overload(self, idx):
    if not isinstance(self, RangeIndexType):
        return None

    _func_name = 'Operator getitem().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(idx, (types.Integer, types.SliceType))
            or isinstance(idx, (types.Array, types.List))
            and isinstance(idx.dtype, (types.Integer, types.Boolean))):
        ty_checker.raise_exc(idx, 'integer, slice, integer array or list',
                             'idx')

    if isinstance(idx, types.Integer):

        def pd_range_index_getitem_impl(self, idx):
            range_len = len(self._data)
            # FIXME_Numba#5801: Numba type unification rules make this float
            idx = types.int64((range_len + idx) if idx < 0 else idx)
            if (idx < 0 or idx >= range_len):
                raise IndexError("RangeIndex.getitem: index is out of bounds")
            return self.start + self.step * idx

        return pd_range_index_getitem_impl

    if isinstance(idx, types.SliceType):

        def pd_range_index_getitem_impl(self, idx):
            fix_start, fix_stop, fix_step = idx.indices(len(self._data))
            return pd.RangeIndex(self.start + self.step * fix_start,
                                 self.start + self.step * fix_stop,
                                 self.step * fix_step,
                                 name=self._name)

        return pd_range_index_getitem_impl

    if isinstance(idx, (types.Array, types.List)):

        if isinstance(idx.dtype, types.Integer):

            def pd_range_index_getitem_impl(self, idx):
                res_as_arr = _sdc_take(self, idx)
                return pd.Int64Index(res_as_arr, name=self._name)

            return pd_range_index_getitem_impl
        elif isinstance(idx.dtype, types.Boolean):

            def pd_range_index_getitem_impl(self, idx):
                return getitem_by_mask(self, idx)

            return pd_range_index_getitem_impl
Пример #24
0
def sdc_pandas_dataframe_rolling_quantile(self, quantile, interpolation='linear'):

    ty_checker = TypeChecker('Method rolling.quantile().')
    ty_checker.check(self, DataFrameRollingType)

    if not isinstance(quantile, Number):
        ty_checker.raise_exc(quantile, 'float', 'quantile')

    str_types = (Omitted, StringLiteral, UnicodeType)
    if not isinstance(interpolation, str_types) and interpolation != 'linear':
        ty_checker.raise_exc(interpolation, 'str', 'interpolation')

    return gen_df_rolling_method_impl('quantile', self, args=['quantile'],
                                      kws={'interpolation': '"linear"'})
Пример #25
0
def pd_range_index_getitem_overload(self, idx):
    if not isinstance(self, RangeIndexType):
        return None

    _func_name = 'Operator getitem().'
    ty_checker = TypeChecker(_func_name)

    if not (isinstance(idx, (types.Integer, types.SliceType))
            or isinstance(idx, (types.Array, types.List))
            and isinstance(idx.dtype, (types.Integer, types.Boolean))):
        ty_checker.raise_exc(idx, 'integer, slice, integer array or list',
                             'idx')

    if isinstance(idx, types.Integer):

        def pd_range_index_getitem_impl(self, idx):
            range_len = len(self._data)
            idx = (range_len + idx) if idx < 0 else idx
            if (idx < 0 or idx >= range_len):
                raise IndexError("RangeIndex.getitem: index is out of bounds")
            return self.start + self.step * idx

        return pd_range_index_getitem_impl

    if isinstance(idx, types.SliceType):

        def pd_range_index_getitem_impl(self, idx):
            fix_start, fix_stop, fix_step = idx.indices(len(self._data))
            return pd.RangeIndex(self.start + self.step * fix_start,
                                 self.start + self.step * fix_stop,
                                 self.step * fix_step,
                                 name=self._name)

        return pd_range_index_getitem_impl

    # returns np.array which is used to represent pandas Int64Index now
    if isinstance(idx, (types.Array, types.List)):

        if isinstance(idx.dtype, types.Integer):

            def pd_range_index_getitem_impl(self, idx):
                return _sdc_take(self, idx)

            return pd_range_index_getitem_impl
        elif isinstance(idx.dtype, types.Boolean):

            def pd_range_index_getitem_impl(self, idx):
                return getitem_by_mask(self, idx)

            return pd_range_index_getitem_impl
Пример #26
0
def pd_int64_index_join_overload(self,
                                 other,
                                 how,
                                 level=None,
                                 return_indexers=False,
                                 sort=False):
    if not isinstance(self, Int64IndexType):
        return None

    _func_name = 'Method join().'
    ty_checker = TypeChecker(_func_name)

    if not isinstance(other, sdc_pandas_index_types):
        ty_checker.raise_exc(other, 'pandas index', 'other')

    if not isinstance(how, types.StringLiteral):
        ty_checker.raise_exc(how, 'string', 'how')
    if not how.literal_value == 'outer':
        raise SDCLimitation(
            f"{_func_name} Only supporting 'outer' now. Given 'how': {how.literal_value}"
        )

    if not (isinstance(level,
                       (types.Omitted, types.NoneType)) or level is None):
        ty_checker.raise_exc(level, 'None', 'level')

    if not (isinstance(return_indexers, (types.Omitted, types.BooleanLiteral))
            or return_indexers is False):
        ty_checker.raise_exc(return_indexers, 'boolean', 'return_indexers')

    if not (isinstance(sort, (types.Omitted, types.Boolean)) or sort is False):
        ty_checker.raise_exc(sort, 'boolean', 'sort')

    _return_indexers = return_indexers.literal_value

    def pd_int64_index_join_impl(self,
                                 other,
                                 how,
                                 level=None,
                                 return_indexers=False,
                                 sort=False):

        if _return_indexers == True:  # noqa
            return sdc_indexes_join_outer(self, other)
        else:
            joined_index, = sdc_indexes_join_outer(self, other)
            return joined_index

    return pd_int64_index_join_impl
Пример #27
0
def concurrent_dict_update_ovld(self, other):
    if not ((self, ConcurrentDictType)
            and isinstance(other, ConcurrentDictType)):
        return None

    _func_name = f'Method {self}::update()'
    ty_checker = TypeChecker(_func_name)

    if self is not other:
        ty_checker.raise_exc(other, f'{self}', 'other')

    def concurrent_dict_update_impl(self, other):
        return hashmap_update(self, other)

    return concurrent_dict_update_impl
Пример #28
0
def pd_int64_index_overload(data, dtype=None, copy=False, name=None):

    _func_name = 'pd.Int64Index().'
    ty_checker = TypeChecker(_func_name)

    convertible_indexes = (PositionalIndexType, RangeIndexType, Int64IndexType)
    if not (isinstance(data, (types.Array, types.List))
            and isinstance(data.dtype, types.Integer)
            or isinstance(data, convertible_indexes)):
        ty_checker.raise_exc(data, 'array/list of integers or integer index',
                             'data')

    dtype_is_number_class = isinstance(dtype, types.NumberClass)
    dtype_is_numpy_signed_int = (check_signed_integer(dtype)
                                 or dtype_is_number_class
                                 and check_signed_integer(dtype.dtype))
    dtype_is_unicode_str = isinstance(dtype,
                                      (types.UnicodeType, types.StringLiteral))
    if not _check_dtype_param_type(dtype):
        ty_checker.raise_exc(dtype, 'int64 dtype', 'dtype')

    if not (isinstance(copy, (types.NoneType, types.Omitted, types.Boolean))
            or copy is False):
        ty_checker.raise_exc(copy, 'bool', 'copy')

    if not (isinstance(name,
                       (types.NoneType, types.Omitted, types.StringLiteral,
                        types.UnicodeType)) or name is None):
        ty_checker.raise_exc(name, 'string or none', 'name')

    is_data_array = isinstance(data, types.Array)
    is_data_index = isinstance(data, convertible_indexes)
    data_dtype_is_int64 = data.dtype is types.int64

    def pd_int64_index_ctor_impl(data, dtype=None, copy=False, name=None):

        if not (dtype is None or dtype_is_numpy_signed_int
                or dtype_is_unicode_str
                and dtype in ('int8', 'int16', 'int32', 'int64')):
            raise ValueError(
                "Incorrect `dtype` passed: expected signed integer")

        if is_data_array == True:  # noqa
            _data = data
        elif is_data_index == True:  # noqa
            _data = data.values
        else:
            # using fix_df_index to get array since it handles index=None
            _data = fix_df_index(data)._data

        if data_dtype_is_int64 == False:  # noqa
            _data = numpy_like.astype(_data, dtype=types.int64)
        else:
            if copy:
                _data = np.copy(_data)
        return init_int64_index(_data, name)

    return pd_int64_index_ctor_impl
Пример #29
0
def _sdc_pandas_series_align_overload(series,
                                      other,
                                      size='max',
                                      finiteness=False):
    ty_checker = TypeChecker(
        'Function sdc.common_functions._sdc_pandas_series_align().')
    ty_checker.check(series, SeriesType)
    ty_checker.check(other, SeriesType)

    str_types = (str, types.StringLiteral, types.UnicodeType, types.Omitted)
    if not isinstance(size, str_types):
        ty_checker.raise_exc(size, 'str', 'size')

    if not isinstance(finiteness, (bool, types.Boolean, types.Omitted)):
        ty_checker.raise_exc(finiteness, 'bool', 'finiteness')

    def _sdc_pandas_series_align_impl(series,
                                      other,
                                      size='max',
                                      finiteness=False):
        if size != 'max' and size != 'min':
            raise ValueError(
                "Function sdc.common_functions._sdc_pandas_series_align(). "
                "The object size\n expected: 'max' or 'min'")

        arr, other_arr = series._data, other._data
        arr_len, other_arr_len = len(arr), len(other_arr)
        min_length = min(arr_len, other_arr_len)
        length = max(arr_len, other_arr_len) if size == 'max' else min_length

        aligned_arr = numpy.repeat([numpy.nan], length)
        aligned_other_arr = numpy.repeat([numpy.nan], length)

        for i in numba.prange(min_length):
            if not finiteness or (numpy.isfinite(arr[i])
                                  and numpy.isfinite(other_arr[i])):
                aligned_arr[i] = arr[i]
                aligned_other_arr[i] = other_arr[i]
            else:
                aligned_arr[i] = aligned_other_arr[i] = numpy.nan

        aligned = pandas.Series(aligned_arr, name=series._name)
        aligned_other = pandas.Series(aligned_other_arr, name=other._name)

        return aligned, aligned_other

    return _sdc_pandas_series_align_impl
Пример #30
0
def sdc_pandas_series_groupby_var(self, ddof=1, *args):

    method_name = 'GroupBy.var().'
    ty_checker = TypeChecker(method_name)
    ty_checker.check(self, SeriesGroupByType)

    if not isinstance(ddof, (types.Omitted, int, types.Integer)):
        ty_checker.raise_exc(ddof, 'int', 'ddof')

    method_args = ['self', 'ddof', '*args']
    default_values = {'ddof': 1}
    impl_used_params = {'ddof': 'ddof'}

    applied_func_name = 'var'
    return sdc_pandas_series_groupby_apply_func(self, applied_func_name,
                                                method_args, default_values,
                                                impl_used_params)