def df_rolling_method_other_none_codegen(method_name,
                                             self,
                                             args=None,
                                             kws=None):
        _method_name = rewrite_name or method_name
        args = args or []
        kwargs = kws or {}

        impl_params = ['self'] + args + kwsparams2list(kwargs)
        impl_params_as_str = ', '.join(impl_params)

        impl_name = f'_df_rolling_{_method_name}_other_none_impl'
        func_lines = [f'def {impl_name}({impl_params_as_str}):']

        if 'pairwise' in kwargs:
            func_lines += [
                '  if pairwise is None:', '    _pairwise = True', '  else:',
                '    _pairwise = pairwise', '  if _pairwise:',
                f'    raise ValueError("Method rolling.{_method_name}(). The object pairwise\\n expected: False")'
            ]
        method_params = args + [
            '{}={}'.format(k, k) for k in kwargs if k != 'other'
        ]
        func_lines += df_rolling_method_main_codegen(method_params,
                                                     self.data.columns,
                                                     method_name)
        func_text = '\n'.join(func_lines)

        global_vars = {'pandas': pandas}

        return func_text, global_vars
Пример #2
0
    def test_range_index_create_defaults(self):
        func_lines = ['def test_impl():', '  return pd.RangeIndex({})']
        test_impl_text = '\n'.join(func_lines)

        # use non default values for all parameters except one (tested)
        non_default_params = {
            'start': 2,
            'stop': 7,
            'step': 2,
            'name': "'index'"
        }
        for arg in non_default_params.keys():
            with self.subTest(omitted=arg):
                kwargs = {
                    key: val
                    for key, val in non_default_params.items() if key != arg
                }
                func_text = test_impl_text.format(', '.join(
                    kwsparams2list(kwargs)))
                test_impl = _make_func_from_text(func_text,
                                                 global_vars={'pd': pd})
                sdc_func = self.jit(test_impl)
                result = sdc_func()
                result_ref = test_impl()
                pd.testing.assert_index_equal(result, result_ref)
Пример #3
0
def _sdc_pandas_groupby_generic_func_codegen(func_name, columns, func_params,
                                             defaults, impl_params):

    all_params_as_str = ', '.join(sigparams2list(func_params, defaults))
    extra_impl_params = ', '.join(kwsparams2list(impl_params))

    groupby_obj = f'{func_params[0]}'
    df = f'{groupby_obj}._parent'
    groupby_dict = f'{groupby_obj}._data'
    groupby_param_sort = f'{groupby_obj}._sort'
    column_names, column_ids = tuple(zip(*columns))

    func_lines = [
        f'def _dataframe_groupby_{func_name}_impl({all_params_as_str}):',
        f'  group_keys = _sdc_asarray([key for key in {groupby_dict}])',
        f'  res_index_len = len(group_keys)',
        f'  if {groupby_param_sort}:',
        f'    argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')',
    ]

    # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i}
    func_lines.extend([
        '\n'.join([
            f'  result_data_{i} = numpy.empty(res_index_len, dtype=res_arrays_dtypes[{i}])',
            f'  for j in numba.prange(res_index_len):',
            f'    column_data_{i} = get_dataframe_data({df}, {column_ids[i]})',
            f'    group_arr_{i} = _sdc_take(column_data_{i}, list({groupby_dict}[group_keys[j]]))',
            f'    group_series_{i} = pandas.Series(group_arr_{i})',
            f'    idx = argsorted_index[j] if {groupby_param_sort} else j',
            f'    result_data_{i}[idx] = group_series_{i}.{func_name}({extra_impl_params})',
        ]) for i in range(len(columns))
    ])

    data = ', '.join(f'\'{column_names[i]}\': result_data_{i}'
                     for i in range(len(columns)))
    func_lines.extend([
        '\n'.join([
            f'  if {groupby_param_sort}:',
            f'    res_index = _sdc_take(group_keys, argsorted_index)',
            f'  else:', f'    res_index = group_keys',
            f'  return pandas.DataFrame({{{data}}}, index=res_index)'
        ])
    ])

    func_text = '\n'.join(func_lines)
    global_vars = {
        'pandas': pandas,
        'numpy': numpy,
        'numba': numba,
        '_sdc_asarray': _sdc_asarray,
        '_sdc_take': _sdc_take,
        'sdc_arrays_argsort': sdc_arrays_argsort,
        'get_dataframe_data': get_dataframe_data
    }

    return func_text, global_vars
Пример #4
0
def get_groupby_params(**kwargs):
    """Generate supported groupby parameters"""

    # only supported groupby parameters are here
    df_params_defaults = {
        'by': "'A'",
        'sort': 'True'
    }
    groupby_params = {k: kwargs.get(k, df_params_defaults[k]) for k in df_params_defaults}
    return ', '.join(kwsparams2list(groupby_params))
Пример #5
0
def df_rolling_method_codegen(method_name, self, args=None, kws=None):
    args = args or []
    kwargs = kws or {}

    impl_params = ['self'] + args + kwsparams2list(kwargs)
    impl_params_as_str = ', '.join(impl_params)

    impl_name = f'_df_rolling_{method_name}_impl'
    func_lines = [f'def {impl_name}({impl_params_as_str}):']

    method_params = args + ['{}={}'.format(k, k) for k in kwargs]
    func_lines += df_rolling_method_main_codegen(method_params, self.data.columns,
                                                 self.data.column_loc, method_name)
    func_text = '\n'.join(func_lines)

    global_vars = {'pandas': pandas}

    return func_text, global_vars
Пример #6
0
def _sdc_pandas_series_groupby_generic_func_codegen(func_name, func_params,
                                                    defaults, impl_params):

    all_params_as_str = ', '.join(sigparams2list(func_params, defaults))
    extra_impl_params = ', '.join(kwsparams2list(impl_params))

    groupby_obj = f'{func_params[0]}'
    series = f'{groupby_obj}._parent'
    groupby_dict = f'{groupby_obj}._data'
    groupby_param_sort = f'{groupby_obj}._sort'

    # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i}
    func_lines = [
        f'def _series_groupby_{func_name}_impl({all_params_as_str}):',
        f'  group_keys = _sdc_asarray([key for key in {groupby_dict}])',
        f'  res_index_len = len(group_keys)', f'  if {groupby_param_sort}:',
        f'    argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')',
        f'  result_data = numpy.empty(res_index_len, dtype=res_dtype)',
        f'  for j in numba.prange(res_index_len):',
        f'    group_arr = _sdc_take({series}._data, list({groupby_dict}[group_keys[j]]))',
        f'    group_series = pandas.Series(group_arr)',
        f'    idx = argsorted_index[j] if {groupby_param_sort} else j',
        f'    result_data[idx] = group_series.{func_name}({extra_impl_params})',
        f'  if {groupby_param_sort}:',
        f'    res_index = _sdc_take(group_keys, argsorted_index)', f'  else:',
        f'    res_index = group_keys',
        f'  return pandas.Series(data=result_data, index=res_index, name={series}._name)'
    ]

    func_text = '\n'.join(func_lines)
    global_vars = {
        'pandas': pandas,
        'numpy': numpy,
        'numba': numba,
        '_sdc_asarray': _sdc_asarray,
        '_sdc_take': _sdc_take,
        'sdc_arrays_argsort': sdc_arrays_argsort
    }

    return func_text, global_vars
def df_rolling_method_other_df_codegen(method_name,
                                       self,
                                       other,
                                       args=None,
                                       kws=None):
    args = args or []
    kwargs = kws or {}

    rolling_params = df_rolling_params_codegen()
    method_kws = {k: k for k in kwargs}
    impl_params = ['self'] + args + kwsparams2list(kwargs)
    impl_params_as_str = ', '.join(impl_params)

    data_columns = {col: idx for idx, col in enumerate(self.data.columns)}
    other_columns = {col: idx for idx, col in enumerate(other.columns)}

    # columns order matters
    common_columns = [col for col in data_columns if col in other_columns]
    all_columns = [col for col in data_columns]
    for col in other_columns:
        if col in all_columns:
            continue
        all_columns.append(col)

    results = []
    impl_name = f'_df_rolling_{method_name}_other_df_impl'
    func_lines = [f'def {impl_name}({impl_params_as_str}):']

    if 'pairwise' in kwargs:
        func_lines += [
            '  if pairwise is None:', '    _pairwise = False', '  else:',
            '    _pairwise = pairwise', '  if _pairwise:',
            f'    raise ValueError("Method rolling.{method_name}(). The object pairwise\\n expected: False, None")'
        ]

    data_length = 'len(self._data._data[0])' if data_columns else '0'
    other_length = 'len(other._data[0])' if other_columns else '0'
    func_lines += [f'  length = max([{data_length}, {other_length}])']

    for col in all_columns:
        res_data = f'result_data_{col}'
        if col in common_columns:
            other_series = f'other_series_{col}'
            method_kws['other'] = other_series
            method_params = ', '.join(args + kwsparams2list(method_kws))
            func_lines += [
                f'  data_{col} = self._data._data[{data_columns[col]}]',
                f'  other_data_{col} = other._data[{other_columns[col]}]',
                f'  series_{col} = pandas.Series(data_{col})',
                f'  {other_series} = pandas.Series(other_data_{col})',
                f'  rolling_{col} = series_{col}.rolling({rolling_params})',
                f'  result_{col} = rolling_{col}.{method_name}({method_params})',
                f'  {res_data} = result_{col}._data[:length]'
            ]
        else:
            func_lines += [
                f'  {res_data} = numpy.empty(length, dtype=float64)',
                f'  {res_data}[:] = numpy.nan'
            ]
        results.append((col, res_data))

    data = ', '.join(f'"{col}": {data}' for col, data in results)
    func_lines += [f'  return pandas.DataFrame({{{data}}})']
    func_text = '\n'.join(func_lines)

    global_vars = {'numpy': numpy, 'pandas': pandas, 'float64': float64}

    return func_text, global_vars