Пример #1
0
        def _get_agg_typ(self, grp, args, code):
            f_ir = numba.ir_utils.get_ir_of_code(
                {
                    'np': np,
                    'numba': numba,
                    'sdc': sdc
                }, code)
            out_data = []
            out_columns = []
            # add key columns of not as_index
            if not grp.as_index:
                for k in grp.keys:
                    out_columns.append(k)
                    ind = grp.df_type.columns.index(k)
                    out_data.append(grp.df_type.data[ind])

            # get output type for each selected column
            for c in grp.selection:
                out_columns.append(c)
                ind = grp.df_type.columns.index(c)
                data = grp.df_type.data[ind]
                _, out_dtype, _ = numba.typed_passes.type_inference_stage(
                    self.context, f_ir, (data, ), None)
                out_arr = _get_series_array_type(out_dtype)
                out_data.append(out_arr)

            out_res = DataFrameType(tuple(out_data), None, tuple(out_columns))
            # XXX output becomes series if single output and explicitly selected
            if len(grp.selection
                   ) == 1 and grp.explicit_select and grp.as_index:
                out_res = arr_to_series_type(out_data[0])
            return signature(out_res, *args)
Пример #2
0
        def generic(self, args, kws):
            assert not kws
            df, values, index, columns, aggfunc, _pivot_values = args

            if not (isinstance(values, types.StringLiteral)
                    and isinstance(index, types.StringLiteral)
                    and isinstance(columns, types.StringLiteral)):
                raise ValueError(
                    "pivot_table() only support string constants for"
                    "'values', 'index' and 'columns' arguments")

            values = values.literal_value
            index = index.literal_value
            columns = columns.literal_value

            # get output data type
            data = df.data[df.columns.index(values)]
            func = get_agg_func(None, aggfunc.literal_value, None)
            f_ir = numba.ir_utils.get_ir_of_code(
                {
                    'np': np,
                    'numba': numba,
                    'sdc': sdc
                }, func.__code__)
            _, out_dtype, _ = numba.typed_passes.type_inference_stage(
                self.context, f_ir, (data, ), None)
            out_arr_typ = _get_series_array_type(out_dtype)

            pivot_vals = _pivot_values.meta
            n_vals = len(pivot_vals)
            out_df = DataFrameType((out_arr_typ, ) * n_vals, None,
                                   tuple(pivot_vals))

            return signature(out_df, *args)
Пример #3
0
def get_hiframes_dtypes(df):
    """get hiframe data types for a pandas dataframe
    """
    col_names = df.columns.tolist()
    hi_typs = [
        _get_series_array_type(_infer_series_dtype(df[cname]))
        for cname in col_names
    ]
    return tuple(hi_typs)