def _box_series_data(dtype, data_typ, val, c): if isinstance(dtype, types.BaseTuple): np_dtype = np.dtype( ','.join(str(t) for t in dtype.types), align=True) dtype = numba.numpy_support.from_dtype(np_dtype) if dtype == string_type: arr = box_str_arr(string_array_type, val, c) elif dtype == datetime_date_type: arr = box_datetime_date_array(data_typ, val, c) elif isinstance(dtype, PDCategoricalDtype): arr = box_categorical_array(data_typ, val, c) elif data_typ == string_array_split_view_type: arr = box_str_arr_split_view(data_typ, val, c) elif dtype == types.List(string_type): arr = box_list(list_string_array_type, val, c) else: arr = box_array(data_typ, val, c) if isinstance(dtype, types.Record): o_str = c.context.insert_const_string(c.builder.module, "O") o_str = c.pyapi.string_from_string(o_str) arr = c.pyapi.call_method(arr, "astype", (o_str,)) return arr
def lower_box_df(context, builder, sig, args): assert len(sig.args) % 2 == 0, "name and column pairs expected" n_cols = len(sig.args)//2 col_names = [a.literal_value for a in sig.args[:n_cols]] col_arrs = [a for a in args[n_cols:]] arr_typs = [a for a in sig.args[n_cols:]] pyapi = context.get_python_api(builder) env_manager = context.get_env_manager(builder) c = numba.pythonapi._BoxContext(context, builder, pyapi, env_manager) gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) res = pyapi.call_method(class_obj, "DataFrame", ()) for cname, arr, arr_typ in zip(col_names, col_arrs, arr_typs): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? if arr_typ == string_array_type: arr_obj = box_str_arr(arr_typ, arr, c) else: arr_obj = box_array(arr_typ, arr, c) # TODO: is incref required? context.nrt.incref(builder, arr_typ, arr) name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) pyapi.object_setitem(res, cname_obj, arr_obj) # pyapi.decref(arr_obj) pyapi.decref(cname_obj) pyapi.decref(class_obj) pyapi.gil_release(gil_state) # release GIL return res
def box_series(typ, val, c): """ """ if typ.dtype == string_type: arr = box_str_arr(typ, val, c) else: arr = box_array(types.Array(typ.dtype, 1, 'C'), val, c) mod_name = c.context.insert_const_string(c.builder.module, "pandas") class_obj = c.pyapi.import_module_noblock(mod_name) res = c.pyapi.call_method(class_obj, "Series", (arr, )) # class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(pd.Series)) # res = c.pyapi.call_function_objargs(class_obj, (arr,)) c.pyapi.decref(class_obj) return res
def box_dataframe(typ, val, c): context = c.context builder = c.builder n_cols = len(typ.columns) col_names = typ.columns arr_typs = typ.data dtypes = [a.dtype for a in arr_typs] # TODO: check Categorical dataframe = cgutils.create_struct_proxy(typ)( context, builder, value=val) col_arrs = [builder.extract_value(dataframe.data, i) for i in range(n_cols)] # df unboxed from Python has_parent = cgutils.is_not_null(builder, dataframe.parent) pyapi = c.pyapi # gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) df_obj = pyapi.call_method(class_obj, "DataFrame", ()) for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) # if column not unboxed, just used the boxed version from parent unboxed_val = builder.extract_value(dataframe.unboxed, i) not_unboxed = builder.icmp(lc.ICMP_EQ, unboxed_val, context.get_constant(types.int8, 0)) use_parent = builder.and_(has_parent, not_unboxed) with builder.if_else(use_parent) as (then, orelse): with then: arr_obj = pyapi.object_getattr_string(dataframe.parent, cname) pyapi.object_setitem(df_obj, cname_obj, arr_obj) with orelse: if dtype == string_type: arr_obj = box_str_arr(arr_typ, arr, c) elif isinstance(dtype, PDCategoricalDtype): arr_obj = box_categorical_array(arr_typ, arr, c) # context.nrt.incref(builder, arr_typ, arr) elif arr_typ == string_array_split_view_type: arr_obj = box_str_arr_split_view(arr_typ, arr, c) elif dtype == types.List(string_type): arr_obj = box_list(list_string_array_type, arr, c) # context.nrt.incref(builder, arr_typ, arr) # TODO required? # pyapi.print_object(arr_obj) else: arr_obj = box_array(arr_typ, arr, c) # TODO: is incref required? # context.nrt.incref(builder, arr_typ, arr) pyapi.object_setitem(df_obj, cname_obj, arr_obj) # pyapi.decref(arr_obj) pyapi.decref(cname_obj) # set df.index if necessary if typ.index != types.none: arr_obj = box_array(typ.index, dataframe.index, c) pyapi.object_setattr_string(df_obj, 'index', arr_obj) pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj