def box_categorical_array(typ, val, c): dtype = typ.dtype mod_name = c.context.insert_const_string(c.builder.module, "pandas") pd_class_obj = c.pyapi.import_module_noblock(mod_name) # categories list e.g. ['A', 'B', 'C'] item_objs = _get_cat_obj_items(dtype.categories, c) n = len(item_objs) list_obj = c.pyapi.list_new(c.context.get_constant(types.intp, n)) for i in range(n): idx = c.context.get_constant(types.intp, i) c.pyapi.incref(item_objs[i]) c.pyapi.list_setitem(list_obj, idx, item_objs[i]) int_dtype = get_categories_int_type(dtype) arr = box_array(types.Array(int_dtype, 1, 'C'), val, c) pdcat_cls_obj = c.pyapi.object_getattr_string(pd_class_obj, "Categorical") cat_arr = c.pyapi.call_method(pdcat_cls_obj, "from_codes", (arr, list_obj)) c.pyapi.decref(pdcat_cls_obj) c.pyapi.decref(arr) c.pyapi.decref(list_obj) for obj in item_objs: c.pyapi.decref(obj) c.pyapi.decref(pd_class_obj) return cat_arr
def box_int64_index(typ, val, c): mod_name = c.context.insert_const_string(c.builder.module, "pandas") pd_class_obj = c.pyapi.import_module_noblock(mod_name) int64_index = cgutils.create_struct_proxy(typ)(c.context, c.builder, val) data = box_array(typ.data, int64_index.data, c) # dtype and copy params are not stored so use default values dtype = c.pyapi.make_none() copy = c.pyapi.bool_from_bool(c.context.get_constant(types.bool_, False)) if typ.is_named: name = c.pyapi.from_native_value(types.unicode_type, int64_index.name) else: name = c.pyapi.make_none() res = c.pyapi.call_method(pd_class_obj, "Int64Index", (data, dtype, copy, name)) c.pyapi.decref(data) c.pyapi.decref(dtype) c.pyapi.decref(copy) c.pyapi.decref(name) c.pyapi.decref(pd_class_obj) return res
def box_dataframe(typ, val, c): context = c.context builder = c.builder n_cols = len(typ.columns) col_names = typ.columns arr_typs = typ.data dtypes = [a.dtype for a in arr_typs] # TODO: check Categorical dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val) col_arrs = [builder.extract_value(dataframe.data, i) for i in range(n_cols)] # df unboxed from Python has_parent = cgutils.is_not_null(builder, dataframe.parent) pyapi = c.pyapi # gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) df_dict = pyapi.dict_new() for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) if dtype == string_type: arr_obj = box_str_arr(arr_typ, arr, c) elif isinstance(arr_typ, Categorical): arr_obj = box_Categorical(arr_typ, arr, c) # context.nrt.incref(builder, arr_typ, arr) elif dtype == types.List(string_type): arr_obj = box_list(list_string_array_type, arr, c) # context.nrt.incref(builder, arr_typ, arr) # TODO required? # pyapi.print_object(arr_obj) else: arr_obj = box_array(arr_typ, arr, c) # TODO: is incref required? # context.nrt.incref(builder, arr_typ, arr) pyapi.dict_setitem(df_dict, cname_obj, arr_obj) pyapi.decref(arr_obj) pyapi.decref(cname_obj) df_obj = pyapi.call_method(class_obj, "DataFrame", (df_dict,)) pyapi.decref(df_dict) # set df.index if necessary if typ.index != types.none: arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index, c) pyapi.object_setattr_string(df_obj, 'index', arr_obj) pyapi.decref(arr_obj) pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj
def _box_index_data(index_typ, val, c): """ Boxes native value used to represent Pandas index into appropriate Python object. Params: index_typ: Numba type of native value val: native value c: LLVM context object Returns: Python object native value is boxed into """ assert isinstance(index_typ, (RangeIndexType, StringArrayType, types.Array, types.NoneType)) if isinstance(index_typ, RangeIndexType): index = box_range_index(index_typ, val, c) elif isinstance(index_typ, types.Array): index = box_array(index_typ, val, c) elif isinstance(index_typ, StringArrayType): index = box_str_arr(string_array_type, val, c) else: # index_typ is types.none index = c.pyapi.make_none() return index
def _box_series_data(dtype, data_typ, val, c): if isinstance(dtype, types.BaseTuple): np_dtype = np.dtype(','.join(str(t) for t in dtype.types), align=True) dtype = numba.np.numpy_support.from_dtype(np_dtype) if dtype == string_type: arr = box_str_arr(string_array_type, val, c) elif isinstance(dtype, CategoricalDtypeType): arr = box_Categorical(data_typ, val, c) elif dtype == types.List(string_type): arr = box_list(list_string_array_type, val, c) else: arr = box_array(data_typ, val, c) if isinstance(dtype, types.Record): o_str = c.context.insert_const_string(c.builder.module, "O") o_str = c.pyapi.string_from_string(o_str) arr = c.pyapi.call_method(arr, "astype", (o_str, )) return arr
def box_Categorical(typ, val, c): pandas_module_name = c.context.insert_const_string(c.builder.module, "pandas") pandas_module = c.pyapi.import_module_noblock(pandas_module_name) constructor = c.pyapi.object_getattr_string(pandas_module, "Categorical") empty_list = c.pyapi.list_new(c.context.get_constant(types.intp, 0)) args = c.pyapi.tuple_pack([empty_list]) categorical = c.pyapi.call(constructor, args) dtype = box_CategoricalDtype(typ.pd_dtype, val, c) c.pyapi.object_setattr_string(categorical, "_dtype", dtype) codes = boxing.box_array(typ.codes, val, c) c.pyapi.object_setattr_string(categorical, "_codes", codes) c.pyapi.decref(codes) c.pyapi.decref(dtype) c.pyapi.decref(args) c.pyapi.decref(empty_list) c.pyapi.decref(constructor) c.pyapi.decref(pandas_module) return categorical
def box_Categorical(typ, val, c): pandas_module_name = c.context.insert_const_string(c.builder.module, "pandas") pandas_module = c.pyapi.import_module_noblock(pandas_module_name) categorical_class = c.pyapi.object_getattr_string(pandas_module, "Categorical") method_from_codes = c.pyapi.object_getattr_string(categorical_class, "from_codes") dtype = box_CategoricalDtype(typ.pd_dtype, val, c) codes = boxing.box_array(typ.codes, val, c) py_none = c.pyapi.make_none() args = c.pyapi.tuple_pack([codes, py_none, py_none, dtype]) categorical = c.pyapi.call(method_from_codes, args=args) c.pyapi.decref(args) c.pyapi.decref(py_none) c.pyapi.decref(codes) c.pyapi.decref(dtype) c.pyapi.decref(method_from_codes) c.pyapi.decref(categorical_class) c.pyapi.decref(pandas_module) return categorical