Exemplo n.º 1
0
def lower_box_df(context, builder, sig, args):
    assert len(sig.args) % 2 == 0, "name and column pairs expected"
    n_cols = len(sig.args)//2
    col_names = [a.literal_value for a in sig.args[:n_cols]]
    col_arrs = [a for a in args[n_cols:]]
    arr_typs = [a for a in sig.args[n_cols:]]

    pyapi = context.get_python_api(builder)
    env_manager = context.get_env_manager(builder)
    c = numba.pythonapi._BoxContext(context, builder, pyapi, env_manager)
    gil_state = pyapi.gil_ensure()  # acquire GIL

    mod_name = context.insert_const_string(c.builder.module, "pandas")
    class_obj = pyapi.import_module_noblock(mod_name)
    res = pyapi.call_method(class_obj, "DataFrame", ())
    for cname, arr, arr_typ in zip(col_names, col_arrs, arr_typs):
        # df['cname'] = boxed_arr
        # TODO: datetime.date, DatetimeIndex?
        if arr_typ == string_array_type:
            arr_obj = box_str_arr(arr_typ, arr, c)
        else:
            arr_obj = box_array(arr_typ, arr, c)
            # TODO: is incref required?
            context.nrt.incref(builder, arr_typ, arr)
        name_str = context.insert_const_string(c.builder.module, cname)
        cname_obj = pyapi.string_from_string(name_str)
        pyapi.object_setitem(res, cname_obj, arr_obj)
        # pyapi.decref(arr_obj)
        pyapi.decref(cname_obj)

    pyapi.decref(class_obj)
    pyapi.gil_release(gil_state)    # release GIL
    return res
Exemplo n.º 2
0
def _box_series_data(dtype, data_typ, val, c):

    if isinstance(dtype, types.BaseTuple):
        np_dtype = np.dtype(
            ','.join(str(t) for t in dtype.types), align=True)
        dtype = numba.numpy_support.from_dtype(np_dtype)

    if dtype == string_type:
        arr = box_str_arr(string_array_type, val, c)
    elif dtype == datetime_date_type:
        arr = box_datetime_date_array(data_typ, val, c)
    elif isinstance(dtype, PDCategoricalDtype):
        arr = box_categorical_array(data_typ, val, c)
    elif data_typ == string_array_split_view_type:
        arr = box_str_arr_split_view(data_typ, val, c)
    elif dtype == types.List(string_type):
        arr = box_list(list_string_array_type, val, c)
    else:
        arr = box_array(data_typ, val, c)

    if isinstance(dtype, types.Record):
        o_str = c.context.insert_const_string(c.builder.module, "O")
        o_str = c.pyapi.string_from_string(o_str)
        arr = c.pyapi.call_method(arr, "astype", (o_str,))

    return arr
Exemplo n.º 3
0
def box_categorical_array(typ, val, c):
    dtype = typ.dtype
    mod_name = c.context.insert_const_string(c.builder.module, "pandas")
    pd_class_obj = c.pyapi.import_module_noblock(mod_name)

    # categories list e.g. ['A', 'B', 'C']
    item_objs = _get_cat_obj_items(dtype.categories, c)
    n = len(item_objs)
    list_obj = c.pyapi.list_new(c.context.get_constant(types.intp, n))
    for i in range(n):
        idx = c.context.get_constant(types.intp, i)
        c.pyapi.incref(item_objs[i])
        c.pyapi.list_setitem(list_obj, idx, item_objs[i])

    int_dtype = get_categories_int_type(dtype)
    arr = box_array(types.Array(int_dtype, 1, 'C'), val, c)

    pdcat_cls_obj = c.pyapi.object_getattr_string(pd_class_obj, "Categorical")
    cat_arr = c.pyapi.call_method(pdcat_cls_obj, "from_codes", (arr, list_obj))
    c.pyapi.decref(pdcat_cls_obj)
    c.pyapi.decref(arr)
    c.pyapi.decref(list_obj)
    for obj in item_objs:
        c.pyapi.decref(obj)

    c.pyapi.decref(pd_class_obj)
    return cat_arr
Exemplo n.º 4
0
def box_dataframe(typ, val, c):
    context = c.context
    builder = c.builder

    n_cols = len(typ.columns)
    col_names = typ.columns
    arr_typs = typ.data
    dtypes = [a.dtype for a in arr_typs]  # TODO: check Categorical

    dataframe = cgutils.create_struct_proxy(typ)(context, builder, value=val)
    col_arrs = [
        builder.extract_value(dataframe.data, i) for i in range(n_cols)
    ]
    # df unboxed from Python
    has_parent = cgutils.is_not_null(builder, dataframe.parent)

    pyapi = c.pyapi
    # gil_state = pyapi.gil_ensure()  # acquire GIL

    mod_name = context.insert_const_string(c.builder.module, "pandas")
    class_obj = pyapi.import_module_noblock(mod_name)
    df_obj = pyapi.call_method(class_obj, "DataFrame", ())

    for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names,
                                             col_arrs, arr_typs, dtypes):
        # df['cname'] = boxed_arr
        # TODO: datetime.date, DatetimeIndex?
        name_str = context.insert_const_string(c.builder.module, cname)
        cname_obj = pyapi.string_from_string(name_str)

        if dtype == string_type:
            arr_obj = box_str_arr(arr_typ, arr, c)
        elif isinstance(dtype, PDCategoricalDtype):
            arr_obj = box_categorical_array(arr_typ, arr, c)
            # context.nrt.incref(builder, arr_typ, arr)
        elif arr_typ == string_array_split_view_type:
            arr_obj = box_str_arr_split_view(arr_typ, arr, c)
        elif dtype == types.List(string_type):
            arr_obj = box_list(list_string_array_type, arr, c)
            # context.nrt.incref(builder, arr_typ, arr)  # TODO required?
            # pyapi.print_object(arr_obj)
        else:
            arr_obj = box_array(arr_typ, arr, c)
            # TODO: is incref required?
            # context.nrt.incref(builder, arr_typ, arr)
        pyapi.object_setitem(df_obj, cname_obj, arr_obj)

        # pyapi.decref(arr_obj)
        pyapi.decref(cname_obj)

    # set df.index if necessary
    if typ.index != types.none:
        arr_obj = _box_series_data(typ.index.dtype, typ.index, dataframe.index,
                                   c)
        pyapi.object_setattr_string(df_obj, 'index', arr_obj)

    pyapi.decref(class_obj)
    # pyapi.gil_release(gil_state)    # release GIL
    return df_obj
Exemplo n.º 5
0
def box_datetime_date_array(typ, val, c):
    ary = box_array(types.Array(types.int64, 1, 'C'), val, c)
    hpat_name = c.context.insert_const_string(c.builder.module, 'hpat')
    hpat_mod = c.pyapi.import_module_noblock(hpat_name)
    hi_mod = c.pyapi.object_getattr_string(hpat_mod, 'hiframes')
    pte_mod = c.pyapi.object_getattr_string(hi_mod, 'pd_timestamp_ext')
    iatdd = c.pyapi.object_getattr_string(pte_mod, 'int_array_to_datetime_date')
    res = c.pyapi.call_function_objargs(iatdd, [ary])
    return res
Exemplo n.º 6
0
def box_series(typ, val, c):
    """
    """
    if typ.dtype == string_type:
        arr = box_str_arr(typ, val, c)
    else:
        arr = box_array(types.Array(typ.dtype, 1, 'C'), val, c)
    mod_name = c.context.insert_const_string(c.builder.module, "pandas")
    class_obj = c.pyapi.import_module_noblock(mod_name)
    res = c.pyapi.call_method(class_obj, "Series", (arr, ))
    # class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(pd.Series))
    # res = c.pyapi.call_function_objargs(class_obj, (arr,))
    c.pyapi.decref(class_obj)
    return res
Exemplo n.º 7
0
def box_dt_index(typ, val, c):
    """
    """
    mod_name = c.context.insert_const_string(c.builder.module, "pandas")
    pd_class_obj = c.pyapi.import_module_noblock(mod_name)

    dt_index = numba.cgutils.create_struct_proxy(
        typ)(c.context, c.builder, val)

    arr = box_array(_dt_index_data_typ, dt_index.data, c)

    # TODO: support name boxing
    # if typ.is_named:
    #     name = c.pyapi.from_native_value(string_type, series.name)
    # else:
    #     name = c.pyapi.make_none()

    res = c.pyapi.call_method(pd_class_obj, "DatetimeIndex", (arr,))

    c.pyapi.decref(pd_class_obj)
    return res