示例#1
0
 def resolve_split(self, ary, args, kws):
     out = SeriesType(types.List(string_type))
     if (len(args) == 1 and isinstance(args[0], types.StringLiteral)
             and len(args[0].literal_value) == 1):
         out = SeriesType(types.List(string_type),
                          string_array_split_view_type)
     return signature(out, *args)
示例#2
0
 def __call__(self, typeinfer):
     typevars = typeinfer.typevars
     oset = typevars[self.target]
     tsets = [typevars[i.name].get() for i in self.items]
     if not tsets:
         typeinfer.add_type(self.target, types.List(types.undefined))
     else:
         for typs in itertools.product(*tsets):
             unified = typeinfer.context.unify_types(*typs)
             typeinfer.add_type(self.target, types.List(unified))
示例#3
0
def arr_to_series_type(arr):
    series_type = None
    if isinstance(arr, types.Array):
        series_type = SeriesType(arr.dtype, arr)
    elif arr == string_array_type:
        # StringArray is readonly
        series_type = SeriesType(string_type)
    elif arr == list_string_array_type:
        series_type = SeriesType(types.List(string_type))
    elif arr == string_array_split_view_type:
        series_type = SeriesType(types.List(string_type),
                                 string_array_split_view_type)
    return series_type
示例#4
0
def _get_series_array_type(dtype):
    """get underlying default array type of series based on its dtype
    """
    # list(list(str))
    if dtype == types.List(string_type):
        # default data layout is list but split view is used if possible
        return list_string_array_type
    # string array
    elif dtype == string_type:
        return string_array_type

    # categorical
    if isinstance(dtype, PDCategoricalDtype):
        return CategoricalArray(dtype)

    # use recarray data layout for series of tuples
    if isinstance(dtype, types.BaseTuple):
        if any(not isinstance(t, types.Number) for t in dtype.types):
            # TODO: support more types. what types can be in recarrays?
            raise ValueError(
                "series tuple dtype {} includes non-numerics".format(dtype))
        np_dtype = np.dtype(','.join(str(t) for t in dtype.types), align=True)
        dtype = numba.numpy_support.from_dtype(np_dtype)

    # TODO: other types?
    # regular numpy array
    return types.Array(dtype, 1, 'C')
示例#5
0
    def __init__(self, dmm, fe_type):
        types_unique = set()
        df_types = []
        for col_type in fe_type.data:
            if col_type in types_unique:
                continue
            types_unique.add(col_type)
            df_types.append(col_type)

        members = [
            ('data', types.Tuple([types.List(typ) for typ in df_types])),
            ('index', fe_type.index),
            ('columns', types.List(string_type)),
            ('parent', types.pyobject),
        ]
        super(DataFrameModel, self).__init__(dmm, fe_type, members)
示例#6
0
def _box_series_data(dtype, data_typ, val, c):

    if isinstance(dtype, types.BaseTuple):
        np_dtype = np.dtype(
            ','.join(str(t) for t in dtype.types), align=True)
        dtype = numba.numpy_support.from_dtype(np_dtype)

    if dtype == string_type:
        arr = box_str_arr(string_array_type, val, c)
    elif dtype == datetime_date_type:
        arr = box_datetime_date_array(data_typ, val, c)
    elif isinstance(dtype, PDCategoricalDtype):
        arr = box_categorical_array(data_typ, val, c)
    elif data_typ == string_array_split_view_type:
        arr = box_str_arr_split_view(data_typ, val, c)
    elif dtype == types.List(string_type):
        arr = box_list(list_string_array_type, val, c)
    else:
        arr = box_array(data_typ, val, c)

    if isinstance(dtype, types.Record):
        o_str = c.context.insert_const_string(c.builder.module, "O")
        o_str = c.pyapi.string_from_string(o_str)
        arr = c.pyapi.call_method(arr, "astype", (o_str,))

    return arr
示例#7
0
 def codegen(context, builder, sig, args):
     nitems = args[0]
     list_type = types.List(string_type)
     l = numba.targets.listobj.ListInstance.allocate(
         context, builder, list_type, nitems)
     l.size = nitems
     return impl_ret_new_ref(context, builder, list_type, l.value)
def _typeof_list(val, c):
    if len(val) == 0:
        raise ValueError("Cannot type empty list")
    ty = typeof_impl(val[0], c)
    if ty is None:
        raise ValueError(
            "Cannot type list element of {!r}".format(type(val[0])), )
    return types.List(ty, reflected=True)
示例#9
0
 def test_subclass_specialization(self):
     os = OverloadSelector()
     self.assertTrue(issubclass(types.Sequence, types.Container))
     os.append(1, (types.Container, types.Container,))
     lstty = types.List(types.boolean)
     self.assertEqual(os.find((lstty, lstty)), 1)
     os.append(2, (types.Container, types.Sequence,))
     self.assertEqual(os.find((lstty, lstty)), 2)
示例#10
0
def alloc_str_list(typingctx, n_t=None):
    def codegen(context, builder, sig, args):
        nitems = args[0]
        list_type = types.List(string_type)
        result = numba.targets.listobj.ListInstance.allocate(context, builder, list_type, nitems)
        result.size = nitems
        return impl_ret_new_ref(context, builder, list_type, result.value)
    return types.List(string_type)(types.intp), codegen
示例#11
0
 def test_list(self):
     aty = types.List(types.undefined)
     bty = types.List(i32)
     self.assert_unify(aty, bty, bty)
     aty = types.List(i16)
     bty = types.List(i32)
     self.assert_unify(aty, bty, bty)
     aty = types.List(types.Tuple([i32, i16]))
     bty = types.List(types.Tuple([i16, i64]))
     cty = types.List(types.Tuple([i32, i64]))
     self.assert_unify(aty, bty, cty)
     aty = types.List(i16)
     bty = types.List(types.Tuple([i16]))
     self.assert_unify_failure(aty, bty)
示例#12
0
 def __init__(self, v_list: nt.List(vect_type)):
     p = {}
     for x in v_list:
         p[x] = x
     self.parents = p
     #        self.parents = {x : x for x in v_list}
     self.sizes = {v_list[0]: 0}
     self.ex = v_list[0]
     del self.sizes[v_list[0]]
示例#13
0
def if_arr_to_series_type(typ):
    if isinstance(typ, (types.Tuple, types.UniTuple)):
        return types.Tuple([if_arr_to_series_type(t) for t in typ.types])
    if isinstance(typ, types.List):
        return types.List(if_arr_to_series_type(typ.dtype))
    if isinstance(typ, types.Set):
        return types.Set(if_arr_to_series_type(typ.dtype))
    # TODO: other types that can have Arrays inside?
    return typ
示例#14
0
def to_string_list_typ(typ):
    if typ == string_array_type:
        return types.List(sdc.str_ext.string_type)

    if isinstance(typ, (types.Tuple, types.UniTuple)):
        new_typs = []
        for i in range(typ.count):
            new_typs.append(to_string_list_typ(typ.types[i]))
        return types.Tuple(new_typs)

    return typ
示例#15
0
    def codegen(context, builder, signature, args):
        in_tup = args[0]
        data_arrs = [builder.extract_value(in_tup, i) for i in range(n_cols)]
        index = builder.extract_value(in_tup, n_cols)
        column_strs = [
            numba.cpython.unicode.make_string_from_constant(
                context, builder, string_type, c) for c in column_names
        ]
        # create dataframe struct and store values
        dataframe = cgutils.create_struct_proxy(signature.return_type)(context,
                                                                       builder)

        data_list_type = [types.List(typ) for typ in types_order]

        data_lists = []
        for typ_id, typ in enumerate(types_order):
            data_list_typ = context.build_list(
                builder, data_list_type[typ_id],
                [data_arrs[data_id] for data_id in data_typs_map[typ][1]])
            data_lists.append(data_list_typ)

        data_tup = context.make_tuple(builder, types.Tuple(data_list_type),
                                      data_lists)

        col_list_type = types.List(string_type)
        column_list = context.build_list(builder, col_list_type, column_strs)

        dataframe.data = data_tup
        dataframe.index = index
        dataframe.columns = column_list
        dataframe.parent = context.get_constant_null(types.pyobject)

        # increase refcount of stored values
        if context.enable_nrt:
            context.nrt.incref(builder, index_typ, index)
            for var, typ in zip(data_arrs, data_typs):
                context.nrt.incref(builder, typ, var)
            for var in column_strs:
                context.nrt.incref(builder, string_type, var)

        return dataframe._getvalue()
示例#16
0
 def generic(self, args, kws):
     assert not kws
     [ary, idx] = args
     if ary == string_array_split_view_type:
         if isinstance(idx, types.SliceType):
             return signature(string_array_split_view_type, *args)
         elif isinstance(idx, types.Integer):
             return signature(types.List(string_type), *args)
         elif idx == types.Array(types.bool_, 1, 'C'):
             return signature(string_array_split_view_type, *args)
         elif idx == types.Array(types.intp, 1, 'C'):
             return signature(string_array_split_view_type, *args)
示例#17
0
def if_series_to_unbox(typ):
    if isinstance(typ, SeriesType):
        return UnBoxedSeriesType(typ.dtype)

    if isinstance(typ, (types.Tuple, types.UniTuple)):
        return types.Tuple([if_series_to_unbox(t) for t in typ.types])
    if isinstance(typ, types.List):
        return types.List(if_series_to_unbox(typ.dtype))
    if isinstance(typ, types.Set):
        return types.Set(if_series_to_unbox(typ.dtype))
    # TODO: other types that can have Series inside?
    return typ
示例#18
0
def if_arr_to_series_type(typ):
    if isinstance(typ, types.Array) or typ in (string_array_type,
            list_string_array_type, string_array_split_view_type):
        return arr_to_series_type(typ)
    if isinstance(typ, (types.Tuple, types.UniTuple)):
        return types.Tuple([if_arr_to_series_type(t) for t in typ.types])
    if isinstance(typ, types.List):
        return types.List(if_arr_to_series_type(typ.dtype))
    if isinstance(typ, types.Set):
        return types.Set(if_arr_to_series_type(typ.dtype))
    # TODO: other types that can have Arrays inside?
    return typ
示例#19
0
def if_series_to_array_type(typ, replace_boxed=False):
    if isinstance(typ, SeriesType):
        return series_to_array_type(typ, replace_boxed)

    if isinstance(typ, (types.Tuple, types.UniTuple)):
        return types.Tuple(
            [if_series_to_array_type(t, replace_boxed) for t in typ.types])
    if isinstance(typ, types.List):
        return types.List(if_series_to_array_type(typ.dtype, replace_boxed))
    if isinstance(typ, types.Set):
        return types.Set(if_series_to_array_type(typ.dtype, replace_boxed))
    # TODO: other types that can have Series inside?
    return typ
示例#20
0
def _infer_series_list_dtype(S):
    for i in range(len(S)):
        first_val = S.iloc[i]
        if not isinstance(first_val, list):
            raise ValueError("data type for column {} not supported".format(
                S.name))
        if len(first_val) > 0:
            # TODO: support more types
            if isinstance(first_val[0], str):
                return types.List(string_type)
            else:
                raise ValueError(
                    "data type for column {} not supported".format(S.name))
    raise ValueError("data type for column {} not supported".format(S.name))
示例#21
0
    def test_print_values(self):
        """
        Test printing a single argument value.
        """
        pyfunc = print_value

        def check_values(typ, values):
            cr = compile_isolated(pyfunc, (typ,))
            cfunc = cr.entry_point
            for val in values:
                with captured_stdout():
                    cfunc(val)
                    self.assertEqual(sys.stdout.getvalue(), str(val) + '\n')

        # Various scalars
        check_values(types.int32, (1, -234))
        check_values(types.int64, (1, -234,
                                   123456789876543210, -123456789876543210))
        check_values(types.uint64, (1, 234,
                                   123456789876543210, 2**63 + 123))
        check_values(types.boolean, (True, False))
        check_values(types.float64, (1.5, 100.0**10.0, float('nan')))
        check_values(types.complex64, (1+1j,))
        check_values(types.NPTimedelta('ms'), (np.timedelta64(100, 'ms'),))

        cr = compile_isolated(pyfunc, (types.float32,))
        cfunc = cr.entry_point
        with captured_stdout():
            cfunc(1.1)
            # Float32 will lose precision
            got = sys.stdout.getvalue()
            expect = '1.10000002384'
            self.assertTrue(got.startswith(expect))
            self.assertTrue(got.endswith('\n'))

        # NRT-enabled type
        with self.assertNoNRTLeak():
            x = [1, 3, 5, 7]
            with self.assertRefCount(x):
                check_values(types.List(types.int32), (x,))

        # Array will have to use object mode
        arraytype = types.Array(types.int32, 1, 'C')
        cr = compile_isolated(pyfunc, (arraytype,), flags=enable_pyobj_flags)
        cfunc = cr.entry_point
        with captured_stdout():
            cfunc(np.arange(10))
            self.assertEqual(sys.stdout.getvalue(),
                             '[0 1 2 3 4 5 6 7 8 9]\n')
示例#22
0
def if_series_to_array_type(typ, replace_boxed=False):
    if isinstance(typ, SeriesType):
        return series_to_array_type(typ, replace_boxed)
    # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed
    # it results in cast error for call dummy_unbox_series
    if replace_boxed and isinstance(typ, BoxedSeriesType):
        return series_to_array_type(typ, replace_boxed)
    if isinstance(typ, (types.Tuple, types.UniTuple)):
        return types.Tuple(
            [if_series_to_array_type(t, replace_boxed) for t in typ.types])
    if isinstance(typ, types.List):
        return types.List(if_series_to_array_type(typ.dtype, replace_boxed))
    if isinstance(typ, types.Set):
        return types.Set(if_series_to_array_type(typ.dtype, replace_boxed))
    # TODO: other types that can have Series inside?
    return typ
示例#23
0
def str_list_to_array_overload(list_typ):
    if list_typ == types.List(string_type):

        def str_list_impl(str_list):
            n = len(str_list)
            n_char = 0
            for i in range(n):
                _str = str_list[i]
                n_char += len(_str)
            str_arr = pre_alloc_string_array(n, n_char)
            for i in range(n):
                _str = str_list[i]
                setitem_string_array(get_offset_ptr(str_arr),
                                     get_data_ptr(str_arr), _str, i)
                del_str(_str)  # XXX assuming str list is not used anymore
            return str_arr

        return str_list_impl

    return lambda a: a
示例#24
0
    ("n_estimators", uint32),
    ("step", float32),
    ("loss", string),
    ("use_aggregation", boolean),
    ("split_pure", boolean),
    ("n_jobs", uint32),
    ("n_samples_increment", uint32),
    ("verbose", boolean),
    ("samples", get_type(SamplesCollection)),
    ("iteration", uint32),
]

spec_amf_classifier = spec_amf_learner + [
    ("n_classes", uint32),
    ("dirichlet", float32),
    ("trees", types.List(get_type(TreeClassifier), reflected=True)),
]

# TODO: we can force pre-compilation when creating the nopython forest


@jitclass(spec_amf_classifier)
class AMFClassifierNoPython(object):
    def __init__(
        self,
        n_classes,
        n_features,
        n_estimators,
        step,
        loss,
        use_aggregation,
示例#25
0
 def test_lists(self):
     ty = types.List(types.int32)
     self.check_pickling(ty)
示例#26
0
 def test_disallow_list(self):
     self.assert_disallow_key(types.List(types.intp))
     self.assert_disallow_value(types.List(types.intp))
示例#27
0
def box_dataframe(typ, val, c):
    context = c.context
    builder = c.builder

    n_cols = len(typ.columns)
    col_names = typ.columns
    arr_typs = typ.data
    dtypes = [a.dtype for a in arr_typs]  # TODO: check Categorical

    dataframe = cgutils.create_struct_proxy(typ)(
        context, builder, value=val)
    col_arrs = [builder.extract_value(dataframe.data, i) for i in range(n_cols)]
    # df unboxed from Python
    has_parent = cgutils.is_not_null(builder, dataframe.parent)

    pyapi = c.pyapi
    # gil_state = pyapi.gil_ensure()  # acquire GIL

    mod_name = context.insert_const_string(c.builder.module, "pandas")
    class_obj = pyapi.import_module_noblock(mod_name)
    df_obj = pyapi.call_method(class_obj, "DataFrame", ())

    for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes):
        # df['cname'] = boxed_arr
        # TODO: datetime.date, DatetimeIndex?
        name_str = context.insert_const_string(c.builder.module, cname)
        cname_obj = pyapi.string_from_string(name_str)
        # if column not unboxed, just used the boxed version from parent
        unboxed_val = builder.extract_value(dataframe.unboxed, i)
        not_unboxed = builder.icmp(lc.ICMP_EQ, unboxed_val, context.get_constant(types.int8, 0))
        use_parent = builder.and_(has_parent, not_unboxed)

        with builder.if_else(use_parent) as (then, orelse):
            with then:
                arr_obj = pyapi.object_getattr_string(dataframe.parent, cname)
                pyapi.object_setitem(df_obj, cname_obj, arr_obj)

            with orelse:
                if dtype == string_type:
                    arr_obj = box_str_arr(arr_typ, arr, c)
                elif isinstance(dtype, PDCategoricalDtype):
                    arr_obj = box_categorical_array(arr_typ, arr, c)
                    # context.nrt.incref(builder, arr_typ, arr)
                elif arr_typ == string_array_split_view_type:
                    arr_obj = box_str_arr_split_view(arr_typ, arr, c)
                elif dtype == types.List(string_type):
                    arr_obj = box_list(list_string_array_type, arr, c)
                    # context.nrt.incref(builder, arr_typ, arr)  # TODO required?
                    # pyapi.print_object(arr_obj)
                else:
                    arr_obj = box_array(arr_typ, arr, c)
                    # TODO: is incref required?
                    # context.nrt.incref(builder, arr_typ, arr)
                pyapi.object_setitem(df_obj, cname_obj, arr_obj)

        # pyapi.decref(arr_obj)
        pyapi.decref(cname_obj)

    # set df.index if necessary
    if typ.index != types.none:
        arr_obj = box_array(typ.index, dataframe.index, c)
        pyapi.object_setattr_string(df_obj, 'index', arr_obj)

    pyapi.decref(class_obj)
    # pyapi.gil_release(gil_state)    # release GIL
    return df_obj
示例#28
0
 def resolve_str(self, ary):
     assert ary.dtype in (string_type, types.List(string_type))
     # TODO: add dtype to series_str_methods_type
     return series_str_methods_type
示例#29
0
文件: str_ext.py 项目: W3SS/hpat
 def resolve_split(self, dict, args, kws):
     assert not kws
     assert len(args) == 1
     return signature(types.List(string_type), *args)
示例#30
0
文件: typeof.py 项目: zxsted/numba
def _typeof_list(val, c):
    if len(val) == 0:
        raise ValueError("Cannot type empty list")
    ty = typeof_impl(val[0], c)
    return types.List(ty, reflected=True)