def resolve_split(self, ary, args, kws): out = SeriesType(types.List(string_type)) if (len(args) == 1 and isinstance(args[0], types.StringLiteral) and len(args[0].literal_value) == 1): out = SeriesType(types.List(string_type), string_array_split_view_type) return signature(out, *args)
def __call__(self, typeinfer): typevars = typeinfer.typevars oset = typevars[self.target] tsets = [typevars[i.name].get() for i in self.items] if not tsets: typeinfer.add_type(self.target, types.List(types.undefined)) else: for typs in itertools.product(*tsets): unified = typeinfer.context.unify_types(*typs) typeinfer.add_type(self.target, types.List(unified))
def arr_to_series_type(arr): series_type = None if isinstance(arr, types.Array): series_type = SeriesType(arr.dtype, arr) elif arr == string_array_type: # StringArray is readonly series_type = SeriesType(string_type) elif arr == list_string_array_type: series_type = SeriesType(types.List(string_type)) elif arr == string_array_split_view_type: series_type = SeriesType(types.List(string_type), string_array_split_view_type) return series_type
def _get_series_array_type(dtype): """get underlying default array type of series based on its dtype """ # list(list(str)) if dtype == types.List(string_type): # default data layout is list but split view is used if possible return list_string_array_type # string array elif dtype == string_type: return string_array_type # categorical if isinstance(dtype, PDCategoricalDtype): return CategoricalArray(dtype) # use recarray data layout for series of tuples if isinstance(dtype, types.BaseTuple): if any(not isinstance(t, types.Number) for t in dtype.types): # TODO: support more types. what types can be in recarrays? raise ValueError( "series tuple dtype {} includes non-numerics".format(dtype)) np_dtype = np.dtype(','.join(str(t) for t in dtype.types), align=True) dtype = numba.numpy_support.from_dtype(np_dtype) # TODO: other types? # regular numpy array return types.Array(dtype, 1, 'C')
def __init__(self, dmm, fe_type): types_unique = set() df_types = [] for col_type in fe_type.data: if col_type in types_unique: continue types_unique.add(col_type) df_types.append(col_type) members = [ ('data', types.Tuple([types.List(typ) for typ in df_types])), ('index', fe_type.index), ('columns', types.List(string_type)), ('parent', types.pyobject), ] super(DataFrameModel, self).__init__(dmm, fe_type, members)
def _box_series_data(dtype, data_typ, val, c): if isinstance(dtype, types.BaseTuple): np_dtype = np.dtype( ','.join(str(t) for t in dtype.types), align=True) dtype = numba.numpy_support.from_dtype(np_dtype) if dtype == string_type: arr = box_str_arr(string_array_type, val, c) elif dtype == datetime_date_type: arr = box_datetime_date_array(data_typ, val, c) elif isinstance(dtype, PDCategoricalDtype): arr = box_categorical_array(data_typ, val, c) elif data_typ == string_array_split_view_type: arr = box_str_arr_split_view(data_typ, val, c) elif dtype == types.List(string_type): arr = box_list(list_string_array_type, val, c) else: arr = box_array(data_typ, val, c) if isinstance(dtype, types.Record): o_str = c.context.insert_const_string(c.builder.module, "O") o_str = c.pyapi.string_from_string(o_str) arr = c.pyapi.call_method(arr, "astype", (o_str,)) return arr
def codegen(context, builder, sig, args): nitems = args[0] list_type = types.List(string_type) l = numba.targets.listobj.ListInstance.allocate( context, builder, list_type, nitems) l.size = nitems return impl_ret_new_ref(context, builder, list_type, l.value)
def _typeof_list(val, c): if len(val) == 0: raise ValueError("Cannot type empty list") ty = typeof_impl(val[0], c) if ty is None: raise ValueError( "Cannot type list element of {!r}".format(type(val[0])), ) return types.List(ty, reflected=True)
def test_subclass_specialization(self): os = OverloadSelector() self.assertTrue(issubclass(types.Sequence, types.Container)) os.append(1, (types.Container, types.Container,)) lstty = types.List(types.boolean) self.assertEqual(os.find((lstty, lstty)), 1) os.append(2, (types.Container, types.Sequence,)) self.assertEqual(os.find((lstty, lstty)), 2)
def alloc_str_list(typingctx, n_t=None): def codegen(context, builder, sig, args): nitems = args[0] list_type = types.List(string_type) result = numba.targets.listobj.ListInstance.allocate(context, builder, list_type, nitems) result.size = nitems return impl_ret_new_ref(context, builder, list_type, result.value) return types.List(string_type)(types.intp), codegen
def test_list(self): aty = types.List(types.undefined) bty = types.List(i32) self.assert_unify(aty, bty, bty) aty = types.List(i16) bty = types.List(i32) self.assert_unify(aty, bty, bty) aty = types.List(types.Tuple([i32, i16])) bty = types.List(types.Tuple([i16, i64])) cty = types.List(types.Tuple([i32, i64])) self.assert_unify(aty, bty, cty) aty = types.List(i16) bty = types.List(types.Tuple([i16])) self.assert_unify_failure(aty, bty)
def __init__(self, v_list: nt.List(vect_type)): p = {} for x in v_list: p[x] = x self.parents = p # self.parents = {x : x for x in v_list} self.sizes = {v_list[0]: 0} self.ex = v_list[0] del self.sizes[v_list[0]]
def if_arr_to_series_type(typ): if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple([if_arr_to_series_type(t) for t in typ.types]) if isinstance(typ, types.List): return types.List(if_arr_to_series_type(typ.dtype)) if isinstance(typ, types.Set): return types.Set(if_arr_to_series_type(typ.dtype)) # TODO: other types that can have Arrays inside? return typ
def to_string_list_typ(typ): if typ == string_array_type: return types.List(sdc.str_ext.string_type) if isinstance(typ, (types.Tuple, types.UniTuple)): new_typs = [] for i in range(typ.count): new_typs.append(to_string_list_typ(typ.types[i])) return types.Tuple(new_typs) return typ
def codegen(context, builder, signature, args): in_tup = args[0] data_arrs = [builder.extract_value(in_tup, i) for i in range(n_cols)] index = builder.extract_value(in_tup, n_cols) column_strs = [ numba.cpython.unicode.make_string_from_constant( context, builder, string_type, c) for c in column_names ] # create dataframe struct and store values dataframe = cgutils.create_struct_proxy(signature.return_type)(context, builder) data_list_type = [types.List(typ) for typ in types_order] data_lists = [] for typ_id, typ in enumerate(types_order): data_list_typ = context.build_list( builder, data_list_type[typ_id], [data_arrs[data_id] for data_id in data_typs_map[typ][1]]) data_lists.append(data_list_typ) data_tup = context.make_tuple(builder, types.Tuple(data_list_type), data_lists) col_list_type = types.List(string_type) column_list = context.build_list(builder, col_list_type, column_strs) dataframe.data = data_tup dataframe.index = index dataframe.columns = column_list dataframe.parent = context.get_constant_null(types.pyobject) # increase refcount of stored values if context.enable_nrt: context.nrt.incref(builder, index_typ, index) for var, typ in zip(data_arrs, data_typs): context.nrt.incref(builder, typ, var) for var in column_strs: context.nrt.incref(builder, string_type, var) return dataframe._getvalue()
def generic(self, args, kws): assert not kws [ary, idx] = args if ary == string_array_split_view_type: if isinstance(idx, types.SliceType): return signature(string_array_split_view_type, *args) elif isinstance(idx, types.Integer): return signature(types.List(string_type), *args) elif idx == types.Array(types.bool_, 1, 'C'): return signature(string_array_split_view_type, *args) elif idx == types.Array(types.intp, 1, 'C'): return signature(string_array_split_view_type, *args)
def if_series_to_unbox(typ): if isinstance(typ, SeriesType): return UnBoxedSeriesType(typ.dtype) if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple([if_series_to_unbox(t) for t in typ.types]) if isinstance(typ, types.List): return types.List(if_series_to_unbox(typ.dtype)) if isinstance(typ, types.Set): return types.Set(if_series_to_unbox(typ.dtype)) # TODO: other types that can have Series inside? return typ
def if_arr_to_series_type(typ): if isinstance(typ, types.Array) or typ in (string_array_type, list_string_array_type, string_array_split_view_type): return arr_to_series_type(typ) if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple([if_arr_to_series_type(t) for t in typ.types]) if isinstance(typ, types.List): return types.List(if_arr_to_series_type(typ.dtype)) if isinstance(typ, types.Set): return types.Set(if_arr_to_series_type(typ.dtype)) # TODO: other types that can have Arrays inside? return typ
def if_series_to_array_type(typ, replace_boxed=False): if isinstance(typ, SeriesType): return series_to_array_type(typ, replace_boxed) if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple( [if_series_to_array_type(t, replace_boxed) for t in typ.types]) if isinstance(typ, types.List): return types.List(if_series_to_array_type(typ.dtype, replace_boxed)) if isinstance(typ, types.Set): return types.Set(if_series_to_array_type(typ.dtype, replace_boxed)) # TODO: other types that can have Series inside? return typ
def _infer_series_list_dtype(S): for i in range(len(S)): first_val = S.iloc[i] if not isinstance(first_val, list): raise ValueError("data type for column {} not supported".format( S.name)) if len(first_val) > 0: # TODO: support more types if isinstance(first_val[0], str): return types.List(string_type) else: raise ValueError( "data type for column {} not supported".format(S.name)) raise ValueError("data type for column {} not supported".format(S.name))
def test_print_values(self): """ Test printing a single argument value. """ pyfunc = print_value def check_values(typ, values): cr = compile_isolated(pyfunc, (typ,)) cfunc = cr.entry_point for val in values: with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') # Various scalars check_values(types.int32, (1, -234)) check_values(types.int64, (1, -234, 123456789876543210, -123456789876543210)) check_values(types.uint64, (1, 234, 123456789876543210, 2**63 + 123)) check_values(types.boolean, (True, False)) check_values(types.float64, (1.5, 100.0**10.0, float('nan'))) check_values(types.complex64, (1+1j,)) check_values(types.NPTimedelta('ms'), (np.timedelta64(100, 'ms'),)) cr = compile_isolated(pyfunc, (types.float32,)) cfunc = cr.entry_point with captured_stdout(): cfunc(1.1) # Float32 will lose precision got = sys.stdout.getvalue() expect = '1.10000002384' self.assertTrue(got.startswith(expect)) self.assertTrue(got.endswith('\n')) # NRT-enabled type with self.assertNoNRTLeak(): x = [1, 3, 5, 7] with self.assertRefCount(x): check_values(types.List(types.int32), (x,)) # Array will have to use object mode arraytype = types.Array(types.int32, 1, 'C') cr = compile_isolated(pyfunc, (arraytype,), flags=enable_pyobj_flags) cfunc = cr.entry_point with captured_stdout(): cfunc(np.arange(10)) self.assertEqual(sys.stdout.getvalue(), '[0 1 2 3 4 5 6 7 8 9]\n')
def if_series_to_array_type(typ, replace_boxed=False): if isinstance(typ, SeriesType): return series_to_array_type(typ, replace_boxed) # XXX: Boxed series variable types shouldn't be replaced in hiframes_typed # it results in cast error for call dummy_unbox_series if replace_boxed and isinstance(typ, BoxedSeriesType): return series_to_array_type(typ, replace_boxed) if isinstance(typ, (types.Tuple, types.UniTuple)): return types.Tuple( [if_series_to_array_type(t, replace_boxed) for t in typ.types]) if isinstance(typ, types.List): return types.List(if_series_to_array_type(typ.dtype, replace_boxed)) if isinstance(typ, types.Set): return types.Set(if_series_to_array_type(typ.dtype, replace_boxed)) # TODO: other types that can have Series inside? return typ
def str_list_to_array_overload(list_typ): if list_typ == types.List(string_type): def str_list_impl(str_list): n = len(str_list) n_char = 0 for i in range(n): _str = str_list[i] n_char += len(_str) str_arr = pre_alloc_string_array(n, n_char) for i in range(n): _str = str_list[i] setitem_string_array(get_offset_ptr(str_arr), get_data_ptr(str_arr), _str, i) del_str(_str) # XXX assuming str list is not used anymore return str_arr return str_list_impl return lambda a: a
("n_estimators", uint32), ("step", float32), ("loss", string), ("use_aggregation", boolean), ("split_pure", boolean), ("n_jobs", uint32), ("n_samples_increment", uint32), ("verbose", boolean), ("samples", get_type(SamplesCollection)), ("iteration", uint32), ] spec_amf_classifier = spec_amf_learner + [ ("n_classes", uint32), ("dirichlet", float32), ("trees", types.List(get_type(TreeClassifier), reflected=True)), ] # TODO: we can force pre-compilation when creating the nopython forest @jitclass(spec_amf_classifier) class AMFClassifierNoPython(object): def __init__( self, n_classes, n_features, n_estimators, step, loss, use_aggregation,
def test_lists(self): ty = types.List(types.int32) self.check_pickling(ty)
def test_disallow_list(self): self.assert_disallow_key(types.List(types.intp)) self.assert_disallow_value(types.List(types.intp))
def box_dataframe(typ, val, c): context = c.context builder = c.builder n_cols = len(typ.columns) col_names = typ.columns arr_typs = typ.data dtypes = [a.dtype for a in arr_typs] # TODO: check Categorical dataframe = cgutils.create_struct_proxy(typ)( context, builder, value=val) col_arrs = [builder.extract_value(dataframe.data, i) for i in range(n_cols)] # df unboxed from Python has_parent = cgutils.is_not_null(builder, dataframe.parent) pyapi = c.pyapi # gil_state = pyapi.gil_ensure() # acquire GIL mod_name = context.insert_const_string(c.builder.module, "pandas") class_obj = pyapi.import_module_noblock(mod_name) df_obj = pyapi.call_method(class_obj, "DataFrame", ()) for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes): # df['cname'] = boxed_arr # TODO: datetime.date, DatetimeIndex? name_str = context.insert_const_string(c.builder.module, cname) cname_obj = pyapi.string_from_string(name_str) # if column not unboxed, just used the boxed version from parent unboxed_val = builder.extract_value(dataframe.unboxed, i) not_unboxed = builder.icmp(lc.ICMP_EQ, unboxed_val, context.get_constant(types.int8, 0)) use_parent = builder.and_(has_parent, not_unboxed) with builder.if_else(use_parent) as (then, orelse): with then: arr_obj = pyapi.object_getattr_string(dataframe.parent, cname) pyapi.object_setitem(df_obj, cname_obj, arr_obj) with orelse: if dtype == string_type: arr_obj = box_str_arr(arr_typ, arr, c) elif isinstance(dtype, PDCategoricalDtype): arr_obj = box_categorical_array(arr_typ, arr, c) # context.nrt.incref(builder, arr_typ, arr) elif arr_typ == string_array_split_view_type: arr_obj = box_str_arr_split_view(arr_typ, arr, c) elif dtype == types.List(string_type): arr_obj = box_list(list_string_array_type, arr, c) # context.nrt.incref(builder, arr_typ, arr) # TODO required? # pyapi.print_object(arr_obj) else: arr_obj = box_array(arr_typ, arr, c) # TODO: is incref required? # context.nrt.incref(builder, arr_typ, arr) pyapi.object_setitem(df_obj, cname_obj, arr_obj) # pyapi.decref(arr_obj) pyapi.decref(cname_obj) # set df.index if necessary if typ.index != types.none: arr_obj = box_array(typ.index, dataframe.index, c) pyapi.object_setattr_string(df_obj, 'index', arr_obj) pyapi.decref(class_obj) # pyapi.gil_release(gil_state) # release GIL return df_obj
def resolve_str(self, ary): assert ary.dtype in (string_type, types.List(string_type)) # TODO: add dtype to series_str_methods_type return series_str_methods_type
def resolve_split(self, dict, args, kws): assert not kws assert len(args) == 1 return signature(types.List(string_type), *args)
def _typeof_list(val, c): if len(val) == 0: raise ValueError("Cannot type empty list") ty = typeof_impl(val[0], c) return types.List(ty, reflected=True)