def _resolve_combine_func(self, ary, args, kws): dtype1 = ary.dtype # getitem returns Timestamp for dt_index and series(dt64) if dtype1 == types.NPDatetime('ns'): dtype1 = pandas_timestamp_type dtype2 = args[0].dtype if dtype2 == types.NPDatetime('ns'): dtype2 = pandas_timestamp_type t = args[1].get_call_type(self.context, ( dtype1, dtype2, ), {}) return signature(SeriesType(t.return_type), *args)
def _resolve_combine_func(self, ary, args, kws): dtype1 = ary.dtype # getitem returns Timestamp for dt_index and series(dt64) if dtype1 == types.NPDatetime('ns'): dtype1 = pandas_timestamp_type dtype2 = args[0].dtype if dtype2 == types.NPDatetime('ns'): dtype2 = pandas_timestamp_type code = args[1].literal_value.code f_ir = numba.ir_utils.get_ir_of_code({'np': np}, code) f_typemap, f_return_type, f_calltypes = numba.compiler.type_inference_stage( self.context, f_ir, (dtype1,dtype2,), None) return signature(SeriesType(f_return_type), *args)
def test_jit_explicit_signature(self): def _check_explicit_signature(sig): f = jit(sig, nopython=True)(add_usecase) # Just a sanity check args = DT(1, 'ms'), TD(2, 'us') expected = add_usecase(*args) self.assertPreciseEqual(f(*args), expected) # Test passing the signature in object form sig = types.NPDatetime('us')(types.NPDatetime('ms'), types.NPTimedelta('us')) _check_explicit_signature(sig) # Same with the signature in string form sig = "NPDatetime('us')(NPDatetime('ms'), NPTimedelta('us'))" _check_explicit_signature(sig)
def _gen_csv_reader_py_pyarrow_func_text_core(col_names, col_typs, usecols, sep, skiprows, signature=None): # TODO: support non-numpy types like strings date_inds = ", ".join( str(i) for i, t in enumerate(col_typs) if t.dtype == types.NPDatetime('ns')) typ_strs = ", ".join([ "{}='{}'".format(to_varname(cname), _get_dtype_str(t)) for cname, t in zip(col_names, col_typs) ]) pd_dtype_strs = ", ".join([ "'{}':{}".format(cname, _get_pd_dtype_str(t)) for cname, t in zip(col_names, col_typs) ]) if signature is None: signature = "fname" func_text = "def csv_reader_py({}):\n".format(signature) func_text += " with objmode({}):\n".format(typ_strs) func_text += " df = pandas_read_csv(fname, names={},\n".format( col_names) func_text += " parse_dates=[{}],\n".format(date_inds) func_text += " dtype={{{}}},\n".format(pd_dtype_strs) func_text += " skiprows={},\n".format(skiprows) func_text += " usecols={}, sep='{}')\n".format(usecols, sep) for cname in col_names: func_text += " {} = df['{}'].values\n".format( to_varname(cname), cname) # func_text += " print({})\n".format(cname) return func_text, 'csv_reader_py'
def lower_unbox_df_column(context, builder, sig, args): # FIXME: last arg should be types.DType? pyapi = context.get_python_api(builder) c = numba.pythonapi._UnboxContext(context, builder, pyapi) # TODO: refcounts? col_ind = sig.args[1].value col_name = sig.args[0].col_names[col_ind] series_obj = c.pyapi.object_getattr_string(args[0], col_name) arr_obj = c.pyapi.object_getattr_string(series_obj, "values") if isinstance(sig.args[2], types.Const) and sig.args[2].value == 11: # FIXME: str code native_val = unbox_str_series(string_array_type, arr_obj, c) else: if isinstance( sig.args[2], types.Const) and sig.args[2].value == 12: # FIXME: dt64 code dtype = types.NPDatetime('ns') else: dtype = sig.args[2].dtype # TODO: error handling like Numba callwrappers.py native_val = unbox_array(types.Array(dtype=dtype, ndim=1, layout='C'), arr_obj, c) c.pyapi.decref(series_obj) c.pyapi.decref(arr_obj) return native_val.value
def _gen_csv_reader_py_pandas(col_names, col_typs, usecols, sep, typingctx, targetctx, parallel, skiprows): # TODO: support non-numpy types like strings date_inds = ", ".join(str(i) for i, t in enumerate(col_typs) if t.dtype == types.NPDatetime('ns')) typ_strs = ", ".join(["{}='{}'".format(_sanitize_varname(cname), _get_dtype_str(t)) for cname, t in zip(col_names, col_typs)]) pd_dtype_strs = ", ".join(["'{}':{}".format(cname, _get_pd_dtype_str(t)) for cname, t in zip(col_names, col_typs)]) func_text = "def csv_reader_py(fname):\n" func_text += " skiprows = {}\n".format(skiprows) func_text += " f_reader = csv_file_chunk_reader(fname._data, {}, skiprows, -1)\n".format(parallel) func_text += " with objmode({}):\n".format(typ_strs) func_text += " df = pd.read_csv(f_reader, names={},\n".format(col_names) func_text += " parse_dates=[{}],\n".format(date_inds) func_text += " dtype={{{}}},\n".format(pd_dtype_strs) func_text += " usecols={}, sep='{}')\n".format(usecols, sep) for cname in col_names: func_text += " {} = df['{}'].values\n".format(_sanitize_varname(cname), cname) # func_text += " print({})\n".format(cname) func_text += " return ({},)\n".format(", ".join(_sanitize_varname(c) for c in col_names)) # print(func_text) glbls = globals() # TODO: fix globals after Numba's #3355 is resolved # {'objmode': objmode, 'csv_file_chunk_reader': csv_file_chunk_reader, # 'pd': pd, 'np': np} loc_vars = {} exec(func_text, glbls, loc_vars) csv_reader_py = loc_vars['csv_reader_py'] # TODO: no_cpython_wrapper=True crashes for some reason jit_func = numba.njit(csv_reader_py) compiled_funcs.append(jit_func) return jit_func
def _get_pd_dtype_str(t): dtype = t.dtype if isinstance(dtype, PDCategoricalDtype): return 'pd.api.types.CategoricalDtype({})'.format(dtype.categories) if dtype == types.NPDatetime('ns'): dtype = 'str' if t == string_array_type: return 'str' return 'np.{}'.format(dtype)
def generic(self, args, kws): if len(args) == 1: # Guard against unary - return dt, td = args if isinstance(dt, types.NPDatetime) and isinstance(td, types.NPTimedelta): unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit) if unit is not None: return signature(types.NPDatetime(unit), dt, td)
def generic(self, args, kws): assert not kws [ary, idx] = args if not isinstance(ary, SeriesType): return out = get_array_index_type(ary, idx) # check result to be dt64 since it might be sliced array # replace result with Timestamp if out is not None and out.result == types.NPDatetime('ns'): return signature(pandas_timestamp_type, ary, out.index)
def _gen_csv_reader_py_pyarrow_func_text_core(col_names, col_typs, dtype_present, usecols, signature=None): # TODO: support non-numpy types like strings date_inds = ", ".join( str(i) for i, t in enumerate(col_typs) if t.dtype == types.NPDatetime('ns')) return_columns = usecols if usecols and isinstance(usecols[0], str) else col_names nb_objmode_vars = ", ".join([ "{}='{}'".format(to_varname(cname), _get_dtype_str(t)) for cname, t in zip(return_columns, col_typs) ]) pd_dtype_strs = ", ".join([ "'{}':{}".format(cname, _get_pd_dtype_str(t)) for cname, t in zip(return_columns, col_typs) ]) if signature is None: signature = "filepath_or_buffer" func_text = "def csv_reader_py({}):\n".format(signature) func_text += " with objmode({}):\n".format(nb_objmode_vars) func_text += " df = pandas_read_csv(filepath_or_buffer,\n" # pyarrow reads unnamed header as " ", pandas reads it as "Unnamed: N" # during inference from file names should be raplaced with "Unnamed: N" # passing names to pyarrow means that one row is header and should be skipped if col_names and any(map(lambda x: x.startswith('Unnamed: '), col_names)): func_text += " names={},\n".format(col_names) func_text += " skiprows=(skiprows and skiprows + 1) or 1,\n" else: func_text += " names=names,\n" func_text += " skiprows=skiprows,\n" func_text += " parse_dates=[{}],\n".format(date_inds) # Python objects (e.g. str, np.float) could not be jitted and passed to objmode # so they are hardcoded to function # func_text += " dtype={{{}}},\n".format(pd_dtype_strs) if dtype_present else \ # " dtype=dtype,\n" # dtype is hardcoded because datetime should be read as string func_text += " dtype={{{}}},\n".format(pd_dtype_strs) func_text += " usecols=usecols,\n" func_text += " sep=sep,\n" func_text += " delimiter=delimiter,\n" func_text += " )\n" for cname in return_columns: func_text += " {} = df['{}'].values\n".format( to_varname(cname), cname) # func_text += " print({})\n".format(cname) return func_text, 'csv_reader_py'
def generic(self, args, kws): pysig = numba.utils.pysignature(pd_dt_index_stub) try: bound = pysig.bind(*args, **kws) except TypeError: # pragma: no cover msg = "Unsupported arguments for pd.DatetimeIndex()" raise ValueError(msg) sig = signature(SeriesType(types.NPDatetime('ns'), 1, 'C'), bound.args).replace(pysig=pysig) return sig
def generic(self, args, kws): assert not kws [in_arr, in_idx] = args is_arr_series = False is_idx_series = False is_arr_dt_index = False if not isinstance(in_arr, SeriesType) and not isinstance( in_idx, SeriesType): return None if isinstance(in_arr, SeriesType): in_arr = series_to_array_type(in_arr) is_arr_series = True if in_arr.dtype == types.NPDatetime('ns'): is_arr_dt_index = True if isinstance(in_idx, SeriesType): in_idx = series_to_array_type(in_idx) is_idx_series = True # TODO: dt_index if in_arr == string_array_type: sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws) else: out = get_array_index_type(in_arr, in_idx) sig = signature(out.result, in_arr, out.index) if sig is not None: arg1 = sig.args[0] arg2 = sig.args[1] if is_arr_series: sig.return_type = if_arr_to_series_type(sig.return_type) arg1 = if_arr_to_series_type(arg1) if is_idx_series: arg2 = if_arr_to_series_type(arg2) sig.args = (arg1, arg2) # dt_index and Series(dt64) should return Timestamp if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'): sig.return_type = pandas_timestamp_type return sig
def _get_numba_typ_from_pa_typ(pa_typ): import pyarrow as pa _typ_map = { # boolean pa.bool_(): types.bool_, # signed int types pa.int8(): types.int8, pa.int16(): types.int16, pa.int32(): types.int32, pa.int64(): types.int64, # unsigned int types pa.uint8(): types.uint8, pa.uint16(): types.uint16, pa.uint32(): types.uint32, pa.uint64(): types.uint64, # float types (TODO: float16?) pa.float32(): types.float32, pa.float64(): types.float64, # String pa.string(): string_type, # date pa.date32(): types.NPDatetime('ns'), pa.date64(): types.NPDatetime('ns'), # time (TODO: time32, time64, ...) pa.timestamp('ns'): types.NPDatetime('ns'), pa.timestamp('us'): types.NPDatetime('ns'), pa.timestamp('ms'): types.NPDatetime('ns'), pa.timestamp('s'): types.NPDatetime('ns'), } if pa_typ not in _typ_map: raise ValueError("Arrow data type {} not supported yet".format(pa_typ)) return _typ_map[pa_typ]
class MaskedConstructor(ConcreteTemplate): key = api.Masked units = ["ns", "ms", "us", "s"] datetime_cases = {types.NPDatetime(u) for u in units} timedelta_cases = {types.NPTimedelta(u) for u in units} cases = [ nb_signature(MaskedType(t), t, types.boolean) for t in ( types.integer_domain | types.real_domain | datetime_cases | timedelta_cases | {types.boolean} ) ]
def generic(self, args, kws): assert not kws assert len(args) == 3 df_typ, col_ind_const, dtype_typ = args[0], args[1], args[2] if isinstance(dtype_typ, types.Literal): if dtype_typ.literal_value == 12: # FIXME dtype for dt64 out_typ = types.Array(types.NPDatetime('ns'), 1, 'C') elif dtype_typ.literal_value == 11: # FIXME dtype for str out_typ = string_array_type else: raise ValueError("invalid input dataframe dtype {}".format(dtype_typ.literal_value)) else: out_typ = types.Array(dtype_typ.dtype, 1, 'C') # FIXME: last arg should be types.DType? return signature(out_typ, *args)
def generic(self, args, kws): if len(args) == 1: # Guard against unary + return left, right = args if isinstance(right, types.NPTimedelta): dt = left td = right elif isinstance(left, types.NPTimedelta): dt = right td = left else: return if isinstance(dt, types.NPDatetime): unit = npdatetime.combine_datetime_timedelta_units(dt.unit, td.unit) if unit is not None: return signature(types.NPDatetime(unit), left, right)
def get_column_read_nodes(c_type, cvar, arrow_readers_var, i): loc = cvar.loc func_text = 'def f(arrow_readers):\n' func_text += ' col_size = get_column_size_parquet(arrow_readers, {})\n'.format( i) # generate strings differently if c_type == string_type: # pass size for easier allocation and distributed analysis func_text += ' column = read_parquet_str(arrow_readers, {}, col_size)\n'.format( i) else: el_type = get_element_type(c_type) if el_type == repr(types.NPDatetime('ns')): func_text += ' column_tmp = np.empty(col_size, dtype=np.int64)\n' # TODO: fix alloc func_text += ' column = sdc.hiframes.api.ts_series_to_arr_typ(column_tmp)\n' else: func_text += ' column = np.empty(col_size, dtype=np.{})\n'.format( el_type) func_text += ' status = read_parquet(arrow_readers, {}, column, np.int32({}))\n'.format( i, _type_to_pq_dtype_number[el_type]) loc_vars = {} exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) size_func = loc_vars['f'] _, f_block = compile_to_numba_ir( size_func, { 'get_column_size_parquet': get_column_size_parquet, 'read_parquet': read_parquet, 'read_parquet_str': read_parquet_str, 'np': np, 'sdc': sdc, 'StringArray': StringArray }).blocks.popitem() replace_arg_nodes(f_block, [arrow_readers_var]) out_nodes = f_block.body[:-3] for stmt in reversed(out_nodes): if stmt.target.name.startswith("column"): assign = ir.Assign(stmt.target, cvar, loc) break out_nodes.append(assign) return out_nodes
def _resolve_map_func(self, ary, args, kws): dtype = ary.dtype # getitem returns Timestamp for dt_index and series(dt64) if dtype == types.NPDatetime('ns'): dtype = pandas_timestamp_type code = args[0].literal_value.code _globals = {'np': np} # XXX hack in hiframes_typed to make globals available if hasattr(args[0].literal_value, 'globals'): # TODO: use code.co_names to find globals actually used? _globals = args[0].literal_value.globals f_ir = numba.ir_utils.get_ir_of_code(_globals, code) f_typemap, f_return_type, f_calltypes = numba.typed_passes.type_inference_stage( self.context, f_ir, (dtype, ), None) return signature(SeriesType(f_return_type), *args)
def _get_dtype_str(t): dtype = t.dtype if isinstance(dtype, PDCategoricalDtype): cat_arr = CategoricalArray(dtype) # HACK: add cat type to numba.types # FIXME: fix after Numba #3372 is resolved cat_arr_name = 'CategoricalArray' + str(ir_utils.next_label()) setattr(types, cat_arr_name, cat_arr) return cat_arr_name if dtype == types.NPDatetime('ns'): dtype = 'NPDatetime("ns")' if t == string_array_type: # HACK: add string_array_type to numba.types # FIXME: fix after Numba #3372 is resolved types.string_array_type = string_array_type return 'string_array_type' return '{}[::1]'.format(dtype)
def iternext_itertuples(context, builder, sig, args, result): iterty, = sig.args it, = args # TODO: support string arrays iterobj = context.make_helper(builder, iterty, value=it) # first array type is implicit int index # use len() to support string arrays len_sig = signature(types.intp, iterty.array_types[1]) nitems = context.compile_internal(builder, lambda a: len(a), len_sig, [iterobj.array0]) # ary = make_array(iterty.array_types[1])(context, builder, value=iterobj.array0) # nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1) index = builder.load(iterobj.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with builder.if_then(is_valid): values = [index] # XXX implicit int index for i, arr_typ in enumerate(iterty.array_types[1:]): arr_ptr = getattr(iterobj, "array{}".format(i)) if arr_typ == types.Array(types.NPDatetime('ns'), 1, 'C'): getitem_sig = signature(pandas_timestamp_type, arr_typ, types.intp) val = context.compile_internal( builder, lambda a, i: hpat.pd_timestamp_ext. convert_datetime64_to_timestamp(np.int64(a[i])), getitem_sig, [arr_ptr, index]) else: getitem_sig = signature(arr_typ.dtype, arr_typ, types.intp) val = context.compile_internal(builder, lambda a, i: a[i], getitem_sig, [arr_ptr, index]) # arr = make_array(arr_typ)(context, builder, value=arr_ptr) # val = _getitem_array1d(context, builder, arr_typ, arr, index, # wraparound=False) values.append(val) value = context.make_tuple(builder, iterty.yield_type, values) result.yield_(value) nindex = cgutils.increment_index(builder, index) builder.store(nindex, iterobj.index)
def test_call_notation(self): # Function call signature i = types.int32 d = types.double self.assertEqual(i(), typing.signature(i)) self.assertEqual(i(d), typing.signature(i, d)) self.assertEqual(i(d, d), typing.signature(i, d, d)) # Value cast self.assertPreciseEqual(i(42.5), 42) self.assertPreciseEqual(d(-5), -5.0) ty = types.NPDatetime('Y') self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y')) self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y')) ty = types.NPTimedelta('s') self.assertPreciseEqual(ty(5), np.timedelta64(5, 's')) self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's')) ty = types.NPTimedelta('') self.assertPreciseEqual(ty(5), np.timedelta64(5)) self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT'))
def test_call_notation(self): # Function call signature i = types.int32 d = types.double self.assertEqual(i(), typing.signature(i)) self.assertEqual(i(d), typing.signature(i, d)) self.assertEqual(i(d, d), typing.signature(i, d, d)) # Value cast self.assertPreciseEqual(i(42.5), 42) self.assertPreciseEqual(d(-5), -5.0) ty = types.NPDatetime('Y') self.assertPreciseEqual(ty('1900'), np.datetime64('1900', 'Y')) if numpy_version < (1,16): # FIXME: workaround for known NumPy 1.16 issue self.assertPreciseEqual(ty('NaT'), np.datetime64('NaT', 'Y')) ty = types.NPTimedelta('s') self.assertPreciseEqual(ty(5), np.timedelta64(5, 's')) if numpy_version < (1,16): # FIXME: workaround for known NumPy 1.16 issue self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT', 's')) ty = types.NPTimedelta('') self.assertPreciseEqual(ty(5), np.timedelta64(5)) if numpy_version < (1,16): # FIXME: workaround for known NumPy 1.16 issue self.assertPreciseEqual(ty('NaT'), np.timedelta64('NaT'))
def is_dt64_series_typ(t): return isinstance(t, SeriesType) and t.dtype == types.NPDatetime('ns')
def test_atomic_types(self): for unit in ('M', 'ms'): ty = types.NPDatetime(unit) self.check_pickling(ty) ty = types.NPTimedelta(unit) self.check_pickling(ty)
from numba.typing import signature from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed import numpy as np import hpat from hpat.str_ext import string_type, unicode_to_char_ptr from hpat.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array from hpat.str_arr_ext import string_array_type from hpat.utils import unliteral_all # from parquet/types.h # boolean, int32, int64, int96, float, double, byte # XXX arrow converts int96 timestamp to int64 _type_to_pq_dtype_number = {'bool_': 0, 'int32': 1, 'int64': 2, 'int96': 3, 'float32': 4, 'float64': 5, repr(types.NPDatetime('ns')): 3, 'int8': 6} def read_parquet(): return 0 def read_parquet_str(): return 0 def read_parquet_str_parallel(): return 0 def read_parquet_parallel():
@numba.njit def lt_f(a, b): return a < b @numba.njit def gt_f(a, b): return a > b series_replace_funcs = { 'sum': _column_sum_impl_basic, 'prod': _column_prod_impl_basic, 'count': _column_count_impl, 'mean': _column_mean_impl, 'max': defaultdict(lambda: _column_max_impl, [(types.NPDatetime('ns'), _column_max_impl_no_isnan)]), 'min': defaultdict(lambda: _column_min_impl, [(types.NPDatetime('ns'), _column_min_impl_no_isnan)]), 'var': _column_var_impl, 'std': _column_std_impl, 'nunique': lambda A: hpat.hiframes.api.nunique(A), 'unique': lambda A: hpat.hiframes.api.unique(A), 'describe': _column_describe_impl, 'fillna_alloc': _column_fillna_alloc_impl, 'fillna_str_alloc': _series_fillna_str_alloc_impl, 'dropna_float': _series_dropna_float_impl, 'dropna_str_alloc': _series_dropna_str_alloc_impl, 'shift': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, shift, False)), 'shift_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, 1, False)), 'pct_change': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, shift, False)), 'pct_change_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, 1, False)), 'str_contains_regex': _str_contains_regex_impl,
def test_ufunc_find_matching_loop(self): f = numpy_support.ufunc_find_matching_loop np_add = FakeUFunc(_add_types) np_mul = FakeUFunc(_mul_types) def check(ufunc, input_types, sigs, output_types=()): """ Check that ufunc_find_matching_loop() finds one of the given *sigs* for *ufunc*, *input_types* and optional *output_types*. """ loop = f(ufunc, input_types + output_types) self.assertTrue(loop) if isinstance(sigs, str): sigs = (sigs, ) self.assertIn(loop.ufunc_sig, sigs) self.assertEqual(len(loop.numpy_inputs), len(loop.inputs)) self.assertEqual(len(loop.numpy_outputs), len(loop.outputs)) if not output_types: # Add explicit outputs and check the result is the same loop_explicit = f(ufunc, list(input_types) + loop.outputs) self.assertEqual(loop_explicit, loop) else: self.assertEqual(loop.outputs, list(output_types)) # Round-tripping inputs and outputs loop_rt = f(ufunc, loop.inputs + loop.outputs) self.assertEqual(loop_rt, loop) return loop def check_exact(ufunc, input_types, sigs, output_types=()): loop = check(ufunc, input_types, sigs, output_types) self.assertEqual(loop.inputs, list(input_types)) def check_no_match(ufunc, input_types): loop = f(ufunc, input_types) self.assertIs(loop, None) # Exact matching for number types check_exact(np_add, (types.bool_, types.bool_), '??->?') check_exact(np_add, (types.int8, types.int8), 'bb->b') check_exact(np_add, (types.uint8, types.uint8), 'BB->B') check_exact(np_add, (types.int64, types.int64), ('ll->l', 'qq->q')) check_exact(np_add, (types.uint64, types.uint64), ('LL->L', 'QQ->Q')) check_exact(np_add, (types.float32, types.float32), 'ff->f') check_exact(np_add, (types.float64, types.float64), 'dd->d') check_exact(np_add, (types.complex64, types.complex64), 'FF->F') check_exact(np_add, (types.complex128, types.complex128), 'DD->D') # Exact matching for datetime64 and timedelta64 types check_exact(np_add, (types.NPTimedelta('s'), types.NPTimedelta('s')), 'mm->m', output_types=(types.NPTimedelta('s'), )) check_exact(np_add, (types.NPTimedelta('ms'), types.NPDatetime('s')), 'mM->M', output_types=(types.NPDatetime('ms'), )) check_exact(np_add, (types.NPDatetime('s'), types.NPTimedelta('s')), 'Mm->M', output_types=(types.NPDatetime('s'), )) check_exact(np_mul, (types.NPTimedelta('s'), types.int64), 'mq->m', output_types=(types.NPTimedelta('s'), )) check_exact(np_mul, (types.float64, types.NPTimedelta('s')), 'dm->m', output_types=(types.NPTimedelta('s'), )) # Mix and match number types, with casting check(np_add, (types.bool_, types.int8), 'bb->b') check(np_add, (types.uint8, types.bool_), 'BB->B') check(np_add, (types.int16, types.uint16), 'ii->i') check(np_add, (types.complex64, types.float64), 'DD->D') check(np_add, (types.float64, types.complex64), 'DD->D') # With some timedelta64 arguments as well check(np_mul, (types.NPTimedelta('s'), types.int32), 'mq->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.NPTimedelta('s'), types.uint32), 'mq->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.NPTimedelta('s'), types.float32), 'md->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.float32, types.NPTimedelta('s')), 'dm->m', output_types=(types.NPTimedelta('s'), )) # No match check_no_match(np_add, (types.NPDatetime('s'), types.NPDatetime('s'))) # No implicit casting from int64 to timedelta64 (Numpy would allow # this). check_no_match(np_add, (types.NPTimedelta('s'), types.int64))
def generic(self, args, kws): assert not kws [in_arr, in_idx] = args is_arr_series = False is_idx_series = False is_arr_dt_index = False if not isinstance(in_arr, SeriesType) and not isinstance( in_idx, SeriesType): return None if isinstance(in_arr, SeriesType): in_arr = series_to_array_type(in_arr) is_arr_series = True if in_arr.dtype == types.NPDatetime('ns'): is_arr_dt_index = True if isinstance(in_idx, SeriesType): in_idx = series_to_array_type(in_idx) is_idx_series = True # TODO: dt_index if in_arr == string_array_type: # XXX fails due in overload # compile_internal version results in symbol not found! # sig = self.context.resolve_function_type( # operator.getitem, (in_arr, in_idx), kws) # HACK to get avoid issues for now if isinstance(in_idx, (types.Integer, types.IntegerLiteral)): sig = string_type(in_arr, in_idx) else: sig = GetItemStringArray.generic(self, (in_arr, in_idx), kws) elif in_arr == list_string_array_type: # TODO: split view # mimic array indexing for list if (isinstance(in_idx, types.Array) and in_idx.ndim == 1 and isinstance(in_idx.dtype, (types.Integer, types.Boolean))): sig = signature(in_arr, in_arr, in_idx) else: sig = numba.typing.collections.GetItemSequence.generic( self, (in_arr, in_idx), kws) elif in_arr == string_array_split_view_type: sig = GetItemStringArraySplitView.generic(self, (in_arr, in_idx), kws) else: out = get_array_index_type(in_arr, in_idx) sig = signature(out.result, in_arr, out.index) if sig is not None: arg1 = sig.args[0] arg2 = sig.args[1] if is_arr_series: sig.return_type = if_arr_to_series_type(sig.return_type) arg1 = if_arr_to_series_type(arg1) if is_idx_series: arg2 = if_arr_to_series_type(arg2) sig.args = (arg1, arg2) # dt_index and Series(dt64) should return Timestamp if is_arr_dt_index and sig.return_type == types.NPDatetime('ns'): sig.return_type = pandas_timestamp_type return sig
def test_ufunc_find_matching_loop(self): f = numpy_support.ufunc_find_matching_loop np_add = FakeUFunc(_add_types) np_mul = FakeUFunc(_mul_types) np_isnan = FakeUFunc(_isnan_types) np_sqrt = FakeUFunc(_sqrt_types) def check(ufunc, input_types, sigs, output_types=()): """ Check that ufunc_find_matching_loop() finds one of the given *sigs* for *ufunc*, *input_types* and optional *output_types*. """ loop = f(ufunc, input_types + output_types) self.assertTrue(loop) if isinstance(sigs, str): sigs = (sigs, ) self.assertIn( loop.ufunc_sig, sigs, "inputs=%s and outputs=%s should have selected " "one of %s, got %s" % (input_types, output_types, sigs, loop.ufunc_sig)) self.assertEqual(len(loop.numpy_inputs), len(loop.inputs)) self.assertEqual(len(loop.numpy_outputs), len(loop.outputs)) if not output_types: # Add explicit outputs and check the result is the same loop_explicit = f(ufunc, list(input_types) + loop.outputs) self.assertEqual(loop_explicit, loop) else: self.assertEqual(loop.outputs, list(output_types)) # Round-tripping inputs and outputs loop_rt = f(ufunc, loop.inputs + loop.outputs) self.assertEqual(loop_rt, loop) return loop def check_exact(ufunc, input_types, sigs, output_types=()): """ Like check(), but also ensure no casting of inputs occurred. """ loop = check(ufunc, input_types, sigs, output_types) self.assertEqual(loop.inputs, list(input_types)) def check_no_match(ufunc, input_types): loop = f(ufunc, input_types) self.assertIs(loop, None) # Exact matching for number types check_exact(np_add, (types.bool_, types.bool_), '??->?') check_exact(np_add, (types.int8, types.int8), 'bb->b') check_exact(np_add, (types.uint8, types.uint8), 'BB->B') check_exact(np_add, (types.int64, types.int64), ('ll->l', 'qq->q')) check_exact(np_add, (types.uint64, types.uint64), ('LL->L', 'QQ->Q')) check_exact(np_add, (types.float32, types.float32), 'ff->f') check_exact(np_add, (types.float64, types.float64), 'dd->d') check_exact(np_add, (types.complex64, types.complex64), 'FF->F') check_exact(np_add, (types.complex128, types.complex128), 'DD->D') # Exact matching for datetime64 and timedelta64 types check_exact(np_add, (types.NPTimedelta('s'), types.NPTimedelta('s')), 'mm->m', output_types=(types.NPTimedelta('s'), )) check_exact(np_add, (types.NPTimedelta('ms'), types.NPDatetime('s')), 'mM->M', output_types=(types.NPDatetime('ms'), )) check_exact(np_add, (types.NPDatetime('s'), types.NPTimedelta('s')), 'Mm->M', output_types=(types.NPDatetime('s'), )) check_exact(np_mul, (types.NPTimedelta('s'), types.int64), 'mq->m', output_types=(types.NPTimedelta('s'), )) check_exact(np_mul, (types.float64, types.NPTimedelta('s')), 'dm->m', output_types=(types.NPTimedelta('s'), )) # Mix and match number types, with casting check(np_add, (types.bool_, types.int8), 'bb->b') check(np_add, (types.uint8, types.bool_), 'BB->B') check(np_add, (types.int16, types.uint16), 'ii->i') check(np_add, (types.complex64, types.float64), 'DD->D') check(np_add, (types.float64, types.complex64), 'DD->D') # Integers, when used together with floating-point numbers, # should cast to any real or complex (see #2006) int_types = [types.int32, types.uint32, types.int64, types.uint64] for intty in int_types: check(np_add, (types.float32, intty), 'ff->f') check(np_add, (types.float64, intty), 'dd->d') check(np_add, (types.complex64, intty), 'FF->F') check(np_add, (types.complex128, intty), 'DD->D') # However, when used alone, they should cast only to # floating-point types of sufficient precision # (typical use case: np.sqrt(2) should give an accurate enough value) for intty in int_types: check(np_sqrt, (intty, ), 'd->d') check(np_isnan, (intty, ), 'd->?') # With some timedelta64 arguments as well check(np_mul, (types.NPTimedelta('s'), types.int32), 'mq->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.NPTimedelta('s'), types.uint32), 'mq->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.NPTimedelta('s'), types.float32), 'md->m', output_types=(types.NPTimedelta('s'), )) check(np_mul, (types.float32, types.NPTimedelta('s')), 'dm->m', output_types=(types.NPTimedelta('s'), )) # No match check_no_match(np_add, (types.NPDatetime('s'), types.NPDatetime('s'))) # No implicit casting from int64 to timedelta64 (Numpy would allow # this). check_no_match(np_add, (types.NPTimedelta('s'), types.int64))
def resolve_dt(self, ary): assert ary.dtype == types.NPDatetime('ns') return series_dt_methods_type