def _analyze_call(self, lhs, rhs, func_var, args, array_dists): """analyze array distributions in function calls """ func_name = "" func_mod = "" fdef = guard(find_callname, self.func_ir, rhs, self.typemap) if fdef is None: warnings.warn( "function call couldn't be found for distributed analysis") self._analyze_call_set_REP(lhs, args, array_dists) return else: func_name, func_mod = fdef if is_alloc_callname(func_name, func_mod): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # numpy direct functions if isinstance(func_mod, str) and func_mod == 'numpy': self._analyze_call_np(lhs, func_name, args, array_dists) return # handle array.func calls if isinstance(func_mod, ir.Var) and is_array(self.typemap, func_mod.name): self._analyze_call_array(lhs, func_mod, func_name, args, array_dists) return # hpat.distributed_api functions if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api': self._analyze_call_hpat_dist(lhs, func_name, args, array_dists) return # len() if func_name == 'len' and func_mod in ('__builtin__', 'builtins'): return if hpat.config._has_h5py and (func_mod == 'hpat.pio_api' and func_name in ['h5read', 'h5write']): return if fdef == ('quantile', 'hpat.hiframes_api'): # quantile doesn't affect input's distribution return if fdef == ('nunique', 'hpat.hiframes_api'): # nunique doesn't affect input's distribution return if fdef == ('unique', 'hpat.hiframes_api'): # doesn't affect distribution of input since input can stay 1D if lhs not in array_dists: array_dists[lhs] = Distribution.OneD_Var new_dist = Distribution( min(array_dists[lhs].value, array_dists[rhs.args[0].name].value)) array_dists[lhs] = new_dist return if fdef == ('rolling_fixed', 'hpat.hiframes_rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('rolling_variable', 'hpat.hiframes_rolling'): # lhs, in_arr, on_arr should have the same distribution new_dist = self._meet_array_dists(lhs, rhs.args[0].name, array_dists) new_dist = self._meet_array_dists(lhs, rhs.args[1].name, array_dists, new_dist) array_dists[rhs.args[0].name] = new_dist return if fdef == ('shift', 'hpat.hiframes_rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('pct_change', 'hpat.hiframes_rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('nlargest', 'hpat.hiframes_api'): # output of nlargest is REP array_dists[lhs] = Distribution.REP return if fdef == ('median', 'hpat.hiframes_api'): return if fdef == ('concat', 'hpat.hiframes_api'): # hiframes concat is similar to np.concatenate self._analyze_call_np_concatenate(lhs, args, array_dists) return if fdef == ('isna', 'hpat.hiframes_api'): return # dummy hiframes functions if func_mod == 'hpat.hiframes_api' and func_name in ( 'to_series_type', 'to_arr_from_series', 'ts_series_to_arr_typ', 'to_date_series_type'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return # np.fromfile() if fdef == ('file_read', 'hpat.io'): return if hpat.config._has_ros and fdef == ('read_ros_images_inner', 'hpat.ros'): return if hpat.config._has_pyarrow and fdef == ('read_parquet', 'hpat.parquet_pio'): return if hpat.config._has_pyarrow and fdef == ('read_parquet_str', 'hpat.parquet_pio'): # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # TODO: fix "numba.extending" in function def if hpat.config._has_xenon and fdef == ('read_xenon_col', 'numba.extending'): array_dists[args[4].name] = Distribution.REP return if hpat.config._has_xenon and fdef == ('read_xenon_str', 'numba.extending'): array_dists[args[4].name] = Distribution.REP # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return if func_name == 'train' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(args[0].name, args[1].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(args[0].name, args[1].name, array_dists) return if func_name == 'predict' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(lhs, args[0].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(lhs, args[0].name, array_dists) return if isinstance(func_mod, ir.Var) and self._analyze_call_d4p( lhs, func_name, self.typemap[func_mod.name], args, array_dists): return # TODO: make sure assert_equiv is not generated unnecessarily # TODO: fix assert_equiv for np.stack from df.value if fdef == ('assert_equiv', 'numba.array_analysis'): return # set REP if not found self._analyze_call_set_REP(lhs, args, array_dists)
def _analyze_call(self, lhs, rhs, func_var, args, array_dists): """analyze array distributions in function calls """ func_name = "" func_mod = "" fdef = guard(find_callname, self.func_ir, rhs, self.typemap) if fdef is None: # check ObjModeLiftedWith, we assume distribution doesn't change # blocks of data are passed in, TODO: document func_def = guard(get_definition, self.func_ir, rhs.func) if isinstance(func_def, ir.Const) and isinstance( func_def.value, numba.dispatcher.ObjModeLiftedWith): return warnings.warn( "function call couldn't be found for distributed analysis") self._analyze_call_set_REP(lhs, args, array_dists, fdef) return else: func_name, func_mod = fdef if is_alloc_callname(func_name, func_mod): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # numpy direct functions if isinstance(func_mod, str) and func_mod == 'numpy': self._analyze_call_np(lhs, func_name, args, array_dists) return # handle array.func calls if isinstance(func_mod, ir.Var) and is_array(self.typemap, func_mod.name): self._analyze_call_array(lhs, func_mod, func_name, args, array_dists) return # handle df.func calls if isinstance(func_mod, ir.Var) and isinstance( self.typemap[func_mod.name], DataFrameType): self._analyze_call_df(lhs, func_mod, func_name, args, array_dists) return # hpat.distributed_api functions if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api': self._analyze_call_hpat_dist(lhs, func_name, args, array_dists) return # len() if func_name == 'len' and func_mod in ('__builtin__', 'builtins'): return if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' and func_name in ('h5read', 'h5write', 'h5read_filter')): return if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' and func_name == 'get_filter_read_indices'): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return if fdef == ('quantile', 'hpat.hiframes.api'): # quantile doesn't affect input's distribution return if fdef == ('nunique', 'hpat.hiframes.api'): # nunique doesn't affect input's distribution return if fdef == ('unique', 'hpat.hiframes.api'): # doesn't affect distribution of input since input can stay 1D if lhs not in array_dists: array_dists[lhs] = Distribution.OneD_Var new_dist = Distribution( min(array_dists[lhs].value, array_dists[rhs.args[0].name].value)) array_dists[lhs] = new_dist return if fdef == ('rolling_fixed', 'hpat.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('rolling_variable', 'hpat.hiframes.rolling'): # lhs, in_arr, on_arr should have the same distribution new_dist = self._meet_array_dists(lhs, rhs.args[0].name, array_dists) new_dist = self._meet_array_dists(lhs, rhs.args[1].name, array_dists, new_dist) array_dists[rhs.args[0].name] = new_dist return if fdef == ('shift', 'hpat.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('pct_change', 'hpat.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('nlargest', 'hpat.hiframes.api'): # output of nlargest is REP array_dists[lhs] = Distribution.REP return if fdef == ('median', 'hpat.hiframes.api'): return if fdef == ('concat', 'hpat.hiframes.api'): # hiframes concat is similar to np.concatenate self._analyze_call_np_concatenate(lhs, args, array_dists) return if fdef == ('isna', 'hpat.hiframes.api'): return if fdef == ('get_series_name', 'hpat.hiframes.api'): return # dummy hiframes functions if func_mod == 'hpat.hiframes.api' and func_name in ( 'get_series_data', 'get_series_index', 'to_arr_from_series', 'ts_series_to_arr_typ', 'to_date_series_type', 'dummy_unbox_series', 'parallel_fix_df_array'): # TODO: support Series type similar to Array self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('init_series', 'hpat.hiframes.api'): # lhs, in_arr, and index should have the same distribution new_dist = self._meet_array_dists(lhs, rhs.args[0].name, array_dists) if len(rhs.args) > 1 and self.typemap[ rhs.args[1].name] != types.none: new_dist = self._meet_array_dists(lhs, rhs.args[1].name, array_dists, new_dist) array_dists[rhs.args[0].name] = new_dist return if fdef == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): # lhs, data arrays, and index should have the same distribution df_typ = self.typemap[lhs] n_cols = len(df_typ.columns) for i in range(n_cols): new_dist = self._meet_array_dists(lhs, rhs.args[i].name, array_dists) # handle index if len(rhs.args) > n_cols and self.typemap[ rhs.args[n_cols].name] != types.none: new_dist = self._meet_array_dists(lhs, rhs.args[n_cols].name, array_dists, new_dist) for i in range(n_cols): array_dists[rhs.args[i].name] = new_dist return if fdef == ('get_dataframe_data', 'hpat.hiframes.pd_dataframe_ext'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('compute_split_view', 'hpat.hiframes.split_impl'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return if fdef == ('get_split_view_index', 'hpat.hiframes.split_impl'): # just used in str.get() implementation for now so we know it is # parallel # TODO: handle index similar to getitem to support more cases return if fdef == ('get_split_view_data_ptr', 'hpat.hiframes.split_impl'): return if fdef == ('setitem_str_arr_ptr', 'hpat.str_arr_ext'): return if fdef == ('num_total_chars', 'hpat.str_arr_ext'): return if fdef == ('_series_dropna_str_alloc_impl_inner', 'hpat.hiframes.series_kernels'): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD_Var in_dist = array_dists[rhs.args[0].name] out_dist = array_dists[lhs] out_dist = Distribution(min(out_dist.value, in_dist.value)) array_dists[lhs] = out_dist # output can cause input REP if out_dist != Distribution.OneD_Var: array_dists[rhs.args[0].name] = out_dist return if (fdef == ('copy_non_null_offsets', 'hpat.str_arr_ext') or fdef == ('copy_data', 'hpat.str_arr_ext')): out_arrname = rhs.args[0].name in_arrname = rhs.args[1].name self._meet_array_dists(out_arrname, in_arrname, array_dists) return if fdef == ('str_arr_item_to_numeric', 'hpat.str_arr_ext'): out_arrname = rhs.args[0].name in_arrname = rhs.args[2].name self._meet_array_dists(out_arrname, in_arrname, array_dists) return # np.fromfile() if fdef == ('file_read', 'hpat.io.np_io'): return if hpat.config._has_ros and fdef == ('read_ros_images_inner', 'hpat.ros'): return if hpat.config._has_pyarrow and fdef == ('read_parquet', 'hpat.io.parquet_pio'): return if hpat.config._has_pyarrow and fdef == ('read_parquet_str', 'hpat.io.parquet_pio'): # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # TODO: fix "numba.extending" in function def if hpat.config._has_xenon and fdef == ('read_xenon_col', 'numba.extending'): array_dists[args[4].name] = Distribution.REP return if hpat.config._has_xenon and fdef == ('read_xenon_str', 'numba.extending'): array_dists[args[4].name] = Distribution.REP # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return if func_name == 'train' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(args[0].name, args[1].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(args[0].name, args[1].name, array_dists) return if func_name == 'predict' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(lhs, args[0].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(lhs, args[0].name, array_dists) return # TODO: make sure assert_equiv is not generated unnecessarily # TODO: fix assert_equiv for np.stack from df.value if fdef == ('assert_equiv', 'numba.array_analysis'): return # we perform call-analysis from external at the end if isinstance(func_mod, ir.Var): ky = (self.typemap[func_mod.name], func_name) if ky in DistributedAnalysis._extra_call: if DistributedAnalysis._extra_call[ky](lhs, func_mod, *ky, args, array_dists): return # set REP if not found self._analyze_call_set_REP(lhs, args, array_dists, fdef)
def _analyze_call(self, lhs, rhs, func_var, args, array_dists): """analyze array distributions in function calls """ func_name = "" func_mod = "" fdef = guard(find_callname, self.func_ir, rhs, self.typemap) if fdef is None: warnings.warn( "function call couldn't be found for distributed analysis") self._analyze_call_set_REP(lhs, args, array_dists) return else: func_name, func_mod = fdef if is_alloc_callname(func_name, func_mod): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # numpy direct functions if isinstance(func_mod, str) and func_mod == 'numpy': self._analyze_call_np(lhs, func_name, args, array_dists) return # handle array.func calls if isinstance(func_mod, ir.Var) and is_array(self.typemap, func_mod.name): self._analyze_call_array(lhs, func_mod, func_name, args, array_dists) return # hpat.distributed_api functions if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api': self._analyze_call_hpat_dist(lhs, func_name, args, array_dists) return # len() if func_name == 'len' and func_mod in ('__builtin__', 'builtins'): return if hpat.config._has_h5py and (func_mod == 'hpat.pio_api' and func_name in ['h5read', 'h5write']): return if fdef == ('quantile', 'hpat.hiframes_api'): # quantile doesn't affect input's distribution return if fdef == ('nunique', 'hpat.hiframes_api'): # nunique doesn't affect input's distribution return if fdef == ('concat', 'hpat.hiframes_api'): # hiframes concat is similar to np.concatenate self._analyze_call_np_concatenate(lhs, args, array_dists) return # np.fromfile() if fdef == ('file_read', 'hpat.io'): return if hpat.config._has_ros and fdef == ('read_ros_images_inner', 'hpat.ros'): return if hpat.config._has_pyarrow and fdef == ('read_parquet', 'hpat.parquet_pio'): return if hpat.config._has_pyarrow and fdef == ('read_parquet_str', 'hpat.parquet_pio'): # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # TODO: fix "numba.extending" in function def if hpat.config._has_xenon and fdef == ('read_xenon_col', 'numba.extending'): array_dists[args[4].name] = Distribution.REP return if hpat.config._has_xenon and fdef == ('read_xenon_str', 'numba.extending'): array_dists[args[4].name] = Distribution.REP # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return if func_name == 'train' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(args[0].name, args[1].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(args[0].name, args[1].name, array_dists) return if func_name == 'predict' and isinstance(func_mod, ir.Var): if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: self._meet_array_dists(lhs, args[0].name, array_dists, Distribution.Thread) return if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: self._meet_array_dists(lhs, args[0].name, array_dists) return # set REP if not found self._analyze_call_set_REP(lhs, args, array_dists)