Exemplo n.º 1
0
    def _analyze_call(self, lhs, rhs, func_var, args, array_dists):
        """analyze array distributions in function calls
        """
        func_name = ""
        func_mod = ""
        fdef = guard(find_callname, self.func_ir, rhs, self.typemap)
        if fdef is None:
            warnings.warn(
                "function call couldn't be found for distributed analysis")
            self._analyze_call_set_REP(lhs, args, array_dists)
            return
        else:
            func_name, func_mod = fdef

        if is_alloc_callname(func_name, func_mod):
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # numpy direct functions
        if isinstance(func_mod, str) and func_mod == 'numpy':
            self._analyze_call_np(lhs, func_name, args, array_dists)
            return

        # handle array.func calls
        if isinstance(func_mod, ir.Var) and is_array(self.typemap,
                                                     func_mod.name):
            self._analyze_call_array(lhs, func_mod, func_name, args,
                                     array_dists)
            return

        # hpat.distributed_api functions
        if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api':
            self._analyze_call_hpat_dist(lhs, func_name, args, array_dists)
            return

        # len()
        if func_name == 'len' and func_mod in ('__builtin__', 'builtins'):
            return

        if hpat.config._has_h5py and (func_mod == 'hpat.pio_api'
                                      and func_name in ['h5read', 'h5write']):
            return

        if fdef == ('quantile', 'hpat.hiframes_api'):
            # quantile doesn't affect input's distribution
            return

        if fdef == ('nunique', 'hpat.hiframes_api'):
            # nunique doesn't affect input's distribution
            return

        if fdef == ('unique', 'hpat.hiframes_api'):
            # doesn't affect distribution of input since input can stay 1D
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD_Var

            new_dist = Distribution(
                min(array_dists[lhs].value,
                    array_dists[rhs.args[0].name].value))
            array_dists[lhs] = new_dist
            return

        if fdef == ('rolling_fixed', 'hpat.hiframes_rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('rolling_variable', 'hpat.hiframes_rolling'):
            # lhs, in_arr, on_arr should have the same distribution
            new_dist = self._meet_array_dists(lhs, rhs.args[0].name,
                                              array_dists)
            new_dist = self._meet_array_dists(lhs, rhs.args[1].name,
                                              array_dists, new_dist)
            array_dists[rhs.args[0].name] = new_dist
            return

        if fdef == ('shift', 'hpat.hiframes_rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('pct_change', 'hpat.hiframes_rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('nlargest', 'hpat.hiframes_api'):
            # output of nlargest is REP
            array_dists[lhs] = Distribution.REP
            return

        if fdef == ('median', 'hpat.hiframes_api'):
            return

        if fdef == ('concat', 'hpat.hiframes_api'):
            # hiframes concat is similar to np.concatenate
            self._analyze_call_np_concatenate(lhs, args, array_dists)
            return

        if fdef == ('isna', 'hpat.hiframes_api'):
            return

        # dummy hiframes functions
        if func_mod == 'hpat.hiframes_api' and func_name in (
                'to_series_type', 'to_arr_from_series', 'ts_series_to_arr_typ',
                'to_date_series_type'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        # np.fromfile()
        if fdef == ('file_read', 'hpat.io'):
            return

        if hpat.config._has_ros and fdef == ('read_ros_images_inner',
                                             'hpat.ros'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet',
                                                 'hpat.parquet_pio'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet_str',
                                                 'hpat.parquet_pio'):
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # TODO: fix "numba.extending" in function def
        if hpat.config._has_xenon and fdef == ('read_xenon_col',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            return

        if hpat.config._has_xenon and fdef == ('read_xenon_str',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        if func_name == 'train' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists)
                return

        if func_name == 'predict' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(lhs, args[0].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(lhs, args[0].name, array_dists)
                return

        if isinstance(func_mod, ir.Var) and self._analyze_call_d4p(
                lhs, func_name, self.typemap[func_mod.name], args,
                array_dists):
            return

        # TODO: make sure assert_equiv is not generated unnecessarily
        # TODO: fix assert_equiv for np.stack from df.value
        if fdef == ('assert_equiv', 'numba.array_analysis'):
            return

        # set REP if not found
        self._analyze_call_set_REP(lhs, args, array_dists)
Exemplo n.º 2
0
    def _analyze_call(self, lhs, rhs, func_var, args, array_dists):
        """analyze array distributions in function calls
        """
        func_name = ""
        func_mod = ""
        fdef = guard(find_callname, self.func_ir, rhs, self.typemap)
        if fdef is None:
            # check ObjModeLiftedWith, we assume distribution doesn't change
            # blocks of data are passed in, TODO: document
            func_def = guard(get_definition, self.func_ir, rhs.func)
            if isinstance(func_def, ir.Const) and isinstance(
                    func_def.value, numba.dispatcher.ObjModeLiftedWith):
                return
            warnings.warn(
                "function call couldn't be found for distributed analysis")
            self._analyze_call_set_REP(lhs, args, array_dists, fdef)
            return
        else:
            func_name, func_mod = fdef

        if is_alloc_callname(func_name, func_mod):
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # numpy direct functions
        if isinstance(func_mod, str) and func_mod == 'numpy':
            self._analyze_call_np(lhs, func_name, args, array_dists)
            return

        # handle array.func calls
        if isinstance(func_mod, ir.Var) and is_array(self.typemap,
                                                     func_mod.name):
            self._analyze_call_array(lhs, func_mod, func_name, args,
                                     array_dists)
            return

        # handle df.func calls
        if isinstance(func_mod, ir.Var) and isinstance(
                self.typemap[func_mod.name], DataFrameType):
            self._analyze_call_df(lhs, func_mod, func_name, args, array_dists)
            return

        # hpat.distributed_api functions
        if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api':
            self._analyze_call_hpat_dist(lhs, func_name, args, array_dists)
            return

        # len()
        if func_name == 'len' and func_mod in ('__builtin__', 'builtins'):
            return

        if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api'
                                      and func_name in ('h5read', 'h5write',
                                                        'h5read_filter')):
            return

        if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' and
                                      func_name == 'get_filter_read_indices'):
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        if fdef == ('quantile', 'hpat.hiframes.api'):
            # quantile doesn't affect input's distribution
            return

        if fdef == ('nunique', 'hpat.hiframes.api'):
            # nunique doesn't affect input's distribution
            return

        if fdef == ('unique', 'hpat.hiframes.api'):
            # doesn't affect distribution of input since input can stay 1D
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD_Var

            new_dist = Distribution(
                min(array_dists[lhs].value,
                    array_dists[rhs.args[0].name].value))
            array_dists[lhs] = new_dist
            return

        if fdef == ('rolling_fixed', 'hpat.hiframes.rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('rolling_variable', 'hpat.hiframes.rolling'):
            # lhs, in_arr, on_arr should have the same distribution
            new_dist = self._meet_array_dists(lhs, rhs.args[0].name,
                                              array_dists)
            new_dist = self._meet_array_dists(lhs, rhs.args[1].name,
                                              array_dists, new_dist)
            array_dists[rhs.args[0].name] = new_dist
            return

        if fdef == ('shift', 'hpat.hiframes.rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('pct_change', 'hpat.hiframes.rolling'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('nlargest', 'hpat.hiframes.api'):
            # output of nlargest is REP
            array_dists[lhs] = Distribution.REP
            return

        if fdef == ('median', 'hpat.hiframes.api'):
            return

        if fdef == ('concat', 'hpat.hiframes.api'):
            # hiframes concat is similar to np.concatenate
            self._analyze_call_np_concatenate(lhs, args, array_dists)
            return

        if fdef == ('isna', 'hpat.hiframes.api'):
            return

        if fdef == ('get_series_name', 'hpat.hiframes.api'):
            return

        # dummy hiframes functions
        if func_mod == 'hpat.hiframes.api' and func_name in (
                'get_series_data', 'get_series_index', 'to_arr_from_series',
                'ts_series_to_arr_typ', 'to_date_series_type',
                'dummy_unbox_series', 'parallel_fix_df_array'):
            # TODO: support Series type similar to Array
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('init_series', 'hpat.hiframes.api'):
            # lhs, in_arr, and index should have the same distribution
            new_dist = self._meet_array_dists(lhs, rhs.args[0].name,
                                              array_dists)
            if len(rhs.args) > 1 and self.typemap[
                    rhs.args[1].name] != types.none:
                new_dist = self._meet_array_dists(lhs, rhs.args[1].name,
                                                  array_dists, new_dist)
                array_dists[rhs.args[0].name] = new_dist
            return

        if fdef == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'):
            # lhs, data arrays, and index should have the same distribution
            df_typ = self.typemap[lhs]
            n_cols = len(df_typ.columns)
            for i in range(n_cols):
                new_dist = self._meet_array_dists(lhs, rhs.args[i].name,
                                                  array_dists)
            # handle index
            if len(rhs.args) > n_cols and self.typemap[
                    rhs.args[n_cols].name] != types.none:
                new_dist = self._meet_array_dists(lhs, rhs.args[n_cols].name,
                                                  array_dists, new_dist)
            for i in range(n_cols):
                array_dists[rhs.args[i].name] = new_dist
            return

        if fdef == ('get_dataframe_data', 'hpat.hiframes.pd_dataframe_ext'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('compute_split_view', 'hpat.hiframes.split_impl'):
            self._meet_array_dists(lhs, rhs.args[0].name, array_dists)
            return

        if fdef == ('get_split_view_index', 'hpat.hiframes.split_impl'):
            # just used in str.get() implementation for now so we know it is
            # parallel
            # TODO: handle index similar to getitem to support more cases
            return

        if fdef == ('get_split_view_data_ptr', 'hpat.hiframes.split_impl'):
            return

        if fdef == ('setitem_str_arr_ptr', 'hpat.str_arr_ext'):
            return

        if fdef == ('num_total_chars', 'hpat.str_arr_ext'):
            return

        if fdef == ('_series_dropna_str_alloc_impl_inner',
                    'hpat.hiframes.series_kernels'):
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD_Var
            in_dist = array_dists[rhs.args[0].name]
            out_dist = array_dists[lhs]
            out_dist = Distribution(min(out_dist.value, in_dist.value))
            array_dists[lhs] = out_dist
            # output can cause input REP
            if out_dist != Distribution.OneD_Var:
                array_dists[rhs.args[0].name] = out_dist
            return

        if (fdef == ('copy_non_null_offsets', 'hpat.str_arr_ext')
                or fdef == ('copy_data', 'hpat.str_arr_ext')):
            out_arrname = rhs.args[0].name
            in_arrname = rhs.args[1].name
            self._meet_array_dists(out_arrname, in_arrname, array_dists)
            return

        if fdef == ('str_arr_item_to_numeric', 'hpat.str_arr_ext'):
            out_arrname = rhs.args[0].name
            in_arrname = rhs.args[2].name
            self._meet_array_dists(out_arrname, in_arrname, array_dists)
            return

        # np.fromfile()
        if fdef == ('file_read', 'hpat.io.np_io'):
            return

        if hpat.config._has_ros and fdef == ('read_ros_images_inner',
                                             'hpat.ros'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet',
                                                 'hpat.io.parquet_pio'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet_str',
                                                 'hpat.io.parquet_pio'):
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # TODO: fix "numba.extending" in function def
        if hpat.config._has_xenon and fdef == ('read_xenon_col',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            return

        if hpat.config._has_xenon and fdef == ('read_xenon_str',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        if func_name == 'train' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists)
                return

        if func_name == 'predict' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(lhs, args[0].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(lhs, args[0].name, array_dists)
                return

        # TODO: make sure assert_equiv is not generated unnecessarily
        # TODO: fix assert_equiv for np.stack from df.value
        if fdef == ('assert_equiv', 'numba.array_analysis'):
            return

        # we perform call-analysis from external at the end
        if isinstance(func_mod, ir.Var):
            ky = (self.typemap[func_mod.name], func_name)
            if ky in DistributedAnalysis._extra_call:
                if DistributedAnalysis._extra_call[ky](lhs, func_mod, *ky,
                                                       args, array_dists):
                    return

        # set REP if not found
        self._analyze_call_set_REP(lhs, args, array_dists, fdef)
Exemplo n.º 3
0
    def _analyze_call(self, lhs, rhs, func_var, args, array_dists):
        """analyze array distributions in function calls
        """
        func_name = ""
        func_mod = ""
        fdef = guard(find_callname, self.func_ir, rhs, self.typemap)
        if fdef is None:
            warnings.warn(
                "function call couldn't be found for distributed analysis")
            self._analyze_call_set_REP(lhs, args, array_dists)
            return
        else:
            func_name, func_mod = fdef

        if is_alloc_callname(func_name, func_mod):
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # numpy direct functions
        if isinstance(func_mod, str) and func_mod == 'numpy':
            self._analyze_call_np(lhs, func_name, args, array_dists)
            return

        # handle array.func calls
        if isinstance(func_mod, ir.Var) and is_array(self.typemap,
                                                     func_mod.name):
            self._analyze_call_array(lhs, func_mod, func_name, args,
                                     array_dists)
            return

        # hpat.distributed_api functions
        if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api':
            self._analyze_call_hpat_dist(lhs, func_name, args, array_dists)
            return

        # len()
        if func_name == 'len' and func_mod in ('__builtin__', 'builtins'):
            return

        if hpat.config._has_h5py and (func_mod == 'hpat.pio_api'
                                      and func_name in ['h5read', 'h5write']):
            return

        if fdef == ('quantile', 'hpat.hiframes_api'):
            # quantile doesn't affect input's distribution
            return

        if fdef == ('nunique', 'hpat.hiframes_api'):
            # nunique doesn't affect input's distribution
            return

        if fdef == ('concat', 'hpat.hiframes_api'):
            # hiframes concat is similar to np.concatenate
            self._analyze_call_np_concatenate(lhs, args, array_dists)
            return

        # np.fromfile()
        if fdef == ('file_read', 'hpat.io'):
            return

        if hpat.config._has_ros and fdef == ('read_ros_images_inner',
                                             'hpat.ros'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet',
                                                 'hpat.parquet_pio'):
            return

        if hpat.config._has_pyarrow and fdef == ('read_parquet_str',
                                                 'hpat.parquet_pio'):
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        # TODO: fix "numba.extending" in function def
        if hpat.config._has_xenon and fdef == ('read_xenon_col',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            return

        if hpat.config._has_xenon and fdef == ('read_xenon_str',
                                               'numba.extending'):
            array_dists[args[4].name] = Distribution.REP
            # string read creates array in output
            if lhs not in array_dists:
                array_dists[lhs] = Distribution.OneD
            return

        if func_name == 'train' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(args[0].name, args[1].name, array_dists)
                return

        if func_name == 'predict' and isinstance(func_mod, ir.Var):
            if self.typemap[func_mod.name] == hpat.ml.svc.svc_type:
                self._meet_array_dists(lhs, args[0].name, array_dists,
                                       Distribution.Thread)
                return
            if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type:
                self._meet_array_dists(lhs, args[0].name, array_dists)
                return

        # set REP if not found
        self._analyze_call_set_REP(lhs, args, array_dists)