def predict(self, X): """Predict using the linear model Parameters ---------- X : array-like or sparse matrix, shape = (n_samples, n_features) Samples. Returns ------- C : array, shape = (n_samples,) Returns predicted values. """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=[np.float64, np.float32]) good_shape_for_daal = \ True if X.ndim <= 1 else True if X.shape[0] >= X.shape[1] else False if not hasattr(self, 'daal_model_') or \ sp.issparse(X) or \ not good_shape_for_daal: logging.info("sklearn.linear_model.Lasso." "predict: " + get_patch_message("sklearn")) return self._decision_function(X) logging.info("sklearn.linear_model.Lasso." "predict: " + get_patch_message("daal")) return _daal4py_predict_lasso(self, X)
def fit(self, X, y=None, sample_weight=None): """Perform DBSCAN clustering from features, or distance matrix. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features), or \ (n_samples, n_samples) Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. sample_weight : array, shape (n_samples,), optional Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. y : Ignored Not used, present here for API consistency by convention. Returns ------- self """ X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32]) if self.eps <= 0.0: raise ValueError("eps must be positive.") if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) _daal_ready = self.algorithm in [ 'auto', 'brute'] and ( self.metric == 'euclidean' or ( self.metric == 'minkowski' and self.p == 2)) and isinstance( X, np.ndarray) if _daal_ready: logging.info( "sklearn.cluster.DBSCAN." "fit: " + get_patch_message("daal")) core_ind, assignments = _daal_dbscan( X, self.eps, self.min_samples, sample_weight=sample_weight) self.core_sample_indices_ = core_ind self.labels_ = assignments self.components_ = np.take(X, core_ind, axis=0) return self logging.info( "sklearn.cluster.DBSCAN." "fit: " + get_patch_message("sklearn")) return super().fit(X, y, sample_weight=sample_weight)
def fit(self, X, y, sample_weight=None, check_input=True): """Fit model with coordinate descent. Parameters ---------- X : {ndarray, sparse matrix} of (n_samples, n_features) Data y : {ndarray, sparse matrix} of shape (n_samples,) or \ (n_samples, n_targets) Target. Will be cast to X's dtype if necessary sample_weight : float or array-like of shape (n_samples,), default=None Sample weight. check_input : bool, default=True Allow to bypass several input checking. Don't use this parameter unless you know what you do. Notes ----- Coordinate descent is an algorithm that considers each column of data at a time hence it will automatically convert the X input as a Fortran-contiguous numpy array if necessary. To avoid memory re-allocation it is advised to allocate the initial data in memory directly using that format. """ # check X and y if check_input: X, y = check_X_y(X, y, copy=False, accept_sparse='csc', dtype=[np.float64, np.float32], multi_output=True, y_numeric=True) y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False) else: # only for compliance with Sklearn, # this assert is not required for Intel(R) oneAPI Data # Analytics Library if isinstance(X, np.ndarray) and \ X.flags['F_CONTIGUOUS'] is False: raise ValueError("ndarray is not Fortran contiguous") if isinstance(X, np.ndarray): self.fit_shape_good_for_daal_ = True if X.ndim <= 1 else True if X.shape[ 0] >= X.shape[1] else False else: self.fit_shape_good_for_daal_ = False if sp.issparse(X) or \ sample_weight is not None or \ not self.fit_shape_good_for_daal_ or \ not (X.dtype == np.float64 or X.dtype == np.float32): if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info("sklearn.linear_model.Lasso." "fit: " + get_patch_message("sklearn")) res_new = super(ElasticNet, self).fit(X, y, sample_weight=sample_weight, check_input=check_input) self._gap = res_new.dual_gap_ return res_new self.n_iter_ = None self._gap = None # only for pass tests # "check_estimators_fit_returns_self(readonly_memmap=True) and # check_regressors_train(readonly_memmap=True) if not (X.flags.writeable): X = np.copy(X) if not (y.flags.writeable): y = np.copy(y) logging.info("sklearn.linear_model.Lasso." "fit: " + get_patch_message("daal")) res = _daal4py_fit_lasso(self, X, y, check_input=check_input) if res is None: if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info("sklearn.linear_model.Lasso." "fit: " + get_patch_message("sklearn_after_daal")) res_new = super(ElasticNet, self).fit(X, y, sample_weight=sample_weight, check_input=check_input) self._gap = res_new.dual_gap_ return res_new return res
def _fit(self, X, y, sample_weight=None, check_input=True): if sklearn_check_version('1.0'): self._check_feature_names(X, reset=True) # check X and y if check_input: X, y = check_X_y( X, y, copy=False, accept_sparse='csc', dtype=[np.float64, np.float32], multi_output=True, y_numeric=True, ) y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False) if not sp.issparse(X): self.fit_shape_good_for_daal_ = \ True if X.ndim <= 1 else True if X.shape[0] >= X.shape[1] else False else: self.fit_shape_good_for_daal_ = False _function_name = f"sklearn.linear_model.{self.__class__.__name__}.fit" _patching_status = PatchingConditionsChain( _function_name) _dal_ready = _patching_status.and_conditions([ (not sp.issparse(X), "X is sparse. Sparse input is not supported."), (self.fit_shape_good_for_daal_, "The shape of X does not satisfy oneDAL requirements: " "number of features > number of samples."), (X.dtype == np.float64 or X.dtype == np.float32, f"'{X.dtype}' X data type is not supported. " "Only np.float32 and np.float64 are supported."), (sample_weight is None, "Sample weights are not supported.")]) _patching_status.write_log() if not _dal_ready: if hasattr(self, 'daal_model_'): del self.daal_model_ if sklearn_check_version('0.23'): res_new = super(ElasticNet, self).fit( X, y, sample_weight=sample_weight, check_input=check_input) else: res_new = super(ElasticNet, self).fit( X, y, check_input=check_input) self._gap = res_new.dual_gap_ return res_new self.n_iter_ = None self._gap = None if not check_input: # only for compliance with Sklearn, # this assert is not required for Intel(R) oneAPI Data # Analytics Library print(type(X), X.flags['F_CONTIGUOUS']) if isinstance(X, np.ndarray) and \ X.flags['F_CONTIGUOUS'] is False: # print(X.flags) raise ValueError("ndarray is not Fortran contiguous") if sklearn_check_version('1.0'): self._normalize = _deprecate_normalize( self.normalize, default=False, estimator_name=self.__class__.__name__) # only for pass tests # "check_estimators_fit_returns_self(readonly_memmap=True) and # check_regressors_train(readonly_memmap=True) if not X.flags.writeable: X = np.copy(X) if not y.flags.writeable: y = np.copy(y) if self.__class__.__name__ == "ElasticNet": res = _daal4py_fit_enet(self, X, y, check_input=check_input) else: res = _daal4py_fit_lasso(self, X, y, check_input=check_input) if res is None: if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info( _function_name + ": " + get_patch_message("sklearn_after_daal") ) if sklearn_check_version('0.23'): res_new = super(ElasticNet, self).fit( X, y, sample_weight=sample_weight, check_input=check_input) else: res_new = super(ElasticNet, self).fit( X, y, check_input=check_input) self._gap = res_new.dual_gap_ return res_new return res
def _fit(self, X, y, sample_weight=None, check_input=True): # check X and y if check_input: X, y = check_X_y( X, y, copy=False, accept_sparse='csc', dtype=[np.float64, np.float32], multi_output=True, y_numeric=True, ) y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False) if not sp.issparse(X): self.fit_shape_good_for_daal_ = \ True if X.ndim <= 1 else True if X.shape[0] >= X.shape[1] else False else: self.fit_shape_good_for_daal_ = False log_str = "sklearn.linear_model." + self.__class__.__name__ + ".fit: " sklearn_ready = sp.issparse(X) or not self.fit_shape_good_for_daal_ or \ X.dtype not in [np.float64, np.float32] or sample_weight is not None if sklearn_ready: if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info( log_str + get_patch_message("sklearn") ) if sklearn_check_version('0.23'): res_new = super(ElasticNet, self).fit( X, y, sample_weight=sample_weight, check_input=check_input) else: res_new = super(ElasticNet, self).fit( X, y, check_input=check_input) self._gap = res_new.dual_gap_ return res_new self.n_iter_ = None self._gap = None if not check_input: # only for compliance with Sklearn, # this assert is not required for Intel(R) oneAPI Data # Analytics Library print(type(X), X.flags['F_CONTIGUOUS']) if isinstance(X, np.ndarray) and \ X.flags['F_CONTIGUOUS'] is False: # print(X.flags) raise ValueError("ndarray is not Fortran contiguous") if sklearn_check_version('1.0'): self._normalize = _deprecate_normalize( self.normalize, default=False, estimator_name=self.__class__.__name__) # only for pass tests # "check_estimators_fit_returns_self(readonly_memmap=True) and # check_regressors_train(readonly_memmap=True) if not X.flags.writeable: X = np.copy(X) if not y.flags.writeable: y = np.copy(y) logging.info(log_str + get_patch_message("daal")) if self.__class__.__name__ == "ElasticNet": res = _daal4py_fit_enet(self, X, y, check_input=check_input) else: res = _daal4py_fit_lasso(self, X, y, check_input=check_input) if res is None: if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info( log_str + get_patch_message("sklearn_after_daal") ) if sklearn_check_version('0.23'): res_new = super(ElasticNet, self).fit( X, y, sample_weight=sample_weight, check_input=check_input) else: res_new = super(ElasticNet, self).fit( X, y, check_input=check_input) self._gap = res_new.dual_gap_ return res_new return res
def _daal_train_test_split(*arrays, **options): n_arrays = len(arrays) if n_arrays == 0: raise ValueError("At least one array required as input") test_size = options.pop('test_size', None) train_size = options.pop('train_size', None) random_state = options.pop('random_state', None) stratify = options.pop('stratify', None) shuffle = options.pop('shuffle', True) rng = options.pop('rng', 'OPTIMIZED_MT19937') available_rngs = ['default', 'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', 'MCG59', 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', 'OPTIMIZED_MT19937'] if rng not in available_rngs: raise ValueError( "Wrong random numbers generator is chosen. " "Available generators: %s" % str(available_rngs)[1:-1]) if options: raise TypeError("Invalid parameters passed: %s" % str(options)) arrays = indexable(*arrays) n_samples = _num_samples(arrays[0]) n_train, n_test = _validate_shuffle_split( n_samples, test_size, train_size, default_test_size=0.25 ) if shuffle is False: if stratify is not None: raise ValueError( "Stratified train/test split is not implemented for shuffle=False") train = np.arange(n_train) test = np.arange(n_train, n_train + n_test) else: if stratify is not None: cv = StratifiedShuffleSplit( test_size=n_test, train_size=n_train, random_state=random_state ) train, test = next(cv.split(X=arrays[0], y=stratify)) else: if mkl_random_is_imported and \ rng not in ['default', 'OPTIMIZED_MT19937'] and \ (isinstance(random_state, int) or random_state is None): random_state = mkl_random.RandomState(random_state, rng) indexes = random_state.permutation(n_samples) test, train = indexes[:n_test], indexes[n_test:( n_test + n_train)] elif rng == 'OPTIMIZED_MT19937' and \ (isinstance(random_state, int) or random_state is None) and \ platform.system() != 'Windows': indexes = np.empty( shape=(n_samples,), dtype=np.int64 if n_train + n_test > 2 ** 31 - 1 else np.int32 ) random_state = np.random.RandomState(random_state) random_state = random_state.get_state()[1] d4p.daal_generate_shuffled_indices([indexes], [random_state]) test, train = indexes[:n_test], indexes[n_test:( n_test + n_train)] else: cv = ShuffleSplit( test_size=n_test, train_size=n_train, random_state=random_state ) train, test = next(cv.split(X=arrays[0], y=stratify)) res = [] for arr in arrays: fallback = False # input format check if not isinstance(arr, np.ndarray): if pandas_is_imported: if not isinstance(arr, pd.core.frame.DataFrame) and \ not isinstance(arr, pd.core.series.Series): fallback = True else: fallback = True # dimensions check if hasattr(arr, 'ndim'): if arr.ndim > 2: fallback = True else: fallback = True # data types check dtypes = get_dtypes(arr) if dtypes is None: fallback = True else: for i, dtype in enumerate(dtypes): if 'float' not in str(dtype) and 'int' not in str(dtype): fallback = True break if fallback: logging.info( "sklearn.model_selection." "train_test_split: " + get_patch_message("sklearn")) res.append(safe_indexing(arr, train)) res.append(safe_indexing(arr, test)) else: logging.info( "sklearn.model_selection." "train_test_split: " + get_patch_message("daal")) if len(arr.shape) == 2: n_cols = arr.shape[1] reshape_later = False else: n_cols = 1 reshape_later = True arr_copy = d4p.get_data(arr) if not isinstance(arr_copy, list): arr_copy = arr_copy.reshape( (arr_copy.shape[0], n_cols), order='A', ) if isinstance(arr_copy, np.ndarray): order = 'C' if arr_copy.flags['C_CONTIGUOUS'] else 'F' train_arr = np.empty( shape=(n_train, n_cols), dtype=arr_copy.dtype, order=order, ) test_arr = np.empty( shape=(n_test, n_cols), dtype=arr_copy.dtype, order=order, ) d4p.daal_train_test_split( arr_copy, train_arr, test_arr, [train], [test] ) if reshape_later: train_arr, test_arr = train_arr.reshape( (n_train,)), test_arr.reshape((n_test,)) elif isinstance(arr_copy, list): train_arr = [ np.empty( shape=(n_train,), dtype=el.dtype, order='C' if el.flags['C_CONTIGUOUS'] else 'F', ) for el in arr_copy ] test_arr = [ np.empty( shape=(n_test,), dtype=el.dtype, order='C' if el.flags['C_CONTIGUOUS'] else 'F' ) for el in arr_copy ] d4p.daal_train_test_split( arr_copy, train_arr, test_arr, [train], [test]) train_arr = {col: train_arr[i] for i, col in enumerate(arr.columns)} test_arr = {col: test_arr[i] for i, col in enumerate(arr.columns)} else: raise ValueError('Array can\'t be converted to needed format') if pandas_is_imported: if isinstance(arr, pd.core.frame.DataFrame): train_arr, test_arr = \ pd.DataFrame(train_arr), pd.DataFrame(test_arr) if isinstance(arr, pd.core.series.Series): train_arr, test_arr = \ train_arr.reshape(n_train), test_arr.reshape(n_test) train_arr, test_arr = pd.Series( train_arr), pd.Series(test_arr) if hasattr(arr, 'index'): train_arr.index = train test_arr.index = test res.append(train_arr) res.append(test_arr) return res