def fit(self, X, y=None): """Fit the imputer on X. Parameters ---------- X : array-like shape of (n_samples, n_features) Input data, where `n_samples` is the number of samples and `n_features` is the number of features. Returns ------- self : object """ # Check data integrity and calling arguments if not is_scalar_nan(self.missing_values): force_all_finite = True else: force_all_finite = "allow-nan" if self.metric not in _NAN_METRICS and not callable(self.metric): raise ValueError( "The selected metric does not support NaN values") if self.n_neighbors <= 0: raise ValueError("Expected n_neighbors > 0. Got {}".format( self.n_neighbors)) X = check_array(X, accept_sparse=False, dtype=FLOAT_DTYPES, force_all_finite=force_all_finite, copy=self.copy) super()._fit_indicator(X) _check_weights(self.weights) self._fit_X = X self._mask_fit_X = _get_mask(self._fit_X, self.missing_values) return self
def __init__( self, n_neighbors=1, weights="uniform", distance="dtw", distance_params=None, **kwargs ): self._distance_params = distance_params if distance_params is None: self._distance_params = {} self.distance = distance self.distance_params = distance_params if isinstance(self.distance, str): distance = distance_factory(metric=self.distance) super(KNeighborsTimeSeriesClassifier, self).__init__( n_neighbors=n_neighbors, algorithm="brute", metric=distance, metric_params=None, # Extra distance params handled in _fit **kwargs ) BaseClassifier.__init__(self) self.weights = _check_weights(weights) # We need to add is-fitted state when inheriting from scikit-learn self._is_fitted = False
def fit(self, X, y=None): """Fit the imputer on X. Parameters ---------- X : {array-like}, shape (n_samples, n_features) Input data, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. Returns ------- self : object Returns self. """ # Check data integrity and calling arguments force_all_finite = False if self.missing_values in ["NaN", np.nan] else True if not force_all_finite: if self.metric not in _MASKED_METRICS and not callable( self.metric): raise ValueError( "The selected metric does not support NaN values.") X = check_array(X, accept_sparse=False, dtype=np.float64, force_all_finite=force_all_finite, copy=self.copy) self.weights = _check_weights(self.weights) # Check for +/- inf if np.any(np.isinf(X)): raise ValueError("+/- inf values are not allowed.") # Check if % missing in any column > col_max_missing mask = _get_mask(X, self.missing_values) if np.any(mask.sum(axis=0) > (X.shape[0] * self.col_max_missing)): raise ValueError("Some column(s) have more than {}% missing values" .format(self.col_max_missing * 100)) X_col_means = np.ma.array(X, mask=mask).mean(axis=0).data # Check if % missing in any row > row_max_missing bad_rows = mask.sum(axis=1) > (mask.shape[1] * self.row_max_missing) if np.any(bad_rows): warnings.warn( "There are rows with more than {0}% missing values. These " "rows are not included as donor neighbors." .format(self.row_max_missing * 100)) # Remove rows that have more than row_max_missing % missing X = X[~bad_rows, :] # Check if sufficient neighboring samples available if X.shape[0] < self.n_neighbors: raise ValueError("There are only %d samples, but n_neighbors=%d." % (X.shape[0], self.n_neighbors)) self.fitted_X_ = X self.statistics_ = X_col_means return self
def __init__(self, n_neighbors=5, *, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None, **kwargs): super().__init__( n_neighbors=n_neighbors, algorithm=algorithm, leaf_size=leaf_size, metric=metric, p=p, metric_params=metric_params, n_jobs=n_jobs, **kwargs) self.weights = _check_weights(weights)
def __init__(self, n_neighbors=1, weights='uniform', algorithm='brute', metric='dtw', metric_params=None, **kwargs): self._cv_for_params = False if metric == 'dtw': metric = dtw_distance elif metric == 'dtwcv': # special case to force loocv grid search # cv in training if metric_params is not None: warnings.warn( "Warning: measure parameters have been specified for " "dtwcv. " "These will be ignored and parameter values will be " "found using LOOCV.") metric = dtw_distance self._cv_for_params = True self._param_matrix = { 'metric_params': [{'w': x / 100} for x in range(0, 100)]} elif metric == 'ddtw': metric = ddtw_distance elif metric == 'wdtw': metric = wdtw_distance elif metric == 'wddtw': metric = wddtw_distance elif metric == 'lcss': metric = lcss_distance elif metric == 'erp': metric = erp_distance elif metric == 'msm': metric = msm_distance elif metric == 'twe': metric = twe_distance elif metric == 'mpdist': metric = mpdist # When mpdist is used, the subsequence length (parameter m) must be set # Example: knn_mpdist = KNeighborsTimeSeriesClassifier( # metric='mpdist', metric_params={'m':30}) else: if type(metric) is str: raise ValueError( "Unrecognised distance measure: " + metric + ". Allowed " "values are " "names from " "[dtw,ddtw," "wdtw," "wddtw," "lcss,erp," "msm] or " "please " "pass a " "callable " "distance " "measure " "into the " "constuctor " "directly.") super(KNeighborsTimeSeriesClassifier, self).__init__( n_neighbors=n_neighbors, algorithm=algorithm, metric=metric, metric_params=metric_params, **kwargs) self.weights = _check_weights(weights) # We need to add is-fitted state when inheriting from scikit-learn self._is_fitted = False
def __init__( self, n_neighbors=1, weights="uniform", distance="dtw", distance_params=None, **kwargs ): self._cv_for_params = False self.distance = distance self.distance_params = distance_params if distance == "euclidean": # Euclidean will default to the base class distance distance = euclidean_distance elif distance == "dtw": distance = dtw_distance elif distance == "dtwcv": # special case to force loocv grid search # cv in training if distance_params is not None: warnings.warn( "Warning: measure parameters have been specified for " "dtwcv. " "These will be ignored and parameter values will be " "found using LOOCV." ) distance = dtw_distance self._cv_for_params = True self._param_matrix = { "metric_params": [{"w": x / 100} for x in range(0, 100)] } elif distance == "ddtw": distance = ddtw_distance elif distance == "wdtw": distance = wdtw_distance elif distance == "wddtw": distance = wddtw_distance elif distance == "lcss": distance = lcss_distance elif distance == "erp": distance = erp_distance elif distance == "msm": distance = msm_distance elif distance == "twe": distance = twe_distance elif distance == "mpdist": distance = mpdist # When mpdist is used, the subsequence length (parameter m) must be set # Example: knn_mpdist = KNeighborsTimeSeriesClassifier( # metric='mpdist', metric_params={'m':30}) else: if type(distance) is str: raise ValueError( "Unrecognised distance measure: " + distance + ". Allowed values " "are names from [euclidean,dtw,ddtw,wdtw,wddtw,lcss,erp,msm] or " "please pass a callable distance measure into the constuctor" ) super(KNeighborsTimeSeriesClassifier, self).__init__( n_neighbors=n_neighbors, algorithm="brute", metric=distance, metric_params=distance_params, **kwargs ) self.weights = _check_weights(weights) # We need to add is-fitted state when inheriting from scikit-learn self._is_fitted = False
def _fit(self, X, y=None): if self.metric_params is not None and 'p' in self.metric_params: if self.p is not None: warnings.warn("Parameter p is found in metric_params. " "The corresponding parameter from __init__ " "is ignored.", SyntaxWarning, stacklevel=2) if hasattr(self, 'weights') and sklearn_check_version("1.0"): self.weights = _check_weights(self.weights) if sklearn_check_version("1.0"): self._check_feature_names(X, reset=True) X_incorrect_type = isinstance( X, (KDTree, BallTree, NeighborsBase, BaseNeighborsBase)) single_output = True self._daal_model = None shape = None correct_n_classes = True try: requires_y = self._get_tags()["requires_y"] except KeyError: requires_y = False if y is not None or requires_y: if not X_incorrect_type or y is None: X, y = validate_data( self, X, y, accept_sparse="csr", multi_output=True, dtype=[np.float64, np.float32]) single_output = False if y.ndim > 1 and y.shape[1] > 1 else True shape = y.shape if is_classifier(self) or is_regressor(self): if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: self.outputs_2d_ = False y = y.reshape((-1, 1)) else: self.outputs_2d_ = True if is_classifier(self): check_classification_targets(y) self.classes_ = [] self._y = np.empty(y.shape, dtype=int) for k in range(self._y.shape[1]): classes, self._y[:, k] = np.unique( y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self._y = self._y.ravel() n_classes = len(self.classes_) if n_classes < 2: correct_n_classes = False else: self._y = y else: if not X_incorrect_type: X, _ = validate_data( self, X, accept_sparse='csr', dtype=[np.float64, np.float32]) if not X_incorrect_type: self.n_samples_fit_ = X.shape[0] self.n_features_in_ = X.shape[1] try: fptype = getFPType(X) except ValueError: fptype = None weights = getattr(self, 'weights', 'uniform') def stock_fit(self, X, y): if sklearn_check_version("0.24"): result = super(NeighborsBase, self)._fit(X, y) else: result = super(NeighborsBase, self)._fit(X) return result if self.n_neighbors is not None: if self.n_neighbors <= 0: raise ValueError( "Expected n_neighbors > 0. Got %d" % self.n_neighbors ) if not isinstance(self.n_neighbors, numbers.Integral): raise TypeError( "n_neighbors does not take %s value, " "enter integer value" % type(self.n_neighbors)) _patching_status = PatchingConditionsChain( "sklearn.neighbors.KNeighborsMixin.kneighbors") _dal_ready = _patching_status.and_conditions([ (self.metric == 'minkowski' and self.p == 2 or self.metric == 'euclidean', f"'{self.metric}' (p={self.p}) metric is not supported. " "Only 'euclidean' or 'minkowski' with p=2 metrics are supported."), (not X_incorrect_type, "X is not Tree or Neighbors instance or array."), (weights in ['uniform', 'distance'], f"'{weights}' weights is not supported. " "Only 'uniform' and 'distance' weights are supported."), (self.algorithm in ['brute', 'kd_tree', 'auto', 'ball_tree'], f"'{self.algorithm}' algorithm is not supported. " "Only 'brute', 'kd_tree', 'auto' and 'ball_tree' " "algorithms are supported."), (single_output, "Multiple outputs are not supported."), (fptype is not None, "Unable to get dtype."), (not sp.issparse(X), "X is sparse. Sparse input is not supported."), (correct_n_classes, "Number of classes < 2.")]) _patching_status.write_log() if _dal_ready: try: daal4py_fit(self, X, fptype) result = self except RuntimeError: logging.info( "sklearn.neighbors.KNeighborsMixin." "kneighbors: " + get_patch_message("sklearn_after_daal")) result = stock_fit(self, X, y) else: result = stock_fit(self, X, y) if y is not None and is_regressor(self): self._y = y if shape is None else y.reshape(shape) return result
def __init__( self, n_neighbors=1, weights="uniform", algorithm="brute", metric="dtw", metric_params=None, **kwargs ): if algorithm == "kd_tree": raise ValueError( "KNeighborsTimeSeriesClassifier cannot work with kd_tree since kd_tree " "cannot be used with a callable distance metric and we do not support " "precalculated distances as yet." ) if algorithm == "ball_tree": raise ValueError( "KNeighborsTimeSeriesClassifier cannot work with ball_tree since " "ball_tree has a list of hard coded distances it can use, and cannot " "work with 3-D arrays" ) self._cv_for_params = False # TODO: add in capacity for euclidean # if metric != "euclidean": # Euclidean will default to the base class distance if metric == "dtw": metric = dtw_distance elif metric == "dtwcv": # special case to force loocv grid search # cv in training if metric_params is not None: warnings.warn( "Warning: measure parameters have been specified for " "dtwcv. " "These will be ignored and parameter values will be " "found using LOOCV." ) metric = dtw_distance self._cv_for_params = True self._param_matrix = { "metric_params": [{"w": x / 100} for x in range(0, 100)] } elif metric == "ddtw": metric = ddtw_distance elif metric == "wdtw": metric = wdtw_distance elif metric == "wddtw": metric = wddtw_distance elif metric == "lcss": metric = lcss_distance elif metric == "erp": metric = erp_distance elif metric == "msm": metric = msm_distance elif metric == "twe": metric = twe_distance elif metric == "mpdist": metric = mpdist # When mpdist is used, the subsequence length (parameter m) must be set # Example: knn_mpdist = KNeighborsTimeSeriesClassifier( # metric='mpdist', metric_params={'m':30}) else: if type(metric) is str: raise ValueError( "Unrecognised distance measure: " + metric + ". Allowed values " "are names from [dtw,ddtw,wdtw,wddtw,lcss,erp,msm] or " "please pass a callable distance measure into the constuctor" ) super(KNeighborsTimeSeriesClassifier, self).__init__( n_neighbors=n_neighbors, algorithm=algorithm, metric=metric, metric_params=metric_params, **kwargs ) self.weights = _check_weights(weights) # We need to add is-fitted state when inheriting from scikit-learn self._is_fitted = False
def _fit(self, X, y=None): if self.metric_params is not None and 'p' in self.metric_params: if self.p is not None: warnings.warn( "Parameter p is found in metric_params. " "The corresponding parameter from __init__ " "is ignored.", SyntaxWarning, stacklevel=2) if hasattr(self, 'weights') and sklearn_check_version("1.0"): self.weights = _check_weights(self.weights) X_incorrect_type = isinstance( X, (KDTree, BallTree, NeighborsBase, BaseNeighborsBase)) single_output = True self._daal_model = None shape = None correct_n_classes = True try: requires_y = self._get_tags()["requires_y"] except KeyError: requires_y = False if y is not None or requires_y: if not X_incorrect_type or y is None: X, y = validate_data(self, X, y, accept_sparse="csr", multi_output=True, dtype=[np.float64, np.float32]) single_output = False if y.ndim > 1 and y.shape[1] > 1 else True shape = y.shape if is_classifier(self) or is_regressor(self): if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: self.outputs_2d_ = False y = y.reshape((-1, 1)) else: self.outputs_2d_ = True if is_classifier(self): check_classification_targets(y) self.classes_ = [] self._y = np.empty(y.shape, dtype=int) for k in range(self._y.shape[1]): classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self._y = self._y.ravel() n_classes = len(self.classes_) if n_classes < 2: correct_n_classes = False else: self._y = y else: if not X_incorrect_type: X, _ = validate_data(self, X, accept_sparse='csr', dtype=[np.float64, np.float32]) if not X_incorrect_type: self.n_samples_fit_ = X.shape[0] self.n_features_in_ = X.shape[1] try: fptype = getFPType(X) except ValueError: fptype = None weights = getattr(self, 'weights', 'uniform') def stock_fit(self, X, y): if sklearn_check_version("0.24"): result = super(NeighborsBase, self)._fit(X, y) else: result = super(NeighborsBase, self)._fit(X) return result if self.n_neighbors is not None: if self.n_neighbors <= 0: raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors) if not isinstance(self.n_neighbors, numbers.Integral): raise TypeError("n_neighbors does not take %s value, " "enter integer value" % type(self.n_neighbors)) condition = (self.metric == 'minkowski' and self.p == 2) or \ self.metric == 'euclidean' if not X_incorrect_type and weights in ['uniform', 'distance'] \ and self.algorithm in ['brute', 'kd_tree', 'auto', 'ball_tree'] \ and condition \ and single_output and fptype is not None and not sp.issparse(X) and \ correct_n_classes: try: logging.info("sklearn.neighbors.KNeighborsMixin." "kneighbors: " + get_patch_message("daal")) daal4py_fit(self, X, fptype) result = self except RuntimeError: logging.info("sklearn.neighbors.KNeighborsMixin." "kneighbors: " + get_patch_message("sklearn_after_daal")) result = stock_fit(self, X, y) else: logging.info("sklearn.neighbors.KNeighborsMixin." "kneighbors: " + get_patch_message("sklearn")) result = stock_fit(self, X, y) if y is not None and is_regressor(self): self._y = y if shape is None else y.reshape(shape) return result