Exemplo n.º 1
0
 def fit(self, X, y=None):
     X = _convert_to_woodwork_structure(X)
     if not is_all_numeric(X):
         raise ValueError("PCA input must be all numeric")
     X = _convert_woodwork_types_wrapper(X.to_dataframe())
     self._component_obj.fit(X)
     return self
Exemplo n.º 2
0
 def fit_transform(self, X, y=None):
     X = _convert_to_woodwork_structure(X)
     if not is_all_numeric(X):
         raise ValueError("PCA input must be all numeric")
     X = _convert_woodwork_types_wrapper(X.to_dataframe())
     X_t = self._component_obj.fit_transform(X, y)
     return pd.DataFrame(
         X_t,
         index=X.index,
         columns=[f"component_{i}" for i in range(X_t.shape[1])])
Exemplo n.º 3
0
    def fit(self, X, y):
        X = _convert_to_woodwork_structure(X)
        if not is_all_numeric(X):
            raise ValueError("LDA input must be all numeric")
        y = _convert_to_woodwork_structure(y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        n_features = X.shape[1]
        n_classes = y.nunique()
        n_components = self.parameters['n_components']
        if n_components is not None and n_components > min(
                n_classes, n_features):
            raise ValueError(f"n_components value {n_components} is too large")

        self._component_obj.fit(X, y)
        return self
Exemplo n.º 4
0
def _convert_numeric_dataset_pandas(X, y):
    """Convert numeric and non-null data to pandas datatype. Raises ValueError if there is null or non-numeric data.
    Used with data sampler strategies.

    Arguments:
        X (pd.DataFrame, np.ndarray, ww.DataTable): Data to transform
        y (pd.Series, np.ndarray, ww.DataColumn): Target data

    Returns:
        Tuple(pd.DataFrame, pd.Series): Transformed X and y"""
    X_ww = infer_feature_types(X)
    if not is_all_numeric(X_ww):
        raise ValueError('Values not all numeric or there are null values provided in the dataset')
    y_ww = infer_feature_types(y)
    X_ww = _convert_woodwork_types_wrapper(X_ww.to_dataframe())
    y_ww = _convert_woodwork_types_wrapper(y_ww.to_series())
    return X_ww, y_ww