def _check_coltype(self, X):
     for col in as_list(self.columns):
         if isinstance(col, str):
             if isinstance(X, np.ndarray):
                 raise ValueError(
                     f"column {col} is a string but datatype receive is numpy."
                 )
             if isinstance(X, pd.DataFrame):
                 if col not in X.columns:
                     raise ValueError(f"column {col} is not in {X.columns}")
         if isinstance(col, int):
             if col not in range(np.atleast_2d(np.array(X)).shape[1]):
                 raise ValueError(
                     f"column {col} is out of bounds for input shape {X.shape}"
                 )
示例#2
0
 def fit(self, X, y=None):
     """Learn the projection required to make the dataset orthogonal to sensitive columns."""
     self._check_coltype(X)
     self.col_ids_ = [
         v if isinstance(v, int) else self._col_idx(X, v)
         for v in as_list(self.columns)
     ]
     X = check_array(X, estimator=self)
     X_fair = X.copy()
     v_vectors = self._make_v_vectors(X, self.col_ids_)
     # gram smidt process but only on sensitive attributes
     for i, col in enumerate(X_fair.T):
         for v in v_vectors.T:
             X_fair[:, i] = X_fair[:, i] - _vector_projection(X_fair[:, i], v)
     # we want to learn matrix P: X P = X_fair
     # this means we first need to create X_fair in order to learn P
     self.projection_, resid, rank, s = np.linalg.lstsq(X, X_fair, rcond=None)
     return self