Exemplo n.º 1
0
def indicator_matrix(df):
    ''' The indicator matrix of the original dataframe. '''
    indicator_matrix = pd.get_dummies(df)

    for column in df.columns:
        if df[column].dtype in ('int64', 'float64'):
            indicator_matrix[column] = util.rescale(df[column], new_min=0, new_max=1)

    return indicator_matrix
Exemplo n.º 2
0
def indicator_matrix(df):
    """The indicator matrix of the original dataframe."""
    indicator_matrix = pd.get_dummies(df)

    for column in df.columns:
        if df[column].dtype in ('int64', 'float64'):
            indicator_matrix[column] = util.rescale(df[column], new_min=0, new_max=1)

    return indicator_matrix
Exemplo n.º 3
0
    def _build_indicator_matrix(self):
        ''' Build the indicator matrix by placing a `1` where a row takes a value for a variable and
        a `0` when it doesn't. '''

        indicator_matrix = pd.get_dummies(self.X)
        # Add the numerical variables after rescaling
        for col in self.numerical_variables.columns:
            indicator_matrix[col] = util.rescale(self.numerical_variables[col], new_min=0,
                                                 new_max=1)
        return indicator_matrix
Exemplo n.º 4
0
 def _compute(self):
     # Build the indicator matrix
     self.indicator_matrix = pd.get_dummies(self.df)
     # Add the numerical variables after rescaling
     for col in self._numerical.columns:
         self.indicator_matrix[col] = util.rescale(self._numerical[col])
     # Compute the correspondance analysis of the indicator matrix
     ca = prince.CA(self.indicator_matrix, components=self.components)
     # Extract the needed values
     self.column_projections = ca.column_projections
     self.row_projections = ca.row_projections
     self.U = ca.U
     self.W = ca.W
     self.V = ca.V
     # Compute Benzécri scores instead of classical eigenvalues
     self.eigenvalues = [
         ((self.Q / (self.Q - 1)) ** 2 * (w - 1 / self.Q) ** 2) ** 2
         if w < 1 / self.Q
         else w ** 2
         for w in self.W
     ]
     self._compute_explained_variance()