Пример #1
0
 def _fit(self, n_components):
     """ This function fits the model to the data """
     df = pd.read_table("fanalysis/tests/ca_data.txt",
                        header=0,
                        index_col=0,
                        delimiter="\t")
     M = df.as_matrix()
     base = Base(n_components=n_components)
     base.fit(M)
     return base
Пример #2
0
    def fit(self, X, y=None):
        """ Fit the model to X.
    
        Parameters
        ----------
        X : array of string, int or float, shape (n_rows, n_vars)
            Training data, where n_rows in the number of rows and n_vars
            is the number of variables.
            X is a data table containing a category in each cell.
            Categories can be coded by strings or numeric values.
        
        y : None
            y is ignored.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        # Create a dummy variables table
        X_dummies = self._binarization(X)
        
        # Fit a Factorial Analysis to the dummy variables table
        self.r_ = np.sum(X_dummies, axis=1).reshape(-1, 1)
        Base.fit(self, X_dummies, y=None)
        
        # Adjustment of the number of components
        n_eigen = self.n_categories_ - self.n_vars_
        if (self.n_components_ > n_eigen):
            self.n_components_ = n_eigen
            self.eig_ = self.eig_[:, :self.n_components_]
            self.row_coord_ = self.row_coord_[:, :self.n_components_]
            self.col_coord_ = self.col_coord_[:, :self.n_components_]
            if self.stats:
                self.row_contrib_ = self.row_contrib_[:, :self.n_components_]
                self.col_contrib_ = self.col_contrib_[:, :self.n_components_]
                self.row_cos2_ = self.row_cos2_[:, :self.n_components_]
                self.col_cos2_ = self.col_cos2_[:, :self.n_components_]

        # Set col_labels_short_
        self.col_labels_short_ = self.col_labels_short_temp_
        
        # Set col_labels_
        self.col_labels_ = self.col_labels_temp_        
        
        self.model_ = "mca"
        
        return self
Пример #3
0
    def _col_topandas_comparison(self, n_components=None, col_labels=False):
        """ This function compares the output of the col_topandas method
        with the R FactoMiner output
        """
        df = pd.read_table("fanalysis/tests/ca_data.txt",
                           header=0,
                           index_col=0,
                           delimiter="\t")
        M = df.as_matrix()
        if col_labels == False:
            labels = ["col" + str(x) for x in np.arange(0, M.shape[1])]
            base = Base(n_components=n_components, row_labels=None)
        else:
            labels = np.loadtxt("fanalysis/tests/ca_col_labels.txt",
                                delimiter=" ",
                                dtype=str)
            base = Base(n_components=n_components, col_labels=labels)
        df_Y = base.fit(M).col_topandas()
        Y = df_Y.as_matrix()
        df_Y_index = df_Y.index.values
        Y_col_coord_temp = Y[:, :base.n_components_]

        eigen_values = np.loadtxt("fanalysis/tests/ca_eig.txt",
                                  delimiter=" ",
                                  dtype=float)
        n_components = self._adjust_n_components(n_components, eigen_values)
        X_col_coord = np.loadtxt("fanalysis/tests/ca_col_coord.txt",
                                 delimiter=" ",
                                 dtype=float)[:, :n_components]
        X_col_contrib = np.loadtxt("fanalysis/tests/ca_col_contrib.txt",
                                   delimiter=" ",
                                   dtype=float)[:, :n_components]
        X_col_cos2 = np.loadtxt("fanalysis/tests/ca_col_cos2.txt",
                                delimiter=" ",
                                dtype=float)[:, :n_components]
        X = np.c_[X_col_coord, X_col_contrib, X_col_cos2]

        # test for data
        Y_col_coord = self._compute_Y(X_col_coord, Y_col_coord_temp,
                                      "col_coord_")
        Y[:, :base.n_components_] = Y_col_coord
        assert_array_almost_equal(X, Y)

        # test for col_labels
        assert_array_equal(labels, df_Y_index)
Пример #4
0
    def transform(self, X, y=None):
        """ Apply the dimensionality reduction on X. X is projected on
        the first axes previous extracted from a training set.

        Parameters
        ----------
        X : array of string, int or float, shape (n_rows_sup, n_vars)
            New data, where n_rows_sup is the number of supplementary
            row points and n_vars is the number of variables.
            X is a data table containing a category in each cell.
            Categories can be coded by strings or numeric values.
            X rows correspond to supplementary row points that are
            projected onto the axes.
        
        y : None
            y is ignored.

        Returns
        -------
        X_new : array of float, shape (n_rows_sup, n_components_)
            X_new : coordinates of the projections of the supplementary
            row points onto the axes.
        """
        # Build dummy variables for the supplementary rows table
        nrows = X.shape[0]
        #ncols = self.col_labels_.shape[0]
        ncols = len(self.col_labels_)
        Y = np.zeros(shape=(nrows, ncols))
        for i in np.arange(0, nrows, 1):
            values = [self.prefixes_[k] + str(X[i, k])
                      for k in np.arange(0, self.n_vars_)]
            for j in np.arange(0, ncols, 1):
                if self.col_labels_[j] in values:
                    Y[i, j] = 1
        
        # Apply the transform method to Y
        return Base.transform(self, Y)
Пример #5
0
    def _fit_transform_comparison(self, n_components=None):
        """ This function compares the result of the fit_transform
        operation with the R FactoMineR output
        """
        if n_components is None:
            base1 = Base()
            base2 = Base()
        else:
            base1 = Base(n_components=n_components)
            base2 = Base(n_components=n_components)
        eigen_values = np.loadtxt("fanalysis/tests/ca_eig.txt",
                                  delimiter=" ",
                                  dtype=float)
        n_components = self._adjust_n_components(n_components, eigen_values)
        X = np.loadtxt("fanalysis/tests/ca_row_coord.txt",
                       delimiter=" ",
                       dtype=float)[:, :n_components]
        df = pd.read_table("fanalysis/tests/ca_data.txt",
                           header=0,
                           index_col=0,
                           delimiter="\t")
        M = df.as_matrix()

        base1.fit(M)
        Y_temp_1 = base1.transform(M)
        Y1 = self._compute_Y(X, Y_temp_1, "row_coord_")
        assert_array_almost_equal(X, Y1)

        Y_temp_2 = base2.fit_transform(M)
        Y2 = self._compute_Y(X, Y_temp_2, "row_coord_")
        assert_array_almost_equal(X, Y2)
Пример #6
0
 def __init__(self, n_components=None, row_labels=None, var_labels=None,
              stats=True):
     Base.__init__(self, n_components, row_labels, None, stats)
     self.var_labels = var_labels