示例#1
0
x = westdf.loc[:, :].values
x = StandardScaler(with_std=False).fit_transform(x)  #centerlize the data

#SparsePCA transform
transformer = SparsePCA(n_components=3,\
                        alpha=0.1,\
                        normalize_components=True,\
                        random_state=0)
x_transformed = transformer.fit_transform(x)

# for data analysis
x_transformed.shape
transformer.alpha
egienvetors = transformer.components_
transformer.error_
transformer.get_params(deep=True)
np.mean(transformer.components_ == 0)
westspca = pd.DataFrame(data=egienvetors, columns=westdf.columns)
Spca1 = westspca.sort_values(by=[0], axis=1)
Spca2 = westspca.sort_values(by=[1], axis=1)
Spca3 = westspca.sort_values(by=[2], axis=1)
Spca4 = westspca.sort_values(by=[3], axis=1)
Spca5 = westspca.sort_values(by=[4], axis=1)

westtrans = pd.DataFrame(data=x_transformed)
westpcascore = pd.concat([chapter, westtrans], axis=1)
pcascore1 = westpcascore.sort_values(by=[0], axis=0)
pcascore2 = westpcascore.sort_values(by=[1], axis=0)
pcascore3 = westpcascore.sort_values(by=[2], axis=0)
pcascore4 = westpcascore.sort_values(by=[3], axis=0)
pcascore5 = westpcascore.sort_values(by=[4], axis=0)
class SPCA(object):
    def __init__(self,
                 n_components=None,
                 alpha=1,
                 ridge_alpha=0.01,
                 max_iter=1000,
                 tol=1e-8,
                 method='lars',
                 n_jobs=None,
                 U_init=None,
                 V_init=None,
                 verbose=False,
                 random_state=None,
                 normalize_components='deprecated'):
        """
        :param n_components:
        :param alpha:
        :param ridge_alpha:
        :param max_iter:
        :param tol:
        :param method:
        :param n_jobs:
        :param U_init:
        :param V_init:
        :param verbose:
        :param random_state:
        :param normalize_components:
        """
        self.model = SparsePCA(n_components=n_components,
                               alpha=alpha,
                               ridge_alpha=ridge_alpha,
                               max_iter=max_iter,
                               tol=tol,
                               method=method,
                               n_jobs=n_jobs,
                               U_init=U_init,
                               V_init=V_init,
                               verbose=verbose,
                               random_state=random_state,
                               normalize_components=normalize_components)

    def fit(self, x, y):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        self.model.transform(X=x)

    def fit_transform(self, x, y=None):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self):
        return self.model.get_params(deep=True)

    def set_params(self, **params):
        return self.model.set_params(**params)

    def get_attributes(self):
        components = self.model.components_
        error = self.model.error_
        n_iter = self.model.n_iter_
        mean = self.model.mean_
        return components, error, n_iter, mean
示例#3
0
    def btnConvert_click(self):
        msgBox = QMessageBox()

        # Alpha
        try:
            Alpha = np.float(ui.txtAlpha.text())
        except:
            msgBox.setText("Alpha is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Ridge
        try:
            Ridge = np.float(ui.txtRidge.text())
        except:
            msgBox.setText("Ridge Alpha is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Max Iter
        try:
            MaxIter = np.int32(ui.txtMaxIter.text())
        except:
            msgBox.setText("Maximum Iteration is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Tolerance
        try:
            Tol = np.float(ui.txtTole.text())
        except:
            msgBox.setText("Tolerance is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        #Method
        if ui.rbLars.isChecked():
            Method = "lars"
        else:
            Method = "cd"

        # Number of Job
        try:
            njob = np.int32(ui.txtJobs.text())
        except:
            msgBox.setText("Number of jobs is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # OutFile
        OutFile = ui.txtOutFile.text()
        if not len(OutFile):
            msgBox.setText("Please enter out file!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # InFile
        InFile = ui.txtInFile.text()
        if not len(InFile):
            msgBox.setText("Please enter input file!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if not os.path.isfile(InFile):
            msgBox.setText("Input file not found!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if ui.rbScale.isChecked() == True and ui.rbALScale.isChecked(
        ) == False:
            msgBox.setText(
                "Subject Level Normalization is just available for Subject Level Analysis!"
            )
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        InData = io.loadmat(InFile)
        OutData = dict()
        OutData["imgShape"] = InData["imgShape"]

        if not len(ui.txtData.currentText()):
            msgBox.setText("Please enter Data variable name!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        try:
            X = InData[ui.txtData.currentText()]

            if ui.cbScale.isChecked() and (not ui.rbScale.isChecked()):
                X = preprocessing.scale(X)
                print("Whole of data is scaled X~N(0,1).")
        except:
            print("Cannot load data")
            return

        try:
            NumFea = np.int32(ui.txtNumFea.text())
        except:
            msgBox.setText("Number of features is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False
        if NumFea < 1:
            msgBox.setText("Number of features must be greater than zero!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if NumFea > np.shape(X)[1]:
            msgBox.setText("Number of features is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Subject
        if not len(ui.txtSubject.currentText()):
            msgBox.setText("Please enter Subject variable name!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        try:
            Subject = InData[ui.txtSubject.currentText()]
            OutData[ui.txtOSubject.text()] = Subject
        except:
            print("Cannot load Subject ID")
            return

        # Label
        if not len(ui.txtLabel.currentText()):
            msgBox.setText("Please enter Label variable name!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False
        OutData[ui.txtOLabel.text()] = InData[ui.txtLabel.currentText()]

        # Task
        if ui.cbTask.isChecked():
            if not len(ui.txtTask.currentText()):
                msgBox.setText("Please enter Task variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOTask.text()] = InData[ui.txtTask.currentText()]

        # Run
        if ui.cbRun.isChecked():
            if not len(ui.txtRun.currentText()):
                msgBox.setText("Please enter Run variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtORun.text()] = InData[ui.txtRun.currentText()]

        # Counter
        if ui.cbCounter.isChecked():
            if not len(ui.txtCounter.currentText()):
                msgBox.setText("Please enter Counter variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOCounter.text()] = InData[
                ui.txtCounter.currentText()]

        # Matrix Label
        if ui.cbmLabel.isChecked():
            if not len(ui.txtmLabel.currentText()):
                msgBox.setText("Please enter Matrix Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOmLabel.text()] = InData[ui.txtmLabel.currentText()]

        # Design
        if ui.cbDM.isChecked():
            if not len(ui.txtDM.currentText()):
                msgBox.setText("Please enter Design Matrix variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtODM.text()] = InData[ui.txtDM.currentText()]

        # Coordinate
        if ui.cbCol.isChecked():
            if not len(ui.txtCol.currentText()):
                msgBox.setText("Please enter Coordinator variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOCol.text()] = InData[ui.txtCol.currentText()]

        # Condition
        if ui.cbCond.isChecked():
            if not len(ui.txtCond.currentText()):
                msgBox.setText("Please enter Condition variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOCond.text()] = InData[ui.txtCond.currentText()]

        # Number of Scan
        if ui.cbNScan.isChecked():
            if not len(ui.txtScan.currentText()):
                msgBox.setText("Please enter Number of Scan variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            OutData[ui.txtOScan.text()] = InData[ui.txtScan.currentText()]

        Models = dict()
        Models["Name"] = "SPCA"

        if ui.rbALScale.isChecked():
            print("Partition data to subject level ...")
            SubjectUniq = np.unique(Subject)
            X_Sub = list()
            for subj in SubjectUniq:
                if ui.cbScale.isChecked() and ui.rbScale.isChecked():
                    X_Sub.append(
                        preprocessing.scale(
                            X[np.where(Subject == subj)[1], :]))
                    print("Data in subject level is scaled, X_" + str(subj) +
                          "~N(0,1).")
                else:
                    X_Sub.append(X[np.where(Subject == subj)[1], :])
                print("Subject ", subj, " is extracted from data.")

            print("Running SPCA in subject level ...")
            X_Sub_PCA = list()
            lenPCA = len(X_Sub)

            for xsubindx, xsub in enumerate(X_Sub):
                model = SparsePCA(n_components=NumFea,alpha=Alpha,ridge_alpha=Ridge,max_iter=MaxIter,\
                                  tol=Tol, method=Method, n_jobs=njob)
                X_Sub_PCA.append(model.fit_transform(xsub))
                Models["Model" + str(xsubindx + 1)] = str(
                    model.get_params(deep=True))
                print("SPCA: ", xsubindx + 1, " of ", lenPCA, " is done.")

            print("Data integration ... ")
            X_new = None
            for xsubindx, xsub in enumerate(X_Sub_PCA):
                X_new = np.concatenate(
                    (X_new, xsub)) if X_new is not None else xsub
                print("Integration: ", xsubindx + 1, " of ", lenPCA,
                      " is done.")
            OutData[ui.txtOData.text()] = X_new
        else:
            print("Running SPCA ...")
            model = SparsePCA(n_components=NumFea, alpha=Alpha, ridge_alpha=Ridge, max_iter=MaxIter, \
                              tol=Tol, method=Method, n_jobs=njob)
            OutData[ui.txtOData.text()] = model.fit_transform(X)
            Models["Model"] = str(model.get_params(deep=True))

        OutData["ModelParameter"] = Models
        print("Saving ...")
        io.savemat(ui.txtOutFile.text(), mdict=OutData)
        print("DONE.")
        msgBox.setText("Sparse PCA is done.")
        msgBox.setIcon(QMessageBox.Information)
        msgBox.setStandardButtons(QMessageBox.Ok)
        msgBox.exec_()
示例#4
0
class SPCA(object):
    """
    Wrapper for sklearn package.  Performs sparse PCA

    SPCA has 5 methods:
       - fit(waveforms)
       update class instance with ICA fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto ICA space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z

       - get_basis()
       returns the basis vectors Z^\dagger

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self, num_components=10,
                 catalog_name='unknown',
                 alpha = 0.1,
                 ridge_alpha = 0.01,
                 max_iter = 2000,
                 tol = 1e-9,
                 n_jobs = 1,
                 random_state = None):

        self._decomposition  = 'Sparse PCA'
        self._num_components = num_components
        self._catalog_name   = catalog_name
        self._alpha          = alpha
        self._ridge_alpha    = ridge_alpha
        self._n_jobs         = n_jobs
        self._max_iter       = max_iter
        self._tol            = tol
        self._random_state   = random_state

        self._SPCA = SparsePCA(n_components=self._num_components,
                              alpha        = self._alpha,
                              ridge_alpha  = self._ridge_alpha,
                              n_jobs       = self._n_jobs,
                              max_iter     = self._max_iter,
                              tol          = self._tol,
                              random_state = self._random_state)

    def fit(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._SPCA.fit(self._waveforms)

    def fit_transform(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._SPCA.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self,A):
        # convert basis back to waveforms using fit
        new_waveforms = self._SPCA.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._SPCA.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the SPCA basis vectors (Z^\dagger)"""
        Zt = self._SPCA.components_
        return Zt
示例#5
0
class SPCA(object):
    """
    Wrapper for sklearn package.  Performs sparse PCA

    SPCA has 5 methods:
       - fit(waveforms)
       update class instance with ICA fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto ICA space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z

       - get_basis()
       returns the basis vectors Z^\dagger

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 alpha=0.1,
                 ridge_alpha=0.01,
                 max_iter=2000,
                 tol=1e-9,
                 n_jobs=1,
                 random_state=None):

        self._decomposition = 'Sparse PCA'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._alpha = alpha
        self._ridge_alpha = ridge_alpha
        self._n_jobs = n_jobs
        self._max_iter = max_iter
        self._tol = tol
        self._random_state = random_state

        self._SPCA = SparsePCA(n_components=self._num_components,
                               alpha=self._alpha,
                               ridge_alpha=self._ridge_alpha,
                               n_jobs=self._n_jobs,
                               max_iter=self._max_iter,
                               tol=self._tol,
                               random_state=self._random_state)

    def fit(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._SPCA.fit(self._waveforms)

    def fit_transform(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._SPCA.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self, A):
        # convert basis back to waveforms using fit
        new_waveforms = self._SPCA.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._SPCA.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the SPCA basis vectors (Z^\dagger)"""
        Zt = self._SPCA.components_
        return Zt