def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
示例#3
0
文件: core.py 项目: fnielsen/brede
    def ica(self, n_components=None, sources='left'):
        """Return result from independent component analysis.

        X = SA + m

        Sklearn's FastICA implementation is used.

        When sources=left the sources are returned in the first (left) matrix
        and the mixing matrix is returned in the second (right) matrix,
        corresponding to X = SA.

        When sources=right the sources are returned in the second matrix while
        the mixing matrix is returned in the first, corresponding to X = AS.

        Parameters
        ----------
        n_components : int, optional
            Number of ICA components.
        sources : left or right, optional
            Indicates whether the sources should be the left or right matrix.

        Returns
        -------
        first : Matrix
            Estimated source matrix (S) if sources=left.
        second : Matrix
            Estimated mixing matrix (A) if sources=right.
        mean_vector : brede.core.vector.Vector
            Estimated mean vector

        References
        ----------
        http://scikit-learn.org/stable/modules/decomposition.html#ica

        """
        if n_components is None:
            min_shape = min(self.shape[0], len(self._eeg_columns))
            n_components = int(np.ceil(sqrt(float(min_shape) / 2)))

        ica = FastICA(n_components=n_components)

        if sources == 'left':
            sources = Matrix(ica.fit_transform(
                self.ix[:, self._eeg_columns].values),
                index=self.index)
            mixing_matrix = Matrix(ica.mixing_.T, columns=self._eeg_columns)
            mean_vector = Vector(ica.mean_, index=self._eeg_columns)
            return sources, mixing_matrix, mean_vector

        elif sources == 'right':
            sources = Matrix(ica.fit_transform(
                self.ix[:, self._eeg_columns].values.T).T,
                columns=self._eeg_columns)
            mixing_matrix = Matrix(ica.mixing_, index=self.index)
            mean_vector = Vector(ica.mean_, index=self.index)
            return mixing_matrix, sources, mean_vector

        else:
            raise ValueError('Wrong argument to "sources"')
示例#4
0
def mixing_matrix(data, n_components, display=True):
    features, weights, labels = data
    ica = FastICA(n_components=n_components)
    ica.fit_transform(features)
    mixing = ica.mixing_
    if display:
        f, ax = plt.subplots(figsize=(10, 4))
        sns.heatmap(mixing)
        plt.title('Signal Mixing Estimated Matrix')
    return mixing
示例#5
0
文件: ICA.py 项目: rchau/sleep-eeg
def run_ica(data, comp):
    ica = FastICA(n_components=comp, whiten=True, max_iter=5000)
    data_out=np.zeros((comp,np.shape(data[0,:,0])[0],np.shape(data[0,0,:])[0]))
    for i in range(np.shape(data[0,:,0])[0]):
        print i
        data_out[:,i,:]=np.transpose(ica.fit_transform(np.transpose(data[:,i,:])))
    return data_out
示例#6
0
文件: part2.py 项目: rbaxter1/CS7641
    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)
        
        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)
        
        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)
        
        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'
        
        ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
                           kurt,
                           np.arange(1, len(kurt)+1, 1).astype('str'),
                           'Feature Index',
                           'Kurtosis',
                           title,
                           filename)
示例#7
0
文件: part2.py 项目: rbaxter1/CS7641
 def best_ica_nba(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_nba_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ica = FastICA(n_components=X_train_scl.shape[1])
     X_train_transformed = ica.fit_transform(X_train_scl, y_train)
     X_test_transformed = ica.transform(X_test_scl)
     
     ## top 2
     kurt = kurtosis(X_train_transformed)
     i = kurt.argsort()[::-1]
     X_train_transformed_sorted = X_train_transformed[:, i]
     X_train_transformed = X_train_transformed_sorted[:,0:2]
     
     kurt = kurtosis(X_test_transformed)
     i = kurt.argsort()[::-1]
     X_test_transformed_sorted = X_test_transformed[:, i]
     X_test_transformed = X_test_transformed_sorted[:,0:2]
     
     # save
     filename = './' + self.save_dir + '/nba_ica_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例#8
0
文件: ica_idvclas.py 项目: hejiaz/SRM
def align(movie_data, options, args, lrh):
    print 'pICA(scikit-learn)'
    nvoxel = movie_data.shape[0]
    nTR    = movie_data.shape[1]
    nsubjs = movie_data.shape[2]

    align_algo = args.align_algo
    nfeature   = args.nfeature
    randseed    = args.randseed
    if not os.path.exists(options['working_path']):
        os.makedirs(options['working_path'])

    # zscore the data
    bX = np.zeros((nsubjs*nvoxel,nTR))
    for m in range(nsubjs):
        bX[m*nvoxel:(m+1)*nvoxel,:] = stats.zscore(movie_data[:, :, m].T ,axis=0, ddof=1).T
    del movie_data
 
    np.random.seed(randseed)
    A = np.mat(np.random.random((nfeature,nfeature)))

    ica = FastICA(n_components= nfeature, max_iter=500,w_init=A,random_state=randseed)
    St = ica.fit_transform(bX.T)
    ES = St.T
    bW = ica.mixing_

    R = np.zeros((nvoxel,nfeature,nsubjs))
    for m in range(nsubjs):
        R[:,:,m] = bW[m*nvoxel:(m+1)*nvoxel,:]

    niter = 10  
    # initialization when first time run the algorithm
    np.savez_compressed(options['working_path']+align_algo+'_'+lrh+'_'+str(niter)+'.npz',\
                                R = R, G=ES.T, niter=niter)
    return niter
示例#9
0
文件: matrix.py 项目: fnielsen/brede
    def ica(self, n_components=None):
        """Return result from independent component analysis.

        X = SA + m

        Sklearn's FastICA implementation is used.

        Parameters
        ----------
        n_components : int, optional
            Number of ICA components.

        Returns
        -------
        source : Matrix
            Estimated source matrix (S)
        mixing_matrix : Matrix
            Estimated mixing matrix (A)
        mean_vector : brede.core.vector.Vector
            Estimated mean vector

        References
        ----------
        http://scikit-learn.org/stable/modules/decomposition.html#ica

        """
        if n_components is None:
            n_components = int(np.ceil(np.sqrt(float(min(self.shape)) / 2)))

        ica = FastICA(n_components=n_components)
        sources = Matrix(ica.fit_transform(self.values), index=self.index)
        mixing_matrix = Matrix(ica.mixing_.T, columns=self.columns)
        mean_vector = Vector(ica.mean_, index=self.columns)

        return sources, mixing_matrix, mean_vector
def test_inverse_transform():
    # Test FastICA.inverse_transform
    n_features = 10
    n_samples = 100
    n1, n2 = 5, 10
    rng = np.random.RandomState(0)
    X = rng.random_sample((n_samples, n_features))
    expected = {(True, n1): (n_features, n1),
                (True, n2): (n_features, n2),
                (False, n1): (n_features, n2),
                (False, n2): (n_features, n2)}
    for whiten in [True, False]:
        for n_components in [n1, n2]:
            n_components_ = (n_components if n_components is not None else
                             X.shape[1])
            ica = FastICA(n_components=n_components, random_state=rng,
                          whiten=whiten)
            with warnings.catch_warnings(record=True):
                # catch "n_components ignored" warning
                Xt = ica.fit_transform(X)
            expected_shape = expected[(whiten, n_components_)]
            assert_equal(ica.mixing_.shape, expected_shape)
            X2 = ica.inverse_transform(Xt)
            assert_equal(X.shape, X2.shape)

            # reversibility test in non-reduction case
            if n_components == X.shape[1]:
                assert_array_almost_equal(X, X2)
示例#11
0
def test_inverse_transform():
    """Test FastICA.inverse_transform"""
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10))
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10))
    n_features = X.shape[1]
    expected = {(True, 5): (n_features, 5),
                (True, 10): (n_features, 10),
                (False, 5): (n_features, 10),
                (False, 10): (n_features, 10)}

    for whiten in [True, False]:
        for n_components in [5, 10]:
            ica = FastICA(n_components=n_components, random_state=rng,
                          whiten=whiten)
            Xt = ica.fit_transform(X)
            expected_shape = expected[(whiten, n_components)]
            assert_equal(ica.mixing_.shape, expected_shape)
            X2 = ica.inverse_transform(Xt)
            assert_equal(X.shape, X2.shape)

            # reversibility test in non-reduction case
            if n_components == X.shape[1]:
                assert_array_almost_equal(X, X2)
示例#12
0
文件: ica.py 项目: FedeMPouzols/Savu
    def filter_frames(self, data):
        logging.debug("I am starting the old componenty vous")
        data = data[0]
        print 'The length of the data is'+str(data.shape)
        sh = data.shape
        newshape = (np.prod(sh[:-1]), sh[-1])
        print "The shape of the data is:"+str(data.shape) + str(newshape)
        data = np.reshape(data, (newshape))
        # data will already be shaped correctly
        logging.debug("Making the matrix")
        ica = FastICA(n_components=self.parameters['number_of_components'],
                      algorithm='parallel',
                      whiten=self.parameters['whiten'],
                      w_init=self.parameters['w_init'],
                      random_state=self.parameters['random_state'])
        logging.debug("Performing the fit")
        data = self.remove_nan_inf(data)  #otherwise the fit flags up an error for obvious reasons
#         print "I'm here"
        S_ = ica.fit_transform(data)
#         print "S_Shape is:"+str(S_.shape)
#         print "self.images_shape:"+str(self.images_shape)
        scores = np.reshape(S_, (self.images_shape))
        eigenspectra = ica.components_
        logging.debug("mange-tout")
        return [scores, eigenspectra]
示例#13
0
def getHeartRate(window, lastHR):
    # Normalize across the window to have zero-mean and unit variance
    mean = np.mean(window, axis=0)
    std = np.std(window, axis=0)
    normalized = (window - mean) / std

    # Separate into three source signals using ICA
    ica = FastICA()
    srcSig = ica.fit_transform(normalized)

    # Find power spectrum
    powerSpec = np.abs(np.fft.fft(srcSig, axis=0))**2
    freqs = np.fft.fftfreq(WINDOW_SIZE, 1.0 / FPS)

    # Find heart rate
    maxPwrSrc = np.max(powerSpec, axis=1)
    validIdx = np.where((freqs >= MIN_HR_BPM / SEC_PER_MIN) & (freqs <= MAX_HR_BMP / SEC_PER_MIN))
    validPwr = maxPwrSrc[validIdx]
    validFreqs = freqs[validIdx]
    maxPwrIdx = np.argmax(validPwr)
    hr = validFreqs[maxPwrIdx]
    print hr

    #plotSignals(normalized, "Normalized color intensity")
    #plotSignals(srcSig, "Source signal strength")
    #plotSpectrum(freqs, powerSpec)

    return hr
def test_ica(eng):
    t = linspace(0, 10, 100)
    s1 = sin(t)
    s2 = square(sin(2*t))
    x = c_[s1, s2, s1+s2]
    random.seed(0)
    x += 0.001*random.randn(*x.shape)
    x = fromarray(x, engine=eng)

    def normalize_ICA(s, aT):
        a = aT.T
        c = a.sum(axis=0)
        return s*c, (a/c).T

    from sklearn.decomposition import FastICA
    ica = FastICA(n_components=2, fun='cube', random_state=0)
    s1 = ica.fit_transform(x.toarray())
    aT1 = ica.mixing_.T
    s1, aT1 = normalize_ICA(s1, aT1)

    s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x)
    s2, aT2 = normalize_ICA(s2, aT2)
    tol=1e-1
    assert allclose_sign_permute(s1, s2, atol=tol)
    assert allclose_sign_permute(aT1, aT2, atol=tol)
示例#15
0
def ica(tx, ty, rx, ry):
    compressor = ICA(whiten=True)  # for some people, whiten needs to be off
    newtx = compressor.fit_transform(tx)
    newrx = compressor.fit_transform(rx)
    em(newtx, ty, newrx, ry, add="wICAtr", times=10)
    km(newtx, ty, newrx, ry, add="wICAtr", times=10)
    nn(newtx, ty, newrx, ry, add="wICAtr")
 def __create_image_obser(self, image_observations) :
     """
     Creation of a space in which the images will be compared (learning stage).
     Firstly PCA is applied in order to reduce the number of features in the
     images. Reduction is done so that 99% of measured variance is covered.
     
     After that, ICA is performed on the coefficients calculated by transforming
     (reducing) the face images with PCA. From the learned ICA components
     basis_images (vectors), original images coefficients and transformation
     for new comming images are extracted.
     """
     pca = PCA()
     pca.fit(image_observations)
     sum = 0
     components_to_take = 0
     for ratio in pca.explained_variance_ratio_:
         components_to_take += 1
         sum += ratio
         if (sum > 0.99):
             break 
     print("PCA reduces the number of dimensions to: " + str(components_to_take))
     pca = PCA(whiten=True, n_components=components_to_take)
     self.__transformed_images = pca.fit_transform(image_observations)
     self.__transformed_images_mean = np.mean(self.__transformed_images, axis=0)
     self.__transformed_images -= self.__transformed_images_mean
     self.__pca = pca
     
     
     ica = FastICA(whiten=True, max_iter=100000)
     self.__original_images_repres = ica.fit_transform(self.__transformed_images)
     self.__basis_images = ica.mixing_.T
     self.__transformation = ica.components_
示例#17
0
def independent_component(x, y):
    clf = FastICA(random_state=1)
    transformed = clf.fit_transform(x.reshape(-1, 1))
    comp = clf.components_[0, 0]
    mm = clf.mixing_[0, 0]
    src_max = transformed.max()
    src_min = transformed.min()
    return [comp, mm, src_max, src_min]
示例#18
0
def transform(data, n_components=3):
    features, weights, labels = data
    start = time()
    ica = FastICA(n_components=n_components)
    transformed = ica.fit_transform(features)
    elapsed = time() - start
    df = pd.DataFrame(transformed)
    return df, elapsed
示例#19
0
    def generate_peoples_results_files(self):

        self.np_result = np.c_[self.results[0]['blue'], self.results[0]['green'], self.results[0]['red']]
        list_number = len(self.results[0]['blue'])

        #  ICA
        ica = FastICA(n_components=3, fun='logcosh', max_iter=2000)
        ica_transformed = ica.fit_transform(self.np_result)
        component_all = ica_transformed.ravel([1])
        component_1 = component_all[:list_number]
        component_2 = component_all[list_number:(2 * list_number)]
        component_3 = component_all[(2 * list_number):(3 * list_number)]

        #  butter_smooth
        N = 8
        Wn = [1.6 / 30, 4.0 / 30]
        t = np.linspace(1 / 30, list_number / 30, list_number)
        b, a = signal.butter(N, Wn, 'bandpass', analog=False)
        filter_1 = signal.filtfilt(b, a, component_1)
        filter_2 = signal.filtfilt(b, a, component_2)
        filter_3 = signal.filtfilt(b, a, component_3)
        lowess_1 = sm.nonparametric.lowess(filter_1, t, frac=10.0 / list_number)
        lowess_2 = sm.nonparametric.lowess(filter_2, t, frac=10.0 / list_number)
        lowess_3 = sm.nonparametric.lowess(filter_3, t, frac=10.0 / list_number)

        smooths = []
        smooth_1 = lowess_1[:, 1]
        smooth_2 = lowess_2[:, 1]
        smooth_3 = lowess_3[:, 1]
        smooths.append(smooth_1)
        smooths.append(smooth_2)
        smooths.append(smooth_3)

        # FFT and spectrum
        fft_1 = np.fft.fft(smooth_1, 256)
        fft_2 = np.fft.fft(smooth_2, 256)
        fft_3 = np.fft.fft(smooth_3, 256)
        spectrum_1 = list(np.abs(fft_1) ** 2)
        spectrum_2 = list(np.abs(fft_2) ** 2)
        spectrum_3 = list(np.abs(fft_3) ** 2)
        max1 = max(spectrum_1)
        max2 = max(spectrum_2)
        max3 = max(spectrum_3)
        num_spec1 = spectrum_1.index(max(spectrum_1))
        if num_spec1 > (list_number / 2):
            num_spec1 = 256 - num_spec1
        num_spec2 = spectrum_2.index(max(spectrum_2))
        if num_spec2 > (list_number / 2):
            num_spec2 = 256 - num_spec2
        num_spec3 = spectrum_3.index(max(spectrum_3))
        if num_spec3 > (list_number / 2):
            num_spec3 = 256 - num_spec3
        num_spec = [num_spec1, num_spec2, num_spec3]
        max_all = [max1, max2, max3]
        max_num = max_all.index(max(max_all))
        self.heartRate = int(num_spec[max_num] * 1800 / 256) + 1
        return smooths[max_num]
    def _fit_local(self, data):


        from sklearn.decomposition import FastICA
        from numpy import random
        random.seed(self.seed)
        model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed)
        signals = model.fit_transform(data)
        return signals, model.mixing_.T
def fit_transform_ica(X):
    ica = FastICA(n_components=50, max_iter=2000, tol=0.05, algorithm='parallel', fun='cube', fun_args={'alpha': 1.0}, random_state=42) #26 36 76
    start = time.time()
    X = ica.fit_transform(X)
    end = time.time()
    
    print "Done!\nFit ICA transform time (secs): {:.3f}".format(end - start)

    return X, ica
示例#22
0
文件: hw3.py 项目: jezlax/python
def print_kurtosis(scaled_data):
#print the kurtosis of the scaled data
    print "Kurotsis of original DF:", kurtosis(scaled_data)

    #print the kurtosis of the ICA transformed columns 
    for i in range(1,len(scaled_data[0])+1):
        ica = FastICA(n_components=i)
        ica_fit = ica.fit_transform(scaled_data)

        print "Kurtosis of ICA Transformed data when i=" + str(i) + ":", kurtosis(ica_fit)
示例#23
0
def ICA(model_data, components = None, transform_data = None):
    t0 = time()
    ica = FastICA(n_components=components)
    if transform_data == None:
        projection = ica.fit_transform(model_data)
    else:
        ica.fit(model_data)
        projection = ica.transform(transform_data)
    print "ICA Time: %0.3f" % (time() - t0)
    return projection
示例#24
0
class ICA(Transform):
    def __init__(self, dependency, n_components=6):
        self.ica = FastICA(n_components)
        self.dependency = dependency
        
    def requires(self):
        return [self.dependency]
        
    def apply(self, data):
        return self.ica.fit_transform(data.T).T
示例#25
0
def fun_doICA(X, nc):
    '''
    Perform ICA and sort signals by bimodality
    '''
    ica = FastICA(n_components = nc)
    Sest = ica.fit_transform(X)
    A = ica.mixing_
    S = fun_sort_bimod(Sest)
    out = {'S':S, 'A':A}
    return out
示例#26
0
def pca_ica(mov, components=50, batch=1000, mu=0.5, ica_func='logcosh', show_status=True):
    """Perform iterative PCA/ICA ROI extraction

    Parameters
    ----------
    mov : pyfluo.Movie
        input movie
    components : int
        number of independent components to return
    batch : int
        number of pixels to load into memory simultaneously. More leads to a better fit, but requires more memory
    mu : float
        from 0-1. In spatiotemporal ICA, closer to 1 means more weight on spatial
    ica_func : str 
        cdf for entropy maximization in ICA
    show_status : bool
        show time elapsed while running

    Returns
    -------
    Array of shape (n,y,x) where n is number of components, and y,x correspond to shape of mov

    """
    if show_status:
        p = mup.Process(target=display_time_elapsed)
        p.start()

    eigenseries, eigenframes,_proj = ipca(mov, components, batch)
    # normalize the series

    frame_scale = mu / np.max(eigenframes)
    frame_mean = np.mean(eigenframes, axis = 0)
    n_eigenframes = frame_scale * (eigenframes - frame_mean)

    series_scale = (1-mu) / np.max(eigenframes)
    series_mean = np.mean(eigenseries, axis = 0)
    n_eigenseries = series_scale * (eigenseries - series_mean)

    # build new features from the space/time data
    # and compute ICA on them

    eigenstuff = np.concatenate([n_eigenframes, n_eigenseries])

    ica = FastICA(n_components=components, fun=ica_func)
    joint_ics = ica.fit_transform(eigenstuff)

    # extract the independent frames
    num_frames, h, w = mov.shape
    frame_size = h * w
    ind_frames = joint_ics[:frame_size, :]
    ind_frames = np.reshape(ind_frames.T, (components, h, w))
    
    if show_status:  p.terminate()
    
    return ind_frames  
def Get_Result(filename1,filename2,withbeam=True):
    '''filename1: 21cm
    filename2 :foreground or 21cm+fg+beam
    '''

    call('rm *.eps', shell=True)

    Plot = False
    pol = 0
    N = 4
#   withbeam=True
    ############read data#####################
    map1 = tt.ICA.ReadMap(filename1)
    map2 = tt.ICA.ReadMap(filename2)
    if withbeam==True: 
        map = map2
        Freq_num = map.shape[0]
        S = map.T
        del map
    else:
        map=map1[:,pol]+map2[:,pol]
        Freq_num = map.shape[0]
        S=map.T
    ############FastICA#######################
    ica = FastICA(
        n_components=N,
        algorithm='parallel',
        whiten=True,
        fun='logcosh',
        fun_args=None,
        max_iter=200,
        tol=0.0001,
        w_init=None,
        random_state=None)
    S_ = ica.fit_transform(S)
    A_ = ica.mixing_
    ##########################################
#    tt.ICA.GetComponent(N, S_)
#    re=tt.ICA.rebuild(N, A_, S_, 0, Plot=False)
#    residuals=tt.ICA.RESULT(0,N,pol,A_,S_,map1,map2,map3,Plot)
    res = []
#    return (map1[100,pol],map1[100,pol]+map2[100,pol]-re)
    ##########################################

    for i in range(Freq_num):
        res.append(tt.ICA.RESULT(i, N, pol, A_, S_, map1, map2, Plot, withbeam))
        print res[-1]
        plt.close('all')
    res = np.array(res)
    resx = np.linspace(700, 800, Freq_num, endpoint=True)
#   plt.plot(resx,res,label='freq_%d pixel_%d'%(F,P))
#   plt.show()
    return np.c_[resx,res]
示例#28
0
def decompose(data, data_cols=None, kind='ICA', n_components=None, iterations=300):
    decompositor = None
    if kind == 'ICA':
        decompositor = FastICA(n_components=n_components, max_iter=iterations)
    elif kind == 'PCA':
        decompositor = PCA(n_components=n_components)
    elif kind == 'Kernel':
        decompositor = KernelPCA(n_components=n_components, max_iter=iterations)
    transformed_data = decompositor.fit_transform(data.as_matrix(data_cols))
    # columns = ['pca{0:0>3}'.format(idx) for idx, value in enumerate(transformed_data, start=0)]
    dataframe = pd.DataFrame(transformed_data, index=data.index)
    dataframe.insert(len(dataframe.columns), 'class', data['class'])
    return dataframe
def calcICA(delta_data, components):

    data = preprocess(delta_data)
    ica = FastICA(n_components=components)
    x_ica = ica.fit_transform(data['cleanMatrix'])
   
    ica_fill = np.ones((delta_data.shape[0],components))*np.nan
    ica_fill[data['cleanind']] = x_ica
    ica_weights = ica.components_.T
    delta_ica = {'transform':ica_fill,
                 'weights' : ica_weights,
                }
    return delta_ica
def fast_ica(brain, components):
    ica = FastICA(n_components=components)
    S_ = ica.fit_transform(brain)  # Reconstruct signals
    A_ = ica.mixing_  # Get estimated mixing matrix

    return S_

    # outfile = infile.split('.')[0] + 'fast_ica.csv'
    # with open(outfile, 'wb') as s:
    #     writer = csv.writer(s)
    #     writer.writerows(S_)

    # return outfile
示例#31
0
# S2 = np.sin(2 * time)  # Signal 1 : sinusoidal signal
S2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
# S_2 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal
# S2 += 0.2 * np.random.normal(size=S2.shape)  # Add noise

S = np.c_[S1, S2]

S /= S.std(axis=0)  # Standardize data

# Mix data
A = np.c_[np.ones([2, 1]), np.random.rand(2, 2)]  # Mixing matrix
X = np.dot(S, A)  # Generate observations

# Compute ICA
ica = FastICA(n_components=2)
S_ica = ica.fit_transform(X)

# #############################################################################
# Plot results

plt.figure()
# plot source signal
ax = plt.subplot(4, 1, 1)
ax.set_title('Source 1')
ax.plot(S1)
ax = plt.subplot(4, 1, 2)
ax.set_title('Source 1')
ax.plot(S2)
# plot mixing signal
ax = plt.subplot(4, 1, 3)
ax.set_title('Observations')
示例#32
0
S = np.c_[s1, s2, s3]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise

S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]])  # Mixing matrix
X = np.dot(S, A.T)  # Generate observations

###############################################################################
# Now try to recover the sources
# ------------------------------

# compute ICA
ica = FastICA(n_components=3)
S_ = ica.fit_transform(X)  # Get the estimated sources
A_ = ica.mixing_  # Get estimated mixing matrix

# compute PCA
pca = PCA(n_components=3)
H = pca.fit_transform(X)  # estimate PCA sources

plt.figure(figsize=(9, 6))

models = [X, S, S_, H]
names = [
    'Observations (mixed signal)', 'True Sources', 'ICA estimated sources',
    'PCA estimated sources'
]
colors = ['red', 'steelblue', 'orange']
示例#33
0
def apply_ICA(proj_data, proj_weights=None):
    ica = FastICA(n_components=2, random_state=RANDOM_SEED);
    result = ica.fit_transform(proj_data.copy().T);  # Copy needed because ICA whitens the input matrix
    return result;
示例#34
0
def vis_embeddings(dim_red_method, epochs, sample):
    n_comp = 2

    x_train = epochs.get_data()
    x_train = x_train.transpose(0, 2, 1).reshape(-1, x_train.shape[1])
    x_train = StandardScaler().fit_transform(x_train)
    y_train = get_y_train(sample)

    inds = np.arange(15, 8000, 50)
    x_train = x_train[inds]
    y_train = y_train[inds]

    print('fitting {}'.format(dim_red_method))
    if dim_red_method == 'pca':
        pca = PCA(n_components=n_comp)
        reduced_data = pca.fit_transform(x_train)
    elif dim_red_method == 'ica':
        ica = FastICA(n_components=n_comp)
        reduced_data = ica.fit_transform(x_train)
    elif dim_red_method == 'se':
        se = SpectralEmbedding(n_components=n_comp)
        reduced_data = se.fit_transform(x_train)
    elif dim_red_method == 'tsne':
        pca = PCA(n_components=50)
        pca_data = pca.fit_transform(x_train)
        tsne = TSNE(n_components=n_comp,
                    verbose=1,
                    perplexity=10,
                    learning_rate=200)
        reduced_data = tsne.fit_transform(pca_data)
    else:
        raise ValueError("{} method not implemented".format(dim_red_method))
    print('fitting done')

    if n_comp == 2:
        reduced_data_df = pd.DataFrame(data=reduced_data,
                                       columns=['PC1', 'PC2'])
    elif n_comp == 3:
        reduced_data_df = pd.DataFrame(data=reduced_data,
                                       columns=['PC1', 'PC2', 'PC3'])
    y_train_df = pd.DataFrame(data=y_train, columns=["labels"])
    final_df = pd.concat([reduced_data_df, y_train_df[['labels']]], axis=1)

    if n_comp == 2:
        sns.set()
        palette = sns.color_palette("bright", 8)
        ax = sns.scatterplot(x='PC1',
                             y='PC2',
                             hue='labels',
                             data=final_df,
                             palette=palette,
                             legend='full')
        ax.set(xlabel='PC1',
               ylabel='PC2',
               title='2 component {}'.format(dim_red_method))
        plt.show()
    elif n_comp == 3:
        ax = plt.figure(figsize=(16, 10)).gca(projection='3d')
        ax.scatter(xs=final_df["PC1"],
                   ys=final_df["PC2"],
                   zs=final_df["PC2"],
                   c=final_df["labels"],
                   cmap='tab10')
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        ax.set_zlabel('PC3')
        plt.show()
示例#35
0
s3 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal

S = np.c_[s1, s2, s3]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise

S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]])
B = np.array([[1, 3, 2], [2, 0.1, 1.0], [2.0, 1.5, 0.5]])
C = np.array([[1, 2, 0.5], [2, 0.5, 0.5], [0.5, 0.5, 2.0]])
# Mixing matrix

ica = FastICA(n_components=3)

XA = np.dot(S, A.T)  # Generate observations
SA_ = ica.fit_transform(XA)  # Reconstruct signals
A_ = ica.mixing_  # Get estimated mixing matrix

XB = np.dot(S, B.T)  # Generate observations
SB_ = ica.fit_transform(XB)  # Reconstruct signals
B_ = ica.mixing_  # Get estimated mixing matrix

XC = np.dot(S, C.T)  # Generate observations
SC_ = ica.fit_transform(XC)  # Reconstruct signals
C_ = ica.mixing_  # Get estimated mixing matrix

plt.figure()
models = [XA, XB, XC, S, SA_, SB_, SC_]
names = [
    'Observations(mixed signal by A)', 'Observations(mixed signal by B)',
    'Observations(mixed signal by C)', 'True Sources',
示例#36
0
    def run_and_fit(self,
                    model_string,
                    nr_components,
                    nr_timepoints,
                    nr_neurons,
                    lambd=0):
        np.random.seed(7)
        #X=self.simulate_data(nr_components,nr_timepoints,nr_neurons)
        X = self.simulate_data_w_noise(nr_components,
                                       nr_timepoints,
                                       nr_neurons,
                                       noise_ampl_mult=4)
        if model_string == 'EnsemblePursuit':
            options_dict = {'seed_neuron_av_nr': 10, 'min_assembly_size': 1}

            ep_pt = EnsemblePursuitPyTorch(n_ensembles=nr_components,
                                           lambd=lambd,
                                           options_dict=options_dict)
            U, V = ep_pt.fit_transform(X)
            self.U = U.numpy()
            self.V = V.numpy().T
        if model_string == 'EnsemblePursuitNumpy':
            options_dict = {'seed_neuron_av_nr': 10, 'min_assembly_size': 1}
            ep_np = EnsemblePursuitNumpy(n_ensembles=nr_components,
                                         lambd=lambd,
                                         options_dict=options_dict)
            U, V, self.corrs = ep_np.fit_transform(X)
            self.U = U
            self.V = V.T
        if model_string == 'ICA':
            ica = FastICA(n_components=nr_components, random_state=7)
            self.V = ica.fit_transform(X.T).T
            self.U = ica.mixing_
        if model_string == 'PCA':
            pca = PCA(n_components=nr_components, random_state=7)
            self.V = pca.fit_transform(X.T).T
            self.U = pca.components_.T
        if model_string == 'sparsePCA':
            spca = SparsePCA(n_components=nr_components, random_state=7)
            self.V = spca.fit_transform(X.T).T
            self.U = spca.components_.T
        if model_string == 'NMF':
            X -= X.min(axis=0)
            nmf = NMF(n_components=nr_components,
                      init='nndsvd',
                      random_state=7,
                      alpha=lambd,
                      l1_ratio=0.5)
            self.V = nmf.fit_transform(X.T).T
            self.U = nmf.components_.T
        if model_string == 'LDA':
            X -= X.min(axis=0)
            nmf = LatentDirichletAllocation(n_components=nr_components,
                                            random_state=7)
            self.V = nmf.fit_transform(X.T).T
            self.U = nmf.components_.T
        print('SHPS', self.U.shape, self.V.shape)
        self.orig = X
        self.approx = self.U @ self.V
        print('orig', self.orig.shape)
        print('approx', self.approx.shape)
示例#37
0
def add_pld_params(model_params,
                   fluxes,
                   pld_intensities,
                   n_pld=9,
                   order=3,
                   add_unity=True,
                   do_pca=True,
                   do_ica=False,
                   do_std=True,
                   pca_cut=False,
                   n_ppm=1.0,
                   start_unity=False,
                   verbose=False):

    # Make a local copy
    pld_intensities = pld_intensities.copy()

    if len(pld_intensities) != n_pld * order:
        pld_intensities = np.vstack(
            [list(pld_intensities**k) for k in range(1, order + 1)])

    # check that the second set is the square of the first set, and so onself.
    for k in range(order):
        assert (np.allclose(pld_intensities[:n_pld]**(k + 1),
                            pld_intensities[k * n_pld:(k + 1) * n_pld]))

    if do_pca or do_ica: do_std = True

    stdscaler = StandardScaler()
    pld_intensities = stdscaler.fit_transform(
        pld_intensities.T) if do_std else pld_intensities.T

    if do_pca:
        pca = PCA()

        pld_intensities = pca.fit_transform(pld_intensities)

        evrc = pca.explained_variance_ratio_.cumsum()
        n_pca = np.where(evrc > 1.0 - n_ppm / ppm)[0].min()
        if pca_cut: pld_intensities = pld_intensities[:, :n_pca]

        if verbose: print(evrc, n_pca)

    if do_ica:
        ica = FastICA()

        pld_intensities = ica.fit_transform(pld_intensities)

        # evrc = ica.explained_variance_ratio_.cumsum()
        # n_ica = np.where(evrc > 1.0-n_ppm/ppm)[0].min()
        # if ica_cut: pld_intensities = pld_intensities[:,:n_ica]
        #
        # if verbose: print(evrc, n_ica)

    if add_unity:
        pld_intensities = np.vstack(
            [pld_intensities.T,
             np.ones(pld_intensities.shape[0])]).T

    pld_coeffs = np.linalg.lstsq(
        pld_intensities, fluxes)[0] if not start_unity else np.ones(
            pld_intensities.shape[1]) / pld_intensities.shape[1.0]

    n_pld_out = n_pca if do_pca and pca_cut else n_pld * order

    for k in range(n_pld_out):
        model_params.add_many(('pld{}'.format(k), pld_coeffs[k], True))

    # if add_unity: model_params.add_many(('pld{}'.format(n_pld_out), pld_coeffs[n_pld_out], True)) # FINDME: Maybe make min,max = 0,2 or = 0.9,1.1
    if add_unity:
        model_params.add_many(
            ('pldBase', pld_coeffs[n_pld_out],
             True))  # FINDME: Maybe make min,max = 0,2 or = 0.9,1.1

    if verbose:
        [
            print('{:5}: {}'.format(val.name, val.value))
            for val in model_params.values() if 'pld' in val.name.lower()
        ]

    return model_params, pld_intensities.T
示例#38
0
s1 = np.sin(2 * time)  # Signal 1 : sinusoidal signal
s2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
s3 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal

S = np.c_[s1, s2, s3]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise

S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]])  # Mixing matrix
X = np.dot(S, A.T)  # Generate observations

# Compute ICA
ica = FastICA(n_components=3)
S_ = ica.fit_transform(X)  # Reconstruct signals
A_ = ica.mixing_  # Get estimated mixing matrix

# We can `prove` that the ICA model applies by reverting the unmixing.
assert np.allclose(X, np.dot(S_, A_.T) + ica.mean_)

# For comparison, compute PCA
pca = PCA(n_components=3)
H = pca.fit_transform(X)  # Reconstruct signals based on orthogonal components

###############################################################################
# Plot results

plt.figure()

models = [X, S, S_, H]
示例#39
0
        gs.fit(labels_EM_PCA.reshape(-1, 1), dataY)
        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(out + 'QSAR NN EM PCA.csv')
        best_indices = tmp.index[tmp['rank_test_score'] == 1].tolist()
        best_em = best_em.append(
            {
                'Layers': str(tmp.iloc[best_indices[0], 4]),
                'Iterations': tmp.iloc[best_indices[0], 5],
                'Score': tmp.iloc[best_indices[0], 12]
            },
            ignore_index=True)

        # Fit/transform with FastICA
        print("Running FastICA...")
        ica = FastICA(n_components=10, random_state=5)
        dataX_ICA = ica.fit_transform(dataX)

        # Run KM
        print("Running k-means...")
        model = KMeans(n_clusters=km)
        labels_KM_PCA = model.fit_predict(dataX_ICA)

        grid = {
            'NN__hidden_layer_sizes': nn_arch,
            'NN__max_iter': nn_iter,
            'NN__learning_rate_init': [0.016],
            'NN__alpha': [0.316227766]
        }
        mlp = MLPClassifier(activation='relu',
                            early_stopping=True,
                            random_state=5)
示例#40
0
 def ica(self, whiten = True):
     ica = FastICA(n_components = 5, whiten = whiten)
     self.train = ica.fit_transform(self.train)
示例#41
0
print("Transforming...")
n_comp = 50

# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(train_df)
tsvd_results_test = tsvd.transform(test_df)

# PCA
pca = PCA(n_components=n_comp, random_state=420)
pca2_results_train = pca.fit_transform(train_df)
pca2_results_test = pca.transform(test_df)

# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train_df)
ica2_results_test = ica.transform(test_df)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train_df)
grp_results_test = grp.transform(test_df)

# SRP
srp = SparseRandomProjection(n_components=n_comp,
                             dense_output=True,
                             random_state=420)
srp_results_train = srp.fit_transform(train_df)
srp_results_test = srp.transform(test_df)

############
示例#42
0
    return n_components_ica


runs = (("data/creditcards_train.arff", "Credit Default", "d1"),
        ("data/htru_train.arff", "Pulsar Detection", "d2"))

for (fname, label, abbrev) in runs:
    X, y, feature_names = load_data(fname)

    # model selection (optimal number of components)
    n_components = optimize_components(X, feature_names, label, abbrev)

    # save as new set of features
    ica = FastICA(n_components=n_components, random_state=SEED)
    start_time = time.perf_counter()
    df = pd.DataFrame(ica.fit_transform(X))
    run_time = time.perf_counter() - start_time
    print(label + ": run time = " + str(run_time))
    print(label + ": iterations until convergence = " + str(ica.n_iter_))
    df.to_pickle(path.join(PKL_DIR, abbrev + "_ica.pickle"))

    # parallel coordinates plot
    visualizer = ParallelCoordinates(sample=0.2, shuffle=True, fast=True)
    visualizer.fit_transform(df, y)
    visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(),
                                  rotation=45,
                                  horizontalalignment='right')
    visualizer.finalize()
    plt.savefig(path.join(PLOT_DIR, abbrev + "_ica_parallel.png"),
                bbox_inches='tight')
    visualizer.show()
# shape
print('Shape train: {}\nShape test: {}'.format(train.shape, test.shape))
y_train = train["y"]
y_mean = np.mean(y_train)

#PCA/ICA for dimensionality reduction
n_comp = 10

# PCA
pca = PCA(n_components=n_comp, random_state=42)
pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
pca2_results_test = pca.transform(test)

# ICA
ica = FastICA(n_components=n_comp, random_state=42)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)

train_cols = [col for col in list(train)]
test_cols = [col for col in list(test)]

print(train_cols)
print(test_cols)

train.drop(train_cols, axis=1, inplace=True)
test.drop(test_cols, axis=1, inplace=True)

# Append decomposition components to datasets
for i in range(1, n_comp + 1):
    train['pca_' + str(i)] = pca2_results_train[:, i - 1]
    test['pca_' + str(i)] = pca2_results_test[:, i - 1]
示例#44
0
    plt.vlines(0, min_y, max_y, linewidth=2)
    plt.xlim(min_x, max_x)
    plt.ylim(min_y, max_y)
    plt.title(title)
    pml.savefig(f'{file_name}.pdf')
    plt.show()


np.random.seed(2)
N = 100
A = np.array([[2, 3], [2, 1]]) * 0.3  # Mixing matrix

S_uni = (np.random.rand(N, 2) * 2 - 1) * np.sqrt(3)
X_uni = S_uni @ A.T

pca = PCA(whiten=True)
S_pca = pca.fit(X_uni).transform(X_uni)

ica = FastICA()
S_ica = ica.fit_transform(X_uni)
S_ica /= S_ica.std(axis=0)

plot_samples(S_uni, 'Uniform Data', 'ica-uniform-source')

plot_samples(X_uni, 'Uniform Data after Linear Mixing', 'ica-uniform-mixed')

plot_samples(S_pca, 'PCA Applied to Mixed Data from Uniform Source',
             'ica-uniform-PCA')

plot_samples(S_ica, 'ICA Applied to Mixed Data from Uniform Source',
             'ica-uniform-ICA')
                      label='mean_kurtosis')
    line2, = plt.plot(k_arr,
                      kurt_var,
                      color='b',
                      marker='o',
                      label='variance of kurtosis')
    plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
    plt.ylabel(' kurtosis')
    plt.xlabel('Number of components')
    plt.show()
    return None


kurt(X, y, 20)
ica = FastICA(n_components=11, random_state=0)
ica_2d = ica.fit_transform(X)
X_ica = ica.transform(X)
plt.scatter(ica_2d[:, 0],
            ica_2d[:, 1],
            c=y,
            cmap="RdGy",
            edgecolor="None",
            alpha=1,
            vmin=75,
            vmax=150)
plt.colorbar()
plt.title('ICA Scatter Plot')


def plot_samples(S, axis_list=None):
    plt.scatter(S[:, 0],
x = dataset.iloc[:, 1:-5]
y = dataset.iloc[:, -5:]
#collect the wavelengths in the spectra
wavelengths = dataset.columns[1:-5]
#list of constituents
constituents = list(dataset.columns[-5:])

from sklearn.decomposition import FastICA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt
from numpy import savetxt

#find out the ICs on the whole dataset
transformer = FastICA(n_components=n_IC, random_state=rdnSeed, max_iter=4000)

IC = transformer.fit_transform(x.T)
M = transformer.mixing_

#set the target variable
target = constituents[
    constituentIndex]  #change the index to fit the model to different constituents
y = np.array(y[target])

#create the dataframe so that we can drop the missing values both in x and y
df = np.append(M, y.reshape(len(y), 1), axis=1)
columns_ = np.append(wavelengths, [target]).transpose()

df = pd.DataFrame(data=df)

#drop the rows that has missing values in any columns and reset the index
df.dropna(inplace=True)
示例#47
0
        c = 0
        X_inner_trainingset = X_all[X_arr[subtrain[train], ].reshape(
            (numDay - 2) * duration), ]
        Y_inner_trainingset = Y_arr[subtrain[train], ].reshape(
            (numDay - 2) * duration)
        X_validate = X_all[X_arr[subtrain[validate], ], ].reshape(
            duration, 600)
        Y_validate = Y_all[X_arr[subtrain[validate]]].reshape(duration)

        for C in components:
            print(test, subtrain[validate], subtrain[train], C)

            ica = FastICA(n_components=C, max_iter=5000,
                          tol=0.0001)  #tol = 0.001
            X_inner_train = ica.fit_transform(
                X_inner_trainingset
            )  #pull components from ica fit transformation
            X_inner_test = ica.transform(X_validate)

            clf = svm.SVC(kernel='linear',
                          class_weight='balanced',
                          probability=True)
            y_inner_score = clf.fit(
                X_inner_train,
                Y_inner_trainingset).decision_function(X_inner_test)
            fpr, tpr, _ = roc_curve(Y_validate, y_inner_score)
            roc_auc[t, v, c] = auc(fpr, tpr)

            c += 1
        v += 1
示例#48
0
    def transform(self, graph_file, first_node=None):
        logging.info('loading graph')
        """
        input: csv file of graph; formate: start_node, end_node, weight
        output: graph, a list, the elements are tuples, like [(1, 2, 1) (3, 1, 1) (2, 3, 1)]
        count amount of nodes from G
        """
        self.graph = self.load_graph(graph_file)  # obtain a array of graph

        self.node_count = self.find_node_count(
            self.graph)  # find the number of nodes in graph
        self.edge_count = len(self.graph)
        print("nodes:", self.node_count)
        print("edges:", self.edge_count)
        self.node_range = range(1, self.node_count + 1)

        logging.info('computing distance matrix')
        self.distance_matrix = self.compute_distance_matrix(
            self.graph, self.node_count)
        # self.distance_matrix = self.nomalization_distance_mtrix(distance_matrix=self.distance_matrix) # nomalized distance matrix
        ##############################  adjacency matrix ##########################################
        self.adjacency_matrix = self.get_adjacency_matrix(
            self.graph, self.node_count)
        ###########################################################################
        if first_node is None:
            """self.first_node = randint(0, self.node_count) + 1  # Choose the first pivot from V randomly."""
            self.first_node = randint(1, self.node_count)
        else:
            self.first_node = first_node  # Specify the first pivot.

        logging.info('finding pivots')
        """
        dimensions=m
        choose m pivots according to k-center.         
        """
        #####################################################
        if self.pivot_select == "randomly":
            self.pivot_nodes = self.choose_pivots_randomly(
                dimension=self.dimension, number_nodes=self.node_count)
        #####################################################
        else:
            self.pivot_nodes = self.choose_pivot_points(
                self.graph, self.dimension)  # self.pivot_nodes: a list

        logging.info('drawing graph in high dimensional space')
        """
        note that the number of pivot nodes is the same as dimension.
        formate of points:
        G=(V, E)
        |V|=n, dimensions = m = pivots
        d(vi, pj) denotes a distance computered by Dijkstra's algorithm in a G.

           p1          p2          p3     ...    pm
        v1 d(v1, p1) d(v1, p2)  d(v1, p3)     d(v1, pm)
        v2  .
        v3  .
        v4  .                                   .
        .   .                                   .
        .   .                                   .
        .   .
        vn d(vn, p1)       ...                d(vn, pm)


        """
        self.points = list(
            map(
                lambda i: tuple(self.distance_matrix[i - 1, p - 1]
                                for p in self.pivot_nodes), self.node_range))

        if self.normalization is True:
            ##############################################################################################################
            self.points = self.nomalization_distance_mtrix(
                distance_matrix=self.points)  # nomalized self.points
            ##############################################################################################################
        logging.info('project into a low dimension use PCA')

        if self.version == "HDE-SV":
            if self.dimension == 2:
                self.transformed_points = np.array(self.points)
        """
        PCA:
            input  array-like:  shape of self.points = (n_sample, n_feature)
            output array-like:  shape of self.transformed_points = (n_sample, n_component)

        """
        if self.version == "HDE":  # PCA denotes that algorithm uses PCA to decomposite original space.
            pca = PCA(n_components=2, copy=True)
            self.transformed_points = pca.fit_transform(self.points)

        if self.version == "HDE-Level":  # PCA denotes that algorithm uses PCA to decomposite original space.
            pca = PCA(n_components=3, copy=True)
            self.transformed_points = pca.fit_transform(self.points)
            pca = PCA(n_components=2, copy=True)
            self.transformed_points = pca.fit_transform(
                self.transformed_points)
        '''
          replace initial version as paper. by mty 2017-8-9
        '''
        if self.version == "HDE-PIT":  # PIT denotes that algorithm uses poweriteration to computer eigenvectors for decomposition space.
            X, S = self.covariance(self.points)
            # X = np.array(self.points).T
            # X = X.astype(float)
            U = self.poweriteration(S, epsilon=self.epsilon)
            self.transformed_points = self.decomposition_space(X, U)
            if self.node_count == (self.edge_count +
                                   1):  # determine wether it is a tree.
                FR = FR_Algorithm(number_of_nodes=self.node_count,
                                  initial_temperature=self.initial_temperature,
                                  cooling_factor=self.cooling_factor,
                                  factor_attract=self.factor_attract,
                                  factor_repulsion=self.factor_repulsion)
                # use FR to fine-tune
                self.transformed_points = FR.apply_force_directed_algorithm(
                    iteration=self.fr_iteration,
                    graph=self.graph,
                    coord_decomposition=self.transformed_points)

        if self.version == "HDE-MDS":  # HDE-MDS denotes that algorithm combines with MDS.
            hde_mds = MDS()  # MDS object
            self.transformed_points = hde_mds.fit_transform(self.points)

        if self.version == "Pivot-MDS":  # Pivot-MDS denotes that original version of Pivot MDS.
            pivot_mds = PivotMDS(d=self.distance_matrix,
                                 pivots=self.dimension)  # PivotMDS object
            self.transformed_points = pivot_mds.optimize()

        if self.version == "HDE-FICA":  # FICA denotes that algorithm uses Fast ICA to decomposite original space.
            #  fun, Could be either 'logcosh', 'exp', or 'cube'.
            fica = FastICA(n_components=2)
            # print(np.array(self.points).shape)
            self.transformed_points = fica.fit_transform(self.points)
            # print(np.array(self.transformed_points).shape)
            # FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature,
            #                   cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion)
            # # use FR to fine-tune
            # self.transformed_points = FR.apply_force_directed_algorithm(iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points)

        if self.version == "HDE-KPCA":  # FPCA denotes that algorithm uses kernel PCA to decomposite original space.
            kpca = KernelPCA(n_components=2,
                             kernel=self.kpca_fun,
                             gamma=self.gamma)
            self.transformed_points = kpca.fit_transform(self.points)

        if self.version == "HDE-NMF":
            nmf = NMF(n_components=2)
            self.transformed_points = nmf.fit_transform(self.points)

        if self.version == "HDE-TruncatedSVD":
            tsvd = TruncatedSVD(n_components=2)
            self.transformed_points = tsvd.fit_transform(self.points)

        if self.version == "HDE-LDA":
            lda = LinearDiscriminantAnalysis(n_components=2)
            y = []
            for i in range(self.node_count):
                y.append(1)
            y = np.array(y)
            lda = lda.fit(self.points, y=y)
            self.transformed_points = lda.transform(self.points)
        if self.version == "HDE-FR":

            pca = PCA(n_components=2, copy=True)
            self.transformed_points = pca.fit_transform(self.points)
            if self.node_count == (self.edge_count +
                                   1):  # determine wether it is a tree.
                FR = FR_Algorithm(number_of_nodes=self.node_count,
                                  initial_temperature=self.initial_temperature,
                                  cooling_factor=self.cooling_factor,
                                  factor_attract=self.factor_attract,
                                  factor_repulsion=self.factor_repulsion)
                # use FR to fine-tune
                self.transformed_points = FR.apply_force_directed_algorithm(
                    iteration=self.fr_iteration,
                    graph=self.graph,
                    coord_decomposition=self.transformed_points)

        if self.version == "HDE-FICA-FR":

            fica = FastICA(n_components=2)
            self.transformed_points = fica.fit_transform(self.points)
            if self.node_count == (self.edge_count +
                                   1):  # determine wether it is a tree.
                FR = FR_Algorithm(number_of_nodes=self.node_count,
                                  initial_temperature=self.initial_temperature,
                                  cooling_factor=self.cooling_factor,
                                  factor_attract=self.factor_attract,
                                  factor_repulsion=self.factor_repulsion)
                # use FR to fine-tune
                self.transformed_points = FR.apply_force_directed_algorithm(
                    iteration=self.fr_iteration,
                    graph=self.graph,
                    coord_decomposition=self.transformed_points)

        if self.version == "HDE-TSNE-FR":
            # pca = PCA(n_components=10, copy=True)
            # self.transformed_points = pca.fit_transform(self.points)
            tsne = TSNE(learning_rate=self.learning_rate, init=self.init
                        )  # 'init' must be 'pca', 'random', or a numpy array
            self.transformed_points = tsne.fit_transform(self.points)
            if self.node_count == (self.edge_count +
                                   1):  # determine wether it is a tree.
                FR = FR_Algorithm(number_of_nodes=self.node_count,
                                  initial_temperature=self.initial_temperature,
                                  cooling_factor=self.cooling_factor,
                                  factor_attract=self.factor_attract,
                                  factor_repulsion=self.factor_repulsion)
                # use FR to fine-tune
                self.transformed_points = FR.apply_force_directed_algorithm(
                    iteration=self.fr_iteration,
                    graph=self.graph,
                    coord_decomposition=self.transformed_points)

        if self.version == "HDE-SPE":
            IP = SpectralEmbedding(n_components=2)
            self.transformed_points = IP.fit_transform(self.distance_matrix)
            # pca = PCA(n_components=2, copy=True)
            # self.transformed_points = pca.fit_transform( self.transformed_points)

        return self.node_count, self.edge_count
示例#49
0
class SourceMethod():
    def __init__(self, name, itrN, rg=[1, 8], err=0.1, method='pca'):
        import xlrd
        #print(name)
        wb = xlrd.open_workbook(name)
        st = wb.sheet_by_index(0)
        self.xlsdata = []
        for itr in range(st.nrows):
            self.xlsdata.append((st.row_values(itr)))
        self.rdata = []
        self.sample = []
        for itr in self.xlsdata:
            self.rdata.append(itr[rg[0]:])
            self.sample.append(itr[0])
        self.title = self.xlsdata[0]
        self.data_orig = np.abs(self.rdata[1:])
        self.orig_data = np.array(self.rdata[1:])
        self.dest_err = err
        self.data_max = np.max(np.abs(self.data_orig), axis=0)
        self.data = np.divide(self.data_orig, self.data_max)
        self.minus = np.divide(self.orig_data[1, :],
                               np.abs(self.data_orig[1, :]))
        self.itrN = itrN
        self.train()

    def func():
        None

    def train(self):
        #data_avg=np.average(self.data)
        data_min = np.min(np.abs(self.data), axis=0)
        base = 0.5
        data_sub = np.subtract(self.data, base * data_min)
        #print(np.shape(data_min))
        for itr in range(1, len(self.data)):
            itr = 3
            self.components_, self.array = mynmf(data_sub, itr, self.itrN,
                                                 self.dest_err)
            self.method = FastICA(n_components=itr)
            self.array = np.transpose(
                self.method.fit_transform(np.transpose(data_sub)))
            self.components_ = self.method.mixing_
            self.array = np.add(self.array, data_min * base / itr)
            #err=np.mean(np.abs(self.data-np.dot(self.components_,self.array)))
            self.for_sta = np.multiply(
                np.abs(np.dot(self.components_, self.array)), self.data_max)

            sum_cof = 0

            for itra in range(len(self.data[0])):
                sum_cof += self.pearson(
                    np.transpose(self.data)[itra],
                    np.transpose(self.for_sta)[itra])
            sum_cof = sum_cof / len(self.data[0])
            print(sum_cof)
            if (self.dest_err > 1 - sum_cof):
                break
            if (itr > 10):
                break
        #self.for_sta=np.multiply(np.abs(np.dot(self.components_,self.array)),self.data_max)
        self.array = np.multiply(self.array, self.data_max)

        self.data_orig = np.multiply(self.data_orig, self.minus)
        self.for_sta = np.multiply(self.for_sta, self.minus)
        self.array = np.multiply(self.array, self.minus)

        self.data_orig = np.transpose(self.data_orig)
        self.for_sta = np.transpose(self.for_sta)
        self.array = np.transpose(self.array)
        self.data = np.transpose(self.data)
        self.orig_data = np.transpose(self.orig_data)

    def get_par(self):
        return self.array, self.method.n_components_, self.method.components_

    def print_sta(self):
        print("Source Number:")
        print(self.components_)
        ratio_sum = np.transpose([np.sum(self.method.components_, axis=1)])
        print("Mixing ratio matrix:")
        print(np.divide(self.method.components_, ratio_sum))

    def pearson(self, x, y):
        x_avg = np.average(x)
        y_avg = np.average(y)
        xv = x - x_avg
        yv = y - y_avg
        cof1 = np.sum(xv * yv)

        x2 = np.sum(np.square(xv))
        y2 = np.sum(np.square(yv))
        if (y2 == 0):
            cof = 0
        else:
            cof = cof1 / np.sqrt(x2 * y2)
        return cof

    def plot_sta(self):
        import matplotlib.pyplot as plt
        #plt.style.use('bmh')
        plt.figure(1)
        tick = np.arange(len(self.array))
        width = 0.6 / len(self.array[0])
        cont = 0
        for dt in np.transpose(self.array):
            #plt.plot(tick+width*cont,dt,alpha=0.4,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color'])
            #plt.bar(tick+width*cont,dt,width,alpha=0.2,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color'])
            cont = cont + 1

        plt.figure(2)
        cont = 0
        for dt in np.transpose(self.array):
            #plt.plot(dt,alpha=0.4,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color'])
            cont = cont + 1

        plt.figure(3)
        plt.plot(np.average(self.for_sta, axis=0))

        for itr in range(len(self.data)):
            plt.figure(4 + itr)
            z1 = np.polyfit(self.data[itr], self.for_sta[itr], 1)

            plt.scatter(self.data[itr], self.for_sta[itr])
            mi = min(self.data[itr])
            ma = max(self.data[itr])
            idtv = ma - mi
            x = np.arange(0, ma, 0.01)
            plt.title("$" + str(self.title[itr]) + "$")
            cof = self.pearson(self.data[itr], self.for_sta[itr])
            #plt.text(mi/6+ma/6,idtv*0.001,"$f(x)=%fx+%f;cof=%f$"%(z1[0],z1[1],cof))
            #plt.text(mi/1.9+ma/1.9,idtv*0.05,)

            plt.plot(x, x * z1[0] + z1[1])
        plt.boxplot(np.transpose(self.data_orig - self.for_sta))
        plt.show()
示例#50
0
emis = f["emis"][...]
X = f["X"][...]
f.close()

TOL = 1e-4
emis[emis < TOL] = TOL
emis[emis > 1 - TOL] = 1 - TOL
ix = np.argsort(X)
X = X[ix]
emis = emis[ix, :]

OD = -np.log(1 - emis)
pcaOD = PCA(whiten=True, n_components=48)

ica = FastICA(n_components=36, max_iter=5000)
ODIR = ica.fit_transform(OD)  # Reconstruct signals
OD2 = ica.inverse_transform(ODIR)
emis2 = 1 - np.exp(-OD2)  # Reconstruct signals
A_ = ica.mixing_  # Get estimated mixing matrix

nmf = NMF(n_components=48)
ODNR = nmf.fit_transform(OD)
OD2 = nmf.inverse_transform(ODNR)
emis2 = 1 - np.exp(-OD2)

N = 48
knots = np.linspace(X.min(), X.max(), N)[1:-1]
tck = splrep(X, -np.log(emis[:, 350]), t=knots)

t = tck[0]
c = np.zeros((emis.shape[-1], tck[1].size))
示例#51
0
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

from sklearn.decomposition import PCA, FastICA

# Load data
input_file = 'mixture_of_signals.txt'
X = np.loadtxt(input_file)

# Compute ICA
ica = FastICA(n_components=4)

# Reconstruct the signals
signals_ica = ica.fit_transform(X)

# Get estimated mixing matrix
mixing_mat = ica.mixing_

# Perform PCA
pca = PCA(n_components=4)
signals_pca = pca.fit_transform(
    X)  # Reconstruct signals based on orthogonal components

# Specify parameters for output plots
models = [X, signals_ica, signals_pca]
colors = ['blue', 'red', 'black', 'green']

# Plotting input signal
plt.figure()
plt.title('Input signal (mixture)')
示例#52
0
    func = func_new

conv = signal.fftconvolve(func, emo, mode='same', axes=0)
conv /= np.linalg.norm(conv, axis=0)
conv = conv * 8

#  PCA
X = np.concatenate((func, emo), axis=1)
pca = PCA(n_components=3, svd_solver='randomized')
pca.fit(X.transpose())
pca = pca.components_.transpose()
pca = pca * 10

# ICA
transformer = FastICA(n_components=3, random_state=0)
ica = transformer.fit_transform(X)
ica = ica * 8

# Fa
transformer = FactorAnalysis(n_components=3, random_state=0)
fa = transformer.fit_transform(X)

# GMM
from sklearn import mixture
gmmodel = mixture.GaussianMixture(n_components=3,
                                  covariance_type='tied',
                                  max_iter=100,
                                  random_state=10).fit(X.transpose())
gmm = gmmodel.means_.transpose()
gmm_samp, gmm_y = gmmodel.sample(118)
def perform_feature_engineering(train, test, config):

    for c in train.columns:
        if (len(train[c].value_counts()) == 2):
            if (train[c].mean() < config['SparseThreshold']):
                del train[c]
                del test[c]

    col = list(test.columns)
    if config['ID'] != True:
        col.remove('ID')

    # tSVD
    if (config['tSVD'] == True):
        tsvd = TruncatedSVD(n_components=config['n_comp'])
        tsvd_results_train = tsvd.fit_transform(train[col])
        tsvd_results_test = tsvd.transform(test[col])
        for i in range(1, config['n_comp'] + 1):
            train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
            test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]
    # PCA
    if (config['PCA'] == True):
        pca = PCA(n_components=config['n_comp'])
        pca2_results_train = pca.fit_transform(train[col])
        pca2_results_test = pca.transform(test[col])
        for i in range(1, config['n_comp'] + 1):
            train['pca_' + str(i)] = pca2_results_train[:, i - 1]
            test['pca_' + str(i)] = pca2_results_test[:, i - 1]
    # ICA
    if (config['ICA'] == True):
        ica = FastICA(n_components=config['n_comp'])
        ica2_results_train = ica.fit_transform(train[col])
        ica2_results_test = ica.transform(test[col])
        for i in range(1, config['n_comp'] + 1):
            train['ica_' + str(i)] = ica2_results_train[:, i - 1]
            test['ica_' + str(i)] = ica2_results_test[:, i - 1]

    # GRP
    if (config['GRP'] == True):
        grp = GaussianRandomProjection(n_components=config['n_comp'], eps=0.1)
        grp_results_train = grp.fit_transform(train[col])
        grp_results_test = grp.transform(test[col])
        for i in range(1, config['n_comp'] + 1):
            train['grp_' + str(i)] = grp_results_train[:, i - 1]
            test['grp_' + str(i)] = grp_results_test[:, i - 1]

    # SRP
    if (config['SRP'] == True):
        srp = SparseRandomProjection(n_components=config['n_comp'],
                                     dense_output=True,
                                     random_state=420)
        srp_results_train = srp.fit_transform(train[col])
        srp_results_test = srp.transform(test[col])
        for i in range(1, config['n_comp'] + 1):
            train['srp_' + str(i)] = srp_results_train[:, i - 1]
            test['srp_' + str(i)] = srp_results_test[:, i - 1]

    if config['magic'] == True:
        magic_mat = train[['ID', 'X0', 'y']]
        magic_mat = magic_mat.groupby(['X0'])['y'].mean()
        magic_mat = pd.DataFrame({
            'X0': magic_mat.index,
            'magic': list(magic_mat)
        })
        mean_magic = magic_mat['magic'].mean()
        train = train.merge(magic_mat, on='X0', how='left')
        test = test.merge(magic_mat, on='X0', how='left')
        test['magic'] = test['magic'].fillna(mean_magic)
    return train, test
示例#54
0
sc = StandardScaler()

ica2 = FastICA(n_components = 2)
ica80 = FastICA(n_components = 80)

#Dataset 1

data1 = pd.read_csv('dist1.txt', sep = ' ')
data1.head()

data1 = data1.dropna(axis = 'index')
dataset1 = data1.values
data1_std = sc.fit_transform(dataset1)

data1_std_ica2 = ica2.fit_transform(data1_std)
data1_std_ica80 = ica80.fit_transform(data1_std)


data1_std_ica2
data1_std_ica80

plt.scatter(data1_std_ica2[:,0], data1_std_ica2[:,1])

dataset1frame80 = pd.DataFrame(data1_std_ica80)
dataset1frame80.head()

dataset1frame80['mean'] = dataset1frame80.mean(axis=1)
dataset1frame80.head()

ica_power1 = dataset1frame80['mean'].values
示例#55
0
X = np.array(trainFile.data)
Y = np.array(trainFile.labels)

# just like the face recognition, we compute the avg digit image
avg_digit = compute_avg_digits(X, configs.IMAGE_WIDTH)
print "Avg digit computed ..."

# Substract each input with the avg
X_normalized_avg = normalize_with_avg(X, avg_digit)
X_normalized = preprocessing.normalize(X_normalized_avg)
print "Normalize X ..."

# ICA Face
ica = FastICA()
features = ica.fit_transform(X_normalized)
print "Transform done ..."

# split into training and testing
cutoff = len(Y) * 0.75
features_train = np.array(features[:cutoff])
Y_train = np.array(Y[:cutoff])
features_test = np.array(features[cutoff:])
Y_test = np.array(Y[cutoff:])

#Submission
#features_train = np.array(features)
#Y_train = np.array(Y)
#X_test = np.array(testFile.data)
#X_test_normalized_avg = normalize_with_avg(X_test, avg_digit)
#X_test_normalized = preprocessing.normalize(X_test_normalized_avg)
示例#56
0
def get_dc_feature(df_train,
                   df_test,
                   n_comp=12,
                   id_column=None,
                   label_column=None):
    """
    构造分解特征
    """
    train = df_train.copy()
    test = df_test.copy()

    if id_column:
        train_id = train[id_column]
        test_id = test[id_column]
        train = drop_columns(train, [id_column])
        test = drop_columns(test, [id_column])
    if label_column:
        train_y = train[label_column]
        train = drop_columns(train, [label_column])

    # tSVD
    tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
    tsvd_results_train = tsvd.fit_transform(train)
    tsvd_results_test = tsvd.transform(test)

    # PCA
    pca = PCA(n_components=n_comp, random_state=420)
    pca2_results_train = pca.fit_transform(train)
    pca2_results_test = pca.transform(test)

    # ICA
    ica = FastICA(n_components=n_comp, random_state=420)
    ica2_results_train = ica.fit_transform(train)
    ica2_results_test = ica.transform(test)

    # GRP
    grp = GaussianRandomProjection(n_components=n_comp,
                                   eps=0.1,
                                   random_state=420)
    grp_results_train = grp.fit_transform(train)
    grp_results_test = grp.transform(test)

    # SRP
    srp = SparseRandomProjection(n_components=n_comp,
                                 dense_output=True,
                                 random_state=420)
    srp_results_train = srp.fit_transform(train)
    srp_results_test = srp.transform(test)

    # Append decomposition components to datasets
    for i in range(1, n_comp + 1):
        train['pca_' + str(i)] = pca2_results_train[:, i - 1]
        test['pca_' + str(i)] = pca2_results_test[:, i - 1]

        train['ica_' + str(i)] = ica2_results_train[:, i - 1]
        test['ica_' + str(i)] = ica2_results_test[:, i - 1]

        train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
        test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]

        train['grp_' + str(i)] = grp_results_train[:, i - 1]
        test['grp_' + str(i)] = grp_results_test[:, i - 1]

        train['srp_' + str(i)] = srp_results_train[:, i - 1]
        test['srp_' + str(i)] = srp_results_test[:, i - 1]

    if id_column:
        train[id_column] = train_id
        test[id_column] = test_id
    if label_column:
        train[label_column] = train_y

    return train, test
示例#57
0
def update_EM(X, K, gamma, A, pi, mu, sigma_sqr, threshold=5e-5,
       A_mode='GA', grad_mode='GA',
       max_em_steps=30, n_gd_steps=20):

  if type(X) is not torch.Tensor:
    X = torch.tensor(X)
  X = X.type(DTYPE).to(device)

  N, D = X.shape

  END = lambda dA, dsigma_sqr: (dA + dsigma_sqr) < threshold

  Y = None
  niters = 0
  dA, dsigma_sqr = 10, 10
  ret_time = {'E':[], 'obj':[]}
  grad_norms, objs= [], []

  if A_mode == 'random':
    A = ortho_group.rvs(D)
    A = to_tensor(A)
  elif A_mode == 'ICA':
    cov = X.T.matmul(X) / len(X)
    cnt = 0
    n_tries = 20
    while cnt < n_tries:
      try:
        ica = FastICA()
        _ = ica.fit_transform(X.cpu())
        Aorig = ica.mixing_

        # avoid numerical instability
        U, ss, V = np.linalg.svd(Aorig)
        ss /= ss[0]
        ss[ss < SINGULAR_SMALL] = SINGULAR_SMALL
        Aorig = (U * ss).dot(V)

        A = np.linalg.inv(Aorig)
        _, ss, _ = np.linalg.svd(A)
        A = to_tensor(A / ss[0])
        cnt = 2*n_tries
      except:
        cnt += 1
    if cnt != 2*n_tries:
      print('ICA failed. Use random.')
      A = to_tensor(ortho_group.rvs(D))

  while (not END(dA, dsigma_sqr)) and niters < max_em_steps:
    niters += 1
    A_prev, sigma_sqr_prev = A.clone(), sigma_sqr.clone()
    objs += [],

    if TIME: e_start = time()
    Y, w, w_sumN, w_sumNK = E(X, A, pi, mu, sigma_sqr, Y=Y)
    if TIME: ret_time['E'] += time() - e_start,

    # M-step
    if A_mode == 'ICA' or A_mode == 'None':
      pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK)
      obj = get_objetive(X, A, pi, mu, sigma_sqr, w)
      objs[-1] += obj,

    if A_mode == 'CF': # gradient ascent
      if CHECK_OBJ:
        objs[-1] += get_objetive(X, A, pi, mu, sigma_sqr, w),

      for i in range(n_gd_steps):
        cf_start = time()
        if VERBOSE: print(A.view(-1))
        
        pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK)

        if TIME: a_start = time()
        if grad_mode == 'CF1':
          A = set_grad_zero(X, A, w, mu, sigma_sqr)
          A = A.T
        elif grad_mode == 'CF2':
          cofs = get_cofactors(A)
          det = torch.det(A)
          if det < 0: # TODO: ignore neg det for now
            cofs = cofs * -1

          newA = A.clone()
          for i in range(D):
            for j in range(D):
              t1 = (w[:, i] * X[:,j,None]**2 / sigma_sqr[i]).sum() / N
              diff = (Y[i] - A[i,j] * X[:, j])[:, None] - mu[i]
              t2 = (w[:, i] * X[:,j,None] * diff / sigma_sqr[i]).sum() / N
              c1 = t1 * cofs[i,j]
              c2 = t1 * (det - A[i,j]*cofs[i,j]) + t2 * cofs[i,j]
              c3 = t2 * (det - A[i,j]*cofs[i,j]) - cofs[i,j]
              inner = c2**2 - 4*c1*c3
              if inner < 0:
                print('Problme at solving for A[{},{}]: no real sol.'.format(i,j))
                pdb.set_trace()
              if c1 == 0:
                sol = - c3 / c2
              else:
                sol = (inner**0.5 - c2) / (2*c1)
              if False:
                # check whether obj gets improved with each updated entry of A
                curr_A = newA.clone()
                curr_A[i,j] = sol
                curr_obj = get_objetive(X, curr_A, pi, mu, sigma_sqr, w)
              newA[i,j] = sol
          A = newA.double()

        # avoid numerical instability
        U, ss, V = torch.svd(A)
        ss = ss / ss[0]
        ss[ss < SINGULAR_SMALL] = SINGULAR_SMALL
        A = (U * ss).matmul(V)

        if TIME:
          if 'A' not in ret_time: ret_time['A'] = []
          ret_time['A'] += time() - a_start,
          if 'CF' not in ret_time: ret_time['CF'] = []
          ret_time['CF'] += time() - cf_start,

        if CHECK_OBJ:
          if TIME: obj_start = time()
          obj = get_objetive(X, A, pi, mu, sigma_sqr, w)
          if TIME: ret_time['obj'] += time() - obj_start,
          objs[-1] += obj,
          if VERBOSE:
            print('iter {}: obj= {:.5f}'.format(i, obj))
        # pdb.set_trace()
      # pdb.set_trace()

    if A_mode == 'GA': # gradient ascent
      if CHECK_OBJ:
        objs[-1] += get_objetive(X, A, pi, mu, sigma_sqr, w),

      for i in range(n_gd_steps):
        ga_start = time()
        if VERBOSE: print(A.view(-1))
        
        pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK)

        if TIME: a_start = time()
        # gradient steps
        grad, y_time = get_grad(X, A, w, mu, sigma_sqr)
        if TIME:
          if 'Y' not in ret_time:
            ret_time['Y'] = []
          ret_time['Y'] += y_time,
        if grad_mode == 'BTLS':
          # backtracking line search
          if TIME: obj_start = time()
          obj = get_objetive(X, A, pi, mu, sigma_sqr, w)
          if TIME: ret_time['obj'] += time() - obj_start,

          beta, t, flag = 0.6, 1, True
          gnorm = torch.norm(grad)
          n_iter, ITER_LIM = 0, 10
          while flag and n_iter < ITER_LIM:
            n_iter += 1
            Ap = A + t * grad
            _, ss, _ = torch.svd(Ap)
            Ap /= ss[0]
            if TIME: obj_start = time()
            obj_p = get_objetive(X, Ap, pi, mu, sigma_sqr, w)
            if TIME: ret_time['obj'] += time() - obj_start,
            t *= beta
            base = obj - 0.5 * t * gnorm
            flag = obj_p < base
          gamma = t
          ret_time['btls_nIters'] += n_iter,
        elif grad_mode == 'perturb':
          # perturb
          perturb = A.std() * 0.1 * torch.randn(A.shape).type(DTYPE).to(device)
          perturbed = A + perturb
          perturbed_grad, _ = get_grad(X, perturbed, w, mu, sigma_sqr)

          grad_diff = torch.norm(grad - perturbed_grad)
          gamma = 1 /(EPS_GRAD + grad_diff) * 0.03

        grad_norms += torch.norm(grad).item(),
        A += gamma * grad

        _, ss, _ = torch.svd(A)
        A /= ss[0]

        if TIME:
          if 'A' not in ret_time: ret_time['A'] = []
          ret_time['A'] += time() - a_start,
          if 'GA' not in ret_time: ret_time['GA'] = []
          ret_time['GA'] += time() - ga_start,

        if CHECK_OBJ:
          if TIME: obj_start = time()
          obj = get_objetive(X, A, pi, mu, sigma_sqr, w)
          if TIME: ret_time['obj'] += time() - obj_start,
          objs[-1] += obj,
          if VERBOSE:
            print('iter {}: obj= {:.5f}'.format(i, obj))
        # pdb.set_trace()
      # pdb.set_trace()

  if VERBOSE:
    print('#{}: dA={:.3e} / dsigma_sqr={:.3e}'.format(niters, dA, dsigma_sqr))
    print('A:', A.view(-1))

  if TIME:
    for key in ret_time:
      ret_time[key] = np.array(ret_time[key]) if ret_time[key] else 0

  # pdb.set_trace()
  return A, pi, mu, sigma_sqr, grad_norms, objs, ret_time 
示例#58
0
# thus we need to use mask_strategy='epi' to compute the mask from the
# EPI images
masker = NiftiMasker(smoothing_fwhm=8,
                     memory='nilearn_cache',
                     memory_level=1,
                     mask_strategy='epi',
                     standardize=True)
data_masked = masker.fit_transform(func_filename)

#####################################################################
# Apply ICA

from sklearn.decomposition import FastICA
n_components = 10
ica = FastICA(n_components=n_components, random_state=42)
components_masked = ica.fit_transform(data_masked.T).T

# Normalize estimated components, for thresholding to make sense
components_masked -= components_masked.mean(axis=0)
components_masked /= components_masked.std(axis=0)
# Threshold
import numpy as np
components_masked[np.abs(components_masked) < .8] = 0

# Now invert the masking operation, going back to a full 3D
# representation
component_img = masker.inverse_transform(components_masked)

#####################################################################
# Visualize the results
示例#59
0
def test_fastica_simple(add_noise, seed):
    # Test the FastICA algorithm on very simple data.
    rng = np.random.RandomState(seed)
    # scipy.stats uses the global RNG:
    n_samples = 1000
    # Generate two sources:
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    s2 = stats.t.rvs(1, size=n_samples)
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing angle
    phi = 0.6
    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi),
                                                    -np.cos(phi)]])
    m = np.dot(mixing, s)

    if add_noise:
        m += 0.1 * rng.randn(2, 1000)

    center_and_norm(m)

    # function as fun arg
    def g_test(x):
        return x**3, (3 * x**2).mean(axis=-1)

    algos = ["parallel", "deflation"]
    nls = ["logcosh", "exp", "cube", g_test]
    whitening = [True, False]
    for algo, nl, whiten in itertools.product(algos, nls, whitening):
        if whiten:
            k_, mixing_, s_ = fastica(m.T,
                                      fun=nl,
                                      algorithm=algo,
                                      random_state=rng)
            with pytest.raises(ValueError):
                fastica(m.T, fun=np.tanh, algorithm=algo)
        else:
            pca = PCA(n_components=2, whiten=True, random_state=rng)
            X = pca.fit_transform(m.T)
            k_, mixing_, s_ = fastica(X,
                                      fun=nl,
                                      algorithm=algo,
                                      whiten=False,
                                      random_state=rng)
            with pytest.raises(ValueError):
                fastica(X, fun=np.tanh, algorithm=algo)
        s_ = s_.T
        # Check that the mixing model described in the docstring holds:
        if whiten:
            assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if not add_noise:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)
        else:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

    # Test FastICA class
    _, _, sources_fun = fastica(m.T, fun=nl, algorithm=algo, random_state=seed)
    ica = FastICA(fun=nl, algorithm=algo, random_state=seed)
    sources = ica.fit_transform(m.T)
    assert ica.components_.shape == (2, 2)
    assert sources.shape == (1000, 2)

    assert_array_almost_equal(sources_fun, sources)
    assert_array_almost_equal(sources, ica.transform(m.T))

    assert ica.mixing_.shape == (2, 2)

    for fn in [np.tanh, "exp(-.5(x^2))"]:
        ica = FastICA(fun=fn, algorithm=algo)
        with pytest.raises(ValueError):
            ica.fit(m.T)

    with pytest.raises(TypeError):
        FastICA(fun=range(10)).fit(m.T)
示例#60
0
    
    print('//===========================pca==========================')
    pca = PCA(n)
    traindata_pca = pca.fit_transform(traindata)
    testdata_pca = pca.transform(testdata)
    Faceidentifier(traindata_pca,trainlabel,testdata_pca,testlabel)

    print('//===========================sfa==========================')
    sfa = sfa.SFA()
    traindata_sfa = sfa.fit_transform(traindata.T,conponents =n).T
    testdata_sfa = sfa.transform(testdata.T).T
    Faceidentifier(traindata_sfa,trainlabel,testdata_sfa,testlabel)
    
    print('//===========================fastica==========================')
    fastica = FastICA(n)
    traindata_fastica = fastica.fit_transform(traindata)
    testdata_fastica = fastica.transform(testdata)
    Faceidentifier(traindata_fastica,trainlabel,testdata_fastica,testlabel)
    
    for i in range(0,9):
        if i == 0:
            b = 0.1
        elif i == 1:
            b = 0.2
        elif i == 2:
            b = 0.5
        elif i == 3:
            b = 0.8
        elif i == 4:
            b = 1
        elif i == 5: