Python pca примеры, mdp.pca Python примеры использования

Пример #1

0

Показать файл

def do_fast_ica(pca_first):
    mo1_cj_inverse = numpy.array(mo1_cj).T
    mo2_cj_inverse = numpy.array(mo2_cj).T
    if pca_first:
        mo1_cj_array = mdp.pca(mo1_cj_inverse, input_dim=4, output_dim=3)
        mo2_cj_array = mdp.pca(mo2_cj_inverse, input_dim=4, output_dim=3)
    else:
        mo1_cj_array = mo1_cj_inverse
        mo2_cj_array = mo2_cj_inverse
    a = mdp.fastica(mo1_cj_array)
    b = mdp.fastica(mo2_cj_array)
    return a, b

Пример #2

0

Показать файл

Файл: do-all.py Проект: houyunting/syq-master-paper

def do_fast_ica(pca_first):
	mo1_cj_inverse = numpy.array(mo1_cj).T
	mo2_cj_inverse = numpy.array(mo2_cj).T
	if pca_first:
		mo1_cj_array = mdp.pca(mo1_cj_inverse, input_dim=4, output_dim=3)
		mo2_cj_array = mdp.pca(mo2_cj_inverse, input_dim=4, output_dim=3)
	else:
		mo1_cj_array = mo1_cj_inverse
		mo2_cj_array = mo2_cj_inverse
	a = mdp.fastica(mo1_cj_array)
	b = mdp.fastica(mo2_cj_array)
	return a,b

Пример #3

0

Показать файл

Файл: metric_analysis.py Проект: G-Node/Morphjongleur

 def _pca(self):
     #self.pca_box_surface_area= 2*( self.pca_lengths[0]*self.pca_lengths[1]
     #                + self.pca_lengths[1]*self.pca_lengths[2]
     #                + self.pca_lengths[2]*self.pca_lengths[0]
     #                )
     '''
     2 * (
         1/2. * self.pca_lengths[0] * numpy.sqrt(numpy.square(self.pca_lengths[1]/2) + numpy.square(self.pca_lengths[2]/2)) 
         +
         1/2. * self.pca_lengths[0] * numpy.sqrt(numpy.square(self.pca_lengths[2]/2) + numpy.square(self.pca_lengths[1]/2)) 
         )
     '''
     #self.pca_rhombus =  self.pca_lengths[0] * numpy.sqrt(numpy.square(self.pca_lengths[2]) + numpy.square(self.pca_lengths[1]))
     mins   = [float('inf'), float('inf'), float('inf')]
     maxs   = [float('-inf'),float('-inf'),float('-inf')]
     for x in mdp.pca( numpy.array([[compartment.x, compartment.y, compartment.z] for compartment in self.morphology.compartments]) ):
         for d in xrange(3):
             if x[d] < mins[d]:
                 mins[d]     = x[d]
             if x[d] > maxs[d]:
                 maxs[d]     = x[d]
     self._pca_length_x  = maxs[0] - mins[0]
     self._pca_length_y  = maxs[1] - mins[1]
     self._pca_length_z  = maxs[2] - mins[2]
     
     self._pca_lengths = (
         self._pca_length_x,
         self._pca_length_y,
         self._pca_length_z
     )

Пример #4

0

Показать файл

Файл: tweet_pca.py Проект: delock/mProto

def tweet_pca_reduce(tweets_train, tweets_test, output_dim):

    # convert dictionary feature vecs to numpy array
    print '--> Converting dictionaries to NumPy arrays'
    train_arr = numpy.array( [tweet_features.tweet_dict_to_nparr(t) for \
                              (t,s) in tweets_train])

    test_arr = numpy.array( [tweet_features.tweet_dict_to_nparr(t) for \
                             (t,s) in tweets_test])

    # compute principle components over training set
    print '--> Computing PCT'
    pca_array = mdp.pca( train_arr.transpose(), \
                         svd=True, output_dim=output_dim )

    # both train and test sets to PC space
    print '--> Projecting feature vectors to PC space'

    train_arr = numpy.dot(train_arr, pca_array)
    test_arr = numpy.dot(test_arr, pca_array)

    # convert projected vecs back to reduced dictionaries
    print '--> Converting NumPy arrays to dictionaries'

    reduced_train = \
        zip( [tweet_features.tweet_nparr_to_dict(v) for v in train_arr], \
             [s for (t,s) in tweets_train] )

    reduced_test  = \
        zip( [tweet_features.tweet_nparr_to_dict(v) for v in test_arr], \
             [s for (t,s) in tweets_test])

    return (reduced_train, reduced_test)

Пример #5

0

Показать файл

Файл: transformations.py Проект: G-Node/Morphjongleur

def pca(self):
    import numpy
    import mdp
    m = morphjongleur.model.morphology.Morphology(
      name                  = self.name,
      file_origin           = self.file_origin,
      description           = self.description,
      datetime_recording    = self.datetime_recording
    )
    assert m.number_of_compartments == 0
    pca_cs  = mdp.pca( numpy.array([ [c.x, c.y, c.z] for c in self.compartments ] ) )
    assert self.number_of_compartments == len(pca_cs)
    for i in xrange( self.number_of_compartments ):
        m.add_compartment(
            morphjongleur.model.morphology.Compartment( 
                self._compartments[i].compartment_id, 
                self._compartments[i].compartment_parent_id, 
                self._compartments[i].radius, 
                x=pca_cs[i][0],
                y=pca_cs[i][1],
                z=pca_cs[i][2]
            ) 
        )
    assert self.number_of_compartments == m.number_of_compartments
    return m

Пример #6

0

Показать файл

Файл: genetic_investor.py Проект: donpiekarz/Stocker

def eval_func(chromosome):
    """ The evaluation function """
    indices_values = []
    sellTrendVector = []
    buyTrendVector = []

    for gene in chromosome:
        indices_values.append(gene.getResult())
        sellTrendVector.append(gene.getResult()[:tradingGA.sellTrendBeginning])
        buyTrendVector.append(gene.getResult()[:tradingGA.buyTrendBeginning])

    #raw_input("Press ENTER to exit")

    indices_values = indicesNormalizer().normalize(indices_values)
    indices_values = numpy.asarray(indices_values)

    result = mdp.pca(indices_values.T, reduce=True)  #, svd=True)
    sell_center = calculate_centroid_center(result[:4], sellTrendVector)
    buy_center = calculate_centroid_center(result[:4], buyTrendVector)

    #print sell_center, len(sell_center)
    #print buy_center, len(buy_center)

    wynik = numpy.linalg.norm(
        numpy.asarray(sell_center) - numpy.asarray(buy_center))

    return wynik

Пример #7

0

Показать файл

Файл: genetic_investor.py Проект: donpiekarz/Stocker

def eval_func(chromosome):
    """ The evaluation function """
    indices_values = []
    sellTrendVector = []
    buyTrendVector = []

    for gene in chromosome:
        indices_values.append(gene.getResult())
        sellTrendVector.append(gene.getResult()[:tradingGA.sellTrendBeginning])
        buyTrendVector.append(gene.getResult()[:tradingGA.buyTrendBeginning])

    #raw_input("Press ENTER to exit")

    indices_values = indicesNormalizer().normalize(indices_values)
    indices_values = numpy.asarray(indices_values)

    result = mdp.pca(indices_values.T, reduce=True)#, svd=True)
    sell_center = calculate_centroid_center(result[:4], sellTrendVector)
    buy_center = calculate_centroid_center(result[:4], buyTrendVector)

    #print sell_center, len(sell_center)
    #print buy_center, len(buy_center)

    wynik = numpy.linalg.norm(numpy.asarray(sell_center) - numpy.asarray(buy_center))

    return wynik

Пример #8

0

Показать файл

Файл: GeneFields.py Проект: Krewn/GeneFieldReporter

def PCA(data):
	NBD=np.zeros((len(data)-1,len(data[0])-1))
	for k in range(1,len(data),1):
		row=[]
		for k1 in range(1,len(data[0]),1):
			if(is_number(data[k][k1])):
				row.append(float(data[k][k1]))
		NBD[k-1]=row
	pca=mdp.pca(NBD,svd=True)
	return pca

Пример #9

0

Показать файл

def PCA(data):
    NBD = np.zeros((len(data) - 1, len(data[0]) - 1))
    for k in range(1, len(data), 1):
        row = []
        for k1 in range(1, len(data[0]), 1):
            if (is_number(data[k][k1])):
                row.append(float(data[k][k1]))
        NBD[k - 1] = row
    pca = mdp.pca(NBD, svd=True)
    return pca

Пример #10

0

Показать файл

Файл: classifier.py Проект: tomasra/venus_volcanoes

 def _reduce_dimensions(
     self, vectors,
     output_dim=6
 ):
     """
     Scales image data vectors to lower dimension
     """
     matrix = np.array(vectors, dtype='float32')
     scaled = mdp.pca(matrix, output_dim=output_dim)
     return scaled

Пример #11

0

Показать файл

Файл: snippet.py Проект: nebiolabs/fatools

def simple_pca(a1):

    M = np.zeros((len(a1), len(a1)))

    for i in range(len(a1)):
        for j in range(i, len(a1)):
            M[i, j] = M[j, i] = (a1[i] - a1[j])**2

    import mdp

    return mdp.pca(M, output_dim=2)

Пример #12

0

Показать файл

Файл: distance.py Проект: trmznt/msaf

def pca_distance( m ):
    # perform PCA, add random noise based on how much the data spread

    comps = mdp.pca( m, output_dim = 2 )
    a = comps[1,0]
    comps[:,0] += jitters( comps[:,0] )
    b = comps[1,0]
    assert a != b
    comps[:,1] += jitters( comps[:,1] )

    return comps

Пример #13

0

Показать файл

Файл: distance.py Проект: trmznt/msaf

def pca_distance( aDistanceMatrix, dim = 2 ):

    comps = mdp.pca( aDistanceMatrix.M, output_dim = 2 )
    a = comps[1,0]
    comps[:,0] += jitters( comps[:,0] )
    b = comps[1,0]
    #assert a != b
    comps[:,1] += jitters( comps[:,1] )


    return comps

Пример #14

0

Показать файл

Файл: snippet.py Проект: edawine/fatools

def simple_pca( a1 ):

    M = np.zeros( (len(a1), len(a1)) )

    for i in range( len(a1) ):
        for j in range( i, len(a1) ):
            M[i,j] = M[j,i] = (a1[i]-a1[j]) ** 2

    import mdp

    return mdp.pca( M, output_dim = 2 )

Пример #15

0

Показать файл

Файл: usagePlot.py Проект: ngannguyen/immunoseq

def getPcaTransformedMatrix(samples, group2samples, type2intersectGenes, selectedvjs, genetype, abs, outfile, options):
    m, rownames = preparePcaMatrix(samples, group2samples, type2intersectGenes, selectedvjs, genetype, abs)
    transformedM = mdp.pca(m, output_dim=4)
    
    #Write to text file
    f = open("%s.txt" %outfile, 'w')
    for i,r in enumerate(transformedM):
        f.write("%s\t%s\t%s\n" %(rownames[i][0], rownames[i][1], "\t".join( [str(c) for c in r] )))
    f.close()

    #Draw plot:
    drawPca(rownames, transformedM, outfile, options)

Пример #16

0

Показать файл

Файл: pca2.py Проект: cliburn/flow

    def Main(self,model):
        # self.model = model
        data = array(model.GetCurrentData()[:])

        k = wx.GetNumberFromUser("PCA Dialog",
                                 "Enter number of principal components",
                                 "k",
                                 1)

        pca_data = mdp.pca(data, output_dim=k)
        # ica_data = r.fastICA(data, k, alg_typ = "deflation", fun = "logcosh", alpha = 1, method = "R", row_norm = 0, maxit = 200, tol = 0.0001, verbose = 1)
        fields = ['Comp%02d' % c for c in range(1, k+1)]
        model.updateHDF('PcaPY', pca_data, fields=fields)

Пример #17

0

Показать файл

Файл: genetic_investor.py Проект: donpiekarz/Stocker

def computeClusterCentre(chromosome, trendBeginning):
    indices_values = []
    trendVector = []

    for gene in chromosome:
        indices_values.append(gene.getResult())
        trendVector.append(gene.getResult()[:trendBeginning])

    indices_values = indicesNormalizer().normalize(indices_values)
    indices_values = numpy.asarray(indices_values)

    result = mdp.pca(indices_values.T, reduce=True)
    center = calculate_centroid_center(result[:4], trendVector)

    return center

Пример #18

0

Показать файл

Файл: genetic_investor.py Проект: donpiekarz/Stocker

def computeClusterCentre(chromosome, trendBeginning):
    indices_values = []
    trendVector = []

    for gene in chromosome:
        indices_values.append(gene.getResult())
        trendVector.append(gene.getResult()[:trendBeginning])

    indices_values = indicesNormalizer().normalize(indices_values)
    indices_values = numpy.asarray(indices_values)

    result = mdp.pca(indices_values.T, reduce=True)
    center = calculate_centroid_center(result[:4], trendVector)

    return center

Пример #19

0

Показать файл

Файл: funzioni3.py Проект: FrancescoScarlata/IN-MCA_Scarlata

def PCAAlg(x, fps):
    global lastValue
    global counter
    global pca_bpms
    primo = True
    prova = -1
    x = np.transpose(x)
    #print("dopo x: " + str(x))
    y = mdp.pca(x)
    #print("pca: " + str(y))
    secondComponent = y[:, 1]
    #print("second: " + str(secondComponent))
    freqs, pruned = searchFreqs(secondComponent, fps, len(secondComponent))
    prova, index = calcolaProssimaFreqSensata(freqs, pruned)
    #print("pca: " + str(prova))
    pca_bpms.append(prova)

Пример #20

0

Показать файл

Файл: mod_default.py Проект: pmeier82/spikeval

    def plot_clusters(self, spikes, noise_cov=None):
        """:spikeplot.cluster: and :spikeplot.cluster_projection: plots

        There will be two plots visualizing the clustering and discrimination
        of the sorting. One showing the clustering of units (scatter plot
        using the first two principal components). The initial cluster
        labels are preserved as colorization in the projected data.

        Additionally there will be a plot showing the projection of each
        cluster coupling onto the vector connecting the corresponding cluster
        means/centers

        :type spikes: dict
        :param spikes: one set of waveforms per unit {k:[n,samples]}
        :type noise_cov: ndarray
        :param noise_cov: noise covariance matrix compatible with the
            dimension of individual observations in :spikes:
        """

        # prepare data
        tf = sum(self.parameters['cut'])
        # TODO: prewhiten !!!
        data_stacked = pca(sp.vstack(spikes.values()), output_dim=4)
        data = {}
        idx = 0
        for k, v in spikes.items():
            n = v.shape[0]
            data[k] = data_stacked[idx:idx + n]
            idx += n

        # produce scatter plots
        for pcs in [(0, 1), (2, 3)]:
            self.result.append(
                cluster(
                    data,
                    data_dim=pcs,
                    plot_mean=True,
                    title='cluster plot',
                    xlabel='PC%s' % (pcs[0] + 1),
                    ylabel='PC%s' % (pcs[1] + 1)))

        # cluster projection
        self.result.append(cluster_projection(data))

Пример #21

0

Показать файл

    def plot_clusters(self, spikes, noise_cov=None):
        """:spikeplot.cluster: and :spikeplot.cluster_projection: plots

        There will be two plots visualizing the clustering and discrimination
        of the sorting. One showing the clustering of units (scatter plot
        using the first two principal components). The initial cluster
        labels are preserved as colorization in the projected data.

        Additionally there will be a plot showing the projection of each
        cluster coupling onto the vector connecting the corresponding cluster
        means/centers

        :type spikes: dict
        :param spikes: one set of waveforms per unit {k:[n,samples]}
        :type noise_cov: ndarray
        :param noise_cov: noise covariance matrix compatible with the
            dimension of individual observations in :spikes:
        """

        # prepare data
        tf = sum(self.parameters['cut'])
        # TODO: prewhiten !!!
        data_stacked = pca(sp.vstack(spikes.values()), output_dim=4)
        data = {}
        idx = 0
        for k, v in spikes.items():
            n = v.shape[0]
            data[k] = data_stacked[idx:idx + n]
            idx += n

        # produce scatter plots
        for pcs in [(0, 1), (2, 3)]:
            self.result.append(
                cluster(data,
                        data_dim=pcs,
                        plot_mean=True,
                        title='cluster plot',
                        xlabel='PC%s' % (pcs[0] + 1),
                        ylabel='PC%s' % (pcs[1] + 1)))

        # cluster projection
        self.result.append(cluster_projection(data))

Пример #22

0

Показать файл

Файл: pca.py Проект: AntoineValera/SynaptiQs

    def compute(self,
                waveforms,
                sampling_rate=None,
                output_dim=2,
                start_sample=0,
                num_samples=0):
        """Computes PCA of waveforms concatenated across recording points.
        
        waveforms : ndarray of waveforms, of shape
            (N_spikes, N_recordingpoints, len(waveform))        
        sampling_rate : not used
        output_dim : Number of features (eigenvalues) to return per waveform
        start_sample : Index of first sample in each waveform to slice out
            to use for PCA
        num_samples : Number of samples of each waveform to use for PCA.
            The default is '0', which means to use all samples, regardless
            of the value of `start sample`.
        
        Returns : pca_mat, a matrix of components. shape: (N_spikes, N_features)
        """
        lenwf = waveforms.shape[2]

        if num_samples > 0:
            # We're not using all samples
            if start_sample < 0 or start_sample >= lenwf:
                # garbage input, use all samples
                print "warning: start_sample must be in [0, %d)" % lenwf
                start_sample = 0

            # slice
            waveforms = waveforms[:, :,
                                  start_sample:(start_sample + num_samples)]

        # reshape into the format PCA expects. Each row is now a concatenation
        # of waveforms from each channel in the group.
        waveforms2 = waveforms.reshape(waveforms.shape[0],
                                       waveforms.shape[1] * waveforms.shape[2])

        # do PCA and return results in (N_spikes, N_features) shape
        pca_mat = mdp.pca(waveforms2, output_dim=output_dim)
        return pca_mat

Пример #23

0

Показать файл

Файл: __init__.py Проект: terratenney/harvesters

def tweet_pca_reduce( tweets_train, tweets_test, output_dim ):

    # convert dictionary feature vecs to numpy array
    print '--> Converting dictionaries to NumPy arrays'
    train_arr = numpy.array( [tweet_dict_to_nparr(t) for \
                              (t,s) in tweets_train])

    test_arr = numpy.array( [tweet_dict_to_nparr(t) for \
                             (t,s) in tweets_test])


    # compute principle components over training set
    print '--> Computing PCT'
    pca_array = mdp.pca( train_arr.transpose(), \
                         svd=True, output_dim=output_dim )


    # both train and test sets to PC space
    print '--> Projecting feature vectors to PC space'

    train_arr = numpy.dot( train_arr, pca_array )
    test_arr  = numpy.dot( test_arr,  pca_array )


    # convert projected vecs back to reduced dictionaries
    print '--> Converting NumPy arrays to dictionaries'

    reduced_train = \
        zip( [tweet_nparr_to_dict(v) for v in train_arr], \
             [s for (t,s) in tweets_train] )

    reduced_test  = \
        zip( [tweet_nparr_to_dict(v) for v in test_arr], \
             [s for (t,s) in tweets_test])

    return (reduced_train, reduced_test)

Пример #24

0

Показать файл

def reduce_dimensions(myarray):
    # with std test input get
    # Covariance matrix may be singular.Try instantiating the node with svd=True.
    return mdp.pca(myarray, svd=True)

Пример #25

0

Показать файл

import mdp

# x is matrix of all instances and features
y = mdp.pca(x)

# evt uitzoeken of mdp.ica iets oplevert

Пример #26

0

Показать файл

Файл: feature_selector.py Проект: Pei-jie/221-project

def featureSelection(trainData,labels,featureSelectionMechanism,numFeatures,minNumSongs, maxNumSongs, trainSongs, featureExtractor):
    """
    Given the feature set of example, will give a reduced feature set.
    
    @param list of training data
    @param list of strings - a list of the unique labels
    @param string - type of feature selection we want to perform
    @param number of features we want to choose
    @param number of min songs for num_songs feature selection
    @param number of max songs for num_songs feature selection
    @return list with a reduced set of features
    """
    informationGains = []
    featureNames = []
    featureLibraryInfo = []
    featureLibrary = []
    
    # Calculate all of the features (not just those from the example)  
    print("Calculate all of the given features")
    allFeatures = Counter()
    featureArray = []
    featureArray = [fs for (fs, label) in trainData]
    for featureSet in featureArray :
        allFeatures.update(featureSet)
    if(numFeatures<len(allFeatures) and numFeatures != 0):
        if(featureSelectionMechanism=="information_gain"):
            selected_features_info = []
            selected_features = []
            print("Using information gain to select features")
            # Loop through all of the features and calculate the information gain for each 
            for feature in allFeatures: 
                print("Calculating information gain for %s " % feature)
                informationGains.append(informationGain(trainData,allFeatures,feature, labels))
                featureNames.append(feature) 
            informationGains = np.array(informationGains)
            sortedargs = np.argsort(informationGains)
            featureNames = [featureNames[i] for i in sortedargs]
            print informationGains
            #informationGains.reverse()
            #featureNames.reverse()
            # Add the top numFeatures to the counter.   
            # if requesting too many features change number of requested features.
            if(numFeatures>len(featureNames)):
                numFeatures = len(featureNames)
            for i in range(0,numFeatures):
                selected_features_info.append(informationGains[i])
                selected_features.append(featureNames[i])
            # print featureLibraryInfo        
        elif(featureSelectionMechanism=="num_songs"):
            print("Using min/max song metric to select features")
            selected_features = getDict(minNumSongs, maxNumSongs, trainSongs, featureExtractor) 
        else: 
            print("Using PCA to select features")
            # Create a matrix with he relevant labels
            allFeaturePairList = list(allFeatures.items());
            allFeatureKeyList = [pair[0] for pair in allFeaturePairList];
            index = 0;
            data_features_matrix = np.zeros((len(trainData),len(allFeatures)));
            print("Creating the matrix for PCA input")
            for (features,label) in trainData: 
                # Loop through each feature and populate matrix
                for feature in features:
                    data_features_matrix[index][allFeatureKeyList.index(feature)] = allFeatures[feature]
                index=index+1
        
            # Run PCA to reduce feature size.
            # print(data_features_matrix)
            
            print("Using PCA to reduce the number of features")
            reduced_features = mdp.pca(transpose(data_features_matrix),output_dim = 2, svd = True)
            u1 = reduced_features[:,1]
            order = np.argsort(u1)[::-1]
            order = order[1:numFeatures]
            print("Populate the selected_features based on representation in first principal component")
            selected_features = [allFeatureKeyList[index] for index in order]
            print(selected_features)
    else:
        for feature in allFeatures:
            featureNames.append(feature)
        selected_features = featureNames    
       
    # Return the selected features regardless of algorithm used.
    return selected_features

Пример #27

0

Показать файл

Файл: ellipseModelConstruct.py Проект: kerwinzxc/gradworks

def runPCA(paramFile):
  data = np.loadtxt(paramFile)
  y = mdp.pca(data, reduce=True)
  print y

Пример #28

0

Показать файл

Файл: analysisTools.py Проект: Krewn/KPlot

def pca(data, singleValueDecomp=True):
    return (mdp.pca(data, svd=singleValueDecomp))

Пример #29

0

Показать файл

Файл: mkidmap.py Проект: n8chz/survey

    if ideology != prev:
         stats.append([])
         cursor2 = connection.cursor()
         cursor2.execute("select ideology from ideology where id=%s" % ideology)
         idrow=cursor2.fetchone()
         idname=idrow[0]
         if idname=="":
             idname="[ideology #%d]" % ideology
         ideologies.append(idname)
    stats[-1].append(float(row[2]))
    prev=ideology
    row=cursor.fetchone()


raw=array(stats)
cooked=mdp.pca(raw, output_dim=2) # see http://nullege.com/codes/search/mdp.pca


(xmax, ymax) = cooked.max(0) # max value in each column vector of y, see http://mathesaurus.sourceforge.net/numeric-numpy.html
(xmin, ymin) = cooked.min(0) # And min.  These will be used to interpolate the x,y coordinates for plotting


idmap=Image.new("RGB", (width+240, height+12), (128,128,128))
draw=ImageDraw.Draw(idmap)
for i in range(len(ideologies)):
    ts=draw.textsize(ideologies[i]) # center the name over its coordinates
    x=width*(cooked[i,0]-xmin)/(xmax-xmin)-math.trunc(ts[0]/2)+120
    y=height*(cooked[i,1]-ymin)/(ymax-ymin)-math.trunc(ts[1]/2)+6
    newcolor=[]
    for j in range(3): # generate a random color, one that contrasts w. midtone gray background
        newrand=random.random()+random.random()

Пример #30

0

Показать файл

Файл: mdp-rbm.py Проект: blazej-wieliczko/dbn-for-sre

from  mdp.nodes import RBMNode
import mdp
from numpy import *
import time

import read_spro

X = read_spro.load_mfcc_file()



rbm = RBMNode(10, X.shape[1])


x2 = X.dot(X.T)

print x2.shape

mdp_pca = mdp.pca(x2)

print X.shape

Пример #31

0

Показать файл

Файл: run_tsne.py Проект: LydiaMennes/smrThesis

print "read data"
for line in co_occ_file:
    if count < 100:
        count += 1
        line = line.replace("\n", "")
        instance = line.split(" ")
        if (indexes[instance[0]] == -1):
            indexes[instance[0]] = current_index
            current_index += 1
        if (indexes[instance[1]] == -1):
            indexes[instance[1]] = current_index
            current_index += 1
        matrix[indexes[instance[0]], indexes[instance[1]]] = float(instance[2])
    else:
        break

    if count % 10000 == 0:
        print count, "entries processed"

co_occ_file.close()

if current_index != nr_functional_words - 1:
    print "not the same", current_index, nr_functional_words

print "perform pca"
matrix = mdp.pca(matrix, 30)
print "pca done, start tsne"

#Y = tsne.tsne(X, no_dims, perplexity)
y = tsne.tsne(matrix, 2, nr_functional_words, perplexity)

Пример #32

0

Показать файл

def featureSelection(trainData, labels, featureSelectionMechanism, numFeatures,
                     minNumSongs, maxNumSongs, trainSongs, featureExtractor):
    """
    Given the feature set of example, will give a reduced feature set.
    
    @param list of training data
    @param list of strings - a list of the unique labels
    @param string - type of feature selection we want to perform
    @param number of features we want to choose
    @param number of min songs for num_songs feature selection
    @param number of max songs for num_songs feature selection
    @return list with a reduced set of features
    """
    informationGains = []
    featureNames = []
    featureLibraryInfo = []
    featureLibrary = []

    # Calculate all of the features (not just those from the example)
    print("Calculate all of the given features")
    allFeatures = Counter()
    featureArray = []
    featureArray = [fs for (fs, label) in trainData]
    for featureSet in featureArray:
        allFeatures.update(featureSet)
    if (numFeatures < len(allFeatures) and numFeatures != 0):
        if (featureSelectionMechanism == "information_gain"):
            selected_features_info = []
            selected_features = []
            print("Using information gain to select features")
            # Loop through all of the features and calculate the information gain for each
            for feature in allFeatures:
                print("Calculating information gain for %s " % feature)
                informationGains.append(
                    informationGain(trainData, allFeatures, feature, labels))
                featureNames.append(feature)
            informationGains = np.array(informationGains)
            sortedargs = np.argsort(informationGains)
            featureNames = [featureNames[i] for i in sortedargs]
            print informationGains
            #informationGains.reverse()
            #featureNames.reverse()
            # Add the top numFeatures to the counter.
            # if requesting too many features change number of requested features.
            if (numFeatures > len(featureNames)):
                numFeatures = len(featureNames)
            for i in range(0, numFeatures):
                selected_features_info.append(informationGains[i])
                selected_features.append(featureNames[i])
            # print featureLibraryInfo
        elif (featureSelectionMechanism == "num_songs"):
            print("Using min/max song metric to select features")
            selected_features = getDict(minNumSongs, maxNumSongs, trainSongs,
                                        featureExtractor)
        else:
            print("Using PCA to select features")
            # Create a matrix with he relevant labels
            allFeaturePairList = list(allFeatures.items())
            allFeatureKeyList = [pair[0] for pair in allFeaturePairList]
            index = 0
            data_features_matrix = np.zeros((len(trainData), len(allFeatures)))
            print("Creating the matrix for PCA input")
            for (features, label) in trainData:
                # Loop through each feature and populate matrix
                for feature in features:
                    data_features_matrix[index][allFeatureKeyList.index(
                        feature)] = allFeatures[feature]
                index = index + 1

            # Run PCA to reduce feature size.
            # print(data_features_matrix)

            print("Using PCA to reduce the number of features")
            reduced_features = mdp.pca(transpose(data_features_matrix),
                                       output_dim=2,
                                       svd=True)
            u1 = reduced_features[:, 1]
            order = np.argsort(u1)[::-1]
            order = order[1:numFeatures]
            print(
                "Populate the selected_features based on representation in first principal component"
            )
            selected_features = [allFeatureKeyList[index] for index in order]
            print(selected_features)
    else:
        for feature in allFeatures:
            featureNames.append(feature)
        selected_features = featureNames

    # Return the selected features regardless of algorithm used.
    return selected_features

Пример #33

0

Показать файл

Файл: analysisTools.py Проект: Krewn/KPlot

def pca(data, singleValueDecomp=True):
	return(mdp.pca(data,svd=singleValueDecomp))

Пример #34

0

Показать файл

Файл: rotational-forest.py Проект: dentrado/machine-learning-project

random.seed([3]) #seting the random seed

Y = get_headers()
for i in Y:
    F = split()
    for j in K:
        #should return a number between 0 and Y.size()
        #The algorithm should delete a random subset of classes
        keepCols = []
        deletedCols = []
        for jj in K:
            if(random.randrange(1)==1): keepCols.append(jj) #every column has 50% to be deleted
            else: deletedCols.append(jj)
        #the 'p' means that's a prime
        Xijp = bootstrap(F[keepCols], 1, len(F)*0.75) #option 2 http://climateecology.wordpress.com/2013/08/19/r-vs-python-speed-comparison-for-bootstrapping/
        Cij = mdp.pca(Xijp)

        #arrangin the rotation matrix
        Ri = [len(Cij)][K]
        id=0
        for a in len(Cij):
            aux = 0
            for b in K:
                aux += Cij[a][b]
            if(id == a): Ri[a][a] = aux #does the diagonal
            ++id

        #It should have the same order but without some columns, so is ok

Пример #35

0

Показать файл

Файл: cluster_bu_cat.py Проект: apanimesh061/YDC

        cur = con.cursor()
        cur.execute("select * from bu_cat")
        rows = cur.fetchall()
        A = rows[0]
        for row in rows[1:]:
            A = np.vstack([A, list(row)])

except mdb.Error, e:
    print e
    sys.exit(1)

finally:
    if con:
        con.close()

A = mdp.pca(A.astype('float32'), reduce=True)

##distances = pdist(A, cosine)
##distances_2d = squareform(distances)
clusters = hierarchy.linkage(A, method='complete', metric='cosine')
flat_clusters = hierarchy.fcluster(clusters.clip(0,100000), 0.8,'inconsistent')
plt.scatter(*numpy.transpose(A), c=clusters)
plt.axis("equal")
title = "threshold: %f, number of clusters: %d" % (thresh, len(set(clusters)))
plt.title(title)
plt.show()
with open('Clusters.dat', 'w+') as f:
    count = 0
    for v in flat_clusters:
        count += 1
        f.write(str(count) + "\t" + str(v) + "\n")

Пример #36

0

Показать файл

Файл: run_tsne.py Проект: lyltje/smrThesis

print "read data"
for line in co_occ_file:
	if count < 100:
		count += 1
		line = line.replace("\n", "")
		instance = line.split(" ")
		if(indexes[instance[0]] == -1):
			indexes[instance[0]] = current_index
			current_index+=1
		if(indexes[instance[1]] == -1):
			indexes[instance[1]] = current_index
			current_index+=1
		matrix[ indexes[instance[0]], indexes[instance[1]] ] = float(instance[2])
	else:
		break
		
	if count%10000 == 0:
		print count, "entries processed"
		
co_occ_file.close()

if current_index != nr_functional_words-1:
	print "not the same", current_index, nr_functional_words

print "perform pca"		
matrix = mdp.pca(matrix, 30)
print "pca done, start tsne"
	
#Y = tsne.tsne(X, no_dims, perplexity)
y = tsne.tsne(matrix, 2, nr_functional_words, perplexity)

Пример #37

0

Показать файл

Файл: pca.py Проект: isaiahgarza/plate-recon

abec = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']


correcto = ['P', 'W', '3', '7', '6', '8', '8', 'D', 'Z', 'G', '2', '0', '1', 'R', 'S', '2', '0', '0']
for k in range(len(correcto)):
	im = Image.open("muestras/%s.png"%k)
	pix = im.load()
	w, h = im.size
	x = []

	for i in range(w):
	  tmp = []
	  for j in range(h):
	    if pix[i, j] == (255, 255, 255):
	      tmp.append(1)
	    else:
	      tmp.append(0)
	  x.append(tmp)
	
	y = mdp.pca(np.array(x,dtype = np.float64 ), output_dim = (7))
	y = y.transpose()
	res = []
	for value in y:
		res.append(value.sum()*1.0e+14)
	y = res
	f.write("%s "%bin(abec.index(correcto[k]))[2:].zfill(7))
	for value in y:
	  f.write("%s "%(str(value))) 
	f.write("\n")
	if debug: print "Dato para la imagen %s: %s\n"%(k, y)

Пример #38

0

Показать файл

Файл: pcluster.py Проект: Alwnikrotikz/hooke

 def do_pca(self,args):
     '''
     PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
     Automatically measures pca from coordinates filename and shows two interactives plots
     With the second argument (arbitrary) you can select the columns and the multiplier factor 
     to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
     Without second argument it reads pca_config.txt file
     (c)Paolo Pancaldi, Massimo Sandal 2009
     '''
     
     # reads the columns of pca
     if self.config['hookedir'][0]=='/':
         slash='/' #a Unix or Unix-like system
     else:
         slash='\\'
     self.my_hooke_dir = self.config['hookedir']+slash
     #self.my_work_dir = os.getcwd()+slash+"pCluster_"+time.strftime("%Y%m%d_%H%M")+slash
     #self.my_curr_dir = os.path.basename(os.getcwd())
     conf=open(self.my_hooke_dir+"pca_config.txt")
     config = conf.readlines()
     conf.close()
     
     self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
     self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
     self.plot_pcaCoord = []         # tiene le due colonne della pca
     self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
     self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
     self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
     self.plot_paths = []            # tiene i paths di tutti i plots
     self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
     self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
     plot_path_temp = ""
     
     # prende in inpunt un arg (nome del file) 
     # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
     arg = args.split(" ")
     if arg[0]==args:
         file_name=args
     else:
         file_name=arg[0]
         config[0] = arg[1]
     
     # creo l'array "plot_myCoord" con tutte le coordinate dei plots
     # e l'array plot_paths con tutti i percorsi dei plots
     nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
     f=open(file_name)
     rows = f.readlines()
     for row in rows:
         if row[0]!=" " and row[0]!="":
             nPlotTot = nPlotTot+1
             plot_path_temp = row
         if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
             row = row[row.index(";",2)+2:].split(" ; ")	# non considero la prima colonna col #picchi
             row = [float(i) for i in row]
             
             #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
             #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
             if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
                 if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
                     #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
                     self.plot_myCoord.append(row)
                     self.plot_paths.append(plot_path_temp)
     f.close()
     
     # creo l'array con alcune colonne e pure moltiplicate 
     for row in self.plot_myCoord:
         res=[]
         for cols in config[0].split(","):
             if cols.find("*")!=-1:
                 col = int(cols.split("*")[0])
                 molt = int(cols.split("*")[1])
             elif cols.find("x")!=-1:
                 col = int(cols.split("x")[0])
                 molt = int(cols.split("x")[1])
             else:
                 col = int(cols)
                 molt = 1
             res.append(row[col]*molt)
         self.plot_origCoord.append(res)
     
     # array convert, calculate PCA, transpose
     self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
     #print self.plot_origCoord.shape
     self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)	#other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
     self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
     pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
     pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
     
     '''
     # Start section of testing with good plots                                  # 4 TESTING!
     Xsyn_1=[]
     Ysyn_1=[]        
     Xgb1_1=[]
     Ygb1_1=[]
     Xbad_1=[]
     Ybad_1=[]
     goodnamefile=open(file_name.replace("coordinate", "good"),'r')
     goodnames=goodnamefile.readlines()
     nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
     goodnames=[i.split()[0] for i in goodnames[1:]]
     
     for index in range(len(self.plot_paths)):
         if self.plot_paths[index][:-1] in goodnames:
             Xsyn_1.append(pca_X[index])
             Ysyn_1.append(pca_Y[index])
         else:
             Xbad_1.append(pca_X[index])
             Ybad_1.append(pca_Y[index])
     # Stop section of testing with good plots                                   # 4 TESTING!
     '''
     
     # print first plot
     clustplot1=lhc.PlotObject()
     clustplot1.add_set(pca_X,pca_Y)
     #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
     #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
     clustplot1.normalize_vectors()
     clustplot1.styles=['scatter', 'scatter','scatter']
     clustplot1.colors=[None,'red','green']
     clustplot1.destination=0
     self._send_plot([clustplot1])
     self.clustplot1=clustplot1
     
     # density and filer estimation
     kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
     tallest = 0
     for i in range(len(pca_X)):
         kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
         if tallest < kern_value:
                 tallest = float(kern_value)
     if float(config[1]) == 0:
         my_filter = float(tallest / 3.242311147)
     else:
         my_filter = float(config[1])
     '''
     # section useful only for graphic printing
     xmin = pca_X.min()
     xmax = pca_X.max()
     ymin = pca_Y.min()
     ymax = pca_Y.max()
     mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
     Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
     axis_X = np.linspace(xmin,xmax,num=100)
     axis_Y = np.linspace(ymin,ymax,num=100)
     '''
     
     # density filtering:
     # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
     filtered_pca_X = []
     filtered_pca_Y = []
     filtered_PcaCoordTr = []
     filtered_PcaCoord = []
     for i in range(len(pca_X)):
         kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
         if kern_value > my_filter:
             filtered_pca_X.append(pca_X[i])
             filtered_pca_Y.append(pca_Y[i])
     filtered_PcaCoordTr.append(filtered_pca_X)
     filtered_PcaCoordTr.append(filtered_pca_Y)
     filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
     
     # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
     for index in range(len(self.plot_pcaCoord)):
         if self.plot_pcaCoord[index] in filtered_PcaCoord:
             self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
             self.plot_FiltPaths.append(self.plot_paths[index])
     
     '''
     # START PCA#2: USELESS!!!
     
     # creo l array con alcune colonne e pure moltiplicate
     temp_coord = []
     for row in self.plot_FiltOrigCoord:
         res=[]
         for cols in config[2].split(","):
             if cols.find("*")!=-1:
                 col = int(cols.split("*")[0])
                 molt = int(cols.split("*")[1])
             elif cols.find("x")!=-1:
                 col = int(cols.split("x")[0])
                 molt = int(cols.split("x")[1])
             else:
                 col = int(cols)
                 molt = 1
             res.append(row[col]*molt)
         temp_coord.append(res)
     self.plot_FiltOrigCoord = temp_coord
             
     # ricalcolo la PCA: array convert, calculate PCA, transpose
     self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
     #print self.plot_FiltOrigCoord.shape
     self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)	#other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
     self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
     pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
     pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
     
     # Start section of testing with good plots                              # 4 TESTING!
     Xsyn_2=[]
     Ysyn_2=[]
     Xbad_2=[]
     Ybad_2=[]
     for index in range(len(self.plot_FiltPaths)):
         if self.plot_FiltPaths[index][:-1] in goodnames:
             Xsyn_2.append(pca_X2[index])
             Ysyn_2.append(pca_Y2[index])
         else:
             Xbad_2.append(pca_X2[index])
             Ybad_2.append(pca_Y2[index])
     
     # print second plot
     clustplot2=lhc.PlotObject()
     #clustplot2.add_set(pca_X2,pca_Y2)
     clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
     clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
     clustplot2.normalize_vectors()
     clustplot2.styles=['scatter', 'scatter','scatter']
     clustplot2.colors=[None,'red','green']
     clustplot2.destination=1
     self._send_plot([clustplot2])
     self.clustplot2=clustplot2
     '''
     
     # PRINT density plot
     clustplot2=lhc.PlotObject()
     clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
     clustplot2.normalize_vectors()
     clustplot2.styles=['scatter', 'scatter','scatter']
     clustplot2.colors=[None,'red','green']
     clustplot2.destination=1
     self._send_plot([clustplot2])
     self.clustplot2=clustplot2
     
     # printing results
     config_pca1 = config[0].replace("*", "x").rstrip("\n")
     config_pca2 = config[2].replace("*", "x").rstrip("\n")
     print ""
     print "- START: "+file_name
     print "Curve totali: ", nPlotTot
     #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
     print "- FILTRO 1: 0-500 e NaN"
     print "Curve totali rimaste: ", len(self.plot_origCoord)
     #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
     print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
     print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
     #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
     print "Piu alta: ", tallest
     #print "- FILTRO 3: 2'PCA:"+config_pca2
     print ""
     
     # -- exporting coordinates and plot of PCA in debug mode! --
     if config[3].find("true")!=-1:
         #1' PCA: save plot and build coordinate s file
         self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
         f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
         for i in range(len(pca_X)):
             f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
         f.close()
         #2' PCA: save plot and build coordinate s file
         #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
         #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
         #for i in range(len(pca_X2)):
         #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
         #f.close()
         #DENSITY: save plot
         self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
         f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
         for i in range(len(filtered_pca_X)):
             f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
         f.close()
         #ALL GOOD COORDINATES (without NaN and 0<x<500)
         f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
         for i in range(len(self.plot_myCoord)):
             for cel in self.plot_myCoord[i]:
                 f.write (" ; " + str(cel))
             f.write ("\n")
         f.close()
     
     # pCLUSTER SAVING!!!
     import shutil
     pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
     if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
     os.mkdir(pcl_name+slash)
     f = open(pcl_name+'.txt','w')
     for i in range(len(self.plot_FiltPaths)):
         myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
         f.write (myfile+"\n")
         shutil.copy2(myfile, pcl_name)
     f.close()

Пример #39

0

Показать файл

Файл: 10-init.py Проект: UBTC/wipy

def PCA(x):
    # http://mdp-toolkit.sourceforge.net/
    return mdp.pca(x)

Python pca примеры использования