示例#1
0
文件: pca.py 项目: zhewang/lcvis
def calculate(ids, matrix, target=None):
    results = PCA(matrix)
    data = []
    for obj_id, row in zip(ids, matrix):
        data.append([round(results.project(row)[0],6),
                round(results.project(row)[1],6)])

    #target = []
    data = icp.align(data, target)

    #for obj_id, row in zip(ids, data):
        #row.append(obj_id)
    return data.tolist()
示例#2
0
    def pca_joints(self, num, data=None):
        """compresses joint data using PCA and returns a projection onto num axes.
        optionally, PCA weights can be computed based on another dataset.
        if "collisions", also highlight the points where drum collisions are detected"""
        if data is None:
            data = self.joints
        lpca = PCA(data[:, :7])  # joint positions only
        rpca = PCA(data[:, 7:14])

        # extract 'num' components
        lproj = lpca.project(self.joints[:, :7], minfrac=lpca.fracs[num - 1])
        rproj = rpca.project(self.joints[:, 7:14], minfrac=rpca.fracs[num - 1])

        return np.concatenate((lproj, rproj), axis=1)
示例#3
0
    def doStuff(self):

        self.readWFDEIOutput()
        self.makeArray()
            
        pca = PCA( self.array)

        print( pca.mu)

        print( pca.fracs)        

        out = pca.project(self.array,minfrac=0.1)

        print( out.shape)

        plt.subplot(1,3,1)

        plt.plot( out[:,0],out[:,1], 'k+')

        plt.subplot(1,3,2)
        plt.plot( out[:,0],out[:,2], 'k+')

        plt.subplot(1,3,3)
        plt.plot( out[:,1],out[:,2], 'k+')


        plt.show()
def PCA_on_waveforms(waveforms, minfrac, location):
    """
	This function performs principal component analysis on the spike waveforms extracted and returns the
	projection of the waveforms on these principal component axes.

	Inputs:
		waveforms: Numpy array containing the waveforms; in the form of
			(N_events x N_electrodes x N_spike_time_range_steps)
		minfrac: Principal component axes that counts for the variance greater than this minfrac
			value will be taken into account.
		params: Dictionary containing the recording and analysis parameters. Following entries must be present:
			spike_timerange: List containing the time range of spike waveform as an array

	Outputs:
		projection: Waveforms projected on the principal component axes
	"""
    """peak_of_spike_time_range = (len(params['spike_timerange']) / 2) + 1
	peaks = waveforms[:,:,peak_of_spike_time_range]

	true_electrode_inds = np.where(peaks[0] != 0) #Eliminating the broken or absent electrodes on the grid (for which the voltage equals 0 all the time) in order to avoid their contamination on the PCA.
	waveforms_true = waveforms[:,true_electrode_inds] #Waveforms from absent electrodes eliminated
	n_dimensions = len(true_electrode_inds[0]) * len(params['spike_timerange']) #Number of dimensions before dimensionality reduction
	waveforms_true = waveforms_true.reshape(len(peaks),n_dimensions) #Reshaping the array with respect to initial number of dimensions
	results = PCA(waveforms_true)"""

    experiment = location.experiment
    n_dimensions = len(waveforms[0]) * (experiment.spike_samples_before +
                                        experiment.spike_samples_after)
    waveforms = waveforms.reshape(len(waveforms), n_dimensions)
    results = PCA(waveforms)
    projection = results.project(waveforms, minfrac)
    return projection
示例#5
0
def main():
    print "Loading Word2Vec model..."
    # 4 GB input file, uses about 20 GB of memory when loaded
    '''Uses the model from: http://bio.nlplab.org/'''
    model = gensim.models.Word2Vec.load_word2vec_format("../../PubMed/BioNLP/wikipedia-pubmed-and-PMC-w2v.bin", binary = True)
    model.init_sims(replace=True)
    vocab = model.index2word

    data_matrix = np.array([model[vocab[i]] for i in range(len(vocab))])
    
    print "Running PCA..."
    pca_results = PCA(data_matrix)
    
    seed_word_list = ["dopamine", "GABA", "serotonin", "5HT", "acetylcholine" , "glutamate","electrode", "stimulator", "cognitive", "behavioral", "ethological", "genetic", "biochemical", "channel", "concentration", "dynamics", "receptor", "antibody", "fMRI", "calcium", "nucleus", "axon", "soma", "dendrite", "synapse", "fNIRS", "EEG"]
    
    # seed_word_list = [s.lower() for s in seed_word_list]
    
    classes = [[] for s in seed_word_list]
    for i in range(len(seed_word_list)):
        classes[i].append(model[seed_word_list[i]])
        for s in model.most_similar(seed_word_list[i]):
            classes[i].append(model[s[0]])
            
    classes_projected = [[] for s in seed_word_list]
    for i in range(len(seed_word_list)):
        for f in classes[i]:
            classes_projected[i].append(pca_results.project(f))
    
    print "Plotting PCA results..."
    fig = plt.figure()
    ax = fig.add_subplot(111, projection = '3d')
    ax.set_title("Principal Components of Word Vectors")
    
    import itertools
    marker = itertools.cycle(['o', '^', '*', "s", "h", "8"])
    colorList = ["r", "b", "g", "y", "k", "c", "m", "w"]
    colors = itertools.cycle(colorList)
        
    m = marker.next()
    for i in range(len(seed_word_list)):
        col = colors.next()
        if i % len(colorList) == 0:
            m = marker.next()
        
        '''
        # plot the individual words
        ax.scatter([f[0] for f in classes_projected[i]], [f[1] for f in classes_projected[i]], [f[2] for f in classes_projected[i]], marker = m, s = 20, c = col)
        '''
        
        # plot the cluster means
        ax.plot([np.mean([f[0] for f in classes_projected[i]])], [np.mean([f[1] for f in classes_projected[i]])], [np.mean([f[2] for f in classes_projected[i]])], marker = m, markersize = 21, color = col, label = seed_word_list[i], linestyle = "none")
        
    
    ax.legend(numpoints = 1)
    plt.show()
示例#6
0
def pca(minfrac):
  matrix = []
  for vector in vects:
    matrix.append(vector[0])
  print "Matrix Built"
  training = numpy.array(matrix)
  print "Training..."
  results = PCA(training)
  ret = []
  print "Projecting..."
  for vector in vects:
    ret.append(results.project(vector[0], minfrac))
  return ret
示例#7
0
def calculate(ids, matrix, target=None):
    results = PCA(matrix)
    data = []
    for obj_id, row in zip(ids, matrix):
        data.append([
            round(results.project(row)[0], 6),
            round(results.project(row)[1], 6)
        ])

    #target = []
    data = icp.align(data, target)

    #for obj_id, row in zip(ids, data):
    #row.append(obj_id)
    return data.tolist()
示例#8
0
def pca(ids, matrix):
    print("{}: Calculating PCA...".format(timestamp()))

    results = PCA(matrix)

    pickle.dump(results, open('./pca_pickle.dat', 'w'))

    data = []

    for obj_id, row in zip(ids, matrix):
        data.append([round(results.project(row)[0],6),
                     round(results.project(row)[1],6),
                     obj_id])

    print("{}: Done.".format(timestamp()))
    return data
示例#9
0
def draw_pcca_memberships(original_data, pcca, discrete_trajectory, colormap_name="jet"):
    """
    Visualize the result of PCCA+ as colored plot of the PCA.
    """
    pca = PCA(original_data)

    cluster_ids = range(0, pcca.shape[1])
    colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1)

    membership = pcca > 0.5
    pcca_traj = np.where(membership[discrete_trajectory])[1]


    for index, cluster in enumerate(cluster_ids):
        datapoints = original_data[np.where(pcca_traj == cluster)]
        print('points in cluster ', cluster, ': ', len(datapoints))
        datapoints_transformed = pca.project(datapoints)
        plt.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5)
    plt.title('pcca')
示例#10
0
def draw_clusters(clusters, plotter=None, colormap_name="jet"):
    """
    Visualize clustered data and cluster membership in a new plot or with an existing axis object.
    """
    plotter = plotter or plt

    # use PCA to be able to visualize the data in two dimensions
    all_data = clusters.getOriginalData()
    pca = PCA(all_data)

    # for nicer visualization
    data_length = len(all_data)
    alpha = 1.0 / (math.sqrt(data_length))
    if alpha < 0.05: alpha = 0.05
    elif alpha > 0.75: alpha = 0.75
    cluster_ids = clusters.getClusterIDs()
    colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1)
    for index, cluster in enumerate(cluster_ids):
        datapoints = all_data[clusters._map == cluster,:]
        datapoints_transformed = pca.project(datapoints)
        plotter.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5)
 def sample_cluster_2Dmap(self, **kwargs):
     defaults = dict(
         genelist=None,
         samplenames=None,
         size=50,)
     for key in defaults:
         kwargs.setdefault(key, defaults[key])
     genearray = self.array
     if type(kwargs['genelist']) == list:
         validatedlist = self.validate_genelist(kwargs['genelist'])
         genearray = self.array.take(validatedlist, axis=0)
     elif kwargs['genelist']:
         raise('genelist should be list of genes')
     samplenames = [x for x in self.dataindexdic.keys()]
     if kwargs['samplenames']:
         if len(kwargs['samplenames']) != len(samplenames):
             raise('length of samplenames should be {}'.format(len(samplenames)))
         samplenames = kwargs['samplenames']
     covarray = numpy.cov(genearray.T)  # covariance array
     covPCA = PCA(covarray)  # matplotlib.mlab.PCA
     convertedcovs = covPCA.project(covarray)  # converted vector along PC
     data = numpy.array([[x[0] for x in convertedcovs], [x[1] for x in convertedcovs]])
     # auto color picking with sample numbers
     color = []
     colorlist = cm.rainbow(numpy.linspace(0, 1, len(samplenames)))
     keys = [x for x in self.dataindexdic.keys()]
     for c, key in zip(colorlist, keys):
         color.extend([c] * len(self.dataindexdic[key]))
     sampleindex = 0 
     for i in range(len(samplenames)):
         samplenumber = len(self.dataindexdic[keys[i]])
         subdata = numpy.take(
             data, range(sampleindex, sampleindex + samplenumber), axis=1)
         plt.scatter(
             subdata[0], subdata[1], color=colorlist[i], s=kwargs['size'], label=samplenames[i])
         sampleindex += samplenumber
     plt.legend(loc='upper left', fontsize=15, scatterpoints=1, bbox_to_anchor=(1, 1))
def main():
    print "Loading Word2Vec model..."
    # 4 GB input file, uses about 20 GB of memory when loaded
    '''Uses the model from: http://bio.nlplab.org/'''
    model = gensim.models.Word2Vec.load_word2vec_format(
        "../../PubMed/BioNLP/wikipedia-pubmed-and-PMC-w2v.bin", binary=True)
    #model = gensim.models.Word2Vec.load("../../PubMed/derived_from_neuroscience_abstracts/word2vec_model_1")
    model.init_sims(replace=True)
    vocab = model.index2word

    while True:
        seed_string = raw_input('\nprompt> ')
        seed_word_list = list(set(
            seed_string.split()))  # set gets the unique elements here

        print "Seed words:"
        for word in seed_word_list:
            print word

# choose how many words to find to allow numrows > numcols in PCA
        vector_length = len(model[vocab[0]])
        top_vecs = int(1 + float(vector_length) /
                       float(len([s for s in seed_word_list if s in vocab])))
        if top_vecs < 15:
            top_vecs = 15

        print "Finding a bunch of similar words..."
        derived_word_list = []
        for s in seed_word_list:
            if s in vocab:
                print "\tSearching for similarities for %s" % s
                l = [
                    m[0]
                    for m in model.most_similar(positive=[s], topn=top_vecs)
                ]
                derived_word_list += l

        if len(derived_word_list) == 0:
            continue

        derived_word_list = list(set(derived_word_list))

        print "Derived words:"
        for word in derived_word_list:
            print word

        data_matrix = np.array([model[s] for s in derived_word_list])

        print "Running PCA..."
        pca_results = PCA(data_matrix)
        projected_vectors = []
        word_short_list = []
        for word in seed_word_list:
            if word in vocab:
                f = model[word]
                projected_vectors.append(pca_results.project(f))
                word_short_list.append(word)

        print "Plotting PCA results..."
        fig = plt.figure()
        plt.title("Principal Components of Word Vectors")

        plots = []

        import itertools
        marker = itertools.cycle(['o', '^', '*', "s", "h", "8"])
        colorList = ["r", "b", "g", "y", "k", "c", "m", "w"]
        colors = itertools.cycle(colorList)

        m = marker.next()
        for i in range(len(projected_vectors)):
            col = colors.next()
            if i % len(colorList) == 0:
                m = marker.next()

            p, = plt.plot([projected_vectors[i][0]], [projected_vectors[i][1]],
                          marker=m,
                          markersize=21,
                          color=col,
                          linestyle="none")
            plots.append(p)

        plt.legend(plots, word_short_list, loc="upper left", numpoints=1)
        plt.show()
    schTrain = getFold(sch, schUserFoldDict, foldid, lambda x,y:x!=y, useNgram)


    XTrain, YTrain = randomShuffle(controlTrain + schTrain, [1]*len(controlTrain) + [0]*len(schTrain))

    #findCorrelation(XTrain)  #plots graph of feature correlations

    #[meanFt, varFt] = normFeatParams(XTrain)  #both meanFt and varFt are of length = numberoffeatures
    #XTrain = normFeat(XTrain, meanFt, varFt)

    PCAObject = PCA(np.asarray(XTrain))

    XTrain = PCAObject.center(XTrain)
    if doPCA:
        numFeatures =  retainPerc(PCAObject.fracs, 0.99)
        XTrain = PCAObject.project(XTrain)[:,0:numFeatures]
        [meanFt, varFt] = normFeatParams(XTrain)  #both meanFt and varFt are of length = numberoffeatures
        XTrain = np.asarray(normFeat(XTrain, meanFt, varFt))
        #print numFeatures, XTrain.shape

    #TODO: SHUFFLE UP THE INPUT

    clf = svm.SVC(kernel='rbf')
    clf.fit(XTrain, YTrain)

    XTest = controlTest + schTest
    XTest = PCAObject.center(XTest)
    if doPCA:
        XTest = PCAObject.project(XTest)[:,0:numFeatures]
        XTest = np.asarray(normFeat(XTest, meanFt, varFt))
示例#14
0
    for i in matrix_with_id:
        obj_ids.append(i)
        matrix.append(matrix_with_id[i])
        if len(matrix_with_id[i]) != row_length:
            print('row length is not {}'.format(row_length))

    # PCA calculating
    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length)
    results = PCA(vec)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        obj_type = BandB_sampled[obj_id]["stype"]
        data.append([results.project(row)[0], results.project(row)[1], obj_type, obj_id])

    f_out = open(args.path+'/pca_supernova.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()

    #matrix = []
    #j = json.load(open('{}/PLV_LINEAR.json'.format(args.path)))

    #metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
    #obj_ids = []

    #row_length = 50

    #with open('{}/object_list.csv'.format(args.path)) as csvfile:
        #objects = csv.reader(csvfile)
示例#15
0
def pca(dim):
    pca = PCA(data[:, 0:9])
    return pca.project(data[:, 0:9])[:, 0:dim]
示例#16
0
文件: pca.py 项目: hdc-arizona/lcvis
        objects = csv.reader(csvfile)
        next(objects, None)
        for row in objects:
            obj_id = int(row[0])
            period = float(row[1])
            if period > 0:
                v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
                for i in range(row_length - len(v)):
                    v.append(v[0])
                matrix.append(v)
                obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length)
    results = PCA(vec)

    with open('pca_result.dat', 'wb') as f:
        pickle.dump(results, f)

    with open('pca_matrix.dat', 'wb') as f:
        pickle.dump(vec, f)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        data.append([results.project(row)[0], results.project(row)[1], metadata[obj_id]["LCtype"], obj_id])

    f_out = open(args.path+'/pca.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
示例#17
0
import numpy as np
from matplotlib.mlab import PCA

data = []
for line in open("emotions.train"):
	data.append([])
	for el in line[2:].strip().split(" "):
		data[-1].append(float(el[el.index(":")+1:]))
	if len(data[-1]) != 86: data.remove(data[-1])

results = PCA(np.array(data))
	
archive = open("pca_archive_wt.txt", "w")
for v in results.Wt: archive.write(",".join([str(float(x)) for x in v]) + "\n")
archive.close()

archive = open("pca_archive_mu.txt", "w")
archive.write(",".join([str(float(x)) for x in results.mu]) + "\n")
archive.close()

archive = open("pca_archive_sigma.txt", "w")
archive.write(",".join([str(float(x)) for x in results.sigma]) + "\n")
archive.close()

fout = open("emotions.train.pca", "w")
for line in open("emotions.train"):
	temp = []
	for el in line[2:].strip().split(" "):
		temp.append(float(el[el.index(":")+1:]))
	fout.write(line[:2] + " ".join([str(str(i+1) + ":" + str(index)) for i, index in enumerate(results.project(np.array(temp), 0.001))]) + "\n") 
fout.close()
示例#18
0
for line in open("emotions.train"):
    data.append([])
    for el in line[2:].strip().split(" "):
        data[-1].append(float(el[el.index(":") + 1:]))
    if len(data[-1]) != 86: data.remove(data[-1])

results = PCA(np.array(data))

archive = open("pca_archive_wt.txt", "w")
for v in results.Wt:
    archive.write(",".join([str(float(x)) for x in v]) + "\n")
archive.close()

archive = open("pca_archive_mu.txt", "w")
archive.write(",".join([str(float(x)) for x in results.mu]) + "\n")
archive.close()

archive = open("pca_archive_sigma.txt", "w")
archive.write(",".join([str(float(x)) for x in results.sigma]) + "\n")
archive.close()

fout = open("emotions.train.pca", "w")
for line in open("emotions.train"):
    temp = []
    for el in line[2:].strip().split(" "):
        temp.append(float(el[el.index(":") + 1:]))
    fout.write(line[:2] + " ".join([
        str(str(i + 1) + ":" + str(index))
        for i, index in enumerate(results.project(np.array(temp), 0.001))
    ]) + "\n")
fout.close()
def make_plots(chromosomes, groups, group_labels):
    infile = open(
        '../finescale_mut_spectra/derived_each_lineage_chr%i_nosingle.txt' %
        chromosomes[0])
    lines = infile.readlines()
    s = lines[0].strip('\n').split(' ')

    indices = {}
    for i in range(1, len(s)):
        try:
            indices[s[i]].append(i - 1)
        except KeyError:
            indices[s[i]] = [i - 1]

    mut_counts = np.zeros((2 * (len(s) - 1), len(lines) - 1))

    mut_list = []
    for chrom in chromosomes:
        infile = open('../finescale_mut_spectra/derived_each_lineage_chr' +
                      str(chrom) + '_nosingle.txt')
        lines = infile.readlines()
        infile.close()

        for i in range(len(lines) - 1):
            s = lines[i + 1].strip('\n').split(' ')
            if chrom == 1:
                mut_list.append(s[0])
            for j in range(len(s) - 1):
                mut_counts[j][i] += int(s[j + 1])
    for j in range(len(s) - 1):
        der_count = mut_counts[j].sum()
        for i in range(len(mut_counts[j])):
            mut_counts[j][i] *= 1.0 / der_count

    averaged_mut_counts = []
    for j in range((len(s) - 1) / 2):
        averaged_mut_counts.append([])
        for i in range(len(mut_counts[0])):
            averaged_mut_counts[-1].append(
                0.5 * (mut_counts[2 * j][i] + mut_counts[2 * j + 1][i]))
    mut_counts = np.array(averaged_mut_counts)

    group_mut_counts = []
    for group in groups:
        for population in group:
            for i in indices[population]:
                group_mut_counts.append(mut_counts[i])

    group_mut_counts = np.array(group_mut_counts)
    myPCA = PCA(group_mut_counts)

    colors = ['blue', 'green', 'red', 'purple', 'black', 'orange']
    for group, group_label, color in zip(groups, group_labels, colors):
        x, y = [], []
        for population in group:
            for ind in indices[population]:
                this_point = myPCA.project(mut_counts[ind])
                x.append(this_point[0])
                y.append(this_point[1])
        plt.scatter(x, y, color=color, label=longname[group_label])
    plt.legend(loc='lower left', ncol=2, prop={'size': 8})
    plt.xticks(())
    plt.yticks(())
    plt.xlabel('PC1 (' + str(int(100 * myPCA.fracs[0])) +
               '% variance explained)')
    plt.ylabel('PC2 (' + str(int(100 * myPCA.fracs[1])) +
               '% variance explained)')
    fig = plt.gcf()
    fig.set_size_inches((4.5, 3.5))
    plt.savefig('_'.join(group_labels) + '_mut_PCA_1kg_nosingle_altlegend.pdf')
    plt.clf()
示例#20
0

kf = cross_validation.KFold(len(X), k=folds, shuffle=True)
for train_index, test_index in kf:
    # print("TRAIN: %s TEST: %s" % (train_index, test_index))
    X_train, X_test = X[train_index], X[test_index]

    # generate knn analysis
    fits.append(g.fit(X_train))
    scores.append(g.bic(X_test))
print scores

fig = Figure(figsize=(6, 6))
canvas = FigureCanvas(fig)
myPCA = PCA(X)
pcDataPoint = myPCA.project(X)
ax = fig.add_subplot(111)
ax.scatter(pcDataPoint[:, 1], pcDataPoint[:, 2])
canvas.print_figure("PCA12.png", dpi=500)
#  print(scores)
# avg = float(sum(scores)/len(scores))
# for k in range(0,len(scores)):
#    diffs.append((scores[k]-avg)*(scores[k]-avg))
# print diffs

# var = float(sum(diffs)/len(scores))
# scoresavg.append(avg)
# scoresvar.append(var)
# print(scoresavg)
# print(scoresvar)
示例#21
0
    matrix = list(x["splinedata"] for x in j)
    
    # metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
    # obj_ids = []

    # with open('{}/object_list.csv'.format(args.path)) as csvfile:
    #     objects = csv.reader(csvfile)
    #     next(objects, None)
    #     for row in objects:
    #         obj_id = int(row[0])
    #         period = float(row[1])
    #         if period > 0:
    #             v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
    #             for i in range(50 - len(v)):
    #                 v.append(v[0])
    #             matrix.append(v)
    #             obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), 20)
    results = PCA(vec)

    data = []

    for obj, row in zip(j, matrix):
        data.append([results.project(row)[0], results.project(row)[1], obj])

    f_out = open('pca_transients.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
示例#22
0
def main():
    print "\nLoading Word2Vec model...\n"
    model = gensim.models.Word2Vec.load(outdir + subdir + "word2vec_model")
    model.init_sims(replace=True)

    vocab = model.index2word

    data_matrix = np.array([model[vocab[i]] for i in range(len(vocab))])

    print "Running PCA..."
    pca_results = PCA(data_matrix)

    seed_word_list = [s.lower() for s in seed_words]
    vectors = [model[s] for s in seed_word_list]
    projected_vectors = [pca_results.project(v) for v in vectors]

    plt.rc('legend', **{'fontsize': 7})
    print "Plotting PCA results in 3D..."
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_title("Principal Components of Word Vectors")

    import itertools
    marker = itertools.cycle(['o', '^', '*', "s", "h", "8"])
    colorList = [
        "r", "b", "g", "y", "k", "c", "m", "w", "HotPink", "Indigo", "Grey"
    ]
    colors = itertools.cycle(colorList)

    m = marker.next()
    for i in range(len(seed_word_list)):
        col = colors.next()
        if i % len(colorList) == 0:
            m = marker.next()
        a = ax.plot([projected_vectors[i][0]], [projected_vectors[i][1]],
                    [projected_vectors[i][2]],
                    marker=m,
                    markersize=10,
                    c=col,
                    label=seed_words[i],
                    linestyle="none")
    ax.legend(numpoints=1, loc=5)

    print "Plotting PCA results in 2D..."
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title("Principal Components of Word Vectors")

    marker = itertools.cycle(['o', '^', '*', "s", "h", "8"])
    colorList = [
        "r", "b", "g", "y", "k", "c", "m", "w", "HotPink", "Indigo", "Grey"
    ]
    colors = itertools.cycle(colorList)

    m = marker.next()
    for i in range(len(seed_word_list)):
        col = colors.next()
        if i % len(colorList) == 0:
            m = marker.next()
        a = ax.plot([projected_vectors[i][0]], [projected_vectors[i][1]],
                    marker=m,
                    markersize=10,
                    c=col,
                    label=seed_words[i],
                    linestyle="none")
    ax.legend(numpoints=1, loc=5)

    plt.show()
示例#23
0
文件: pca.py 项目: ycchui/uncertainty
results = PCA(data)
#print results.numcols
row_std = np.std(data, axis=0)
print results.Wt.shape
#print results.sigma
#print row_std
#print results.Wt[0]
#print results.Wt[1]
#print results.fracs #contribution of each axes
loading1 = results.Wt[0]/row_std
loading2 = results.Wt[1]/row_std
if not os.path.exists(os.path.join(os.getcwd(), 'pca')):
    os.makedirs(os.path.join(os.getcwd(),'pca'))
os.chdir(os.path.join(os.getcwd(),'pca'))
para_project = results.project(para)
print para_project.shape
fig = plt.figure()
fig.set_size_inches(10,10)
ax1 = plt.subplot(121, aspect='equal')
plt.plot(results.Y[:,0],results.Y[:,1],'o', color='blue', label = 'models')
plt.xlabel('PC1')
plt.ylabel('PC2')
#plt.legend()
centre_spines(ax1)
ax2 = plt.subplot(122,aspect='equal')
plt.plot(loading1,loading2,'^', color='red',label = 'metrics')
for label, x,y in zip(t, loading1,loading2):
    if(x == np.max(loading1) or x == np.min(loading1) or y == np.max(loading2) or y == np.min(loading2)):
        plt.annotate(label, xy=(x,y))
    #plt.annotate(label, xy=(x,y))
示例#24
0
    matrix = list(x["splinedata"] for x in j)

    # metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
    # obj_ids = []

    # with open('{}/object_list.csv'.format(args.path)) as csvfile:
    #     objects = csv.reader(csvfile)
    #     next(objects, None)
    #     for row in objects:
    #         obj_id = int(row[0])
    #         period = float(row[1])
    #         if period > 0:
    #             v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
    #             for i in range(50 - len(v)):
    #                 v.append(v[0])
    #             matrix.append(v)
    #             obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), 20)
    results = PCA(vec)

    data = []

    for obj, row in zip(j, matrix):
        data.append([results.project(row)[0], results.project(row)[1], obj])

    f_out = open('pca_transients.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
示例#25
0
results = PCA(data)
#print results.numcols
row_std = np.std(data, axis=0)
print results.Wt.shape
#print results.sigma
#print row_std
#print results.Wt[0]
#print results.Wt[1]
#print results.fracs #contribution of each axes
loading1 = results.Wt[0] / row_std
loading2 = results.Wt[1] / row_std
if not os.path.exists(os.path.join(os.getcwd(), 'pca')):
    os.makedirs(os.path.join(os.getcwd(), 'pca'))
os.chdir(os.path.join(os.getcwd(), 'pca'))
para_project = results.project(para)
print para_project.shape
fig = plt.figure()
fig.set_size_inches(10, 10)
ax1 = plt.subplot(121, aspect='equal')
plt.plot(results.Y[:, 0], results.Y[:, 1], 'o', color='blue', label='models')
plt.xlabel('PC1')
plt.ylabel('PC2')
#plt.legend()
centre_spines(ax1)
ax2 = plt.subplot(122, aspect='equal')
plt.plot(loading1, loading2, '^', color='red', label='metrics')
for label, x, y in zip(t, loading1, loading2):
    if (x == np.max(loading1) or x == np.min(loading1) or y == np.max(loading2)
            or y == np.min(loading2)):
        plt.annotate(label, xy=(x, y))
示例#26
0
        next(objects, None)
        for row in objects:
            obj_id = int(row[0])
            period = float(row[1])
            if period > 0:
                v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
                for i in range(row_length - len(v)):
                    v.append(v[0])
                v.append(period)
                matrix.append(v)
                obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length+1)
    results = PCA(vec)

    with open('pca_result.dat', 'wb') as f:
        pickle.dump(results, f)

    with open('pca_matrix.dat', 'wb') as f:
        pickle.dump(vec, f)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        data.append([results.project(row)[0], results.project(row)[1], metadata[obj_id]["LCtype"], obj_id])

    f_out = open(args.path+'/pca_with_period.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
示例#27
0
        obj_ids.append(i)
        matrix.append(matrix_with_id[i])
        if len(matrix_with_id[i]) != row_length:
            print('row length is not {}'.format(row_length))

    # PCA calculating
    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length)
    results = PCA(vec)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        obj_type = BandB_sampled[obj_id]["stype"]
        data.append([
            results.project(row)[0],
            results.project(row)[1], obj_type, obj_id
        ])

    f_out = open(args.path + '/pca_supernova.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()

    #matrix = []
    #j = json.load(open('{}/PLV_LINEAR.json'.format(args.path)))

    #metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
    #obj_ids = []

    #row_length = 50
示例#28
0
    ax.set_ylim([-15,20])
    ax.set_zlim([-15,15])
    pl.savefig("3D_" +rat + "_" + date1+".png")

    pl.close('all') 
    pl.xlim([-10,20])
    pl.ylim([-15,15])
    pl.scatter(pca.Y[::1,0], pca.Y[::1,1])
    pl.savefig("2D_" + rat + "_" + date1 + ".png")
    
    for date in file[rat]:
        if date != date1: 
            try:
                object = file[rat][date]["valueMatrix"]
                data = np.array(object)
                projectedData = pca.project(data)

                pl.close('all') 
                fig1 = pl.figure()
                ax = Axes3D(fig1)
                ax.scatter(projectedData[::1,0], projectedData[::1,1], projectedData[::1,2], 'bo')
                ax.set_xlim([-10,20])
                ax.set_ylim([-15,20])
                ax.set_zlim([-15,15])
                pl.savefig("3D_" +rat + "_" + date+".png")

                pl.close('all') 
                pl.xlim([-10,20])
                pl.ylim([-15,15])
                pl.scatter(projectedData[::1,0], projectedData[::1,1])
                pl.savefig("2D_" + rat + "_" + date + ".png")