示例#1
0
def test_DPGMM():

    #
    # random generator
    seed = 1337
    rand_gen = numpy.random.RandomState(seed)

    verbose = True

    #
    # loading a very simple dataset
    dataset_name = 'nltcs'
    train, valid, test = dataset.load_train_val_test_csvs(dataset_name)

    #
    # this is the max number of clustering for a truncated DP
    n_components = 100

    cov_type = 'diag'
    n_iters = 1000

    # 'spherical', 'tied', 'diag', 'full'. Defaults to 'diag'.
    cov_type = 'diag'
    concentration = 1.0
    # a higher alpha means more clusters
    # as the expected number of clusters is alpha*log(N).
    dpgmm_c = mixture.DPGMM(n_components=n_components,
                            covariance_type=cov_type,
                            random_state=rand_gen,
                            n_iter=n_iters,
                            alpha=concentration,
                            verbose=verbose)

    #
    # fitting to training set
    fit_start_t = perf_counter()
    dpgmm_c.fit(train)
    fit_end_t = perf_counter()

    #
    # getting the cluster assignment
    pred_start_t = perf_counter()
    clustering = dpgmm_c.predict(train)
    pred_end_t = perf_counter()

    print('Clustering')
    print('for instances: ', clustering.shape[0])
    print(clustering)
    print('smallest cluster', numpy.min(clustering))
    print('biggest cluster', numpy.max(clustering))
    print('clustering done in', (fit_end_t - fit_start_t), 'secs')
    print('prediction done in', (pred_end_t - pred_start_t), 'secs')

    #
    # predicting probabilities
    pred_start_t = perf_counter()
    clustering_p = dpgmm_c.predict_proba(train)
    pred_end_t = perf_counter()
    print('prediction done in', (pred_end_t - pred_start_t), 'secs')
    print(clustering_p.shape[0], clustering_p.shape[1])
示例#2
0
def latent_cluster_estimate(SAMObject,
                            n_components=10,
                            X=None,
                            plot=True,
                            alpha=10,
                            covariance_type='diag',
                            which_indices=(0, 1)):
    """
    Use Dirichlet Process GMMs to cluster the latent space by automatically estimating an effective number of clusters.
    ARG SAMObject: The SAMObject to operate on.
    ARG n_components: The number of DPGMM commponents to use (ie max number of clusters). Some components will switch off.
    ARG X: If None, we'll use the SAMObject's latent space, otherwise the provided one.
    ARG plot: Whether to plot the result or not.
    ARG alpha: The parameter for the stick-breaking process. In theory, large alpha encourages more clusters, although in practice I haven't seen such behaviour.
    ARG covariance_type: See DPGMM from scikit-learn.
    ARG which_indices: If plotting, which indices to plot.
    RETURN Y_: The cluster assignments for each component in the latent space. This is not (0,1,...,n_clusters), but instead it is
               (0,1,...,n_components), so that switched off components will not appear in Y_.
    """
    from sklearn import mixture

    if X is None:
        X = SAMObject._get_latent()
    # Fit a Dirichlet process mixture of Gaussians using five components
    dpgmm = mixture.DPGMM(n_components=n_components,
                          covariance_type=covariance_type,
                          n_iter=5000,
                          alpha=alpha)
    dpgmm.fit(X)
    Y_ = dpgmm.predict(X)

    if plot:
        from scipy import linalg
        import matplotlib as mpl
        import itertools

        color_iter = colors = cm.rainbow(np.linspace(0, 1, 20))
        myperm = np.random.permutation(color_iter.shape[0])
        color_iter = color_iter[myperm, :]
        marker_iter = itertools.cycle((',', '+', '.', 'o', '*', 'v', 'x', '>'))
        splot = pb.subplot(1, 1, 1)

        for i, (mean, covar, color, marker) in enumerate(
                zip(dpgmm.means_, dpgmm._get_covars(), color_iter,
                    marker_iter)):
            # as the method will not use every component it has access to
            # unless it needs it, we shouldn't plot the redundant components.
            #if not np.any(Y_ == i):
            #    continue
            pb.scatter(X[Y_ == i, which_indices[0]],
                       X[Y_ == i, which_indices[1]],
                       s=40,
                       color=color,
                       marker=marker)

        pb.legend(np.unique(Y_))
        pb.show()
        pb.draw()
        pb.show()
    return Y_
示例#3
0
 def clustering_DPGMM(self, n_components, alpha):
     model = mixture.DPGMM(n_components=n_components,
                           alpha=alpha,
                           n_iter=1000)
     model.fit(self.embedding_)
     self.label = model.predict(self.embedding_)
     return self.label, model
示例#4
0
 def select(self):
     from sklearn import mixture
     X = self.input_mtrx[self.collection_ind()]
     self.est = mixture.DPGMM(n_components=3)
     self.est.fit(X)
     labels = self.est.predict(X)
     self.labels[self.collection_ind()] = labels
示例#5
0
    def cluster_changepoints_level1(self):

        print "Level1 : Clustering changepoints in Z(t)"

        if constants.REMOTE == 1:
            if self.fit_DPGMM:
                print "DPGMM L1 - start"
                # Previously, when L0 was GMM, alpha = 0.4
                print "L1 ", str(
                    len(self.list_of_cp) /
                    constants.DPGMM_DIVISOR_L1), " ALPHA ", 10
                dpgmm = mixture.DPGMM(n_components=int(
                    len(self.list_of_cp) / 6),
                                      covariance_type='diag',
                                      n_iter=1000,
                                      alpha=10,
                                      thresh=1e-7)
                print "DPGMM L1 - end"

            if self.fit_GMM:
                gmm = mixture.GMM(n_components=self.n_components_L1,
                                  covariance_type='full',
                                  n_iter=1000,
                                  thresh=5e-5)
                print "GMM L1 - end"
        elif constants.REMOTE == 2:
            gmm = mixture.GMM(n_components=self.n_components_L1,
                              covariance_type='full')
        else:
            gmm = mixture.GMM(n_components=self.n_components_L1,
                              covariance_type='full')

        if self.fit_GMM:
            gmm.fit(self.change_pts_Z)
            Y_gmm = gmm.predict(self.change_pts_Z)
            Y = Y_gmm

        if self.fit_DPGMM:
            Y = []
            i = 0

            while True:
                print "In DPGMM Fit loop"
                dpgmm.fit(self.change_pts_Z)
                Y = dpgmm.predict(self.change_pts_Z)
                if len(set(Y)) > 1:
                    break
                i += 1
                if i > 100:
                    break

        self.save_cluster_metrics(self.change_pts_Z, Y, 'level1')

        for i in range(len(Y)):
            label = constants.alphabet_map[Y[i] + 1]
            self.map_cp2level1[i] = label
            utils.dict_insert_list(label, i, self.map_level12cp)

        self.generate_l2_cluster_matrices()
def simple_stats_with_minutes():
    x = numpy.loadtxt(open(FILENAME, 'rb'),
                      delimiter=",",
                      usecols=(7, 22, 23, 24, 25, 26, 27, 28),
                      skiprows=1)
    dpgmm = mixture.DPGMM(n_iter=100, n_components=25)
    dpgmm.fit(x)
    return _output_results(dpgmm.predict(x))
示例#7
0
def compute_similarity(F,
                       bound_idxs,
                       dirichlet=False,
                       xmeans=False,
                       k=5,
                       offset=4):
    """Main function to compute the segment similarity of file file_struct.

    Parameters
    ----------
    F: np.ndarray
        Matrix containing one feature vector per row.
    bound_idxs: np.ndarray
        Array with the indeces of the segment boundaries.
    dirichlet: boolean
        Whether to use the dirichlet estimator of the number of unique labels.
    xmeans: boolean
        Whether to use the xmeans estimator of the number of unique labels.
    k: int > 0
        If the other two predictors are `False`, use fixed number of labels.
    offset: int >= 0
        Number of frames to ignore from beginning and end of each segment.

    Returns
    -------
    labels_est: np.ndarray
        Estimated labels, containing integer identifiers.
    """
    # Get the feature segments
    feat_segments = get_feat_segments(F, bound_idxs)

    # Get the 2D-FMCs segments
    fmcs = feat_segments_to_2dfmc_max(feat_segments, offset)
    if len(fmcs) == 0:
        return np.arange(len(bound_idxs) - 1)

    # Compute the labels using kmeans
    if dirichlet:
        k_init = np.min([fmcs.shape[0], k])
        # Only compute the dirichlet method if the fmc shape is small enough
        if fmcs.shape[1] > 500:
            labels_est = compute_labels_kmeans(fmcs, k=k)
        else:
            dpgmm = mixture.DPGMM(n_components=k_init, covariance_type='full')
            # dpgmm = mixture.VBGMM(n_components=k_init, covariance_type='full')
            dpgmm.fit(fmcs)
            k = len(dpgmm.means_)
            labels_est = dpgmm.predict(fmcs)
            # print("Estimated with Dirichlet Process:", k)
    if xmeans:
        xm = XMeans(fmcs, plot=False)
        k = xm.estimate_K_knee(th=0.01, maxK=8)
        labels_est = compute_labels_kmeans(fmcs, k=k)
        # print("Estimated with Xmeans:", k)
    else:
        labels_est, wfmcs = compute_labels_kmeans(fmcs, k=k)

    return labels_est, wfmcs
    def fitIGMM(self,obs,IsPlot=0):
        """
        Fitting the Infinite Gaussian Mixture Model and GMM where applicable
        Input Parameters
        ----------
        
        obs:        samples  generated under the acqusition function by BGSS
        
        IsPlot:     flag variable for visualization    
        
        
        Returns
        -------
        mean vector: mu_1,...mu_K
        """

        if self.dim<=2:
            n_init_components=3
        else:
            n_init_components=np.int(self.dim*1.1)
            
        dpgmm = mixture.DPGMM(n_components=n_init_components,covariance_type="full",min_covar=1e-3)
        dpgmm.fit(obs) 

        # check if DPGMM fail, then use GMM.
        mydist=euclidean_distances(dpgmm.means_,dpgmm.means_) 
        np.fill_diagonal(mydist,99)

        if dpgmm.converged_ is False or np.min(mydist)<(0.01*self.dim):
            dpgmm = mixture.GMM(n_components=n_init_components,covariance_type="full",min_covar=1e-5)
            dpgmm.fit(obs)  

        # truncated for variational inference
        weight=dpgmm.weights_
        weight_sorted=np.sort(weight)
        weight_sorted=weight_sorted[::-1]
        temp_cumsum=np.cumsum(weight_sorted)
        
        cutpoint=0
        for idx,val in enumerate(temp_cumsum):
            if val>0.7:
                cutpoint=weight_sorted[idx]
                break
        
        ClusterIndex=[idx for idx,val in enumerate(dpgmm.weights_) if val>=cutpoint]        
               
        myMeans=dpgmm.means_[ClusterIndex]
        #dpgmm.means_=dpgmm.means_[ClusterIndex]
        dpgmm.truncated_means_=dpgmm.means_[ClusterIndex]
                
        if IsPlot==1 and self.dim<=2:
            visualization.plot_histogram(self,obs)
            visualization.plot_mixturemodel(dpgmm,self,obs)

        new_X=myMeans.reshape((len(ClusterIndex), -1))
        new_X=new_X.tolist()
        
        return new_X
def clusterize_dirichlet(*args):
    """ Clustering and plotting with Dirichlet process GMM """
    ### Clustering
    try:
        from sklearn import mixture
        from scipy import linalg
        import pylab as pl
        import matplotlib as mpl
        from sklearn.decomposition import PCA
    except:
        print "You need SciPy and scikit-learn"
        sys.exit(-1)

    models = []
    for arg in args:
        dpgmm = mixture.DPGMM(n_components=15, cvtype='full')
        dpgmm.fit(arg)
        print dpgmm
        models.append(copy.deepcopy(dpgmm))
        print raw_input("any key to pass")

    ### Plotting
    color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
    for i, (clf, data) in enumerate(zip(models, args)):
        pca = PCA(n_components=2)
        X_r = pca.fit(data).transform(data)
        splot = pl.subplot(len(args), 1, 1 + i)
        pl.scatter(X_r[:, 0], X_r[:, 1])
        #pl.title('PCA of unit types / numbers')
        Y_ = clf.predict(data)
        for i, (mean, covar,
                color) in enumerate(zip(clf.means, clf.covars, color_iter)):
            v, w = linalg.eigh(covar)
            u = w[0] / linalg.norm(w[0])
            # as the DP will not use every component it has access to
            # unless it needs it, we shouldn't plot the redundant
            # components.
            if not np.any(Y_ == i):
                continue
            pl.scatter(data[Y_ == i, 0], data[Y_ == i, 1], .8, color=color)

            # Plot an ellipse to show the Gaussian component
            angle = np.arctan(u[1] / u[0])
            angle = 180 * angle / np.pi  # convert to degrees
            ell = mpl.patches.Ellipse(mean,
                                      v[0],
                                      v[1],
                                      180 + angle,
                                      color=color)
            ell.set_clip_box(splot.bbox)
            ell.set_alpha(0.5)
            splot.add_artist(ell)
        pl.xlim(0.0, 1.0)
        pl.ylim(0.0, 1.0)
        pl.xticks(())
        pl.yticks(())
        pl.title("Dirichlet process GMM")
    pl.show()
def advanced_stats_only():
    x = numpy.loadtxt(open(FILENAME, 'rb'),
                      delimiter=",",
                      usecols=(7, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 25,
                               26, 27),
                      skiprows=1)
    dpgmm = mixture.DPGMM(n_iter=100, n_components=25, alpha=1)
    dpgmm.fit(x)
    return create_player_groups(dpgmm.predict(x))
def all_relevant_stats():
    x = numpy.loadtxt(open(FILENAME, 'rb'),
                      delimiter=",",
                      usecols=(7, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 22, 23,
                               24, 25, 26, 27, 28),
                      skiprows=1)
    dpgmm = mixture.DPGMM(n_iter=100, n_components=25)
    dpgmm.fit(x)
    return _output_results(dpgmm.predict(x))
示例#12
0
 def tune(self, pa=None, comm=None, divergence_threshold=1e10, verbose=0):
     """
     Tune the step...
     """
     if pa is None:
         raise RuntimeError('This step method works only in pysmc.')
     ac = self.get_acceptance_rate(comm=comm)
     if ac == -1:
         return False
     self.reset_counters()
     if (self._tuned and ac >= self.adapt_lower_ac_rate
             and ac <= self.adapt_upper_ac_rate):
         return False
     use_mpi = comm is not None
     if use_mpi:
         rank = comm.Get_rank()
         size = comm.Get_size()
     else:
         rank = 0
         size = 1
     pa = pa.gather()
     # Only the root should run train the mixture
     if rank == 0:
         pa.resample()
         data = [
             pa.particles[i]['stochastics'][self.stochastic.__name__]
             for i in range(pa.num_particles)
         ]
         data = np.array(data, dtype='float')
         if data.ndim == 1:
             data = np.atleast_2d(data).T
         self._gmm = mixture.DPGMM(n_components=self.n_components,
                                   covariance_type=self.covariance_type,
                                   n_iter=self.n_iter)
         self.gmm.fit(data)
         Y_ = self.gmm.predict(data)
         n_comp = 0
         for i in range(self.n_components):
             if np.any(Y_ == i):
                 n_comp += 1
         self._gmm = mixture.GMM(n_components=n_comp,
                                 covariance_type=self.covariance_type,
                                 n_iter=self.n_iter)
         self.gmm.fit(data)
         Y_ = self.gmm.predict(data)
         if verbose >= 2:
             for i, (mean, covar) in enumerate(
                     zip(self.gmm.means_, self.gmm._get_covars())):
                 if not np.any(Y_ == i):
                     continue
                 print(('\n', mean, covar))
     if use_mpi:
         self._gmm = comm.bcast(self._gmm)
     self.gmm.covars_ = self.gmm._get_covars()
     self._tuned = True
     return True
示例#13
0
def do_dpgmm(mat, n_components):

    log.info("Using the Dirichlet Process Gaussian Mixture Model")
    log.info("Design matrix size %s. Requested components: %s" ,mat.shape, n_components)
    t0 = time.time()
    dpgmm = mixture.DPGMM(n_components=n_components, covariance_type='tied', alpha=0.5)
    dpgmm.fit(mat)
    labels = dpgmm.predict(mat)
    logprobs, responsibilities = dpgmm.eval(mat)
    tf = time.time()
    log.info("Time: %s s.",tf-t0)
    return logprobs, responsibilities
示例#14
0
def build_dpgmm(k):
    print 'building ' + gmm_type + ' Dirichlet GMM with k<=' + str(
        k) + ' components'
    gmm = mixture.DPGMM(n_components=k,
                        covariance_type=gmm_type,
                        alpha=1,
                        thresh=0.001,
                        min_covar=0.001,
                        n_iter=500,
                        params='wmc',
                        init_params='wmc')
    return gmm
示例#15
0
def gmm(input_file,Output):
    lvltrace.lvltrace("LVLEntree dans gmm unsupervised")
    print "#########################################################################################################\n"
    print "GMM"
    print "#########################################################################################################\n"
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    
    # Fit a mixture of gaussians with EM using five components
    gmm = mixture.GMM(n_components=5, covariance_type='spherical', init_params = 'wmc')
    gmm.fit(X)

    # Fit a dirichlet process mixture of gaussians using five components
    dpgmm = mixture.DPGMM(n_components=5, covariance_type='spherical',init_params = 'wmc')
    dpgmm.fit(X)

    color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm', 'b','g','r','c','m','y','k','b','g','r','c','m','y','k','b','g','r','c','m','y','k','b','g','r','c','m','y','k'])

    for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                      (dpgmm, 'Dirichlet Process GMM')]):
        splot = pl.subplot(2, 1, 1 + i)
        Y_ = clf.predict(X)
        for i, (mean, covar, color) in enumerate(zip(
                                                     clf.means_, clf._get_covars(), color_iter)):
            v, w = linalg.eigh(covar)
            u = w[0] / linalg.norm(w[0])
            # as the DP will not use every component it has access to
            # unless it needs it, we shouldn't plot the redundant
            # components.
            if not np.any(Y_ == i):
                continue
            pl.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
            
            # Plot an ellipse to show the Gaussian component
            angle = np.arctan(u[1] / u[0])
            angle = 180 * angle / np.pi  # convert to degrees
            ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
            ell.set_clip_box(splot.bbox)
            ell.set_alpha(0.5)
            splot.add_artist(ell)
        pl.xticks(())
        pl.yticks(())
        pl.title(title)
    save = Output + "gmm.png"
    plt.savefig(save)
    lvltrace.lvltrace("LVLSortie dans gmm unsupervised")
	def cluster_changepoints(self):
		"""
		Clusters changepoints specified in self.list_of_cp.
		"""

		print "Clustering changepoints..."
		print "L1 ", str(len(self.list_of_cp)/constants.DPGMM_DIVISOR_L1)," ALPHA: ", self.ALPHA_L1

		if constants.REMOTE == 1:
			if self.fit_DPGMM:
				dpgmm = mixture.DPGMM(n_components = int(len(self.list_of_cp)/constants.DPGMM_DIVISOR_L1), covariance_type='diag', n_iter = 10000, alpha = self.ALPHA_L1, thresh= 1e-4)
			if self.fit_GMM:
				gmm = mixture.GMM(n_components = self.n_components_L1, covariance_type='full', n_iter=5000, thresh = 0.01)
		elif constants.REMOTE == 2:
			gmm = mixture.GMM(n_components = self.n_components_L1, covariance_type='full', thresh = 0.01)
		else:
			gmm = mixture.GMM(n_components = self.n_components_L1, covariance_type='full')

		if self.fit_GMM:
			gmm.fit(self.changepoints)
			predictions_gmm = gmm.predict(self.changepoints)
			print "L1: Clusters in GMM",len(set(predictions_gmm))
			predictions = predictions_gmm

		if self.fit_DPGMM:
			predictions = []
			while True:
				print "Inside loop"
				dpgmm.fit(self.changepoints)
				predictions = dpgmm.predict(self.changepoints)
				if len(set(predictions)) > 1:
					break

			print "L1: Clusters in DP-GMM", len(set(predictions))

		for i in range(len(predictions)):
			label = constants.alphabet_map[predictions[i] + 1]
			self.map_cp2cluster[i] = label
			utils.dict_insert_list(label, i, self.map_level1_cp)
			demonstration = self.map_cp2demonstrations[i]
			frm = self.map_cp2frm[i]
			try:
				surgeme = self.map_frm2surgeme[demonstration][frm]
			except KeyError as e:
				print e
				sys.exit()

			utils.print_and_write(("%3d   %s   %s   %3d   %3d\n" % (i, label, demonstration, frm, surgeme)), self.log)
示例#17
0
文件: _tests.py 项目: mazoku/thesis
def fit_mixture(glcm, n_components, max_components=4, type='gmm'):
    print 'preparing data ...',
    data = data_from_glcm(glcm)
    print 'done'

    print 'fitting %s ...' % type,
    if type == 'dpgmm':
        gmm = mixture.DPGMM(n_components=n_components,
                            covariance_type='spherical',
                            alpha=0.1)
    elif type == 'gmm':
        if n_components == 0:
            aics = []
            n_comps = range(1, max_components + 1)
            print 'searching for optimal number of components: ',
            for i in n_comps:
                gmm = mixture.GMM(n_components=i, covariance_type='spherical')
                gmm.fit(data)
                aic = gmm.aic(data)
                print '(%i, %.2f)' % (i, aic),
                aics.append(aic)
            best = n_comps[np.argmin(np.array(aics))]
            print ' -> %i' % best
            gmm = mixture.GMM(n_components=best, covariance_type='spherical')
        else:
            gmm = mixture.GMM(n_components=n_components,
                              covariance_type='spherical')
    else:
        raise ValueError('Wrong micture type. Allowed values: dpgmm, gmm')

    gmm.fit(data)
    print 'done'
    print 'means:'
    print gmm.means_

    print 'predicting %s ...' % type,
    y_pred = gmm.predict(data)
    glcm_labs = np.zeros(glcm.shape)
    for x, y in zip(data, y_pred):
        glcm_labs[tuple(x)] = y + 1
    print 'done'

    plt.figure()
    plt.subplot(121), plt.imshow(glcm, 'gray', interpolation='nearest')
    plt.subplot(122), plt.imshow(glcm_labs, 'jet', interpolation='nearest')
    plt.show()
def classification_dp_gmm(sample=700):
    train_data_set, train_labels, test_data_set, test_labels = generate_train_data(
        sample)
    # Fit a Dirichlet process mixture of Gaussians using five components
    # dpgmm = mixture.DPGMM(n_components=5, covariance_type='full', n_iter=20)
    dpgmm = mixture.DPGMM(n_components=2, covariance_type='diag', n_iter=10)
    # dpgmm = mixture.VBGMM(n_components=2, covariance_type='diag', n_iter=30)
    dpgmm.fit(train_data_set)
    # print 'train accuracy'
    y_train_pred = dpgmm.predict(train_data_set)
    train_accuracy = np.mean(y_train_pred.ravel() == train_labels.ravel())
    print train_accuracy,
    print ',',
    # print 'test accuracy'
    y_test_pred = dpgmm.predict(test_data_set)
    test_accuracy = np.mean(y_test_pred.ravel() == test_labels.ravel())
    print test_accuracy
示例#19
0
def compute_similarity(PCP, bound_idxs, dirichlet=False, xmeans=False, k=5):
    """Main function to compute the segment similarity of file file_struct."""

    # Get PCP segments
    pcp_segments = get_pcp_segments(PCP, bound_idxs)

    # Get the 2d-FMCs segments
    fmcs = pcp_segments_to_2dfmc_max(pcp_segments)
    if len(fmcs) == 0:
        return np.arange(len(bound_idxs) - 1)

    # Compute the labels using kmeans
    if dirichlet:
        k_init = np.min([fmcs.shape[0], k])
        # Only compute the dirichlet method if the fmc shape is small enough
        if fmcs.shape[1] > 500:
            labels_est = compute_labels_kmeans(fmcs, k=k)
        else:
            dpgmm = mixture.DPGMM(n_components=k_init, covariance_type='full')
            #dpgmm = mixture.VBGMM(n_components=k_init, covariance_type='full')
            dpgmm.fit(fmcs)
            k = len(dpgmm.means_)
            labels_est = dpgmm.predict(fmcs)
            #print "Estimated with Dirichlet Process:", k
    if xmeans:
        xm = XMeans(fmcs, plot=False)
        k = xm.estimate_K_knee(th=0.01, maxK=8)
        labels_est = compute_labels_kmeans(fmcs, k=k)
        #print "Estimated with Xmeans:", k
    else:
        labels_est = compute_labels_kmeans(fmcs, k=k)

    # Plot results
    #plot_pcp_wgt(PCP, bound_idxs)

    return labels_est
示例#20
0
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import mixture
import scipy.io as sio


np.set_printoptions(threshold=np.nan)
mat_contents = sio.loadmat('Euclid.mat')

#print mat_contents.keys()
#print mat_contents['Euclid_matrix'].shape
row = mat_contents['Euclid_matrix'][:,100]
  
dpgmm = mixture.DPGMM(n_components=6, covariance_type='diag', alpha=10, n_iter=100,verbose=1, thresh=0.0001)
dpgmm.fit(row)

Y = dpgmm.predict(row)

Y_unique = np.unique(Y)
for point in Y_unique:
	print point
	print np.mean(row[Y == point])

print dpgmm.get_params()
print dpgmm.n_components
print dpgmm.precs_


#print Y
示例#21
0
from sklearn import mixture

import itertools
color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])

import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl

FILENAME = "mcdonalds-normalized-data-clean.tsv"

# Note: you'll have to remove the last "name" column in the file (or
# some other such thing), so that all the columns are numeric.
X = numpy.loadtxt(open(FILENAME, "rb"), delimiter="\t", skiprows=1)
dpgmm = mixture.DPGMM(n_components=25)
dpgmm.fit(X)
clusters = dpgmm.predict(X)

classes = [[] for i in range(25)]
for i, c in enumerate(clusters):
    classes[c].append(i)

with open('mcdonalds-normalized-data-names.tsv') as f:
    names = f.read().split('\n')[1:]
with open('chen_out', 'w') as f:
    f.write('\n\n\n'.join('\n'.join(names[i] for i in cc) for cc in classes))

for i, (clf, title) in enumerate([(dpgmm, 'Dirichlet Process GMM')]):
    splot = plt.subplot(1, 1, 1 + i)
    Y_ = clf.predict(X)
示例#22
0
# Number of samples per component
n_samples = 500

# Generate random sample, two components
np.random.seed(0)
C = np.array([[0., -0.1], [1.7, .4]])
X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]

# Fit a mixture of gaussians with EM using five components
gmm = mixture.GMM(n_components=5, cvtype='full')
gmm.fit(X)

# Fit a dirichlet process mixture of gaussians using five components
dpgmm = mixture.DPGMM(n_components=5, cvtype='full')
dpgmm.fit(X)

color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])

for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                  (dpgmm, 'Dirichlet Process GMM')]):
    splot = pl.subplot(2, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar,
            color) in enumerate(zip(clf.means, clf.covars, color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.
示例#23
0
    def cluster_changepoints_level2(self):

        print "Level2 : Clustering changepoints in W(t)"

        mkdir_path = constants.PATH_TO_CLUSTERING_RESULTS + self.trial
        os.mkdir(mkdir_path)

        # To put frames of milestones
        os.mkdir(mkdir_path + "/" + "milestones")

        self.file = open(mkdir_path + "/" + self.trial + "clustering.txt",
                         "wb")
        self.metrics_picklefile = mkdir_path + "/" + self.trial + "metrics.p"

        line = self.featfile + "\n\n"
        self.file.write(line)

        line = "L1 Cluster   L2 Cluster   Demonstration   Frame#  CP#   Surgeme\n"
        self.file.write(line)

        print "---Checking data representativeness ---"
        for key in sorted(self.map_level12cp.keys()):
            mkdir_l1_cluster = mkdir_path + "/" + key

            list_of_cp_key = self.map_level12cp[key]

            if self.check_pruning_condition(list_of_cp_key):
                continue

            os.mkdir(mkdir_l1_cluster)
        print "--- ---"

        for key in sorted(self.map_level12cp.keys()):
            matrix = self.l2_cluster_matrices[key]
            list_of_cp_key = self.map_level12cp[key]

            if self.check_pruning_condition(list_of_cp_key):
                self.pruned_L1_clusters.append(key)
                del self.map_level12cp[key]
                print "Pruned"
                for pruned_cp in list_of_cp_key:
                    # print "Pruned: " + str(key) + " " + str(pruned_cp) + " " + str(self.map_cp2demonstrations[pruned_cp])
                    self.list_of_cp.remove(pruned_cp)
                continue

            if constants.REMOTE == 1:
                gmm = mixture.GMM(n_components=min(self.n_components_L2,
                                                   matrix.shape[0]),
                                  covariance_type='full',
                                  n_iter=10000,
                                  thresh=5e-5)
                # Alpha didn't change between using GMM or DP-GMM for L0
                print "L2 ", str(int(np.ceil(len(list_of_cp_key) /
                                             2.0))), " ALPHA ", 1
                dpgmm = mixture.DPGMM(n_components=int(
                    np.ceil(len(list_of_cp_key) / 2.0)),
                                      covariance_type='diag',
                                      n_iter=1000,
                                      alpha=1,
                                      thresh=1e-7)
            if constants.REMOTE == 2:
                gmm = mixture.GMM(n_components=self.n_components_L2,
                                  covariance_type='full')
            else:
                gmm = mixture.GMM(n_components=self.n_components_L2,
                                  covariance_type='full')

            try:
                if self.fit_GMM:
                    gmm.fit(matrix)
                    Y = gmm.predict(matrix)
                if self.fit_DPGMM:
                    dpgmm.fit(matrix)
                    Y = dpgmm.predict(matrix)

            except ValueError as e:
                print "ValueError"
                continue

            self.save_cluster_metrics(matrix,
                                      Y,
                                      'level2_' + str(key),
                                      level2_mode=True)

            for i in range(len(Y)):

                cp = list_of_cp_key[i]
                l1_cluster = key
                l2_cluster = Y[i]
                milestone = l1_cluster + "_" + str(l2_cluster)
                demonstration = self.map_cp2demonstrations[cp]
                try:
                    frm = self.map_cp2frm[cp]
                    surgeme = self.map_frm2surgeme[demonstration][frm]
                except KeyError as e:
                    print e
                    sys.exit()

                self.map_cp2milestones[cp] = milestone

                self.file.write(
                    "%s             %3d         %s   %3d   %3d    %3d\n" %
                    (l1_cluster, l2_cluster, demonstration, frm, cp, surgeme))

                if constants.REMOTE == 0:
                    self.copy_frames(demonstration, frm, str(l1_cluster),
                                     str(l2_cluster), surgeme)

        if constants.REMOTE == 0:
            self.copy_milestone_frames(matrix, list_of_cp_key, gmm)
示例#24
0
    def generate_change_points_2(self):
        """
		Generates changespoints by clustering across demonstrations.
		"""
        cp_index = 0
        i = 0
        big_N = None
        map_index2demonstration = {}
        map_index2frm = {}
        size_of_X = self.data_X_size[self.list_of_demonstrations[0]]

        for demonstration in self.list_of_demonstrations:
            print demonstration
            N = self.data_N[demonstration]

            start, end = utils.get_start_end_annotations(
                constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER +
                demonstration + "_" + constants.CAMERA + ".p")

            for j in range(N.shape[0]):
                map_index2demonstration[i] = demonstration
                map_index2frm[i] = start + j * self.sr
                i += 1

            big_N = utils.safe_concatenate(big_N, N)

        print "Generated big_N"

        if constants.REMOTE == 1:
            if self.fit_GMM:
                gmm = mixture.GMM(n_components=self.n_components_cp,
                                  covariance_type='full',
                                  thresh=0.01)

            if self.fit_DPGMM:
                # dpgmm = mixture.DPGMM(n_components = 100, covariance_type='diag', n_iter = 10000, alpha = 100, thresh= 2e-4)

                #DO NOT FIDDLE WITH PARAMS WITHOUT CONSENT :)
                avg_len = int(big_N.shape[0] /
                              len(self.list_of_demonstrations))
                DP_GMM_COMPONENTS = int(
                    avg_len / constants.DPGMM_DIVISOR
                )  #tuned with suturing experts only for kinematics
                print "L0 ", DP_GMM_COMPONENTS, "ALPHA: ", constants.ALPHA_ZW_CP
                dpgmm = mixture.DPGMM(n_components=DP_GMM_COMPONENTS,
                                      covariance_type='diag',
                                      n_iter=1000,
                                      alpha=constants.ALPHA_ZW_CP,
                                      thresh=1e-7)

        elif constants.REMOTE == 2:
            gmm = mixture.GMM(n_components=self.n_components_cp,
                              covariance_type='full')
        else:
            gmm = mixture.GMM(n_components=self.n_components_cp,
                              covariance_type='full')

        if self.fit_GMM:
            start = time.time()
            gmm.fit(big_N)
            end = time.time()
            "GMM time taken: ", str(end - start)
            Y_gmm = gmm.predict(big_N)

            print "L0: Clusters in GMM", len(set(Y_gmm))
            Y = Y_gmm

        if self.fit_DPGMM:
            start = time.time()
            dpgmm.fit(big_N)
            end = time.time()
            "DP-GMM time taken: ", str(end - start)
            Y_dpgmm = dpgmm.predict(big_N)

            Y = Y_dpgmm
            print "L0: Clusters in DP-GMM", len(set(Y_dpgmm))

        for w in range(len(Y) - 1):

            if Y[w] != Y[w + 1]:
                change_pt = big_N[w][:size_of_X]
                self.append_cp_array(change_pt)
                self.map_cp2frm[cp_index] = map_index2frm[w]
                self.map_cp2demonstrations[cp_index] = map_index2demonstration[
                    w]
                self.list_of_cp.append(cp_index)

                cp_index += 1

        print "Done with generating change points", len(self.list_of_cp)
示例#25
0
np.random.seed(0)
X = np.zeros((n_samples, 2))
step = 4 * np.pi / n_samples

for i in xrange(X.shape[0]):
    x = i * step - 6
    X[i, 0] = x + np.random.normal(0, 0.1)
    X[i, 1] = 3 * (np.sin(x) + np.random.normal(0, .2))

color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])

for i, (clf, title) in enumerate([
    (mixture.GMM(n_components=10, covariance_type='full',
                 n_iter=100), "Expectation-maximization"),
    (mixture.DPGMM(n_components=10,
                   covariance_type='full',
                   alpha=0.01,
                   n_iter=100), "Dirichlet Process,alpha=0.01"),
    (mixture.DPGMM(n_components=10,
                   covariance_type='diag',
                   alpha=100.,
                   n_iter=100), "Dirichlet Process,alpha=100.")
]):

    clf.fit(X)
    splot = pl.subplot(3, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar,
            color) in enumerate(zip(clf.means_, clf._get_covars(),
                                    color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
示例#26
0
                    labels_dist[j] += 1
        bic.append(gmm.bic(data_use))
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            best_gmm = gmm
        print n_components,"labels:",labels_unique,"lables_dist",labels_dist
    clf = best_gmm
    for i in range(len(bic)):
        print([i,bic[i],logl[i]])

if method == 'dpgmm':
    alpha_range = [0.001,0.01,0.1,1.,10.,100.,1000.,1e6]
    n_clusters_max = 100
    for alpha in alpha_range:
        # Fit a mixuture of gaussians with Dirichlet Process Mixture
        dpgmm = mixture.DPGMM(n_components=n_clusters_max,\
                covariance_type='full',alpha=alpha,n_iter=1000)
        dpgmm.fit(data_use)
        labels_predict = dpgmm.predict(data_use)
        labels_unique = np.unique(labels_predict)
        # Counting the number of samples belonging to each cluster
        labels_dist = [0]*len(labels_unique)
        for i in range(len(labels_predict)):
            for j in range(len(labels_unique)):
                if labels_predict[i] == labels_unique[j]:
                    labels_dist[j] += 1
        print(["alpha: ",alpha,"labels: ",labels_unique,"labels_dist: ",\
                labels_dist])
    print(["Upper bound for the number of clusters: ",n_clusters_max])


示例#27
0
# Number of samples per component
n_samples = 500

# Generate random sample, two components
np.random.seed(0)
C = np.array([[0., -0.1], [1.7, .4]])
X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]

# Fit a mixture of Gaussians with EM using five components
gmm = mixture.GMM(n_components=5, covariance_type='full')
gmm.fit(X)

# Fit a Dirichlet process mixture of Gaussians using five components
dpgmm = mixture.DPGMM(n_components=5, covariance_type='full')
dpgmm.fit(X)

color_iter = itertools.cycle(
    ['navy', 'c', 'cornflowerblue', 'gold', 'darkorange'])

for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                  (dpgmm, 'Dirichlet Process GMM')]):
    splot = plt.subplot(2, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar,
            color) in enumerate(zip(clf.means_, clf._get_covars(),
                                    color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
示例#28
0
文件: test.py 项目: tesschin/FYP-Code
lowest_bic = np.infty
bic = []
n_components_range = range(1, 3)
cv_types = ['spherical']
for cv_type in cv_types:
    for n_components in n_components_range:
        # Fit a mixture of Gaussians with EM
        gmm = mixture.GMM(n_components=n_components, covariance_type=cv_type)

        gmm.fit(X)
        bic.append(gmm.bic(X))
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            best_gmm = gmm

best_gmm = mixture.DPGMM(n_components=4)
best_gmm.fit(X)

elapsed = int(round(time.time() * 1000)) - currTime

print best_gmm
print "ELAPSED: " + str(elapsed)

bic = np.array(bic)
color_iter = itertools.cycle(['k', 'r', 'g', 'b', 'c', 'm', 'y'])
clf = best_gmm
bars = []

# Plot the BIC scores
spl = plt.subplot(2, 1, 1)
for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
示例#29
0
文件: _tests.py 项目: mazoku/thesis
def glcm_dpgmm(img):
    # deriving glcm
    mask = (img > 0) * (img < 255)
    glcm = tools.graycomatrix_3D(img, mask=mask)

    # inds = tuple(inds.flatten())

    # processing glcm
    # glcm_gc = skimor.closing(glcm, selem=skimor.disk(1))
    # glcm_go = skimor.opening(glcm, selem=skimor.disk(1))

    # plt.figure()
    # plt.subplot(131), plt.imshow(glcm, 'gray', interpolation='nearest'), plt.title('glcm')
    # plt.subplot(132), plt.imshow(glcm_gc, 'gray', interpolation='nearest'), plt.title('glcm_gc')
    # plt.subplot(133), plt.imshow(glcm_go, 'gray', interpolation='nearest'), plt.title('glcm_go')

    # thresholding glcm
    c_t = 4
    thresh = c_t * np.mean(glcm)
    glcm_t = glcm > thresh
    glcm_to = skimor.binary_closing(glcm_t, selem=skimor.disk(3))
    glcm_to = skimor.binary_opening(glcm_to, selem=skimor.disk(3))
    # tools.blob_from_gcm(glcm_to, img, return_rvs=True, show=True, show_now=False)
    #
    # labs_im, num = skimea.label(glcm_to, return_num=True)
    #
    # labels = np.unique(labs_im)[1:]
    # for l in labels:
    #     tmp = glcm * (labs_im == l)
    #     fit_mixture(tmp, n_components=0, type='gmm')

    # syntetic glcm
    # glcm = np.array([[0,1,1,2,0,0,0,0],
    #                  [1,2,2,3,1,0,0,1],
    #                  [1,3,4,2,1,0,0,0],
    #                  [0,1,3,1,0,0,0,1],
    #                  [1,0,0,0,0,0,1,3],
    #                  [0,2,0,0,0,2,2,1],
    #                  [0,0,0,0,1,3,4,0],
    #                  [0,0,0,0,1,2,0,0]])

    # dpgmm
    glcm_o = glcm.copy()
    # glcm = glcm_go * glcm_to
    # glcm = glcm_go
    # glcm = glcm_gc
    glcm = glcm * glcm_to
    data = data_from_glcm(glcm)

    # fitting DPGMM
    # print 'fitting DPGMM ...'
    # types = ['spherical', 'tied', 'diag', 'full']
    # n_comps = range(2, 11)
    # # n_comps = range(2, 4)
    # aics = np.zeros((len(types), len(n_comps)))
    # bics = np.zeros((len(types), len(n_comps)))
    # scores = np.zeros((len(types), len(n_comps)))
    # for i, type in enumerate(types):
    #     print '\nTYPE:', type
    #     for j, n in enumerate(n_comps):
    #         # dpgmm = mixture.DPGMM(n_components=6, covariance_type='tied', alpha=0.100)
    #         dpgmm = mixture.GMM(n_components=n, covariance_type=type)
    #         dpgmm.fit(data)
    #         aic = dpgmm.aic(data)
    #         bic = dpgmm.bic(data)
    #         score = dpgmm.score(data).mean()
    #         # aics.append(aic)
    #         # bics.append(bic)
    #         # scores.append(score)
    #         aics[i, j] = aic
    #         bics[i, j] = bic
    #         scores[i, j] = score
    #         print 'n_comps=%i, score=%.2f, aic=%.2f, bic=%.2f' % (n, score, aic, bic)
    #
    # plt.figure()
    # color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm', 'y', 'k'])
    # for aic, color in zip(aics, color_iter):
    #     plt.plot(n_comps, aic, color + '-')
    # plt.legend(types)
    # plt.title('aic')
    #
    # plt.figure()
    # color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm', 'y', 'k'])
    # for bic, color in zip(bics, color_iter):
    #     plt.plot(n_comps, bic, color + '-')
    # plt.legend(types)
    # plt.title('bic')
    #
    # plt.figure()
    # color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm', 'y', 'k'])
    # for score, color in zip(scores, color_iter):
    #     plt.plot(n_comps, score, color + '-')
    # plt.legend(types)
    # plt.title('scores')

    print 'fitting DPGMM ...',
    # dpgmm = mixture.GMM(n_components=3, covariance_type='tied')
    dpgmm = mixture.DPGMM(n_components=6, covariance_type='tied', alpha=1.)
    dpgmm.fit(data)
    print 'done'
    print 'means:'
    print dpgmm.means_

    # dpgmm = mixture.GMM(n_components=3, covariance_type='tied')
    # dpgmm.fit(data)
    # print 'n_comps=3, score=%.2f, aic=%.2f, bic=%.2f' % (dpgmm.score(data).mean(), dpgmm.aic(data), dpgmm.bic(data))
    # dpgmm = mixture.GMM(n_components=4, covariance_type='tied')
    # dpgmm.fit(data)
    # print 'n_comps=4, score=%.2f, aic=%.2f, bic=%.2f' % (dpgmm.score(data).mean(), dpgmm.aic(data), dpgmm.bic(data))
    # dpgmm = mixture.GMM(n_components=5, covariance_type='tied')
    # dpgmm.fit(data)
    # print 'n_comps=5, score=%.2f, aic=%.2f, bic=%.2f' % (dpgmm.score(data).mean(), dpgmm.aic(data), dpgmm.bic(data))

    # predicting DPGMM
    print 'predicting DPGMM ...',
    data = data_from_glcm(glcm_o)
    y_pred = dpgmm.predict(data)
    glcm_labs = np.zeros(glcm.shape, dtype=np.uint8)
    for x, y in zip(data, y_pred):
        glcm_labs[tuple(x)] = int(y + 1)
    print 'done'

    # glcm_labs += 10
    inds = np.argsort(dpgmm.means_.mean(axis=1))
    glcm_labs2 = glcm_labs.copy()
    for i, l in enumerate(inds):
        glcm_labs2 = np.where(glcm_labs == l + 1, i + 1, glcm_labs2)
    glcm_labs = glcm_labs2
    # glcm_labs3 = inds[glcm_labs.flatten()].reshape(glcm_labs.shape)

    # plt.figure()
    # plt.subplot(121), plt.imshow(glcm_labs)
    # plt.subplot(122), plt.imshow(glcm_labs2)
    # plt.show()

    labint = dpgmm.predict(np.vstack((range(0, 256), range(0, 256))).T)
    labim = labint[img.flatten()].reshape(img.shape)
    # labim += 10
    labim2 = labim.copy()
    for i, l in enumerate(inds):
        labim2 = np.where(labim == l, i + 1, labim2)
    labim = labim2
    # labim3 = inds[labim.flatten()].reshape(labim.shape)
    # plt.figure()
    # plt.subplot(121), plt.imshow(labim)
    # plt.subplot(122), plt.imshow(labim2)
    # # plt.subplot(133), plt.imshow(labim3)
    # plt.show()

    labim_f = scindifil.median_filter(labim, size=3)
    plt.figure()
    plt.subplot(131), plt.imshow(img, 'gray',
                                 interpolation='nearest'), plt.axis('off')
    plt.subplot(132), plt.imshow(labim, 'jet', interpolation='nearest',
                                 vmin=0), plt.axis('off')
    plt.subplot(133), plt.imshow(labim_f,
                                 'jet',
                                 interpolation='nearest',
                                 vmin=0), plt.axis('off')

    plt.figure()
    plt.subplot(121), plt.imshow(glcm_o,
                                 'jet',
                                 interpolation='nearest',
                                 vmin=0), plt.axis('off')
    for c in dpgmm.means_:
        plt.plot(c[0],
                 c[1],
                 'o',
                 markerfacecolor='w',
                 markeredgecolor='k',
                 markersize=12)
    plt.subplot(122), plt.imshow(glcm_labs,
                                 'jet',
                                 interpolation='nearest',
                                 vmin=0), plt.axis('off')
    for c in dpgmm.means_:
        plt.plot(c[0],
                 c[1],
                 'o',
                 markerfacecolor='w',
                 markeredgecolor='k',
                 markersize=12)

    plt.show()
示例#30
0
X = np.zeros((n_samples, 2))
step = 4 * np.pi / n_samples

for i in xrange(X.shape[0]):
    x = i * step - 6
    X[i, 0] = x + np.random.normal(0, 0.1)
    X[i, 1] = 3 * (np.sin(x) + np.random.normal(0, .2))


color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])


for i, (clf, title) in enumerate([
        (mixture.GMM(n_components=10, covariance_type='full'), \
             "Expectation-maximization"),
        (mixture.DPGMM(n_components=10, covariance_type='full', alpha=0.01),
         "Dirichlet Process,alpha=0.01"),
        (mixture.DPGMM(n_components=10, covariance_type='diag', alpha=100.),
         "Dirichlet Process,alpha=100.")
        ]):

    clf.fit(X, n_iter=100)
    splot = pl.subplot(3, 1, 1 + i)
    Y_ = clf.predict(X)
    for i, (mean, covar, color) in enumerate(zip(
            clf.means_, clf._get_covars(), color_iter)):
        v, w = linalg.eigh(covar)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.