def __init__(self, initial_means, priors=None, covariance_matrices=None, conv_threshold=1e-6, bias=0.1, normalise=False, svd_dimensions=None): """ Creates an EM clusterer with the given starting parameters, convergence threshold and vector mangling parameters. :param initial_means: the means of the gaussian cluster centers :type initial_means: [seq of] numpy array or seq of SparseArray :param priors: the prior probability for each cluster :type priors: numpy array or seq of float :param covariance_matrices: the covariance matrix for each cluster :type covariance_matrices: [seq of] numpy array :param conv_threshold: maximum change in likelihood before deemed convergent :type conv_threshold: int or float :param bias: variance bias used to ensure non-singular covariance matrices :type bias: float :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._means = numpy.array(initial_means, numpy.float64) self._num_clusters = len(initial_means) self._conv_threshold = conv_threshold self._covariance_matrices = covariance_matrices self._priors = priors self._bias = bias
def cluster(self, vectors, assign_clusters=False, ClusterNum=None, DisType='cos', Stype='avg', trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if ('cos' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = cosine_distance( vectors[i], vectors[j]) elif ('euc' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = euclidean_distance( vectors[i], vectors[j]) self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors]) result = VectorSpaceClusterer.cluster(self, vectors, assign_clusters, ClusterNum, Stype, trace) if (2 == len(vectors[0])): # 二维样本则显示可视化结果 self.draw_2D(vectors, result) return result
def cluster(self, vectors, assign_clusters=False,ClusterNum=None, DisType='euc',Stype='mean',trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if(0==l): return [] if('cos'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = cosine_distance(vectors[i], vectors[j]) elif('euc'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = euclidean_distance(vectors[i], vectors[j]) result = VectorSpaceClusterer.cluster(self, vectors,assign_clusters,ClusterNum, Stype, trace) #/////////////////////// 测试,输出距离 ///////////////// # m = 0 # for k,v in self._distMap: # m +=1 # print v,"\t", # if (m%7==0): # print #///////////////////////////////////////////////////// if(2==len(vectors[0])): # 二维样本则显示可视化结果 draw_2D_cluster(vectors, result) return result
def __init__(self, num_means, distance, repeats=1, conv_test=1e-6, initial_means=None, normalise=False, svd_dimensions=None, rng=None, avoid_empty_clusters=False): """ :param num_means: the number of means to use (may use fewer) :type num_means: int :param distance: measure of distance between two vectors :type distance: function taking two vectors and returing a float :param repeats: number of randomised clustering trials to use :type repeats: int :param conv_test: maximum variation in mean differences before deemed convergent :type conv_test: number :param initial_means: set of k initial means :type initial_means: sequence of vectors :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int :param rng: random number generator (or None) :type rng: Random :param avoid_empty_clusters: include current centroid in computation of next one; avoids undefined behavior when clusters become empty :type avoid_empty_clusters: boolean """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_means = num_means self._distance = distance self._max_difference = conv_test assert not initial_means or len(initial_means) == num_means self._means = initial_means assert repeats >= 1 assert not (initial_means and repeats > 1) self._repeats = repeats if rng: self._rng = rng else: self._rng = random.Random() self._avoid_empty_clusters = avoid_empty_clusters
def cluster(self, vectors, assign_clusters=False, ClusterNum=None, DisType='euc', Stype='mean', trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if (0 == l): return [] if ('cos' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = cosine_distance( vectors[i], vectors[j]) elif ('euc' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = euclidean_distance( vectors[i], vectors[j]) result = VectorSpaceClusterer.cluster(self, vectors, assign_clusters, ClusterNum, Stype, trace) #/////////////////////// 测试,输出距离 ///////////////// # m = 0 # for k,v in self._distMap: # m +=1 # print v,"\t", # if (m%7==0): # print #///////////////////////////////////////////////////// if (2 == len(vectors[0])): # 二维样本则显示可视化结果 draw_2D_cluster(vectors, result) return result
def cluster(self, vectors, assign_clusters=False, DisType='cos',Stype='avg',trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if('cos'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = cosine_distance(vectors[i], vectors[j]) elif('euc'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = euclidean_distance(vectors[i], vectors[j]) self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors]) result = VectorSpaceClusterer.cluster(self, vectors,assign_clusters, Stype, trace) return result
def __init__(self, num_clusters=None, normalise=True, svd_dimensions=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = num_clusters self._groups_values = None self._distMap = {}
def __init__(self, num_clusters=None, normalise=True, svd_dimensions=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = num_clusters self._groups_values = None self._distMap ={}