def __init__(self, initial_means, priors=None, covariance_matrices=None, conv_threshold=1e-6, bias=0.1, normalise=False, svd_dimensions=None): """ Creates an EM clusterer with the given starting parameters, convergence threshold and vector mangling parameters. @param initial_means: the means of the gaussian cluster centers @type initial_means: [seq of] numpy array or seq of SparseArray @param priors: the prior probability for each cluster @type priors: numpy array or seq of float @param covariance_matrices: the covariance matrix for each cluster @type covariance_matrices: [seq of] numpy array @param conv_threshold: maximum change in likelihood before deemed convergent @type conv_threshold: int or float @param bias: variance bias used to ensure non-singular covariance matrices @type bias: float @param normalise: should vectors be normalised to length 1 @type normalise: boolean @param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD @type svd_dimensions: int """ VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._means = numpy.array(initial_means, numpy.float64) self._num_clusters = len(initial_means) self._conv_threshold = conv_threshold self._covariance_matrices = covariance_matrices self._priors = priors self._bias = bias
def __init__(self, vector_names=None, num_clusters=1, normalise=True, svd_dimensions=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = num_clusters self._dendogram = None self._groups_values = None self._names = vector_names self._name_dendogram = None
def __init__(self, params, normalise, vector_names=None, svd_dimensions=None, log_it=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = params.min_clusters self._dendogram = None self._groups_values = None self._names = vector_names self._name_dendogram = None self._iterative_reassign = params.reassign self._max_reassign = params.reassign_max self._reassigned_clusters = {} self.log_it = log_it
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order self._dendogram = Dendogram([numpy.array(vector, numpy.float64) for vector in vectors]) if self._names: self._name_dendogram = Dendogram(self._names) self._vectors_to_cluster = vectors return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order self._dendogram = Dendogram( [numpy.array(vector, numpy.float64) for vector in vectors]) if self._names: self._name_dendogram = Dendogram(self._names) self._vectors_to_cluster = vectors return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order if self.msg_handle is not None: self.msg_handle.dm(str(len(vectors))) self.msg_handle.tile_yield() self._dendogram = Dendogram( [numpy.array(vector, numpy.float64) for vector in vectors]) if self._names: self._name_dendogram = Dendogram(self._names) self._vectors_to_cluster = vectors return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
def cluster(self, vectors, assign_clusters=False, trace=False): # stores the merge order self._vectors_to_cluster = vectors return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
def __init__(self, params, normalise, vector_names = None, svd_dimensions=None): VectorSpaceClusterer.__init__(self, normalise, svd_dimensions) self._num_clusters = params.num_clusters self._groups_values = None self._names = vector_names self._max_iterations = params.max_iterations