Пример #1
0
    def incStickCap(self, inc=1):
        """Increases the stick cap by the given number of entrys. Can be used in collaboration with nllData to increase the number of sticks until insufficient improvement, indicating the right number has been found."""
        self.stickCap += inc
        self.n += map(lambda _: gcp.GaussianPrior(self.dims), xrange(inc))
        self.v = numpy.append(self.v, numpy.ones((inc, 2),
                                                 dtype=numpy.float32), 0)

        if (self.z is not None):
            self.z = numpy.append(
                self.z,
                numpy.random.mtrand.dirichlet(32.0 * numpy.ones(inc),
                                              size=self.z.shape[0]), 1)
            weight = numpy.random.mtrand.dirichlet(numpy.ones(2),
                                                   size=self.z.shape[0])
            self.z[:, :self.stickCap - inc] *= weight[:, 0].reshape(
                (self.z.shape[0], 1))
            self.z[:, self.stickCap - inc:] *= weight[:, 1].reshape(
                (self.z.shape[0], 1))

        self.nT += [None] * inc
        self.vExpLog = numpy.append(
            self.vExpLog, -1.0 * numpy.ones(inc, dtype=numpy.float32))
        self.vExpNegLog = numpy.append(
            self.vExpNegLog, -1.0 * numpy.ones(inc, dtype=numpy.float32))
Пример #2
0
    def __init__(self, dims, stickCap=1):
        """You initialise with the number of dimensions and the cap on the number of sticks to have. Note that the stick cap should be high enough for it to represent enough components, but not so high that you run out of memory. The better option is to set the stick cap to 1 (The default) and use the solve grow methods, which in effect find the right number of sticks. Alternativly if only given one parameter of the same type it acts as a copy constructor."""
        if isinstance(dims, DPGMM):
            self.dims = dims.dims
            self.stickCap = dims.stickCap

            self.data = map(lambda x: x.copy(), dims.data)

            self.prior = gcp.GaussianPrior(dims.prior)
            self.priorT = gcp.StudentT(dims.priorT) if (dims.priorT
                                                        is not None) else None
            self.n = map(lambda x: gcp.GaussianPrior(x), dims.n)
            self.beta = dims.beta.copy()
            self.alpha = dims.alpha.copy()
            self.v = dims.v.copy()
            self.z = None if (dims.z is None) else dims.z.copy()

            self.skip = dims.skip
            self.epsilon = dims.epsilon

            self.nT = map(lambda x: None
                          if (x is None) else gcp.StudentT(x), dims.nT)
            self.vExpLog = dims.vExpLog.copy()
            self.vExpNegLog = dims.vExpNegLog.copy()
        else:
            self.dims = dims
            self.stickCap = stickCap

            self.data = [
            ]  # A list containing data matrices - used to collate all the samples ready for processing. Before processing they are all appended into a single data matrix, such that this list is of length 1.

            self.prior = gcp.GaussianPrior(
                self.dims)  # The prior over the mixture components.
            self.priorT = None
            self.n = map(lambda _: gcp.GaussianPrior(self.dims),
                         xrange(self.stickCap)
                         )  # The mixture components, one for each stick.
            self.beta = numpy.ones(
                2, dtype=numpy.float32
            )  # The two parameters (Typically named alpha & beta) for the Gamma distribution prior over alpha.
            self.alpha = numpy.ones(
                2, dtype=numpy.float32
            )  # The parameters for the Gamma distribution that represents the current distribution over alpha - basically beta updated with the current stick configuration.
            self.v = numpy.ones(
                (self.stickCap, 2), dtype=numpy.float32
            )  # Each [i,:] of this array represents the two parameters of the beta distribution over the strick breaking weight for the relevant mixture component.
            self.z = None  # The matrix of multinomials over stick-assignment for each sample, aligned with the data matrix. In the case of incrimental use will not necessarily be complete.

            self.skip = 0  # Number of samples at the start of the data matrix to not bother updating - useful to speed things up with incrimental learning.
            self.epsilon = 1e-4  # Amount of change below which it stops iterating.

            # The cache of stuff kept around for speed...
            self.nT = [
                None
            ] * self.stickCap  # The student T distribution associated with each Gaussian.
            self.vExpLog = numpy.empty(
                self.stickCap, dtype=numpy.float32
            )  # The expected value of the logorithm of each v.
            self.vExpNegLog = numpy.empty(
                self.stickCap, dtype=numpy.float32
            )  # The expected value of the logarithm of 1 minus each v.

            self.vExpLog[:] = -1.0  # Need these to always be correct - this matches initialisation of v.
            self.vExpNegLog[:] = -1.0  # As above.