def __init__(self, X, prior, alpha, K=1, K_max=None):

        self.alpha = alpha
        N, _ = X.shape
        assignments = np.arange(N)

        self.components = GaussianComponentsDiag(X, prior, assignments, K_max)
示例#2
0
    def __init__(
            self, X, prior, alpha, K, assignments="rand",
            covariance_type="full"
            ):

        self.alpha = alpha
        N, D = X.shape

        # Initial component assignments
        if assignments == "rand":
            assignments = np.random.randint(0, K, N)

            # Make sure we have consequetive values
            for k in xrange(assignments.max()):
                while len(np.nonzero(assignments == k)[0]) == 0:
                    assignments[np.where(assignments > k)] -= 1
                if assignments.max() == k:
                    break
        elif assignments == "each-in-own":
            assignments = np.arange(N)
        else:
            # assignments is a vector
            pass

        if covariance_type == "full":
            self.components = GaussianComponents(X, prior, assignments, K_max=K)
        elif covariance_type == "diag":
            self.components = GaussianComponentsDiag(X, prior, assignments, K_max=K)
        elif covariance_type == "fixed":
            self.components = GaussianComponentsFixedVar(X, prior, assignments, K_max=K)
        else:
            assert False, "Invalid covariance type."
class IGMM(object):
    def __init__(self, X, prior, alpha, K=1, K_max=None):

        self.alpha = alpha
        N, _ = X.shape
        assignments = np.arange(N)

        self.components = GaussianComponentsDiag(X, prior, assignments, K_max)

    def draw(self, p_k):
        k_uni = random.random()
        for i in range(len(p_k)):
            k_uni = k_uni - p_k[i]
            if k_uni < 0:
                return i
        return len(p_k) - 1

    '''
    Implementation of the Gibbs Sampling algorithm for Dirichlet Process Mixture Models. 
    See artifacts/Mardale, Doust, Couge, Ekpo_PGM_Deliverable_2.pdf for the mathematical derivations.
    '''

    def gibbs_sample(self, n_iter):
        record_dict = {}
        record_dict["components"] = []

        for i_iter in range(n_iter):
            for i in range(self.components.N):
                k_old = self.components.assignments[i]
                K_old = self.components.K
                stats_old = self.components.cache_component_stats(k_old)

                self.components.del_item(i)

                log_prob_z = np.zeros(self.components.K + 1, np.float)
                log_prob_z[:self.components.K] = np.log(
                    (self.components.counts[:self.components.K]) /
                    (self.components.N + self.alpha -
                     1)) + self.components.log_post_pred(i)
                log_prob_z[-1] = math.log(
                    self.alpha / (self.components.N + self.alpha -
                                  1)) + self.components.cached_log_prior[i]
                # andrei: log-sim-exp trick / like normalization
                prob_z = np.exp(log_prob_z - logsumexp(log_prob_z))

                k = self.draw(prob_z)

                if k == k_old and self.components.K == K_old:
                    self.components.restore_component_from_stats(
                        k_old, *stats_old)
                    self.components.assignments[i] = k_old
                else:
                    self.components.add_item(i, k)

            record_dict["components"].append(self.components.K - 1)
        return record_dict
示例#4
0
    def setup_components(self, K, assignments="rand", X=None):
        """
        Setup the `components` attribute.

        See parameters of `FBGMM` for parameters not described below. This
        function is also useful for resetting the `components`, e.g. if you
        want to change the maximum number of possible components.

        Parameters
        ----------
        X : NxD matrix or None
            The data matrix. If None, then it is assumed that the `components`
            attribute has already been initialized and that this function is
            called to reset the `components`; in this case the data is taken
            from the previous initialization.
        """
        if X is None:
            assert hasattr(self, "components")
            X = self.components.X

        N, D = X.shape

        # Initial component assignments
        if isinstance(assignments, basestring) and assignments == "rand":
            assignments = np.random.randint(0, K, N)
        elif isinstance(assignments,
                        basestring) and assignments == "each-in-own":
            assignments = np.arange(N)
        else:
            # `assignments` is a vector
            pass
        # Make sure we have consequetive values
        for k in xrange(assignments.max()):
            while len(np.nonzero(assignments == k)[0]) == 0:
                assignments[np.where(assignments > k)] -= 1
            if assignments.max() == k:
                break

        if self.covariance_type == "full":
            self.components = GaussianComponents(X,
                                                 self.prior,
                                                 assignments,
                                                 K_max=K)
        elif self.covariance_type == "diag":
            self.components = GaussianComponentsDiag(X,
                                                     self.prior,
                                                     assignments,
                                                     K_max=K)
        elif self.covariance_type == "fixed":
            self.components = GaussianComponentsFixedVar(X,
                                                         self.prior,
                                                         assignments,
                                                         K_max=K)
        else:
            assert False, "Invalid covariance type."
示例#5
0
    def __init__(
            self, X, prior, alpha, assignments="rand", K=1, K_max=None):

        self.alpha = alpha
        N, D = X.shape

        if assignments == "rand":
            assignments = np.random.randint(0, K, N)

            for k in range(assignments.max()):
                while len(np.nonzero(assignments == k)[0]) == 0:
                    assignments[np.where(assignments > k)] -= 1
                if assignments.max() == k:
                    break
        elif assignments == "one-by-one":
            assignments = -1*np.ones(N, dtype="int")
            assignments[0] = 0  # first data vector belongs to first component
        elif assignments == "each-in-own":
            assignments = np.arange(N)
        
        self.components = GaussianComponentsDiag(X, prior, assignments, K_max)
示例#6
0
class IGMM(object):
    def __init__(
            self, X, prior, alpha, assignments="rand", K=1, K_max=None):

        self.alpha = alpha
        N, D = X.shape

        if assignments == "rand":
            assignments = np.random.randint(0, K, N)

            for k in range(assignments.max()):
                while len(np.nonzero(assignments == k)[0]) == 0:
                    assignments[np.where(assignments > k)] -= 1
                if assignments.max() == k:
                    break
        elif assignments == "one-by-one":
            assignments = -1*np.ones(N, dtype="int")
            assignments[0] = 0  # first data vector belongs to first component
        elif assignments == "each-in-own":
            assignments = np.arange(N)
        
        self.components = GaussianComponentsDiag(X, prior, assignments, K_max)


    def draw(self, p_k):
      k_uni = random.random()
      for i in range(len(p_k)):
          k_uni = k_uni - p_k[i]
          if k_uni < 0:
              return i
      return len(p_k) - 1


    def gibbs_sample(self, n_iter):
        record_dict = {}
        record_dict["components"] = []

        for i_iter in range(n_iter):
            for i in range(self.components.N):
                k_old = self.components.assignments[i]
                K_old = self.components.K
                stats_old = self.components.cache_component_stats(k_old)

                self.components.del_item(i)

                log_prob_z = np.zeros(self.components.K + 1, np.float)
                log_prob_z[:self.components.K] = np.log((self.components.counts[:self.components.K]) / (self.components.N + self.alpha -1))+  self.components.log_post_pred(i)
                log_prob_z[-1] = math.log(self.alpha / (self.components.N + self.alpha -1)) + self.components.cached_log_prior[i] 
                #paul: log-sim-exp trick / like normalization
                prob_z = np.exp(log_prob_z - logsumexp(log_prob_z))
               
                k = self.draw(prob_z)

                if k == k_old and self.components.K == K_old:
                    self.components.restore_component_from_stats(k_old, *stats_old)
                    self.components.assignments[i] = k_old
                else:
                    self.components.add_item(i, k)
              

            record_dict["components"].append(self.components.K - 1)
        return record_dict