def LMO_err(params, M=2, verbal=False):
        global Nfeval
        params = np.exp(params)
        al, bl = params[:-1], params[
            -1]  # params[:int(n_params/2)], params[int(n_params/2):] #  [np.exp(e) for e in params]
        if train.x.shape[1] < 5:
            train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN
        else:
            train_L, dev_L = 0, 0
            for i in range(len(al)):
                train_L += train_L0[i] / al[i]**2
            train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN

        tmp_mat = train_L @ eig_vec_K
        C = train_L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                              inv_eig_val) @ tmp_mat.T / N2
        c = C @ W_nystr_Y * N2
        c_y = c - train.y
        lmo_err = 0
        N = 0
        for ii in range(1):
            permutation = np.random.permutation(train.x.shape[0])
            for i in range(0, train.x.shape[0], M):
                indices = permutation[i:i + M]
                K_i = train_W[np.ix_(indices, indices)] * N2
                C_i = C[np.ix_(indices, indices)]
                c_y_i = c_y[indices]
                b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
                lmo_err += b_y.T @ K_i @ b_y
                N += 1
        return lmo_err[0, 0] / M**2
示例#2
0
 def LMO_err(params, M=10):
     np.random.seed(2)
     random.seed(2)
     al, bl = np.exp(params)
     L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
     if nystr:
         tmp_mat = L @ eig_vec_K
         C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2
         c = C @ W_nystr_Y * N2
     else:
         LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
         C = L @ LWL_inv @ L / N2
         c = C @ W @ Y * N2
     c_y = c - Y
     lmo_err = 0
     N = 0
     for ii in range(1):
         permutation = np.random.permutation(X.shape[0])
         for i in range(0, X.shape[0], M):
             indices = permutation[i:i + M]
             K_i = W[np.ix_(indices, indices)] * N2
             C_i = C[np.ix_(indices, indices)]
             c_y_i = c_y[indices]
             b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
             lmo_err += b_y.T @ K_i @ b_y
             N += 1
     return lmo_err[0, 0] / N / M ** 2
示例#3
0
 def LMO_err(params, M=2):
     params = np.exp(params)
     al, bl = params[:-1], params[-1]
     L = bl * bl * np.exp(-L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp(
         -L0[1] / al[1] / al[1] /
         2) + 1e-6 * EYEN  # l(X,None,al,bl)# +1e-6*EYEN
     if nystr:
         tmp_mat = L @ eig_vec_K
         C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                         inv_eig_val_K) @ tmp_mat.T / N2
         c = C @ W_nystr_Y * N2
     else:
         LWL_inv = chol_inv(
             L @ W @ L + L / N2 + JITTER * EYEN
         )  # chol_inv(W*N2+L_inv) # chol_inv(L@W@L+L/N2 +JITTER*EYEN)
         C = L @ LWL_inv @ L / N2
         c = C @ W @ Y * N2
     c_y = c - Y
     lmo_err = 0
     N = 0
     for ii in range(1):
         permutation = np.random.permutation(X.shape[0])
         for i in range(0, X.shape[0], M):
             indices = permutation[i:i + M]
             K_i = W[np.ix_(indices, indices)] * N2
             C_i = C[np.ix_(indices, indices)]
             c_y_i = c_y[indices]
             b_y = np.linalg.inv(np.eye(C_i.shape[0]) - C_i @ K_i) @ c_y_i
             # print(I_CW_inv.shape,c_y_i.shape)
             lmo_err += b_y.T @ K_i @ b_y
             N += 1
     return lmo_err[0, 0] / N / M**2
示例#4
0
    def list_permute(X, Y, k, l, n_permute=400, seed=8273):
        """
        Return a numpy array of HSIC's for each permutation.
        This is an implementation where kernel matrices are pre-computed.
        TODO: can be improved.
        """
        if X.shape[0] != Y.shape[0]:
            raise ValueError(
                'X and Y must have the same number of rows (sample size')
        n = X.shape[0]

        r = 0
        arr_hsic = np.zeros(n_permute)
        K = k.eval(X, X)
        L = l.eval(Y, Y)
        # set the seed
        rand_state = np.random.get_state()
        np.random.seed(seed)

        while r < n_permute:
            # shuffle the order of X, Y while still keeping the original pairs
            ind = np.random.choice(n, n, replace=False)
            Ks = K[np.ix_(ind, ind)]
            #Xs = X[ind]
            #Ys = Y[ind]
            #Ks2 = k.eval(Xs, Xs)
            #assert np.linalg.norm(Ks - Ks2, 'fro') < 1e-4

            Ls = L[np.ix_(ind, ind)]
            Kmean = np.mean(Ks, 0)
            HK = Ks - Kmean
            HKf = HK.flatten() / (n - 1)
            # shift Ys n-1 times
            for s in range(n - 1):
                if r >= n_permute:
                    break
                Ls = np.roll(Ls, 1, axis=0)
                Ls = np.roll(Ls, 1, axis=1)

                # compute HSIC
                Lmean = np.mean(Ls, 0)
                HL = Ls - Lmean
                # t = trace(KHLH)
                HLf = HL.T.flatten() / (n - 1)
                bhsic = HKf.dot(HLf)

                arr_hsic[r] = bhsic
                r = r + 1
        # reset the seed back
        np.random.set_state(rand_state)
        return arr_hsic
示例#5
0
def scatterMatrixInto(globalMatrix, elementMatrix, locationMap):

    size1 = elementMatrix.shape[0]
    size2 = elementMatrix.shape[0]

    if size1 != elementMatrix.shape[1]:
        raise ValueError('Element matrix must be square!')

    if size1 != locationMap.shape[0]:
        raise ValueError(
            'Element matrix and location map size do not correspond! Make sure location map has size #dof.'
        )

    globalMatrix[numpy.ix_(locationMap, locationMap)] = globalMatrix[numpy.ix_(
        locationMap, locationMap)] + elementMatrix
示例#6
0
def gaussian_trig(m, v, i, e=None):
    d = len(m)
    L = len(i)
    e = np.ones((1, L)) if e is None else np.atleast_2d(e)
    ee = np.vstack([e, e]).reshape(1, -1, order='F')

    mi = np.atleast_2d(m[i])
    vi = v[np.ix_(i, i)]
    vii = np.atleast_2d(np.diag(vi))

    M = np.vstack([e * exp(-vii / 2) * sin(mi), e * exp(-vii / 2) * cos(mi)])
    M = M.flatten(order='F')

    lq = -(vii.T + vii) / 2
    q = exp(lq)

    U1 = (exp(lq + vi) - q) * sin(mi.T - mi)
    U2 = (exp(lq - vi) - q) * sin(mi.T + mi)
    U3 = (exp(lq + vi) - q) * cos(mi.T - mi)
    U4 = (exp(lq - vi) - q) * cos(mi.T + mi)

    V = np.vstack(
        [np.hstack([U3 - U4, U1 + U2]),
         np.hstack([(U1 + U2).T, U3 + U4])])
    V = np.vstack([
        np.hstack([V[::2, ::2], V[::2, 1::2]]),
        np.hstack([V[1::2, ::2], V[1::2, 1::2]])
    ])
    V = np.dot(ee.T, ee) * V / 2

    C = np.hstack([np.diag(M[1::2]), -np.diag(M[::2])])
    C = np.hstack([C[:, ::2], C[:, 1::2]])
    C = fill_mat(C, np.zeros((d, 2 * L)), i, None)

    return M, V, C
示例#7
0
def test_multivariate_normal_logpdf_batches_and_states_shared_cov_masked(D=10):
    # Test broadcasting over B batches, N datapoints, and K means, 1 covariance, with masks
    B = 3
    N = 100
    K = 5
    x = npr.randn(B, N, D)
    mask = npr.rand(B, N, D) < .5
    mu = npr.randn(K, D)
    L = npr.randn(D, D)
    Sigma = np.dot(L, L.T)

    ll1 = multivariate_normal_logpdf(x[:, :, None, :],
                                     mu,
                                     Sigma,
                                     mask=mask[:, :, None, :])
    assert ll1.shape == (B, N, K)

    ll2 = np.empty((B, N, K))
    for b in range(B):
        for n in range(N):
            m = mask[b, n]
            if m.sum() == 0:
                ll2[b, n] = 0
            else:
                for k in range(K):
                    ll2[b, n, k] = mvn.logpdf(x[b, n][m], mu[k][m],
                                              Sigma[np.ix_(m, m)])

    assert np.allclose(ll1, ll2)
示例#8
0
def loss_cp(self, m, s):
    D0 = np.size(s, 1)
    D1 = D0 + 2 * len(self.angle)
    M = m
    S = s

    ell = self.p
    Q = np.dot(np.vstack([1, ell]), np.array([[1, ell]]))
    Q = fill_mat(Q, np.zeros((D1, D1)), [0, D0], [0, D0])
    Q = fill_mat(ell**2, Q, [D0 + 1], [D0 + 1])

    target = gaussian_trig(self.target, 0 * s, self.angle)[0]
    target = np.hstack([self.target, target])
    i = np.arange(D0)
    m, s, c = gaussian_trig(M, S, self.angle)
    q = np.dot(S[np.ix_(i, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    w = self.width if hasattr(self, "width") else [1]
    L = np.array([0])
    S2 = np.array(0)
    for i in range(len(w)):
        self.z = target
        self.W = Q / w[i]**2
        r, s2, c = self.loss_sat(M, S)
        L = L + r
        S2 = S2 + s2

    return L / len(w)
示例#9
0
 def permute(self, perm):
     """
     Permute the discrete latent states.
     """
     self.log_Ps = self.log_Ps[np.ix_(perm, perm)]
     self.mus = self.mus[perm]
     self.sqrt_Sigmas = self.sqrt_Sigmas[perm]
     self.Ws = self.Ws[perm]
示例#10
0
    def permute(self, perm):
        """
        Permute the discrete latent states.
        """
        self.Ps = self.Ps[np.ix_(perm, perm)]
        self.rs = self.rs[perm]
        self.ps = self.ps[perm]

        # Reset the transition matrix
        self._transition_matrix = None
示例#11
0
    def permutation_list_mmd2_gram(X1, X2, Y1, Y2, k, kx, n_permute=400):
        """
        Repeatedly mix, permute X,Y and compute MMD^2. This is intended to be
        used to approximate the null distritubion.
        """
        Y1Y2 = np.vstack((Y1, Y2))
        Ky1y2y1y2 = k.eval(Y1Y2, Y1Y2)

        rand_state = np.random.get_state()
        np.random.seed()

        ny1y2 = Y1Y2.shape[0]
        ny1 = Y1.shape[0]
        ny2 = Y2.shape[0]
        list_mmd2 = np.zeros(n_permute)

        for r in range(n_permute):
            # print r
            ind = np.random.choice(ny1y2, ny1y2, replace=False)
            # divide into new y1, y2
            indy1 = ind[:ny1]
            # print(indy1)
            indy2 = ind[ny1:]
            Ky1 = Ky1y2y1y2[np.ix_(indy1, indy1)]
            # print(Ky1)
            Ky2 = Ky1y2y1y2[np.ix_(indy2, indy2)]
            Ky1y2 = Ky1y2y1y2[np.ix_(indy1, indy2)]

            weights, _ = WQuadMMDTest.kernel_mean_matching(X1, X2, kx)
            Ky1 = np.matmul(np.matmul(np.diag(weights[:, 0]), Ky1),
                            np.diag(weights[:, 0]))
            Ky1y2 = np.matmul(np.diag(weights[:, 0]), Ky1y2)

            mmd2r, var = WQuadMMDTest.h1_mean_var_gram(Ky1,
                                                       Ky2,
                                                       Ky1y2,
                                                       is_var_computed=False)
            list_mmd2[r] = mmd2r

        np.random.set_state(rand_state)
        return list_mmd2
示例#12
0
    def sd_values(self, config, pos):
        """Returns the values of a given slater determinant
        on the position specified by pos.

        Args :
                config : electronic configuration
                pos : ndarray shape(N,3)
        Returns:
                values : nd array shape(N,Nbasis)
        """
        mo_vals = self.mo_values(pos)
        return np.linalg.det(mo_vals[np.ix_(config, config)])
示例#13
0
def test_multivariate_normal_logpdf_simple_masked(D=10):
    # Test single datapoint log pdf with mask
    x = npr.randn(D)
    mask = npr.rand(D) < 0.5
    mask[0] = True
    mu = npr.randn(D)
    L = npr.randn(D, D)
    Sigma = np.dot(L, L.T)

    ll1 = multivariate_normal_logpdf(x, mu, Sigma, mask=mask)
    ll2 = mvn.logpdf(x[mask], mu[mask], Sigma[np.ix_(mask, mask)])
    assert np.allclose(ll1, ll2)
示例#14
0
    def electron_integrals(*args):
        r"""Compute the one- and two-electron integrals in the molecular orbital basis.

        Args:
            args (array[array[float]]): initial values of the differentiable parameters

        Returns:
            tuple[array[float]]: 1D tuple containing core constant, one- and two-electron integrals
        """
        _, coeffs, _, h_core, repulsion_tensor = generate_scf(mol)(*args)
        one = anp.einsum("qr,rs,st->qt", coeffs.T, h_core, coeffs)
        two = anp.swapaxes(
            anp.einsum("ab,cd,bdeg,ef,gh->acfh", coeffs.T, coeffs.T,
                       repulsion_tensor, coeffs, coeffs),
            1,
            3,
        )
        core_constant = nuclear_energy(mol.nuclear_charges,
                                       mol.coordinates)(*args)

        if core is None and active is None:
            return core_constant, one, two

        for i in core:
            core_constant = core_constant + 2 * one[i][i]
            for j in core:
                core_constant = core_constant + 2 * two[i][j][j][i] - two[i][
                    j][i][j]

        for p in active:
            for q in active:
                for i in core:
                    o = anp.zeros(one.shape)
                    o[p, q] = 1.0
                    one = one + (2 * two[i][p][q][i] - two[i][p][i][q]) * o

        one = one[anp.ix_(active, active)]
        two = two[anp.ix_(active, active, active, active)]

        return core_constant, one, two
示例#15
0
    def permutation_list_mmd2_gram(X, Y, wx, wy, k, n_permute=400, seed=8273):
        """
        Repeatedly mix, permute X,Y and compute MMD^2. This is intended to be
        used to approximate the null distritubion.
        """
        XY = np.vstack((X, Y))
        wxy = np.vstack((wx, wy))
        Kxyxy = k.eval(XY, XY)  #np.multiply(np.outer(wxy,wxy),k.eval(XY, XY))

        rand_state = np.random.get_state()
        np.random.seed(seed)

        nxy = XY.shape[0]  #nxy = np.sum(wxy)#
        nx = X.shape[0]  #nx = np.sum(wx)#
        y = Y.shape[0]  #ny= np.sum(wy)#n
        list_mmd2 = np.zeros(n_permute)

        for r in range(n_permute):
            #print r
            ind = np.random.choice(nxy, nxy,
                                   replace=False)  #len(wxy), len(wxy)
            # divide into new X, Y
            indx = ind[:nx]
            #print(indx)
            indy = ind[nx:]
            Kx = Kxyxy[np.ix_(indx, indx)]
            #print(Kx)
            Ky = Kxyxy[np.ix_(indy, indy)]
            Kxy = Kxyxy[np.ix_(indx, indy)]

            mmd2r, var = QuadMMDTest.h1_mean_var_gram(Kx,
                                                      Ky,
                                                      Kxy,
                                                      wx,
                                                      wy,
                                                      is_var_computed=False)
            list_mmd2[r] = mmd2r

        np.random.set_state(rand_state)
        return list_mmd2
示例#16
0
    def createNumberingForOrderTemplate(self, p):

        numberOfFields, spaceDim = p.shape

        numbering = []

        for iFieldComponent in range(numberOfFields):
            degreesForFieldComponent = p[0, :] + 1

            if spaceDim == 1:
                numering.append(numpy.arange(degreesForFieldComponent[0, 0]))

            if spaceDim == 2:
                mapping = numpy.zeros(degreesForFieldComponent,
                                      dtype=numpy.int)

                pMinusOneR = degreesForFieldComponent[0] - 2
                pMinusOneS = degreesForFieldComponent[1] - 2

                # nodal modes
                mapping[0:2, 0] = [0, 2]
                mapping[0:2, 1] = [1, 3]

                # edge modes
                index = 4
                mapping[2:, 0] = numpy.arange(index, index + pMinusOneR)
                mapping[2:, 1] = numpy.arange(index + pMinusOneR,
                                              index + 2 * pMinusOneR)

                index += 2 * pMinusOneR
                mapping[0, 2:] = numpy.arange(index, index + pMinusOneS)
                mapping[1, 2:] = numpy.arange(index + pMinusOneS,
                                              index + 2 * pMinusOneS)

                index += 2 * pMinusOneS

                volumeModeIndices = numpy.ix_(
                    numpy.arange(2, degreesForFieldComponent[0]),
                    numpy.arange(2, degreesForFieldComponent[0]))
                mapping[volumeModeIndices] = numpy.reshape(
                    numpy.arange(index, index + pMinusOneR * pMinusOneS),
                    (pMinusOneS, pMinusOneR)).T

                numbering.append(mapping)

        return numbering
示例#17
0
def concat(con, sat, policy, m, s):
    max_u = policy.max_u
    E = len(max_u)
    D = len(m)

    F = D + E
    i, j = np.arange(D), np.arange(D, F)
    M = m
    S = fill_mat(s, np.zeros((F, F)))

    m, s, c = con(policy, m, s)
    M = np.hstack([M, m])
    S = fill_mat(s, S, j, j)
    q = np.matmul(S[np.ix_(i, i)], c)
    S = fill_mat(q, S, i, j)
    S = fill_mat(q.T, S, j, i)

    M, S, R = sat(M, S, j, max_u)
    C = np.hstack([np.eye(D), c]) @ R
    return M, S, C
示例#18
0
def gaussian_sin(m, v, i, e=None):
    d = len(m)
    L = len(i)
    e = np.ones((1, L)) if e is None else np.atleast_2d(e)

    mi = np.atleast_2d(m[i])
    vi = v[np.ix_(i, i)]
    vii = np.atleast_2d(np.diag(vi))
    M = e * exp(-vii / 2) * sin(mi)
    M = M.flatten()

    lq = -(vii.T + vii) / 2
    q = exp(lq)
    V = ((exp(lq + vi) - q) * cos(mi.T - mi) -
         (exp(lq - vi) - q) * cos(mi.T + mi))
    V = np.dot(e.T, e) * V / 2

    C = np.diag((e * exp(-vii / 2) * cos(mi)).flatten())
    C = fill_mat(C, np.zeros((d, L)), i, None)

    return M, V, C
示例#19
0
def path_proba_selection(w_s_c_, w_s_d_, k, k_to_keep, new_Lt):
    ''' Utility to update the path probabilities after the selection
    w_s_* (list): The path probabilities starting from the * head
    k (dict): The original number of component on each layer
    k_to_keep (dict): The components selected in the network
    new_Lt (int): The selected number of layers on the common tail.
    --------------------------------------------------------------------------
    returns (tuple of size 2): The paths probabilities starting from each head
    '''
    
    # Deal with both heads
    w = {'d':  w_s_d_.reshape(*np.concatenate([k['d'], k['t']]), order = 'C'),\
         'c':  w_s_c_.reshape(*np.concatenate([k['c'], k['t']]), order = 'C')}
        
    for h in ['c', 'd']:
        original_Lh = len(w[h].shape)
        new_Lh = len(k[h]) + new_Lt
        
        k_to_keep_ht = k_to_keep[h][:new_Lt] + k_to_keep['t']
        assert (len(k_to_keep_ht) == new_Lh)
        
        new_k_idx_grid = np.ix_(*k_to_keep_ht)
                
        # If layer deletion, sum the last components of the paths
        # Not checked
        if original_Lh > new_Lh: 
            deleted_dims = tuple(range(original_Lh)[new_Lt:])
            w_s = w[h][new_k_idx_grid].sum(deleted_dims).flatten(order = 'C')
        else:
            w_s = w[h][new_k_idx_grid].flatten(order = 'C')
        
        # Renormalization
        w_s /= w_s.sum()
        w[h] = w_s
        
    w_s_c = w['c']
    w_s_d = w['d']
                
    return w_s_c, w_s_d
示例#20
0
def _cv_beta(kernel_x, kernel_y, kernel_x_params, kernel_y_params,
             base_density, X, Y, lmbda, split):

    # fits on k-1 folds of the training dataset and repeats the operation. The output is a tensor beta of shape:
    # beta : k times N times d
    # k is the number of folds, N the number of data point in the k-1 folds and   d is the dimension of the data.

    n_total, d = Y.shape
    n_train = split[0][0].shape[0]
    n_test = split[0][1].shape[0]
    K_X = kernel_x._kernel(kernel_x_params, X, X)

    G = _compute_G(kernel_y_params, kernel_y, Y,
                   K_X) + n_train * lmbda * np.eye(n_total)

    G = np.linalg.inv(G)

    num_folds = 0
    for train_idx, test_idx in split:
        train_idx_tot = d * np.repeat(train_idx, d) + np.tile(
            np.array(range(d)), train_idx.shape[0])
        test_idx_tot = d * np.repeat(test_idx, d) + np.tile(
            np.array(range(d)), test_idx.shape[0])

        h = _compute_h(kernel_y_params, kernel_y, base_density,
                       Y[train_idx, :], K_X[np.ix_(train_idx, train_idx)])
        GG = _compute_G(kernel_y_params, kernel_y, Y[train_idx, :],
                        K_X[np.ix_(train_idx, train_idx)])

        beta = np.matmul(G[np.ix_(train_idx_tot, train_idx_tot)], h)

        h = np.matmul(G[np.ix_(test_idx_tot, train_idx_tot)], h)

        beta_tmp = np.linalg.solve(G[np.ix_(test_idx_tot, test_idx_tot)], h)
        beta -= np.matmul(G[np.ix_(train_idx_tot, test_idx_tot)], beta_tmp)
        beta = beta / lmbda
        beta = np.reshape(beta, [1, -1, d])
        if num_folds == 0:
            betas = 1. * beta
        else:
            betas = np.concatenate([betas, beta], axis=0)
        num_folds += 1

    return betas
示例#21
0
def propagate(m, s, plant, dynmodel, policy):
    angi = plant.angi
    poli = plant.poli
    dyni = plant.dyni
    difi = plant.difi

    D0 = len(m)
    D1 = D0 + 2 * len(angi)
    D2 = D1 + len(policy.max_u)
    M = np.array(m)
    S = s

    i, j = np.arange(D0), np.arange(D0, D1)
    m, s, c = gaussian_trig(M[i], S[np.ix_(i, i)], angi)
    q = np.matmul(S[np.ix_(i, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    i, j = poli, np.arange(D1)
    m, s, c = policy.fcn(M[i], S[np.ix_(i, i)])
    q = np.matmul(S[np.ix_(j, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    i, j = np.hstack([dyni, np.arange(D1, D2)]), np.arange(D2)
    m, s, c = dynmodel.fcn(M[i], S[np.ix_(i, i)])
    q = np.matmul(S[np.ix_(j, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    P = np.hstack([np.zeros((D0, D2)), np.eye(D0)])
    P = fill_mat(np.eye(len(difi)), P, difi, difi)
    M_next = np.matmul(P, M[:, newaxis]).flatten()
    S_next = P @ S @ P.T
    S_next = (S_next + S_next.T) / 2
    return M_next, S_next
示例#22
0
def multivariate_normal_logpdf(data, mus, Sigmas, mask=None):
    """
    Compute the log probability density of a multivariate Gaussian distribution.
    This will broadcast as long as data, mus, Sigmas have the same (or at
    least compatible) leading dimensions.
    Parameters
    ----------
    data : array_like (..., D)
        The points at which to evaluate the log density
    mus : array_like (..., D)
        The mean(s) of the Gaussian distribution(s)
    Sigmas : array_like (..., D, D)
        The covariances(s) of the Gaussian distribution(s)
    mask : array_like (..., D) bool
        Optional mask indicating which entries in the data are observed
    Returns
    -------
    lps : array_like (...,)
        Log probabilities under the multivariate Gaussian distribution(s).
    """
    # Check inputs
    D = data.shape[-1]
    assert mus.shape[-1] == D
    assert Sigmas.shape[-2] == Sigmas.shape[-1] == D

    # If there's no mask, we can just use the standard log pdf code
    if mask is None:
        return _multivariate_normal_logpdf(data, mus, Sigmas)

    # Otherwise we need to separate the data into sets with the same mask,
    # since each one will entail a different covariance matrix.
    #
    # First, determine the output shape. Allow mus and Sigmas to
    # have different shapes; e.g. many Gaussians with the same
    # covariance but different means.
    shp1 = np.broadcast(data, mus).shape[:-1]
    shp2 = np.broadcast(data[..., None], Sigmas).shape[:-2]
    assert len(shp1) == len(shp2)
    shp = tuple(max(s1, s2) for s1, s2 in zip(shp1, shp2))

    # Broadcast the data into the full shape
    full_data = np.broadcast_to(data, shp + (D, ))

    # Get the full mask
    assert mask.dtype == bool
    assert mask.shape == data.shape
    full_mask = np.broadcast_to(mask, shp + (D, ))

    # Flatten the mask and get the unique values
    flat_data = flatten_to_dim(full_data, 1)
    flat_mask = flatten_to_dim(full_mask, 1)
    unique_masks, mask_index = np.unique(flat_mask,
                                         return_inverse=True,
                                         axis=0)

    # Initialize the output
    lls = np.nan * np.ones(flat_data.shape[0])

    # Compute the log probability for each mask
    for i, this_mask in enumerate(unique_masks):
        this_inds = np.where(mask_index == i)[0]
        this_D = np.sum(this_mask)
        if this_D == 0:
            lls[this_inds] = 0
            continue

        this_data = flat_data[np.ix_(this_inds, this_mask)]
        this_mus = mus[..., this_mask]
        this_Sigmas = Sigmas[np.ix_(
            *[np.ones(sz, dtype=bool) for sz in Sigmas.shape[:-2]], this_mask,
            this_mask)]

        # Precompute the Cholesky decomposition
        this_Ls = np.linalg.cholesky(this_Sigmas)

        # Broadcast mus and Sigmas to full shape and extract the necessary indices
        this_mus = flatten_to_dim(np.broadcast_to(this_mus, shp + (this_D, )),
                                  1)[this_inds]
        this_Ls = flatten_to_dim(
            np.broadcast_to(this_Ls, shp + (this_D, this_D)), 2)[this_inds]

        # Evaluate the log likelihood
        lls[this_inds] = _multivariate_normal_logpdf(this_data,
                                                     this_mus,
                                                     this_Sigmas,
                                                     Ls=this_Ls)

    # Reshape the output
    assert np.all(np.isfinite(lls))
    return np.reshape(lls, shp)
示例#23
0
def DDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True):
    ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM)
    
    y (numobs x p ndarray): The observations containing categorical variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    prev_lik = -1E16
    best_lik = -1E16
    tol = 0.01
    max_patience = 1
    patience = 0

    best_k = deepcopy(k)
    best_r = deepcopy(r)

    best_sil = -1
    new_sil = -1

    # Initialize the parameters
    eta = deepcopy(init['eta'])
    psi = deepcopy(init['psi'])
    lambda_bin = deepcopy(init['lambda_bin'])
    lambda_ord = deepcopy(init['lambda_ord'])
    lambda_categ = deepcopy(init['lambda_categ'])

    H = deepcopy(init['H'])
    w_s = deepcopy(
        init['w_s']
    )  # Probability of path s' through the network for all s' in Omega

    numobs = len(y)
    likelihood = []
    it_num = 0
    ratio = 1000
    np.random.seed = seed

    # Dispatch variables between categories
    y_bin = y[:,
              np.logical_or(var_distrib == 'bernoulli', var_distrib ==
                            'binomial')]
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',
                              var_distrib == 'binomial')].astype(int)
    nb_bin = len(nj_bin)

    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical'].astype(int)
    nb_categ = len(nj_categ)

    y_ord = y[:, var_distrib == 'ordinal']
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nb_ord = len(nj_ord)

    L = len(k)
    k_aug = k + [1]
    S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)])
    M = M_growth(1, r, numobs)

    assert nb_ord + nb_bin + nb_categ > 0

    # Compute the Gower matrix
    cat_features = np.logical_or(var_distrib == 'categorical',
                                 var_distrib == 'bernoulli')
    dm = gower_matrix(y, cat_features=cat_features)

    while (it_num < it) & ((ratio > eps) | (patience <= max_patience)):
        print(it_num)

        # The clustering layer is the one used to perform the clustering
        # i.e. the layer l such that k[l] == n_clusters
        clustering_layer = np.argmax(np.array(k) == n_clusters)

        #####################################################################################
        ################################# S step ############################################
        #####################################################################################

        #=====================================================================
        # Draw from f(z^{l} | s, Theta) for all s in Omega
        #=====================================================================

        mu_s, sigma_s = compute_path_params(eta, H, psi)
        sigma_s = ensure_psd(sigma_s)
        z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M)
        '''
        print('mu_s',  np.abs(mu_s[0]).mean())
        print('sigma_s',  np.abs(sigma_s[0]).mean())
        print('z_s0',  np.abs(z_s[0]).mean())
        print('z_s1',  np.abs(z_s[1]).mean(0)[:,0])
        '''

        #========================================================================
        # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1
        #========================================================================

        chsi = compute_chsi(H, psi, mu_s, sigma_s)
        chsi = ensure_psd(chsi)
        rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi)

        # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively
        z2_z1s = draw_z2_z1s(chsi, rho, M, r)

        #=======================================================================
        # Compute the p(y| z1) for all variable categories
        #=======================================================================

        py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord,
                        lambda_categ, y_categ, nj_categ, z_s[0])

        #========================================================================
        # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s
        #========================================================================

        zl1_ys = draw_zl1_ys(z_s, py_zl1, M)

        #####################################################################################
        ################################# E step ############################################
        #####################################################################################

        #=====================================================================
        # Compute conditional probabilities used in the appendix of asta paper
        #=====================================================================

        pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s,
                                          py_zl1)
        #del(py_zl1)

        #=====================================================================
        # Compute p(z^{(l)}| s, y). Equation (5) of the paper
        #=====================================================================

        pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S)
        pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s)

        #=====================================================================
        # Compute MFA expectations
        #=====================================================================

        Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \
            E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S)

        ###########################################################################
        ############################ M step #######################################
        ###########################################################################

        #=======================================================
        # Compute MFA Parameters
        #=======================================================

        w_s = np.mean(ps_y, axis=0)
        eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y,
                                  H, k)

        #=======================================================
        # Identifiability conditions
        #=======================================================

        # Update eta, H and Psi values
        H = diagonal_cond(H, psi)
        Ez, AT = compute_z_moments(w_s, eta, H, psi)
        eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)

        del (Ez)

        #=======================================================
        # Compute GLLVM Parameters
        #=======================================================

        # We optimize each column separately as it is faster than all column jointly
        # (and more relevant with the independence hypothesis)

        lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        ###########################################################################
        ################## Clustering parameters updating #########################
        ###########################################################################

        new_lik = np.sum(np.log(p_y))
        likelihood.append(new_lik)
        ratio = (new_lik - prev_lik) / abs(prev_lik)
        print(likelihood)

        idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
        psl_y = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum)

        temp_class = np.argmax(psl_y, axis=1)
        try:
            new_sil = silhouette_score(dm, temp_class, metric='precomputed')
        except ValueError:
            new_sil = -1

        print('Silhouette score:', new_sil)
        if best_sil < new_sil:
            z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1)
            best_sil = deepcopy(new_sil)
            classes = deepcopy(temp_class)

            fig = plt.figure(figsize=(8, 8))
            plt.scatter(z[:, 0], z[:, 1])
            plt.show()

        # Refresh the classes only if they provide a better explanation of the data
        if best_lik < new_lik:
            best_lik = deepcopy(prev_lik)

        if prev_lik < new_lik:
            patience = 0
            M = M_growth(it_num + 2, r, numobs)
        else:
            patience += 1

        ###########################################################################
        ######################## Parameter selection  #############################
        ###########################################################################

        is_not_min_specif = not (np.all(np.array(k) == n_clusters)
                                 & np.array_equal(r, [2, 1]))

        if look_for_simpler_network(
                it_num) & perform_selec & is_not_min_specif:
            r_to_keep = r_select(y_bin, y_ord, y_categ, zl1_ys, z2_z1s, w_s)

            # If r_l == 0, delete the last l + 1: layers
            new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1

            k_to_keep = k_select(w_s, k, new_L, clustering_layer)

            is_L_unchanged = L == new_L
            is_r_unchanged = np.all(
                [len(r_to_keep[l]) == r[l] for l in range(new_L + 1)])
            is_k_unchanged = np.all(
                [len(k_to_keep[l]) == k[l] for l in range(new_L)])

            is_selection = not (is_r_unchanged & is_k_unchanged
                                & is_L_unchanged)

            assert new_L > 0

            if is_selection:

                eta = [eta[l][k_to_keep[l]] for l in range(new_L)]
                eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)]

                H = [H[l][k_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, r_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)]

                psi = [psi[l][k_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)]

                if nb_bin > 0:
                    # Add the intercept:
                    bin_r_to_keep = np.concatenate([[0],
                                                    np.array(r_to_keep[0]) + 1
                                                    ])
                    lambda_bin = lambda_bin[:, bin_r_to_keep]

                if nb_ord > 0:
                    # Intercept coefficients handling is a little more complicated here
                    lambda_ord_intercept = [
                        lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord
                    ]
                    Lambda_ord_var = np.stack(
                        [lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord])
                    Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]]
                    lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\
                                  for j in range(nb_ord)]

                if nb_categ > 0:
                    lambda_categ_intercept = [
                        lambda_categ[j][:, 0] for j in range(nb_categ)
                    ]
                    Lambda_categ_var = [
                        lambda_categ_j[:, -r[0]:]
                        for lambda_categ_j in lambda_categ
                    ]
                    Lambda_categ_var = [
                        lambda_categ_j[:, r_to_keep[0]]
                        for lambda_categ_j in lambda_categ
                    ]

                    lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\
                                   for j in range(nb_categ)]

                w = w_s.reshape(*k, order='C')
                new_k_idx_grid = np.ix_(*k_to_keep[:new_L])

                # If layer deletion, sum the last components of the paths
                if L > new_L:
                    deleted_dims = tuple(range(L)[new_L:])
                    w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(
                        order='C')
                else:
                    w_s = w[new_k_idx_grid].flatten(order='C')

                w_s /= w_s.sum()

                k = [len(k_to_keep[l]) for l in range(new_L)]
                r = [len(r_to_keep[l]) for l in range(new_L + 1)]

                k_aug = k + [1]
                S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)])
                L = new_L

                patience = 0
                best_r = deepcopy(r)
                best_k = deepcopy(k)

                # Identifiability conditions
                H = diagonal_cond(H, psi)
                Ez, AT = compute_z_moments(w_s, eta, H, psi)
                eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)

            print('New architecture:')
            print('k', k)
            print('r', r)
            print('L', L)
            print('S', S)
            print("w_s", len(w_s))

        prev_lik = deepcopy(new_lik)
        it_num = it_num + 1

    out = dict(likelihood = likelihood, classes = classes, z = z, \
               best_r = best_r, best_k = best_k)
    return (out)
示例#24
0
def toBlocks(mat, d):
    J11 = mat[np.ix_([0, d - 1], [0, d - 1])]
    J12 = mat[np.ix_([0, d - 1], [d, mat.shape[0] - 1])]
    J22 = mat[np.ix_([d, mat.shape[0] - 1], [d, mat.shape[0] - 1])]
    return J11, J12, J22
示例#25
0
def load_celegans_network(props=np.ones((3, 4))):
    """" This function loads a connectome with a subsample of the entire connectome. The sub-sample
        is given by props. props[i,j] = proportion of neurons of category (i,j) to include
        category i = body position (Head = 0, Middle =1, Tail =2)
        category j = neuron type (Sensory = 0, Motor = 1, Interneuron =2, Poly-type =3)
        Besides names and positions of neurons, it outputs an array of adjacency matrix, for each type of
        connectivity (Synapse, electric junction and NMJ (?))"""

    NeuronTypeCSV = csv.reader(open('data/NeuronType.csv', 'r'),
                               delimiter=',',
                               skipinitialspace=True)
    neuron_info_all = [[] for index in range(4)]
    relevant_indexes = [0, 1, 2, 14]
    # load relevant information (names, numerica position, anatomical position and type)
    for row in NeuronTypeCSV:
        for j0, j in enumerate(relevant_indexes):
            neuron_info_all[j0].append(row[j].strip(' \t\n\r'))

    names_with_zeros = deepcopy(neuron_info_all[0])
    # erase extra zeros in name
    for j in range(279):
        indZero = neuron_info_all[0][j].find('0')
        if (indZero >= 0 and indZero < len(neuron_info_all[0][j]) - 1):
            neuron_info_all[0][j] = neuron_info_all[0][j].replace('0', '')

    names = deepcopy(neuron_info_all[0])
    xpos = np.array(neuron_info_all[1])

    location = neuron_info_all[2]

    issensory = np.zeros(279)
    ismotor = np.zeros(279)
    isinterneuron = np.zeros(279)

    NeuronTypeISM = csv.reader(open('data/NeuronTypeISM.csv', 'r'),
                               delimiter=',',
                               skipinitialspace=True)

    for row in NeuronTypeISM:
        try:
            index = names.index(row[0])
            words = row[2].lower()
            if ('sensory' in words):
                issensory[index] = 1
            if ('motor' in words):
                ismotor[index] = 1
            if ('interneuron' in words):
                isinterneuron[index] = 1
        except:
            pass
    NeuronRemainingTypesISM = csv.reader(open(
        'data/NeuronRemainingTypesISM.csv', 'r'),
                                         delimiter=',',
                                         skipinitialspace=True)
    for row in NeuronRemainingTypesISM:
        try:
            index = neuron_info_all[0].index(row[0])
            words = row[1].lower()
            if ('sensory' in words):
                issensory[index] = 1
            if ('motor' in words):
                ismotor[index] = 1
            if ('interneuron' in words):
                isinterneuron[index] = 1
        except:
            pass

    ConnectomeCSV = csv.reader(open('data/NeuronConnect.csv', 'r'),
                               delimiter=',',
                               skipinitialspace=True)
    As_weighted = np.zeros((3, 279, 279))

    for row in ConnectomeCSV:
        try:
            index1 = names_with_zeros.index(row[0])
            index2 = names_with_zeros.index(row[1])
            if ('S' in row[2] or 'R' in row[2] or 'Sp' in row[2]
                    or 'Rp' in row[2]):
                As_weighted[0, index1,
                            index2] = As_weighted[0, index1, index2] + float(
                                row[3])
            if ('EJ' in row[2]):
                As_weighted[1, index1,
                            index2] = As_weighted[1, index1, index2] + float(
                                row[3])
            if ('NMJ' in row[2]):
                As_weighted[2, index1,
                            index2] = As_weighted[2, index1, index2] + float(
                                row[3])
        except:
            pass
    As = (As_weighted > 0).astype(int)

    ind_type = [[] for _ in range(4)]

    # 0=sensory,motor,interneuron,poly
    ind_type[0] = np.where(
        np.logical_and(
            np.logical_and(issensory.astype(bool), (1 - ismotor).astype(bool)),
            (1 - isinterneuron).astype(bool)))[0]
    ind_type[1] = np.where(
        np.logical_and(
            np.logical_and((1 - issensory).astype(bool), ismotor.astype(bool)),
            (1 - isinterneuron).astype(bool)))[0]
    ind_type[2] = np.where(
        np.logical_and(
            np.logical_and((1 - issensory).astype(bool),
                           (1 - ismotor).astype(bool)),
            isinterneuron.astype(bool)))[0]

    ind_type[3] = np.where(issensory + ismotor + isinterneuron >= 2)[0]

    # Head, Middle, Tail

    ind_pos = [[] for _ in range(3)]
    ind_pos[0] = [i for i, j in enumerate(location) if j == 'H']
    ind_pos[1] = [i for i, j in enumerate(location) if j == 'M']
    ind_pos[2] = [i for i, j in enumerate(location) if j == 'T']

    ind_type_pos_number = np.zeros((3, 4))

    ind_type_pos = [[] for _ in range(3)]

    for j in range(3):
        ind_type_pos[j] = [[] for _ in range(4)]

    for i in range(4):
        for j in range(3):
            ind_type_pos[j][i] = [
                val for val in ind_pos[j] if val in ind_type[i]
            ]
            ind_type_pos_number[j, i] = len(ind_type_pos[j][i])

    ind_neuron_subsampled = [[] for _ in range(3) for _ in range(4)]
    for j in range(3):
        ind_neuron_subsampled[j] = [[] for _ in range(4)]

    for i in range(4):
        for j in range(3):
            try:
                ind_neuron_subsampled[j][i] = np.random.choice(
                    ind_type_pos[j][i],
                    np.floor(ind_type_pos_number[j, i] *
                             props[j, i]).astype(int),
                    replace=False)
            except:
                ind_neuron_subsampled[j][i] = []

    ind_neuron_subsampled = np.sort(
        np.concatenate([
            np.concatenate(ind_neuron_subsampled[j][:], axis=0)
            for j in range(3)
        ]).astype(int))

    As = As[np.ix_(range(3), ind_neuron_subsampled, ind_neuron_subsampled)]
    xpos = np.array(deepcopy(xpos[ind_neuron_subsampled]).astype(float))
    names = [j for j0, j in enumerate(names) if j0 in ind_neuron_subsampled]

    return As, names, xpos
示例#26
0
plant.odei = odei
plant.angi = angi
plant.poli = poli
plant.dyno = dyno
plant.dyni = dyni
plant.difi = difi

m, s, c = gaussian_trig(mu0, S0, angi)
m = np.hstack([mu0, m])
c = np.dot(S0, c)
s = np.vstack([np.hstack([S0, c]), np.hstack([c.T, s])])

policy = GPModel()
policy.max_u = [10]
policy.p = {
    'inputs': multivariate_normal(m[poli], s[np.ix_(poli, poli)], nc),
    'targets': 0.1 * randn(nc, len(policy.max_u)),
    'hyp': log([1, 1, 1, 0.7, 0.7, 1, 0.01])
}

Loss.fcn = loss_cp
cost = Loss()
cost.p = 0.5
cost.gamma = 1
cost.width = [0.25]
cost.angle = plant.angi
cost.target = np.array([0, 0, 0, np.pi])

start = multivariate_normal(mu0, S0)
x, y, L, latent = rollout(start, policy, plant, cost, H)
policy.fcn = lambda m, s: concat(congp, gaussian_sin, policy, m, s)
示例#27
0
def boxQP(H, g, lower, upper, x0):
    n = H.shape[0]
    clamped = np.zeros(n)
    free = np.ones(n)
    Hfree = np.zeros(n)
    oldvalue = 0
    result = 0
    nfactor = 0
    clamp = lambda value: np.maximum(lower, np.minimum(upper, value))

    maxIter = 100
    minRelImprove = 1e-8
    minGrad = 1e-8
    stepDec = 0.6
    minStep = 1e-22
    Armijo = 0.1

    if x0.shape[0] == n:
        x = clamp(x0)
    else:
        lu = np.array([lower, upper])
        lu[np.isnan(lu)] = np.nan
        x = np.nanmean(lu, axis=1)

    value = np.dot(x.T, np.dot(H, x)) + np.dot(x.T, g)

    for iteration in range(maxIter):
        if result != 0:
            break

        if iteration > 1 and (oldvalue - value) < minRelImprove * abs(oldvalue):
            result = 4
            logging.info("[QP info] Improvement smaller than tolerance")
            break

        oldvalue = value

        grad = g + np.dot(H, x)

        old_clamped = clamped
        clamped = np.zeros(n)
        clamped[np.logical_and(x == lower, grad > 0)] = 1
        clamped[np.logical_and(x == upper, grad < 0)] = 1
        free = np.logical_not(clamped)

        if np.all(clamped):
            result = 6
            logging.info("[QP info] All dimensions are clamped")
            break

        if iteration == 0:
            factorize = True
        else:
            factorize = np.any(old_clamped != clamped)

        if factorize:
            try:
                if not np.all(np.allclose(H, H.T)):
                    H = np.triu(H)
                Hfree = np.linalg.cholesky(H[np.ix_(free, free)])
            except LinAlgError:
                eigs, _ = np.linalg.eig(H[np.ix_(free, free)])
                print(eigs)
                result = -1
                logging.info("[QP info] Hessian is not positive definite")
                break
            nfactor += 1

        gnorm = np.linalg.norm(grad[free])
        if gnorm < minGrad:
            result = 5
            logging.info("[QP info] Gradient norm smaller than tolerance")
            break

        grad_clamped = g + np.dot(H, x*clamped)
        search = np.zeros(n)

        y = np.linalg.lstsq(Hfree.T, grad_clamped[free])[0]
        search[free] = -np.linalg.lstsq(Hfree, y)[0] - x[free]
        sdotg = np.sum(search*grad)
        if sdotg >= 0:
            print(f"[QP info] No descent direction found. Should not happen. Grad is {grad}")
            break

        # armijo linesearch
        step = 1
        nstep = 0
        xc = clamp(x + step*search)
        vc = np.dot(xc.T, g) + 0.5*np.dot(xc.T, np.dot(H, xc))
        while (vc - oldvalue) / (step*sdotg) < Armijo:
            step *= stepDec
            nstep += 1
            xc = clamp(x + step * search)
            vc = np.dot(xc.T, g) + 0.5 * np.dot(xc.T, np.dot(H, xc))

            if step < minStep:
                result = 2
                break

        # accept candidate
        x = xc
        value = vc

        # print(f"[QP info] Iteration {iteration}, value of the cost: {vc}")

    if iteration >= maxIter:
        result = 1

    return x, result, Hfree, free
示例#28
0
 def permute(self, perm):
     """
     Permute the discrete latent states.
     """
     self.log_Ps = self.log_Ps[np.ix_(perm, perm)]
示例#29
0
 def permute(self, perm):
     self.log_Ps = self.log_Ps[np.ix_(perm, perm)]
     self.weights[-1] = self.weights[-1][:,perm]
     self.biases[-1] = self.biases[-1][perm]
示例#30
0
def M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
              dm =  [], max_patience = 1, use_silhouette = True):# dm small hack to remove 
    
    ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM)
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    use_silhouette (Bool): If True use the silhouette as quality criterion (best for clustering) else use
                            the likelihood (best for data augmentation).
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    prev_lik = - 1E16
    best_lik = -1E16
    
    best_sil = -1 
    new_sil = -1 
        
    tol = 0.01
    patience = 0
    is_looking_for_better_arch = False
    
    # Initialize the parameters
    eta = deepcopy(init['eta'])
    psi = deepcopy(init['psi'])
    lambda_bin = deepcopy(init['lambda_bin'])
    lambda_ord = deepcopy(init['lambda_ord'])
    lambda_cont = deepcopy(init['lambda_cont'])
    lambda_categ = deepcopy(init['lambda_categ'])

    H = deepcopy(init['H'])
    w_s = deepcopy(init['w_s']) # Probability of path s' through the network for all s' in Omega
   
    numobs = len(y)
    likelihood = []
    silhouette = []
    it_num = 0
    ratio = 1000
    np.random.seed = seed
    out = {} # Store the full output
        
    # Dispatch variables between categories
    y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')]
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')].astype(int)
    nb_bin = len(nj_bin)
        
    y_ord = y[:, var_distrib == 'ordinal']    
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nb_ord = len(nj_ord)
    
    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical'].astype(int)
    nb_categ = len(nj_categ)    
    
    y_cont = y[:, var_distrib == 'continuous'].astype(float)
    nb_cont = y_cont.shape[1]
    
    # Set y_count standard error to 1
    y_cont = y_cont / y_cont.std(axis = 0, keepdims = True)
    
    L = len(k)
    k_aug = k + [1]
    S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)])    
    M = M_growth(1, r, numobs)
   
    assert nb_bin + nb_ord + nb_cont + nb_categ > 0 
    if nb_bin + nb_ord + nb_cont + nb_categ != len(var_distrib):
        raise ValueError('Some variable types were not understood,\
                         existing types are: continuous, categorical,\
                         ordinal, binomial and bernoulli')

    # Compute the Gower matrix
    if len(dm) == 0:
        cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli')
        dm = gower_matrix(y, cat_features = cat_features)
    
               
    # Do not stop the iterations if there are some iterations left or if the likelihood is increasing
    # or if we have not reached the maximum patience and if a new architecture was looked for
    # in the previous iteration
    while ((it_num < it) & (ratio > eps) & (patience <= max_patience)) | is_looking_for_better_arch:
        print(it_num)

        # The clustering layer is the one used to perform the clustering 
        # i.e. the layer l such that k[l] == n_clusters
        
        if not(isnumeric(n_clusters)):
            if n_clusters == 'auto':
                clustering_layer = 0
            else:
                raise ValueError('Please enter an int or "auto" for n_clusters')
        else:
            assert (np.array(k) == n_clusters).any()
            clustering_layer = np.argmax(np.array(k) == n_clusters)

        #####################################################################################
        ################################# S step ############################################
        #####################################################################################

        #=====================================================================
        # Draw from f(z^{l} | s, Theta) for all s in Omega
        #=====================================================================  
        
        mu_s, sigma_s = compute_path_params(eta, H, psi)
        sigma_s = ensure_psd(sigma_s)
        z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M)
         
        #========================================================================
        # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1
        #========================================================================
        
        chsi = compute_chsi(H, psi, mu_s, sigma_s)
        chsi = ensure_psd(chsi)
        rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi)

        # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively
        z2_z1s = draw_z2_z1s(chsi, rho, M, r)
                   
        #=======================================================================
        # Compute the p(y| z1) for all variable categories
        #=======================================================================
        
        py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, \
                        lambda_categ, y_categ, nj_categ, y_cont, lambda_cont, z_s[0])
        
        #========================================================================
        # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s
        #========================================================================
                
        zl1_ys = draw_zl1_ys(z_s, py_zl1, M)
                
        #####################################################################################
        ################################# E step ############################################
        #####################################################################################
        
        #=====================================================================
        # Compute conditional probabilities used in the appendix of asta paper
        #=====================================================================
        
        pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1)

        #=====================================================================
        # Compute p(z^{(l)}| s, y). Equation (5) of the paper
        #=====================================================================
        
        pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S)
        pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s)
                
        
        #=====================================================================
        # Compute MFA expectations
        #=====================================================================
        
        Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \
            E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S)


        ###########################################################################
        ############################ M step #######################################
        ###########################################################################
             
        #=======================================================
        # Compute MFA Parameters 
        #=======================================================

        w_s = np.mean(ps_y, axis = 0)      
        eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k)

        #=======================================================
        # Identifiability conditions
        #======================================================= 

        # Update eta, H and Psi values
        H = diagonal_cond(H, psi)
        Ez, AT = compute_z_moments(w_s, eta, H, psi)
        eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)
        
        del(Ez)
        
        #=======================================================
        # Compute GLLVM Parameters
        #=======================================================
                        
        lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)
                 
        lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)
            
        lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_cont = cont_params_GLLVM(y_cont, lambda_cont, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        ###########################################################################
        ################## Clustering parameters updating #########################
        ###########################################################################
          
        new_lik = np.sum(np.log(p_y))
        likelihood.append(new_lik)
        silhouette.append(new_sil)
        ratio = abs((new_lik - prev_lik)/prev_lik)
        
        idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
        psl_y = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) 

        temp_class = np.argmax(psl_y, axis = 1)
        try:
            new_sil = silhouette_score(dm, temp_class, metric = 'precomputed')
        except ValueError:
            new_sil = -1
           
        # Store the params according to the silhouette or likelihood
        is_better = (best_sil < new_sil) if use_silhouette else (best_lik < new_lik)
            
        if is_better:
            z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1)
            best_sil = deepcopy(new_sil)
            classes = deepcopy(temp_class)
            '''
            plt.figure(figsize=(8,8))
            plt.scatter(z[:, 0], z[:, 1], c = classes)
            plt.show()
            '''
            
            # Store the output
            out['classes'] = deepcopy(classes)
            out['best_z'] = deepcopy(z_s[0])
            out['Ez.y'] = z
            out['best_k'] = deepcopy(k)
            out['best_r'] = deepcopy(r)
            
            out['best_w_s'] = deepcopy(w_s)
            out['lambda_bin'] = deepcopy(lambda_bin)
            out['lambda_ord'] = deepcopy(lambda_ord)
            out['lambda_categ'] = deepcopy(lambda_categ)
            out['lambda_cont'] = deepcopy(lambda_cont)

            out['eta'] = deepcopy(eta)            
            out['mu'] = deepcopy(mu_s)
            out['sigma'] = deepcopy(sigma_s)
            
            out['psl_y'] = deepcopy(psl_y)
            out['ps_y'] = deepcopy(ps_y)

            
        # Refresh the classes only if they provide a better explanation of the data
        if best_lik < new_lik:
            best_lik = deepcopy(prev_lik)
                               
        if prev_lik < new_lik:
            patience = 0
            M = M_growth(it_num + 2, r, numobs)
        else:
            patience += 1
                          
        ###########################################################################
        ######################## Parameter selection  #############################
        ###########################################################################
        min_nb_clusters = 2
       
        if isnumeric(n_clusters): # To change when add multi mode
            is_not_min_specif = not(np.all(np.array(k) == n_clusters) & np.array_equal(r, [2,1]))
        else:
            is_not_min_specif = not(np.all(np.array(k) == min_nb_clusters) & np.array_equal(r, [2,1]))
        
        is_looking_for_better_arch = look_for_simpler_network(it_num) & perform_selec & is_not_min_specif
        if is_looking_for_better_arch:
            r_to_keep = r_select(y_bin, y_ord, y_categ, y_cont, zl1_ys, z2_z1s, w_s)
            
            # If r_l == 0, delete the last l + 1: layers
            new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 
            
            k_to_keep = k_select(w_s, k, new_L, clustering_layer, not(isnumeric(n_clusters)))
    
            is_L_unchanged = (L == new_L)
            is_r_unchanged = np.all([len(r_to_keep[l]) == r[l] for l in range(new_L + 1)])
            is_k_unchanged = np.all([len(k_to_keep[l]) == k[l] for l in range(new_L)])
              
            is_selection = not(is_r_unchanged & is_k_unchanged & is_L_unchanged)
            
            assert new_L > 0
            
            if is_selection:           
                
                eta = [eta[l][k_to_keep[l]] for l in range(new_L)]
                eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)]
                
                H = [H[l][k_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, r_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)]
                
                psi = [psi[l][k_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)]
                
                if nb_bin > 0:
                    # Add the intercept:
                    bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) 
                    lambda_bin = lambda_bin[:, bin_r_to_keep]
                 
                if nb_ord > 0:
                    # Intercept coefficients handling is a little more complicated here
                    lambda_ord_intercept = [lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord]
                    Lambda_ord_var = np.stack([lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord])
                    Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]]
                    lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\
                                  for j in range(nb_ord)]
    
                # To recheck
                if nb_cont > 0:
                    # Add the intercept:
                    cont_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) 
                    lambda_cont = lambda_cont[:, cont_r_to_keep]  
                    
                if nb_categ > 0:
                    lambda_categ_intercept = [lambda_categ[j][:, 0]  for j in range(nb_categ)]
                    Lambda_categ_var = [lambda_categ_j[:,-r[0]:] for lambda_categ_j in lambda_categ]
                    Lambda_categ_var = [lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ]

                    lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\
                                   for j in range(nb_categ)]  

                w = w_s.reshape(*k, order = 'C')
                new_k_idx_grid = np.ix_(*k_to_keep[:new_L])
                
                # If layer deletion, sum the last components of the paths
                if L > new_L: 
                    deleted_dims = tuple(range(L)[new_L:])
                    w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(order = 'C')
                else:
                    w_s = w[new_k_idx_grid].flatten(order = 'C')
    
                w_s /= w_s.sum()
                
                
                # Refresh the classes: TO RECHECK
                #idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
                #ps_y_tmp = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum)
                #np.argmax(ps_y_tmp[:, k_to_keep[0]], axis = 1)

    
                k = [len(k_to_keep[l]) for l in range(new_L)]
                r = [len(r_to_keep[l]) for l in range(new_L + 1)]
                
                k_aug = k + [1]
                S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)])    
                L = new_L

                patience = 0
                
                # Identifiability conditions
                H = diagonal_cond(H, psi)
                Ez, AT = compute_z_moments(w_s, eta, H, psi)
                eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)
        
                del(Ez)
                                                
                         
            print('New architecture:')
            print('k', k)
            print('r', r)
            print('L', L)
            print('S',S)
            print("w_s", len(w_s))
            
        prev_lik = deepcopy(new_lik)
        it_num = it_num + 1
        print(likelihood)
        print(silhouette)
        

    out['likelihood'] = likelihood
    out['silhouette'] = silhouette
    
    return(out)