Exemplo n.º 1
0
def get_l2_measures(w_matrix, sequences):
    """
    Computes the squared L2-norm between the visible units representing a MRI 
    sequence and each hidden unit.

    Parameters
    ----------
    w_matrix: numpy array.
        Weights matrix of the RBM.
    sequences: list.
        List containing the name of the MRI sequences

    Returns
    -------
    numpy array.
        Array [n_sequences X n_hidden_units] containing the squared L2-norm 
        between each hidden unit and the MRI sequences
    """
    l2_measures = np.zeros((len(sequences), w_matrix.shape[1]))
    n_visibles_voxels = int(float(w_matrix.shape[0]) / float(len(sequences)))

    for i in xrange(0, len(sequences)):
        l2_measures[i, :] = \
            l2(w_matrix[i * n_visibles_voxels: (i + 1) * n_visibles_voxels, :],
               axis=0)

    return l2_measures
def L_r(Tt, ut, bf, c):
    if Tt.ndim<=1:
        Tt = np.atleast_2d(Tt).T
        ut = np.atleast_2d(ut).T
    cut = c * ut
    Id = np.eye(Tt.shape[0])
    if bf <= 1e-10:
        return 0*Id, bf * np.ones((Id.shape[0], 1)), (-Tt + cut) / l2(-Tt + cut)
    _M = M(Tt, cut, bf)
    alpha = -Tt.T.dot(-Tt + cut) / (l2(-Tt) * l2(-Tt + cut))
    delta = min(l2(-Tt) / bf, 1)
    if alpha < 0:
        beta = 1 / (1 - alpha * delta)
    else:
        beta = 1
    IdM_inv = np.linalg.inv(Id - beta * _M)

    v = IdM_inv.dot(-Tt + cut) / l2(-Tt + cut)
    return c * (IdM_inv - Id), -IdM_inv.dot(hf(Tt, cut, bf)), v
Exemplo n.º 3
0
def mean_field_hs(Vs, K):
    """

    Pj(xj) = 1/Z0 *exp(-beta*hj(xj)), where
    hj(xj) = \sum_{<j,jp>} \sum_{xjp \in jp} V(xj,xjp)*Pjp(xjp)

    We assume a Potts model of m variables x0...xj...xm-1 where each
    variable can take on K states 0...i...K-1.  Mean field functions h
    are represented as a matrix hss where each row gives the values
    hj(i).  [Note that i,j are reversed from the usual row-column
    convention.]

    Input is a matrix Vs of pairwise contributions to the hamiltonian
    where Vs[j][jp] is a function V(xj,xjp)
    """
    M = len(Vs)
    jpairs = pairs(range(M))
    hs = [[1 for i in range(K)] for j in range(M)]

    def Pj(xj, j):
        # print xj,j
        return exp(-beta * hs[j][xj]) / sum(exp(-beta * hs[j][xjp]) for xjp in range(K))

    old_hs = matcopy(hs)
    while True:
        for j in range(M):
            for i in range(K):
                hs[j][i] = sum(sum(Vs[j][jp](i, ip) * Pj(ip, jp) for ip in range(K)) for jp in range(j + 1, M)) + sum(
                    sum(Vs[jp][j](ip, i) * Pj(ip, jp) for ip in range(K)) for jp in range(0, j - 1)
                )
        print l2(concat(hs), concat(old_hs))
        if old_hs == hs:
            break
        else:
            old_hs = matcopy(hs)
            print hs
    return hs
Exemplo n.º 4
0
Arquivo: hmm.py Projeto: poneill/amic
def baum_welch(obs,L):
    """Given sequence and bs length L, approximate MLE parameters for
    emission probabilities,transition rate a01 (background->site).
    TODO: non-uniform background frequencies"""
    states = range(L+1)
    a01 = random.random()
    start_p = make_start_p(a01)
    trans_p = make_trans_p(a01)
    emit_p = [simplex_sample(4) for state in states]
    hidden_states = [random.choice(states) for ob in obs]
    iterations = 0
    while True:
        # compute hidden states, given probs
        prob,hidden_states_new = viterbi(obs, states, start_p, trans_p, emit_p)
        # compute probs, given hidden states
        # first compute a01
        a01_new = estimate_a01(hidden_states_new)
        start_p_new = make_start_p(a01_new)
        trans_p_new = make_trans_p(a01_new)
        emit_p_new = estimate_emit_p(obs,hidden_states_new,states)
        if (start_p_new == start_p and
            trans_p_new == trans_p and
            emit_p_new == emit_p and
            hidden_states_new == hidden_states):
            break
        else:
            print iterations,a01,l2(start_p,start_p_new),
            print l2(concat(trans_p),concat(trans_p_new)),
            print l2((hidden_states),hidden_states_new)
            a01 = a01_new
            start_p = start_p_new
            trans_p = trans_p_new
            emit_p = emit_p_new
            hidden_states = hidden_states_new
            iterations += 1
    return start_p,trans_p,emit_p,hidden_states
Exemplo n.º 5
0
def mean_field_hs():
    """Following derviation on wikipedia's mean field theory page..."""
    def V(xj,xjp):
        if xj == xjp > 0:
            retval = 10**10
        else:
            retval = (eps[xj] + eps[xjp])/choose(J,2) # divide by choose(J,2) since we're summing over pairs
        if random.random() < 0:
            print "V(%s,%s) = %s" % (xj,xjp,retval)
        return retval
    # because each term appears J-1 times.  self-consistency equation
    # for mean field approximation is:

    # Pj(xj) = 1/Z0 *exp(-beta*hj(xj)), where

    # hj(xj) = \sum_{<j,jp>} \sum_{xjp \in jp} V(xj,xjp)*Pjp(xjp)

    # In this case, the graph is fully connected and all variables xj
    # are exchangeable, so sum over pairs reduces to (J-1).

    # Moreover, due to exchangeability hj is the same for each
    # variable, so we can update a single function h(x) for all
    # variables.  h(x) is a function with G+1 possible input values,
    # so we can represent h as an array of size G+1 such that h[i]
    # stores the value h(i).

    # Initialize it arbitrarily
    h_cur = [1] * (G+1)
    h_next = [0] * (G+1)
    def P(i):
        """Return probability at time t that x takes on value i"""
        return exp(-beta*h_cur[i])/sum(exp(-beta*h_cur[ip]) for ip in range(G+1))
    while True:
        for i in range(G+1):
            terms = [V(i,ip)*P(ip) for ip in range(G+1) if not i == ip]
            #print i,terms
            h_next[i] = (J-1)*sum(terms)
        if l2(h_next,h_cur) < 10**-10:
            break
        h_cur = h_next[:]
        print h_cur
    return h_cur
def Ff(Tt, cut, bf):
    denominator = max(bf, l2(-Tt)) * l2(-Tt + cut)
    # To avoid issues with shapes we multiply the scalar denominator (scalar for each face)
    # into the Lagraniane multiplyer
    numerator = -Tt.dot((-Tt + cut).T)
    return numerator / denominator
def ef(Tt, cut, bf):
    return bf / l2(-Tt + cut)
def active_sliding(Tt, ut, bf, ct):
    return l2(-Tt + ct * ut) - bf > 1e-10
    def fit(self):
        # sparse group lasso selection on kappa
        distance = 1.
        nb_iter = 1
        while distance > self.rtol and nb_iter <= self.max_iter:
            coefs_old = self.coef_.copy()
            for gr in self.groups[0][1:]:
                # 1- Should the group be zero-ed out?
                tmp_coefs_gr = self.coef_.copy()
                tmp_coefs_gr[gr] = 0.
                if discard_group(self.y, self.knots, tmp_coefs_gr, self.splrep,
                                 self.proj_matrix, self.u, self.lbda1,
                                 self.alpha1, gr):
                    self.coef_[gr] = 0.
                # 2- If the group is not zero-ed out, update each component
                else:
                    self.coef_[gr] = block_wise_descent_fitting(
                        self.coef_, self.y, self.knots, self.splrep,
                        self.proj_matrix, self.u, self.lbda1, self.alpha1, gr)

            # sparse group lasso selection on tau
            for gr in self.groups[1][1:]:
                # 1- Should the group be zero-ed out?
                tmp_coefs_gr = self.coef_.copy()
                tmp_coefs_gr[gr] = 0.
                if discard_group(self.y, self.knots, tmp_coefs_gr, self.splrep,
                                 self.proj_matrix, self.u, self.lbda2,
                                 self.alpha2, gr):
                    self.coef_[gr] = 0.
                # 2- If the group is not zero-ed out, update each component
                else:
                    self.coef_[gr] = block_wise_descent_fitting(
                        self.coef_, self.y, self.knots, self.splrep,
                        self.proj_matrix, self.u, self.lbda2, self.alpha2, gr)

            # estimate beta_0
            ind_beta0 = int(self.groups[0][0])
            beta0_old = self.coef_[ind_beta0]
            coef_excl_beta0 = get_coef_exclude_ind(self.coef_, ind_beta0)
            beta0_new, ignored1, ignored2 = fmin_l_bfgs_b(
                func=gradient_f_beta0,
                x0=beta0_old,
                args=(coef_excl_beta0, ind_beta0, self.y, self.knots,
                      self.splrep, self.proj_matrix, self.u),
                approx_grad=True)
            self.coef_[ind_beta0] = beta0_new
            # estimate theta_0
            ind_theta0 = int(self.groups[1][0])
            theta0_old = self.coef_[ind_theta0]
            coef_excl_theta0 = get_coef_exclude_ind(self.coef_, ind_theta0)
            theta0_new, ignored3, ignored4 = fmin_l_bfgs_b(
                func=gradient_f_theta0,
                x0=theta0_old,
                args=(coef_excl_theta0, ind_theta0, self.y, self.knots,
                      self.splrep, self.proj_matrix, self.u),
                approx_grad=True)
            self.coef_[ind_theta0] = theta0_new
            # update_nb_iter
            nb_iter += 1
            # update_distance
            distance = l2(self.coef_ - coefs_old)
        return self