def get_l2_measures(w_matrix, sequences): """ Computes the squared L2-norm between the visible units representing a MRI sequence and each hidden unit. Parameters ---------- w_matrix: numpy array. Weights matrix of the RBM. sequences: list. List containing the name of the MRI sequences Returns ------- numpy array. Array [n_sequences X n_hidden_units] containing the squared L2-norm between each hidden unit and the MRI sequences """ l2_measures = np.zeros((len(sequences), w_matrix.shape[1])) n_visibles_voxels = int(float(w_matrix.shape[0]) / float(len(sequences))) for i in xrange(0, len(sequences)): l2_measures[i, :] = \ l2(w_matrix[i * n_visibles_voxels: (i + 1) * n_visibles_voxels, :], axis=0) return l2_measures
def L_r(Tt, ut, bf, c): if Tt.ndim<=1: Tt = np.atleast_2d(Tt).T ut = np.atleast_2d(ut).T cut = c * ut Id = np.eye(Tt.shape[0]) if bf <= 1e-10: return 0*Id, bf * np.ones((Id.shape[0], 1)), (-Tt + cut) / l2(-Tt + cut) _M = M(Tt, cut, bf) alpha = -Tt.T.dot(-Tt + cut) / (l2(-Tt) * l2(-Tt + cut)) delta = min(l2(-Tt) / bf, 1) if alpha < 0: beta = 1 / (1 - alpha * delta) else: beta = 1 IdM_inv = np.linalg.inv(Id - beta * _M) v = IdM_inv.dot(-Tt + cut) / l2(-Tt + cut) return c * (IdM_inv - Id), -IdM_inv.dot(hf(Tt, cut, bf)), v
def mean_field_hs(Vs, K): """ Pj(xj) = 1/Z0 *exp(-beta*hj(xj)), where hj(xj) = \sum_{<j,jp>} \sum_{xjp \in jp} V(xj,xjp)*Pjp(xjp) We assume a Potts model of m variables x0...xj...xm-1 where each variable can take on K states 0...i...K-1. Mean field functions h are represented as a matrix hss where each row gives the values hj(i). [Note that i,j are reversed from the usual row-column convention.] Input is a matrix Vs of pairwise contributions to the hamiltonian where Vs[j][jp] is a function V(xj,xjp) """ M = len(Vs) jpairs = pairs(range(M)) hs = [[1 for i in range(K)] for j in range(M)] def Pj(xj, j): # print xj,j return exp(-beta * hs[j][xj]) / sum(exp(-beta * hs[j][xjp]) for xjp in range(K)) old_hs = matcopy(hs) while True: for j in range(M): for i in range(K): hs[j][i] = sum(sum(Vs[j][jp](i, ip) * Pj(ip, jp) for ip in range(K)) for jp in range(j + 1, M)) + sum( sum(Vs[jp][j](ip, i) * Pj(ip, jp) for ip in range(K)) for jp in range(0, j - 1) ) print l2(concat(hs), concat(old_hs)) if old_hs == hs: break else: old_hs = matcopy(hs) print hs return hs
def baum_welch(obs,L): """Given sequence and bs length L, approximate MLE parameters for emission probabilities,transition rate a01 (background->site). TODO: non-uniform background frequencies""" states = range(L+1) a01 = random.random() start_p = make_start_p(a01) trans_p = make_trans_p(a01) emit_p = [simplex_sample(4) for state in states] hidden_states = [random.choice(states) for ob in obs] iterations = 0 while True: # compute hidden states, given probs prob,hidden_states_new = viterbi(obs, states, start_p, trans_p, emit_p) # compute probs, given hidden states # first compute a01 a01_new = estimate_a01(hidden_states_new) start_p_new = make_start_p(a01_new) trans_p_new = make_trans_p(a01_new) emit_p_new = estimate_emit_p(obs,hidden_states_new,states) if (start_p_new == start_p and trans_p_new == trans_p and emit_p_new == emit_p and hidden_states_new == hidden_states): break else: print iterations,a01,l2(start_p,start_p_new), print l2(concat(trans_p),concat(trans_p_new)), print l2((hidden_states),hidden_states_new) a01 = a01_new start_p = start_p_new trans_p = trans_p_new emit_p = emit_p_new hidden_states = hidden_states_new iterations += 1 return start_p,trans_p,emit_p,hidden_states
def mean_field_hs(): """Following derviation on wikipedia's mean field theory page...""" def V(xj,xjp): if xj == xjp > 0: retval = 10**10 else: retval = (eps[xj] + eps[xjp])/choose(J,2) # divide by choose(J,2) since we're summing over pairs if random.random() < 0: print "V(%s,%s) = %s" % (xj,xjp,retval) return retval # because each term appears J-1 times. self-consistency equation # for mean field approximation is: # Pj(xj) = 1/Z0 *exp(-beta*hj(xj)), where # hj(xj) = \sum_{<j,jp>} \sum_{xjp \in jp} V(xj,xjp)*Pjp(xjp) # In this case, the graph is fully connected and all variables xj # are exchangeable, so sum over pairs reduces to (J-1). # Moreover, due to exchangeability hj is the same for each # variable, so we can update a single function h(x) for all # variables. h(x) is a function with G+1 possible input values, # so we can represent h as an array of size G+1 such that h[i] # stores the value h(i). # Initialize it arbitrarily h_cur = [1] * (G+1) h_next = [0] * (G+1) def P(i): """Return probability at time t that x takes on value i""" return exp(-beta*h_cur[i])/sum(exp(-beta*h_cur[ip]) for ip in range(G+1)) while True: for i in range(G+1): terms = [V(i,ip)*P(ip) for ip in range(G+1) if not i == ip] #print i,terms h_next[i] = (J-1)*sum(terms) if l2(h_next,h_cur) < 10**-10: break h_cur = h_next[:] print h_cur return h_cur
def Ff(Tt, cut, bf): denominator = max(bf, l2(-Tt)) * l2(-Tt + cut) # To avoid issues with shapes we multiply the scalar denominator (scalar for each face) # into the Lagraniane multiplyer numerator = -Tt.dot((-Tt + cut).T) return numerator / denominator
def ef(Tt, cut, bf): return bf / l2(-Tt + cut)
def active_sliding(Tt, ut, bf, ct): return l2(-Tt + ct * ut) - bf > 1e-10
def fit(self): # sparse group lasso selection on kappa distance = 1. nb_iter = 1 while distance > self.rtol and nb_iter <= self.max_iter: coefs_old = self.coef_.copy() for gr in self.groups[0][1:]: # 1- Should the group be zero-ed out? tmp_coefs_gr = self.coef_.copy() tmp_coefs_gr[gr] = 0. if discard_group(self.y, self.knots, tmp_coefs_gr, self.splrep, self.proj_matrix, self.u, self.lbda1, self.alpha1, gr): self.coef_[gr] = 0. # 2- If the group is not zero-ed out, update each component else: self.coef_[gr] = block_wise_descent_fitting( self.coef_, self.y, self.knots, self.splrep, self.proj_matrix, self.u, self.lbda1, self.alpha1, gr) # sparse group lasso selection on tau for gr in self.groups[1][1:]: # 1- Should the group be zero-ed out? tmp_coefs_gr = self.coef_.copy() tmp_coefs_gr[gr] = 0. if discard_group(self.y, self.knots, tmp_coefs_gr, self.splrep, self.proj_matrix, self.u, self.lbda2, self.alpha2, gr): self.coef_[gr] = 0. # 2- If the group is not zero-ed out, update each component else: self.coef_[gr] = block_wise_descent_fitting( self.coef_, self.y, self.knots, self.splrep, self.proj_matrix, self.u, self.lbda2, self.alpha2, gr) # estimate beta_0 ind_beta0 = int(self.groups[0][0]) beta0_old = self.coef_[ind_beta0] coef_excl_beta0 = get_coef_exclude_ind(self.coef_, ind_beta0) beta0_new, ignored1, ignored2 = fmin_l_bfgs_b( func=gradient_f_beta0, x0=beta0_old, args=(coef_excl_beta0, ind_beta0, self.y, self.knots, self.splrep, self.proj_matrix, self.u), approx_grad=True) self.coef_[ind_beta0] = beta0_new # estimate theta_0 ind_theta0 = int(self.groups[1][0]) theta0_old = self.coef_[ind_theta0] coef_excl_theta0 = get_coef_exclude_ind(self.coef_, ind_theta0) theta0_new, ignored3, ignored4 = fmin_l_bfgs_b( func=gradient_f_theta0, x0=theta0_old, args=(coef_excl_theta0, ind_theta0, self.y, self.knots, self.splrep, self.proj_matrix, self.u), approx_grad=True) self.coef_[ind_theta0] = theta0_new # update_nb_iter nb_iter += 1 # update_distance distance = l2(self.coef_ - coefs_old) return self