def __init__(self, y): # Pre-cache a sparse LU decomposition of the FL matrix from pygfl.utils import get_1d_penalty_matrix from scipy.sparse.linalg import factorized from scipy.sparse import csc_matrix D = get_1d_penalty_matrix(y.shape[0]) D = np.vstack([D, np.zeros(y.shape[0])]) D[-1,-1] = 1e-6 # Nugget for full rank matrix D = csc_matrix(D) self.invD = factorized(D) # Setup the fast GFL solver from pygfl.solver import TrailSolver from pygfl.trails import decompose_graph from pygfl.utils import hypercube_edges, chains_to_trails from networkx import Graph edges = hypercube_edges(y.shape) g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) self.solver = TrailSolver() self.solver.set_data(y, edges, ntrails, trails, breakpoints) from pygfl.easy import solve_gfl self.beta = solve_gfl(y)
class FastWeightedFusedLassoSolver: def __init__(self, y): # Pre-cache a sparse LU decomposition of the FL matrix from pygfl.utils import get_1d_penalty_matrix from scipy.sparse.linalg import factorized from scipy.sparse import csc_matrix D = get_1d_penalty_matrix(y.shape[0]) D = np.vstack([D, np.zeros(y.shape[0])]) D[-1,-1] = 1e-6 # Nugget for full rank matrix D = csc_matrix(D) self.invD = factorized(D) # Setup the fast GFL solver from pygfl.solver import TrailSolver from pygfl.trails import decompose_graph from pygfl.utils import hypercube_edges, chains_to_trails from networkx import Graph edges = hypercube_edges(y.shape) g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) self.solver = TrailSolver() self.solver.set_data(y, edges, ntrails, trails, breakpoints) from pygfl.easy import solve_gfl self.beta = solve_gfl(y) def solve(self, lams, rel_tol=1e-6, alpha=2, max_admm_steps=100000): ''' TODO # Run ADMM until convergence prev = beta.copy() for step in range(max_admm_steps): # Beta step beta[0] = (2 * y[0] + alpha*(z[0] - u[0])) / (2 + alpha) beta[-1] = (2 * y[-1] + alpha*(z[-1] - u[-1])) / (2 + alpha) beta[1:-1] = (2 * y[1:-1] + alpha * (z[1:-1] - u[1:-1]).reshape((-1,2)).sum(axis=1)) / (2 + 2*alpha) # Run the 1D FL for every edge for i in range(y.shape[0]-1): weighted_fl(2, beta[i:i+2] + u[2*i:2*i+2], weights, lams[i], z_buf, x_buf, a_buf, b_buf, tm_buf, tp_buf) z[2*i:2*i+2] = z_buf # Update the dual variable u += np.repeat(beta, 2)[1:-1] - z # Check for convergence (not recommended this way, but it's fast) delta = np.linalg.norm(prev - beta) if delta <= rel_tol: break prev = beta.copy() return beta ''' pass
def __init__(self, alpha=2., inflate=2., maxsteps=100000, converge=1e-6, penalty='gfl', max_dp_steps=5000, gamma=1.): TrailSolver.__init__(self, alpha, inflate, maxsteps, converge, penalty, max_dp_steps, gamma) if penalty != 'gfl': raise NotImplementedError( 'Only regular fused lasso supported for logistic loss.')
def train_gtv(X, y, q, minlam=0.2, maxlam=10., numlam=30, verbose=1, tf_k=0, penalty='gfl', **kwargs): if isinstance(q, int): q = (q, q) grid = generate_grid(X, q) # Divide the space into q^2 bins data = np.zeros(q[0]*q[1]) weights = np.zeros(q[0]*q[1]) i = 0 for x1_left, x1_right in zip(grid[0][:-1], grid[0][1:]): for x2_left, x2_right in zip(grid[1][:-1], grid[1][1:]): vals = np.where((X[:,0] >= x1_left) * (X[:,0] < x1_right) * (X[:,1] >= x2_left) * (X[:,1] < x2_right))[0] weights[i] = len(vals) data[i] = y[vals].mean() if len(vals) > 0 else 0 i += 1 # Get the edges for a 2d grid edges = hypercube_edges(q) ########### Setup the graph if penalty == 'gfl': g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) elif penalty == 'dp' or penalty == 'gamlasso': trails = np.array(edges, dtype='int32').flatten() breakpoints = np.array(range(2, len(trails)+1, 2), dtype='int32') ntrails = len(breakpoints) print '\tSetting up trail solver' solver = TrailSolver(maxsteps=30000, penalty=penalty) # Set the data and pre-cache any necessary structures solver.set_data(data, edges, ntrails, trails, breakpoints, weights=weights) print '\tSolving' # Grid search to find the best lambda results = solver.solution_path(minlam, maxlam, numlam, verbose=verbose) results['grid'] = grid return results
def __init__(self, signal_dist, null_dist, penalties_cross_x=None): self.signal_dist = signal_dist self.null_dist = null_dist if penalties_cross_x is None: self.penalties_cross_x = np.dot else: self.penalties_cross_x = penalties_cross_x self.w_iters = [] self.beta_iters = [] self.c_iters = [] self.delta_iters = [] # ''' Load the graph fused lasso library ''' # graphfl_lib = cdll.LoadLibrary('libgraphfl.so') # self.graphfl_weight = graphfl_lib.graph_fused_lasso_weight_warm # self.graphfl_weight.restype = c_int # self.graphfl_weight.argtypes = [c_int, ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS'), # c_int, ndpointer(c_int, flags='C_CONTIGUOUS'), ndpointer(c_int, flags='C_CONTIGUOUS'), # c_double, c_double, c_double, c_int, c_double, # ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS')] self.solver = TrailSolver()
def solve_gfl(data, edges=None, weights=None, minlam=0.2, maxlam=1000.0, numlam=30, alpha=0.2, inflate=2., converge=1e-6, maxsteps=100000, lam=None, verbose=0, missing_val=None, full_path=False, loss='normal'): '''A very easy-to-use version of GFL solver that just requires the data and the edges.''' #Fix no edge cases if edges is not None and edges.shape[0] == 0: return data if verbose: print('Decomposing graph into trails') if loss == 'binomial': flat_data = data[0].flatten() nonmissing_flat_data = flat_data, data[1].flatten() else: flat_data = data.flatten() nonmissing_flat_data = flat_data if weights is not None: weights = weights.flatten() if edges is None: if loss == 'binomial': if verbose: print( 'Using default edge set of a grid of same shape as the data: {0}' .format(data[0].shape)) edges = hypercube_edges(data[0].shape) else: if verbose: print( 'Using default edge set of a grid of same shape as the data: {0}' .format(data.shape)) edges = hypercube_edges(data.shape) if missing_val is not None: if verbose: print( 'Removing all data points whose data value is {0}'.format( missing_val)) edges = [(e1, e2) for (e1, e2) in edges if flat_data[e1] != missing_val and flat_data[e2] != missing_val] if loss == 'binomial': nonmissing_flat_data = flat_data[ flat_data != missing_val], nonmissing_flat_data[1][ flat_data != missing_val] else: nonmissing_flat_data = flat_data[flat_data != missing_val] # Keep initial edges init_edges = np.array(edges) ########### Setup the graph g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) if verbose: print('Setting up trail solver') ########### Setup the solver if loss == 'normal': solver = TrailSolver(alpha, inflate, maxsteps, converge) elif loss == 'logistic': solver = LogisticTrailSolver(alpha, inflate, maxsteps, converge) elif loss == 'binomial': solver = BinomialTrailSolver(alpha, inflate, maxsteps, converge) else: raise NotImplementedError('Loss must be normal, logistic, or binomial') # Set the data and pre-cache any necessary structures solver.set_data(nonmissing_flat_data, edges, ntrails, trails, breakpoints, weights=weights) if verbose: print('Solving') ########### Run the solver if lam: # Fixed lambda beta = solver.solve(lam) else: # Grid search to find the best lambda beta = solver.solution_path(minlam, maxlam, numlam, verbose=max(0, verbose - 1)) if not full_path: beta = beta['best'] ########### Fix disconnected nodes mask = np.ones_like(beta) mask[init_edges[:, 0]] = 0 mask[init_edges[:, 1]] = 0 beta[mask > 0] = data[mask > 0] return beta
class SmoothedFdr(object): def __init__(self, signal_dist, null_dist, penalties_cross_x=None): self.signal_dist = signal_dist self.null_dist = null_dist if penalties_cross_x is None: self.penalties_cross_x = np.dot else: self.penalties_cross_x = penalties_cross_x self.w_iters = [] self.beta_iters = [] self.c_iters = [] self.delta_iters = [] # ''' Load the graph fused lasso library ''' # graphfl_lib = cdll.LoadLibrary('libgraphfl.so') # self.graphfl_weight = graphfl_lib.graph_fused_lasso_weight_warm # self.graphfl_weight.restype = c_int # self.graphfl_weight.argtypes = [c_int, ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS'), # c_int, ndpointer(c_int, flags='C_CONTIGUOUS'), ndpointer(c_int, flags='C_CONTIGUOUS'), # c_double, c_double, c_double, c_int, c_double, # ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS'), ndpointer(c_double, flags='C_CONTIGUOUS')] self.solver = TrailSolver() def add_step(self, w, beta, c, delta): self.w_iters.append(w) self.beta_iters.append(beta) self.c_iters.append(c) self.delta_iters.append(delta) def finish(self): self.w_iters = np.array(self.w_iters) self.beta_iters = np.array(self.beta_iters) self.c_iters = np.array(self.c_iters) self.delta_iters = np.array(self.delta_iters) def reset(self): self.w_iters = [] self.beta_iters = [] self.c_iters = [] self.delta_iters = [] def solution_path(self, data, penalties, dof_tolerance=1e-4, min_lambda=0.20, max_lambda=1.5, lambda_bins=30, converge=0.00001, max_steps=100, m_converge=0.00001, m_max_steps=20, cd_converge=0.00001, cd_max_steps=1000, verbose=0, dual_solver='graph', admm_alpha=1., admm_inflate=2., admm_adaptive=False, initial_values=None, grid_data=None, grid_map=None): '''Follows the solution path of the generalized lasso to find the best lambda value.''' lambda_grid = np.exp(np.linspace(np.log(max_lambda), np.log(min_lambda), lambda_bins)) aic_trace = np.zeros(lambda_grid.shape) # The AIC score for each lambda value aicc_trace = np.zeros(lambda_grid.shape) # The AICc score for each lambda value (correcting for finite sample size) bic_trace = np.zeros(lambda_grid.shape) # The BIC score for each lambda value dof_trace = np.zeros(lambda_grid.shape) # The degrees of freedom of each final solution log_likelihood_trace = np.zeros(lambda_grid.shape) beta_trace = [] u_trace = [] w_trace = [] c_trace = [] results_trace = [] best_idx = None best_plateaus = None flat_data = data.flatten() edges = penalties[3] if dual_solver == 'graph' else None if grid_data is not None: grid_points = np.zeros(grid_data.shape) grid_points[:,:] = np.nan for i, _lambda in enumerate(lambda_grid): if verbose: print('#{0} Lambda = {1}'.format(i, _lambda)) # Clear out all the info from the previous run self.reset() # Fit to the final values results = self.run(flat_data, penalties, _lambda=_lambda, converge=converge, max_steps=max_steps, m_converge=m_converge, m_max_steps=m_max_steps, cd_converge=cd_converge, cd_max_steps=cd_max_steps, verbose=verbose, dual_solver=dual_solver, admm_alpha=admm_alpha, admm_inflate=admm_inflate, admm_adaptive=admm_adaptive, initial_values=initial_values) if verbose: print('Calculating degrees of freedom') # Create a grid structure out of the vector of betas if grid_map is not None: grid_points[grid_map != -1] = results['beta'][grid_map[grid_map != -1]] else: grid_points = results['beta'].reshape(data.shape) # Count the number of free parameters in the grid (dof) plateaus = calc_plateaus(grid_points, dof_tolerance, edges=edges) dof_trace[i] = len(plateaus) #dof_trace[i] = (np.abs(penalties.dot(results['beta'])) >= dof_tolerance).sum() + 1 # Use the naive DoF if verbose: print('Calculating AIC') # Get the negative log-likelihood log_likelihood_trace[i] = -self._data_negative_log_likelihood(flat_data, results['c']) # Calculate AIC = 2k - 2ln(L) aic_trace[i] = 2. * dof_trace[i] - 2. * log_likelihood_trace[i] # Calculate AICc = AIC + 2k * (k+1) / (n - k - 1) aicc_trace[i] = aic_trace[i] + 2 * dof_trace[i] * (dof_trace[i]+1) / (flat_data.shape[0] - dof_trace[i] - 1.) # Calculate BIC = -2ln(L) + k * (ln(n) - ln(2pi)) bic_trace[i] = -2 * log_likelihood_trace[i] + dof_trace[i] * (np.log(len(flat_data)) - np.log(2 * np.pi)) # Track the best model thus far if best_idx is None or bic_trace[i] < bic_trace[best_idx]: best_idx = i best_plateaus = plateaus # Save the final run parameters to use for warm-starting the next iteration initial_values = results # Save the trace of all the resulting parameters beta_trace.append(results['beta']) u_trace.append(results['u']) w_trace.append(results['w']) c_trace.append(results['c']) if verbose: print('DoF: {0} AIC: {1} AICc: {2} BIC: {3}'.format(dof_trace[i], aic_trace[i], aicc_trace[i], bic_trace[i])) if verbose: print('Best setting (by BIC): lambda={0} [DoF: {1}, AIC: {2}, AICc: {3} BIC: {4}]'.format(lambda_grid[best_idx], dof_trace[best_idx], aic_trace[best_idx], aicc_trace[best_idx], bic_trace[best_idx])) return {'aic': aic_trace, 'aicc': aicc_trace, 'bic': bic_trace, 'dof': dof_trace, 'loglikelihood': log_likelihood_trace, 'beta': np.array(beta_trace), 'u': np.array(u_trace), 'w': np.array(w_trace), 'c': np.array(c_trace), 'lambda': lambda_grid, 'best': best_idx, 'plateaus': best_plateaus} def run(self, data, penalties, _lambda=0.1, converge=0.00001, max_steps=100, m_converge=0.00001, m_max_steps=100, cd_converge=0.00001, cd_max_steps=100, verbose=0, dual_solver='graph', admm_alpha=1., admm_inflate=2., admm_adaptive=False, initial_values=None): '''Runs the Expectation-Maximization algorithm for the data with the given penalty matrix.''' delta = converge + 1 if initial_values is None: beta = np.zeros(data.shape) prior_prob = np.exp(beta) / (1 + np.exp(beta)) u = initial_values else: beta = initial_values['beta'] prior_prob = initial_values['c'] u = initial_values['u'] prev_nll = 0 cur_step = 0 while delta > converge and cur_step < max_steps: if verbose: print('Step #{0}'.format(cur_step)) if verbose: print('\tE-step...') # Get the likelihood weights vector (E-step) post_prob = self._e_step(data, prior_prob) if verbose: print('\tM-step...') # Find beta using an alternating Taylor approximation and convex optimization (M-step) beta, u = self._m_step(beta, prior_prob, post_prob, penalties, _lambda, m_converge, m_max_steps, cd_converge, cd_max_steps, verbose, dual_solver, admm_adaptive=admm_adaptive, admm_inflate=admm_inflate, admm_alpha=admm_alpha, u0=u) # Get the signal probabilities prior_prob = ilogit(beta) cur_nll = self._data_negative_log_likelihood(data, prior_prob) if dual_solver == 'admm': # Get the negative log-likelihood of the data given our new parameters cur_nll += _lambda * np.abs(u['r']).sum() # Track the change in log-likelihood to see if we've converged delta = np.abs(cur_nll - prev_nll) / (prev_nll + converge) if verbose: print('\tDelta: {0}'.format(delta)) # Track the step self.add_step(post_prob, beta, prior_prob, delta) # Increment the step counter cur_step += 1 # Update the negative log-likelihood tracker prev_nll = cur_nll # DEBUGGING if verbose: print('\tbeta: [{0:.4f}, {1:.4f}]'.format(beta.min(), beta.max())) print('\tprior_prob: [{0:.4f}, {1:.4f}]'.format(prior_prob.min(), prior_prob.max())) print('\tpost_prob: [{0:.4f}, {1:.4f}]'.format(post_prob.min(), post_prob.max())) if dual_solver != 'graph': print('\tdegrees of freedom: {0}'.format((np.abs(penalties.dot(beta)) >= 1e-4).sum())) # Return the results of the run return {'beta': beta, 'u': u, 'w': post_prob, 'c': prior_prob} def _data_negative_log_likelihood(self, data, prior_prob): '''Calculate the negative log-likelihood of the data given the weights.''' signal_weight = prior_prob * self.signal_dist.pdf(data) null_weight = (1-prior_prob) * self.null_dist.pdf(data) return -np.log(signal_weight + null_weight).sum() def _e_step(self, data, prior_prob): '''Calculate the complete-data sufficient statistics (weights vector).''' signal_weight = prior_prob * self.signal_dist.pdf(data) null_weight = (1-prior_prob) * self.null_dist.pdf(data) post_prob = signal_weight / (signal_weight + null_weight) return post_prob def _m_step(self, beta, prior_prob, post_prob, penalties, _lambda, converge, max_steps, cd_converge, cd_max_steps, verbose, dual_solver, u0=None, admm_alpha=1., admm_inflate=2., admm_adaptive=False): ''' Alternating Second-order Taylor-series expansion about the current iterate and coordinate descent to optimize Beta. ''' prev_nll = self._m_log_likelihood(post_prob, beta) delta = converge + 1 u = u0 cur_step = 0 while delta > converge and cur_step < max_steps: if verbose > 1: print('\t\tM-Step iteration #{0}'.format(cur_step)) print('\t\tTaylor approximation...') # Cache the exponentiated beta exp_beta = np.exp(beta) # Form the parameters for our weighted least squares if dual_solver != 'admm' and dual_solver != 'graph': # weights is a diagonal matrix, represented as a vector for efficiency weights = 0.5 * exp_beta / (1 + exp_beta)**2 y = (1+exp_beta)**2 * post_prob / exp_beta + beta - (1 + exp_beta) if verbose > 1: print('\t\tForming dual...') x = np.sqrt(weights) * y A = (1. / np.sqrt(weights))[:,np.newaxis] * penalties.T else: weights = (prior_prob * (1 - prior_prob)) y = beta - (prior_prob - post_prob) / weights print(weights) print(y) if dual_solver == 'cd': # Solve the dual via coordinate descent u = self._u_coord_descent(x, A, _lambda, cd_converge, cd_max_steps, verbose > 1, u0=u) elif dual_solver == 'sls': # Solve the dual via sequential least squares u = self._u_slsqp(x, A, _lambda, verbose > 1, u0=u) elif dual_solver == 'lbfgs': # Solve the dual via L-BFGS-B u = self._u_lbfgsb(x, A, _lambda, verbose > 1, u0=u) elif dual_solver == 'admm': # Solve the dual via alternating direction methods of multipliers #u = self._u_admm_1dfusedlasso(y, weights, _lambda, cd_converge, cd_max_steps, verbose > 1, initial_values=u) #u = self._u_admm(y, weights, _lambda, penalties, cd_converge, cd_max_steps, verbose > 1, initial_values=u) u = self._u_admm_lucache(y, weights, _lambda, penalties, cd_converge, cd_max_steps, verbose > 1, initial_values=u, inflate=admm_inflate, adaptive=admm_adaptive, alpha=admm_alpha) beta = u['x'] elif dual_solver == 'graph': u = self._graph_fused_lasso(y, weights, _lambda, penalties[0], penalties[1], penalties[2], penalties[3], cd_converge, cd_max_steps, max(0, verbose - 1), admm_alpha, admm_inflate, initial_values=u) beta = u['beta'] # if np.abs(beta).max() > 20: # beta = np.clip(beta, -20, 20) # u = None else: raise Exception('Unknown solver: {0}'.format(dual_solver)) if dual_solver != 'admm' and dual_solver != 'graph': # Back out beta from the dual solution beta = y - (1. / weights) * penalties.T.dot(u) # Get the current log-likelihood cur_nll = self._m_log_likelihood(post_prob, beta) # Track the convergence delta = np.abs(prev_nll - cur_nll) / (prev_nll + converge) if verbose > 1: print('\t\tM-step delta: {0}'.format(delta)) # Increment the step counter cur_step += 1 # Update the negative log-likelihood tracker prev_nll = cur_nll return beta, u def _m_log_likelihood(self, post_prob, beta): '''Calculate the log-likelihood of the betas given the weights and data.''' return (np.log(1 + np.exp(beta)) - post_prob * beta).sum() def _graph_fused_lasso(self, y, weights, _lambda, ntrails, trails, breakpoints, edges, converge, max_steps, verbose, alpha, inflate, initial_values=None): '''Solve for u using a super fast graph fused lasso library that has an optimized ADMM routine.''' if verbose: print('\t\tSolving via Graph Fused Lasso') # if initial_values is None: # beta = np.zeros(y.shape, dtype='double') # z = np.zeros(breakpoints[-1], dtype='double') # u = np.zeros(breakpoints[-1], dtype='double') # else: # beta = initial_values['beta'] # z = initial_values['z'] # u = initial_values['u'] # n = y.shape[0] # self.graphfl_weight(n, y, weights, ntrails, trails, breakpoints, _lambda, alpha, inflate, max_steps, converge, beta, z, u) # return {'beta': beta, 'z': z, 'u': u } self.solver.alpha = alpha self.solver.inflate = inflate self.solver.maxsteps = max_steps self.solver.converge = converge self.solver.set_data(y, edges, ntrails, trails, breakpoints, weights=weights) if initial_values is not None: self.solver.beta = initial_values['beta'] self.solver.z = initial_values['z'] self.solver.u = initial_values['u'] self.solver.solve(_lambda) return {'beta': self.solver.beta, 'z': self.solver.z, 'u': self.solver.u } def _u_admm_lucache(self, y, weights, _lambda, D, converge_threshold, max_steps, verbose, alpha=1.8, initial_values=None, inflate=2., adaptive=False): '''Solve for u using alternating direction method of multipliers with a cached LU decomposition.''' if verbose: print('\t\tSolving u via Alternating Direction Method of Multipliers') n = len(y) m = D.shape[0] a = inflate * _lambda # step-size parameter # Initialize primal and dual variables from warm start if initial_values is None: # Graph Laplacian L = csc_matrix(D.T.dot(D) + csc_matrix(np.eye(n))) # Cache the LU decomposition lu_factor = sla.splu(L, permc_spec='MMD_AT_PLUS_A') x = np.array([y.mean()] * n) # likelihood term z = np.zeros(n) # slack variable for likelihood r = np.zeros(m) # penalty term s = np.zeros(m) # slack variable for penalty u_dual = np.zeros(n) # scaled dual variable for constraint x = z t_dual = np.zeros(m) # scaled dual variable for constraint r = s else: lu_factor = initial_values['lu_factor'] x = initial_values['x'] z = initial_values['z'] r = initial_values['r'] s = initial_values['s'] u_dual = initial_values['u_dual'] t_dual = initial_values['t_dual'] primal_trace = [] dual_trace = [] converged = False cur_step = 0 D_full = D while not converged and cur_step < max_steps: # Update x x = (weights * y + a * (z - u_dual)) / (weights + a) x_accel = alpha * x + (1 - alpha) * z # over-relaxation # Update constraint term r arg = s - t_dual local_lambda = (_lambda - np.abs(arg) / 2.).clip(0) if adaptive else _lambda r = _soft_threshold(arg, local_lambda / a) r_accel = alpha * r + (1 - alpha) * s # Projection to constraint set arg = x_accel + u_dual + D.T.dot(r_accel + t_dual) z_new = lu_factor.solve(arg) s_new = D.dot(z_new) dual_residual_u = a * (z_new - z) dual_residual_t = a * (s_new - s) z = z_new s = s_new # Dual update primal_residual_x = x_accel - z primal_residual_r = r_accel - s u_dual = u_dual + primal_residual_x t_dual = t_dual + primal_residual_r # Check convergence primal_resnorm = np.sqrt((np.array([i for i in primal_residual_x] + [i for i in primal_residual_r])**2).mean()) dual_resnorm = np.sqrt((np.array([i for i in dual_residual_u] + [i for i in dual_residual_t])**2).mean()) primal_trace.append(primal_resnorm) dual_trace.append(dual_resnorm) converged = dual_resnorm < converge_threshold and primal_resnorm < converge_threshold if primal_resnorm > 5 * dual_resnorm: a *= inflate u_dual /= inflate t_dual /= inflate elif dual_resnorm > 5 * primal_resnorm: a /= inflate u_dual *= inflate t_dual *= inflate # Update the step counter cur_step += 1 if verbose and cur_step % 100 == 0: print('\t\t\tStep #{0}: dual_resnorm: {1:.6f} primal_resnorm: {2:.6f}'.format(cur_step, dual_resnorm, primal_resnorm)) return {'x': x, 'r': r, 'z': z, 's': s, 'u_dual': u_dual, 't_dual': t_dual, 'primal_trace': primal_trace, 'dual_trace': dual_trace, 'steps': cur_step, 'lu_factor': lu_factor} def _u_admm(self, y, weights, _lambda, D, converge_threshold, max_steps, verbose, alpha=1.0, initial_values=None): '''Solve for u using alternating direction method of multipliers.''' if verbose: print('\t\tSolving u via Alternating Direction Method of Multipliers') n = len(y) m = D.shape[0] a = _lambda # step-size parameter # Set up system involving graph Laplacian L = D.T.dot(D) W_over_a = np.diag(weights / a) x_denominator = W_over_a + L #x_denominator = sparse.linalg.inv(W_over_a + L) # Initialize primal and dual variables if initial_values is None: x = np.array([y.mean()] * n) z = np.zeros(m) u = np.zeros(m) else: x = initial_values['x'] z = initial_values['z'] u = initial_values['u'] primal_trace = [] dual_trace = [] converged = False cur_step = 0 while not converged and cur_step < max_steps: # Update x x_numerator = 1.0 / a * weights * y + D.T.dot(a * z - u) x = np.linalg.solve(x_denominator, x_numerator) Dx = D.dot(x) # Update z Dx_relaxed = alpha * Dx + (1 - alpha) * z # over-relax Dx z_new = _soft_threshold(Dx_relaxed + u / a, _lambda / a) dual_residual = a * D.T.dot(z_new - z) z = z_new primal_residual = Dx_relaxed - z # Update u u = u + a * primal_residual # Check convergence primal_resnorm = np.sqrt((primal_residual ** 2).mean()) dual_resnorm = np.sqrt((dual_residual ** 2).mean()) primal_trace.append(primal_resnorm) dual_trace.append(dual_resnorm) converged = dual_resnorm < converge_threshold and primal_resnorm < converge_threshold # Update step-size parameter based on norm of primal and dual residuals # This is the varying penalty extension to standard ADMM a *= 2 if primal_resnorm > 10 * dual_resnorm else 0.5 # Recalculate the x_denominator since we changed the step-size # TODO: is this worth it? We're paying a matrix inverse in exchange for varying the step size #W_over_a = sparse.dia_matrix(np.diag(weights / a)) W_over_a = np.diag(weights / a) #x_denominator = sparse.linalg.inv(W_over_a + L) # Update the step counter cur_step += 1 if verbose and cur_step % 100 == 0: print('\t\t\tStep #{0}: dual_resnorm: {1:.6f} primal_resnorm: {2:.6f}'.format(cur_step, dual_resnorm, primal_resnorm)) dof = np.sum(Dx > converge_threshold) + 1. AIC = np.sum((y - x)**2) + 2 * dof return {'x': x, 'z': z, 'u': u, 'dof': dof, 'AIC': AIC} def _u_admm_1dfusedlasso(self, y, W, _lambda, converge_threshold, max_steps, verbose, alpha=1.0, initial_values=None): '''Solve for u using alternating direction method of multipliers. Note that this method only works for the 1-D fused lasso case.''' if verbose: print('\t\tSolving u via Alternating Direction Method of Multipliers (1-D fused lasso)') n = len(y) m = n - 1 a = _lambda # The D matrix is the first-difference operator. K is the matrix (W + a D^T D) # where W is the diagonal matrix of weights. We use a tridiagonal representation # of K. Kd = np.array([a] + [2*a] * (n-2) + [a]) + W # diagonal entries Kl = np.array([-a] * (n-1)) # below the diagonal Ku = np.array([-a] * (n-1)) # above the diagonal # Initialize primal and dual variables if initial_values is None: x = np.array([y.mean()] * n) z = np.zeros(m) u = np.zeros(m) else: x = initial_values['x'] z = initial_values['z'] u = initial_values['u'] primal_trace = [] dual_trace = [] converged = False cur_step = 0 while not converged and cur_step < max_steps: # Update x out = _1d_fused_lasso_crossprod(a*z - u) x = tridiagonal_solve(Kl, Ku, Kd, W * y + out) Dx = np.ediff1d(x) # Update z Dx_hat = alpha * Dx + (1 - alpha) * z # Over-relaxation z_new = _soft_threshold(Dx_hat + u / a, _lambda / a) dual_residual = a * _1d_fused_lasso_crossprod(z_new - z) z = z_new primal_residual = Dx - z #primal_residual = Dx_hat - z # Update u u = (u + a * primal_residual).clip(-_lambda, _lambda) # Check convergence primal_resnorm = np.sqrt((primal_residual ** 2).mean()) dual_resnorm = np.sqrt((dual_residual ** 2).mean()) primal_trace.append(primal_resnorm) dual_trace.append(dual_resnorm) converged = dual_resnorm < converge_threshold and primal_resnorm < converge_threshold # Update step-size parameter based on norm of primal and dual residuals a *= 2 if primal_resnorm > 10 * dual_resnorm else 0.5 Kd = np.array([a] + [2*a] * (n-2) + [a]) + W # diagonal entries Kl = np.array([-a] * (n-1)) # below the diagonal Ku = np.array([-a] * (n-1)) # above the diagonal cur_step += 1 if verbose and cur_step % 100 == 0: print('\t\t\tStep #{0}: dual_resnorm: {1:.6f} primal_resnorm: {2:.6f}'.format(cur_step, dual_resnorm, primal_resnorm)) dof = np.sum(Dx > converge_threshold) + 1. AIC = np.sum((y - x)**2) + 2 * dof return {'x': x, 'z': z, 'u': u, 'dof': dof, 'AIC': AIC} def _u_coord_descent(self, x, A, _lambda, converge, max_steps, verbose, u0=None): '''Solve for u using coordinate descent.''' if verbose: print('\t\tSolving u via Coordinate Descent') u = u0 if u0 is not None else np.zeros(A.shape[1]) l2_norm_A = (A * A).sum(axis=0) r = x - A.dot(u) delta = converge + 1 prev_objective = _u_objective_func(u, x, A) cur_step = 0 while delta > converge and cur_step < max_steps: # Update each coordinate one at a time. for coord in range(len(u)): prev_u = u[coord] next_u = prev_u + A.T[coord].dot(r) / l2_norm_A[coord] u[coord] = min(_lambda, max(-_lambda, next_u)) r += A.T[coord] * prev_u - A.T[coord] * u[coord] # Track the change in the objective function value cur_objective = _u_objective_func(u, x, A) delta = np.abs(prev_objective - cur_objective) / (prev_objective + converge) if verbose and cur_step % 100 == 0: print('\t\t\tStep #{0}: Objective: {1:.6f} CD Delta: {2:.6f}'.format(cur_step, cur_objective, delta)) # Increment the step counter and update the previous objective value cur_step += 1 prev_objective = cur_objective return u def _u_slsqp(self, x, A, _lambda, verbose, u0=None): '''Solve for u using sequential least squares.''' if verbose: print('\t\tSolving u via Sequential Least Squares') if u0 is None: u0 = np.zeros(A.shape[1]) # Create our box constraints bounds = [(-_lambda, _lambda) for u0_i in u0] results = minimize(_u_objective_func, u0, args=(x, A), jac=_u_objective_deriv, bounds=bounds, method='SLSQP', options={'disp': False, 'maxiter': 1000}) if verbose: print('\t\t\t{0}'.format(results.message)) print('\t\t\tFunction evaluations: {0}'.format(results.nfev)) print('\t\t\tGradient evaluations: {0}'.format(results.njev)) print('\t\t\tu: [{0}, {1}]'.format(results.x.min(), results.x.max())) return results.x def _u_lbfgsb(self, x, A, _lambda, verbose, u0=None): '''Solve for u using L-BFGS-B.''' if verbose: print('\t\tSolving u via L-BFGS-B') if u0 is None: u0 = np.zeros(A.shape[1]) # Create our box constraints bounds = [(-_lambda, _lambda) for _ in u0] # Fit results = minimize(_u_objective_func, u0, args=(x, A), method='L-BFGS-B', bounds=bounds, options={'disp': verbose}) return results.x def plateau_regression(self, plateaus, data, grid_map=None, verbose=False): '''Perform unpenalized 1-d regression for each of the plateaus.''' weights = np.zeros(data.shape) for i,(level,p) in enumerate(plateaus): if verbose: print('\tPlateau #{0}'.format(i+1)) # Get the subset of grid points for this plateau if grid_map is not None: plateau_data = np.array([data[grid_map[x,y]] for x,y in p]) else: plateau_data = np.array([data[x,y] for x,y in p]) w = single_plateau_regression(plateau_data, self.signal_dist, self.null_dist) for idx in p: weights[idx if grid_map is None else grid_map[idx[0], idx[1]]] = w posteriors = self._e_step(data, weights) weights = weights.flatten() return (weights, posteriors)
def smooth_fdr(data, fdr_level, edges=None, initial_values=None, verbose=0, null_dist=None, signal_dist=None, num_sweeps=10, missing_val=None): flat_data = data.flatten() nonmissing_flat_data = flat_data if edges is None: if verbose: print( 'Using default edge set of a grid of same shape as the data: {0}' .format(data.shape)) edges = hypercube_edges(data.shape) if missing_val is not None: if verbose: print( 'Removing all data points whose data value is {0}'.format( missing_val)) edges = [(e1, e2) for (e1, e2) in edges if flat_data[e1] != missing_val and flat_data[e2] != missing_val] nonmissing_flat_data = flat_data[flat_data != missing_val] # Decompose the graph into trails g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) if null_dist is None: # empirical null estimation mu0, sigma0 = empirical_null(nonmissing_flat_data, verbose=max(0, verbose - 1)) elif isinstance(null_dist, GaussianKnown): mu0, sigma0 = null_dist.mean, null_dist.stdev else: mu0, sigma0 = null_dist null_dist = GaussianKnown(mu0, sigma0) if verbose: print('Empirical null: {0}'.format(null_dist)) # signal distribution estimation if verbose: print('Running predictive recursion for {0} sweeps'.format(num_sweeps)) if signal_dist is None: grid_x = np.linspace(min(-20, nonmissing_flat_data.min() - 1), max(nonmissing_flat_data.max() + 1, 20), 220) pr_results = predictive_recursion(nonmissing_flat_data, num_sweeps, grid_x, mu0=mu0, sig0=sigma0) signal_dist = GridDistribution(pr_results['grid_x'], pr_results['y_signal']) if verbose: print('Smoothing priors via solution path algorithm') solver = TrailSolver() solver.set_data(flat_data, edges, ntrails, trails, breakpoints) results = solution_path_smooth_fdr(flat_data, solver, null_dist, signal_dist, verbose=max(0, verbose - 1)) results['discoveries'] = calc_fdr(results['posteriors'], fdr_level) results['null_dist'] = null_dist results['signal_dist'] = signal_dist # Reshape everything back to the original data shape results['betas'] = results['betas'].reshape(data.shape) results['priors'] = results['priors'].reshape(data.shape) results['posteriors'] = results['posteriors'].reshape(data.shape) results['discoveries'] = results['discoveries'].reshape(data.shape) results['beta_iters'] = np.array( [x.reshape(data.shape) for x in results['beta_iters']]) results['prior_iters'] = np.array( [x.reshape(data.shape) for x in results['prior_iters']]) results['posterior_iters'] = np.array( [x.reshape(data.shape) for x in results['posterior_iters']]) return results
def smooth_fdr_known_dists(data, fdr_level, null_dist, signal_dist, edges=None, initial_values=None, verbose=0, missing_val=None): '''FDR smoothing where the null and alternative distributions are known (and not necessarily Gaussian). Both must define the function pdf.''' flat_data = data.flatten() nonmissing_flat_data = flat_data if edges is None: if verbose: print( 'Using default edge set of a grid of same shape as the data: {0}' .format(data.shape)) edges = hypercube_edges(data.shape) if missing_val is not None: if verbose: print( 'Removing all data points whose data value is {0}'.format( missing_val)) edges = [(e1, e2) for (e1, e2) in edges if flat_data[e1] != missing_val and flat_data[e2] != missing_val] nonmissing_flat_data = flat_data[flat_data != missing_val] # Decompose the graph into trails g = Graph() g.add_edges_from(edges) chains = decompose_graph(g, heuristic='greedy') ntrails, trails, breakpoints, edges = chains_to_trails(chains) if verbose: print('Smoothing priors via solution path algorithm') solver = TrailSolver() solver.set_data(flat_data, edges, ntrails, trails, breakpoints) results = solution_path_smooth_fdr(flat_data, solver, null_dist, signal_dist, verbose=max(0, verbose - 1)) results['discoveries'] = calc_fdr(results['posteriors'], fdr_level) results['null_dist'] = null_dist results['signal_dist'] = signal_dist # Reshape everything back to the original data shape results['betas'] = results['betas'].reshape(data.shape) results['priors'] = results['priors'].reshape(data.shape) results['posteriors'] = results['posteriors'].reshape(data.shape) results['discoveries'] = results['discoveries'].reshape(data.shape) results['beta_iters'] = np.array( [x.reshape(data.shape) for x in results['beta_iters']]) results['prior_iters'] = np.array( [x.reshape(data.shape) for x in results['prior_iters']]) results['posterior_iters'] = np.array( [x.reshape(data.shape) for x in results['posterior_iters']]) return results