def test_multinomial_vs_logistic(): """ Test that multinomial regression with two categories is the same as logistic regression """ n = 500 p = 10 J = 2 X = np.random.standard_normal(n*p).reshape((n,p)) counts = np.random.randint(0,10,n*J).reshape((n,J)) + 2 mult_x = rr.linear_transform(X, input_shape=(p,J-1)) loss = rr.multinomial_deviance.linear(mult_x, counts=counts) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs1 = solver.composite.coefs loss = rr.logistic_deviance.linear(X, successes=counts[:,0], trials = np.sum(counts, axis=1)) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs2 = solver.composite.coefs loss = rr.logistic_deviance.linear(X, successes=counts[:,1], trials = np.sum(counts, axis=1)) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs3 = solver.composite.coefs npt.assert_equal(coefs1.shape,(p,J-1)) npt.assert_array_almost_equal(coefs1.flatten(), coefs2.flatten(), 5) npt.assert_array_almost_equal(coefs1.flatten(), -coefs3.flatten(), 5)
def test_conjugate_solver(): # Solve Lagrange problem Y = np.random.standard_normal(500) Y[100:150] += 7 Y[250:300] += 14 loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(len(Y), lagrange=1.4) D = sparse.csr_matrix((np.identity(500) + np.diag([-1] * 499, k=1))[:-1]) fused = R.l1norm.linear(D, lagrange=25.5) problem = R.container(loss, sparsity, fused) solver = R.FISTA(problem) solver.fit(max_its=500, tol=1e-10) solution = solver.composite.coefs # Solve constrained version delta1 = np.fabs(D * solution).sum() delta2 = np.fabs(solution).sum() fused_constraint = R.l1norm.linear(D, bound=delta1) sparsity_constraint = R.l1norm(500, bound=delta2) constrained_problem = R.container(loss, fused_constraint, sparsity_constraint) constrained_solver = R.FISTA(constrained_problem) vals = constrained_solver.fit(max_its=500, tol=1e-10) constrained_solution = constrained_solver.composite.coefs npt.assert_almost_equal(np.fabs(constrained_solution).sum(), delta2, 3) npt.assert_almost_equal(np.fabs(D * constrained_solution).sum(), delta1, 3) # Solve with (shifted) conjugate function loss = R.quadratic.shift(-Y, coef=0.5) true_conjugate = R.quadratic.shift(Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver = R.FISTA(problem.conjugate_composite(true_conjugate)) solver.fit(max_its=500, tol=1e-10) conjugate_coefs = problem.conjugate_primal_from_dual( solver.composite.coefs) # Solve with generic conjugate function loss = R.quadratic.shift(-Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver2 = R.FISTA(problem.conjugate_composite(conjugate_tol=1e-12)) solver2.fit(max_its=500, tol=1e-10) conjugate_coefs_gen = problem.conjugate_primal_from_dual( solver2.composite.coefs) d1 = np.linalg.norm(solution - constrained_solution) / np.linalg.norm(solution) d2 = np.linalg.norm(solution - conjugate_coefs) / np.linalg.norm(solution) d3 = np.linalg.norm(solution - conjugate_coefs_gen) / np.linalg.norm(solution) npt.assert_array_less(d1, 0.01) npt.assert_array_less(d2, 0.01) npt.assert_array_less(d3, 0.01)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_conjugate_solver(): # Solve Lagrange problem Y = np.random.standard_normal(500); Y[100:150] += 7; Y[250:300] += 14 loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(len(Y), lagrange = 1.4) D = sparse.csr_matrix((np.identity(500) + np.diag([-1]*499,k=1))[:-1]) fused = R.l1norm.linear(D, lagrange = 25.5) problem = R.container(loss, sparsity, fused) solver = R.FISTA(problem) solver.fit(max_its=500, tol=1e-10) solution = solver.composite.coefs # Solve constrained version delta1 = np.fabs(D * solution).sum() delta2 = np.fabs(solution).sum() fused_constraint = R.l1norm.linear(D, bound = delta1) sparsity_constraint = R.l1norm(500, bound = delta2) constrained_problem = R.container(loss, fused_constraint, sparsity_constraint) constrained_solver = R.FISTA(constrained_problem) vals = constrained_solver.fit(max_its=500, tol=1e-10) constrained_solution = constrained_solver.composite.coefs npt.assert_almost_equal(np.fabs(constrained_solution).sum(), delta2, 3) npt.assert_almost_equal(np.fabs(D * constrained_solution).sum(), delta1, 3) # Solve with (shifted) conjugate function loss = R.quadratic.shift(-Y, coef=0.5) true_conjugate = R.quadratic.shift(Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver = R.FISTA(problem.conjugate_composite(true_conjugate)) solver.fit(max_its=500, tol=1e-10) conjugate_coefs = problem.conjugate_primal_from_dual(solver.composite.coefs) # Solve with generic conjugate function loss = R.quadratic.shift(-Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver2 = R.FISTA(problem.conjugate_composite(conjugate_tol=1e-12)) solver2.fit(max_its=500, tol=1e-10) conjugate_coefs_gen = problem.conjugate_primal_from_dual(solver2.composite.coefs) d1 = np.linalg.norm(solution - constrained_solution) / np.linalg.norm(solution) d2 = np.linalg.norm(solution - conjugate_coefs) / np.linalg.norm(solution) d3 = np.linalg.norm(solution - conjugate_coefs_gen) / np.linalg.norm(solution) npt.assert_array_less(d1, 0.01) npt.assert_array_less(d2, 0.01) npt.assert_array_less(d3, 0.01)
def test_affine_linear_offset_l1norm(): """ Test linear, affine and offset with the l1norm atom """ n = 1000 p = 10 X = np.random.standard_normal((n,p)) Y = 10*np.random.standard_normal(n) coefs = [] loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm(p, lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.linear(np.eye(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.affine(np.eye(p),np.zeros(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.linear(np.eye(p), lagrange=5., offset=np.zeros(p)) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.shift(np.zeros(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) for i,j in itertools.combinations(range(len(coefs)), 2): npt.assert_almost_equal(coefs[i], coefs[j])
def test_container(self): tests = [] atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L loss = self.loss problem = rr.container(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving atom prox with container\n %s ' % str(self))) # write the loss in terms of a quadratic for the smooth loss and a smooth function... q = rr.identity_quadratic(L, prox_center, 0, 0) lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6 * L) lossq.quadratic = rr.identity_quadratic(0.4 * L, prox_center.copy(), 0, 0) problem = rr.container(lossq, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop) tests.append((atom.proximal(q), problem.solve(tol=1.e-12, FISTA=self.FISTA, coef_stop=self.coef_stop), 'solving prox with container with monotonicity ' + 'but loss has identity_quadratic\n %s ' % str(self))) d = atom.conjugate problem = rr.container(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((d.proximal(q), solver.composite.coefs, 'solving dual prox with container\n %s ' % str(self))) if not self.interactive: for test in tests: yield (all_close, ) + test + (self, ) else: for test in tests: yield all_close(*((test + (self, ))))
def example4(lambda1=10): #Example with an initial value for backtracking # In the previous examples you'll see a lot of "Increasing inv_step" iterations - these are trying to find an approximate Lipschitz constant in a backtracking loop. # For your problem the Lipschitz constant is just the largest eigenvalue of X^TX, so you can precompute this with a few power iterations. n = 100 p = 1000 X = np.random.standard_normal(n*p).reshape((n,p)) Y = 10*np.random.standard_normal(n) v = np.random.standard_normal(p) for i in range(10): v = np.dot(X.T, np.dot(X,v)) norm = np.linalg.norm(v) v /= norm print "Approximate Lipschitz constant is", norm loss = rr.l2normsq.affine(X,-Y,coef=1.) sparsity = rr.l1norm(p, lagrange = lambda1) nonnegative = rr.nonnegative(p) problem = rr.container(loss, sparsity, nonnegative) solver = rr.FISTA(problem) #Give approximate Lipschitz constant to solver solver.fit(debug=True, start_inv_step=norm) solution = solver.composite.coefs
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso(n=100): l1 = 1. sparsity = R.l1norm(n, lagrange=l1) X = np.random.standard_normal((5000,n)) Y = np.random.standard_normal((5000,)) regloss = R.quadratic.affine(-X,Y) p=R.container(regloss, sparsity) solver=R.FISTA(p) solver.debug = True t1 = time.time() vals1 = solver.fit(max_its=800) t2 = time.time() dt1 = t2 - t1 soln = solver.composite.coefs time.sleep(5) print soln[range(10)] print solver.composite.objective(soln) print "Times", dt1
def fused_example(): x=np.random.standard_normal(500); x[100:150] += 7 sparsity = R.l1norm(500, lagrange=1.3) D = (np.identity(500) + np.diag([-1]*499,k=1))[:-1] fused = R.l1norm.linear(D, lagrange=10.5) loss = R.quadratic.shift(-x, coef=0.5) pen = R.container(loss, sparsity,fused) solver = R.FISTA(pen) vals = solver.fit() soln = solver.composite.coefs # solution pylab.figure(num=1) pylab.clf() pylab.plot(soln, c='g') pylab.scatter(np.arange(x.shape[0]), x) # objective values pylab.figure(num=2) pylab.clf() pylab.plot(vals)
def test_group_lasso_sparse(n=100): def selector(p, slice): return np.identity(p)[slice] def selector_sparse(p, slice): return sparse.csr_matrix(np.identity(p)[slice]) X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) loss = R.quadratic.affine(X, -Y, coef=0.5) penalties = [R.l2norm.linear(selector(500, slice(i*100,(i+1)*100)), lagrange=.1) for i in range(5)] penalties[0].lagrange = 250. penalties[1].lagrange = 225. penalties[2].lagrange = 150. penalties[3].lagrange = 100. group_lasso = R.container(loss, *penalties) solver=R.FISTA(group_lasso) solver.debug = True t1 = time.time() vals = solver.fit(max_its=2000, min_its=20,tol=1e-8) soln1 = solver.composite.coefs t2 = time.time() dt1 = t2 - t1 print soln1[range(10)]
def group_lasso_example(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l2norm(selector(500, slice(i*100,(i+1)*100)), lagrange=.1) for i in range(5)] penalties[0].lagrange = 250. penalties[1].lagrange = 225. penalties[2].lagrange = 150. penalties[3].lagrange = 100. X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) loss = R.quadratic.affine(X, -Y, coef=0.5) group_lasso = R.container(loss, *penalties) solver=R.FISTA(group_lasso) solver.debug = True vals = solver.fit(max_its=2000, min_its=20,tol=1e-10) soln = solver.composite.coefs # solution pylab.figure(num=1) pylab.clf() pylab.plot(soln, c='g') # objective values pylab.figure(num=2) pylab.clf() pylab.plot(vals)
def test_lasso(): """ this test verifies that the l1 prox can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly """ l1 = rr.l1norm(4, lagrange=2.0) l11 = rr.l1norm(4, lagrange=1.0) l12 = rr.l1norm(4, lagrange=1.0) X = np.random.standard_normal((10, 4)) Y = np.random.standard_normal(10) + 3 loss = rr.quadratic.affine(X, -Y) p1 = rr.container(l11, loss, l12) solver1 = rr.FISTA(p1) solver1.fit(tol=1.0e-12, min_its=500) p2 = rr.separable_problem.singleton(l1, loss) solver2 = rr.FISTA(p2) solver2.fit(tol=1.0e-12) f = p2.objective ans = scipy.optimize.fmin_powell(f, np.zeros(4), ftol=1.0e-12) print f(solver2.composite.coefs), f(ans) print f(solver1.composite.coefs), f(ans) yield ac, ans, solver2.composite.coefs, "singleton solver" yield ac, solver1.composite.coefs, solver2.composite.coefs, "container solver"
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso(): ''' this test verifies that the l1 prox can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly ''' l1 = rr.l1norm(4, lagrange=2.) l11 = rr.l1norm(4, lagrange=1.) l12 = rr.l1norm(4, lagrange=1.) X = np.random.standard_normal((10, 4)) Y = np.random.standard_normal(10) + 3 loss = rr.quadratic.affine(X, -Y) p1 = rr.container(l11, loss, l12) solver1 = rr.FISTA(p1) solver1.fit(tol=1.0e-12, min_its=500) p2 = rr.separable_problem.singleton(l1, loss) solver2 = rr.FISTA(p2) solver2.fit(tol=1.0e-12) f = p2.objective ans = scipy.optimize.fmin_powell(f, np.zeros(4), ftol=1.0e-12) print(f(solver2.composite.coefs), f(ans)) print(f(solver1.composite.coefs), f(ans)) yield all_close, ans, solver2.composite.coefs, 'singleton solver', None yield all_close, solver1.composite.coefs, solver2.composite.coefs, 'container solver', None
def test_lasso_dual(): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding """ l1 = .1 sparsity = R.l1norm(10, lagrange=l1) x = np.arange(10) - 5 loss = R.quadratic.shift(-x, coef=0.5) pen = R.simple_problem(loss, sparsity) solver = R.FISTA(pen) pen.lipschitz = 1 solver.fit(backtrack=False) soln = solver.composite.coefs st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) np.testing.assert_almost_equal(soln,st, decimal=3) pen = R.simple_problem(loss, sparsity) solver = R.FISTA(pen) solver.fit(monotonicity_restart=False) soln = solver.composite.coefs st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) np.testing.assert_almost_equal(soln,st, decimal=3) pen = R.container(loss, sparsity) solver = R.FISTA(pen) solver.fit() soln = solver.composite.coefs np.testing.assert_almost_equal(soln,st, decimal=3)
def test_multiple_lasso_dual(n=500): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding even when specified with multiple seminorms """ l1 = 1 sparsity1 = R.l1norm(n, lagrange=l1 * 0.75) sparsity2 = R.l1norm(n, lagrange=l1 * 0.25) x = np.random.normal(0, 1, n) loss = R.quadratic.shift(-x, coef=0.5) p = R.dual_problem.fromprimal(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2 - t1 st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) np.testing.assert_almost_equal(soln, st, decimal=3) p = R.container(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2 - t1 st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) print soln[range(10)] print st[range(10)] np.testing.assert_almost_equal(soln, st, decimal=3)
def test_logistic_offset(): """ Test the equivalence of binary/count specification in logistic_likelihood """ #Form the count version of the problem trials = np.random.binomial(5, 0.5, 10) + 1 successes = np.random.binomial(trials, 0.5, len(trials)) n = len(successes) p = 2 * n X = np.hstack( [np.ones((n, 1)), np.random.normal(0, 1, n * p).reshape((n, p))]) loss = rr.logistic_loglike.linear(X, successes=successes, trials=trials) weights = np.ones(p + 1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob1 = rr.container(loss, penalty) solver1 = rr.FISTA(prob1) vals = solver1.fit(tol=1e-12) solution1 = solver1.composite.coefs diff = 0.1 loss = rr.logistic_loglike.affine(X, successes=successes, trials=trials, offset=diff * np.ones(n)) weights = np.ones(p + 1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob2 = rr.container(loss, penalty) solver2 = rr.FISTA(prob2) vals = solver2.fit(tol=1e-12) solution2 = solver2.composite.coefs ind = np.arange(1, p + 1) print(solution1[np.arange(5)]) print(solution2[np.arange(5)]) npt.assert_array_almost_equal(solution1[ind], solution2[ind], 3) npt.assert_almost_equal(solution1[0] - diff, solution2[0], 2)
def example3(lambda1=10): #Example using a smooth approximation to the non-negativity constraint # On large problems this might be faster than using the actual constraint n = 100 p = 1000 X = np.random.standard_normal(n*p).reshape((n,p)) Y = 10*np.random.standard_normal(n) loss = rr.l2normsq.affine(X,-Y,coef=1.) sparsity = rr.l1norm(p, lagrange = lambda1) nonnegative = rr.nonnegative(p) smooth_nonnegative = rr.smoothed_atom(nonnegative, epsilon = 1e-4) problem = rr.container(loss, sparsity, smooth_nonnegative) solver = rr.FISTA(problem) solver.fit(debug=True) solution1 = solver.composite.coefs loss = rr.l2normsq.affine(X,-Y,coef=1.) sparsity = rr.l1norm(p, lagrange = lambda1) nonnegative = rr.nonnegative(p) problem = rr.container(loss, sparsity, nonnegative) solver = rr.FISTA(problem) solver.fit(debug=True) solution2 = solver.composite.coefs pl.subplot(1,2,1) pl.hist(solution1, bins=40) pl.subplot(1,2,2) pl.scatter(solution2,solution1) pl.xlabel("Constraint") pl.ylabel("Smooth constraint")
def lasso_via_dual_split(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l1norm(selector(500, slice(i*100,(i+1)*100)), lagrange=0.2) for i in range(5)] x = np.random.standard_normal(500) loss = R.quadratic.shift(-x, coef=0.5) lasso = R.container(loss,*penalties) solver = R.FISTA(lasso) np.testing.assert_almost_equal(np.maximum(np.fabs(x)-0.2, 0) * np.sign(x), solver.composite.coefs, decimal=3)
def test_logistic_counts(): """ Test the equivalence of binary/count specification in logistic_deviance """ #Form the count version of the problem trials = np.random.binomial(5,0.5,100)+1 successes = np.random.binomial(trials,0.5,len(trials)) n = len(successes) p = 2*n X = np.random.normal(0,1,n*p).reshape((n,p)) loss = rr.logistic_deviance.linear(X, successes=successes, trials=trials) penalty = rr.quadratic(p, coef=1.) prob1 = rr.container(loss, penalty) solver1 = rr.FISTA(prob1) solver1.fit() solution1 = solver1.composite.coefs #Form the binary version of the problem Ynew = [] Xnew = [] for i, (s,n) in enumerate(zip(successes,trials)): Ynew.append([1]*s + [0]*(n-s)) for j in range(n): Xnew.append(X[i,:]) Ynew = np.hstack(Ynew) Xnew = np.vstack(Xnew) loss = rr.logistic_deviance.linear(Xnew, successes=Ynew) penalty = rr.quadratic(p, coef=1.) prob2 = rr.container(loss, penalty) solver2 = rr.FISTA(prob2) solver2.fit() solution2 = solver2.composite.coefs npt.assert_array_almost_equal(solution1, solution2, 3)
def test_logistic_offset(): """ Test the equivalence of binary/count specification in logistic_likelihood """ #Form the count version of the problem trials = np.random.binomial(5,0.5,10)+1 successes = np.random.binomial(trials,0.5,len(trials)) n = len(successes) p = 2*n X = np.hstack([np.ones((n,1)),np.random.normal(0,1,n*p).reshape((n,p))]) loss = rr.logistic_loglike.linear(X, successes=successes, trials=trials) weights = np.ones(p+1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob1 = rr.container(loss, penalty) solver1 = rr.FISTA(prob1) vals = solver1.fit(tol=1e-12) solution1 = solver1.composite.coefs diff = 0.1 loss = rr.logistic_loglike.affine(X, successes=successes, trials=trials, offset = diff*np.ones(n)) weights = np.ones(p+1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob2 = rr.container(loss, penalty) solver2 = rr.FISTA(prob2) vals = solver2.fit(tol=1e-12) solution2 = solver2.composite.coefs ind = range(1,p+1) print(solution1[range(5)]) print(solution2[range(5)]) npt.assert_array_almost_equal(solution1[ind], solution2[ind], 3) npt.assert_almost_equal(solution1[0]-diff,solution2[0], 2)
def group_lasso_signal_approx(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l2norm(selector(500, slice(i*100,(i+1)*100)), lagrange=10.) for i in range(5)] loss = R.quadratic.shift(-x, coef=0.5) group_lasso = R.container(loss, **penalties) x = np.random.standard_normal(500) solver = R.FISTA(group_lasso) solver.fit() a = solver.composite.coefs
def __init__(self, X, initial=None, lagrange=1, rho=1): self.X = R.affine_transform(X, None) self.atom = R.l1norm(X.shape[1], l) self.rho = rho self.loss = R.quadratic.affine(X, -np.zeros(X.shape[0]), lagrange=rho/2.) self.lasso = R.container(self.loss, self.atom) self.solver = R.FISTA(self.lasso.problem()) if initial is None: self.beta[:] = np.random.standard_normal(self.atom.primal_shape) else: self.beta[:] = initial
def test_container(self): tests = [] atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L loss = self.loss problem = rr.container(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving atom prox with container\n %s ' % str(self))) # write the loss in terms of a quadratic for the smooth loss and a smooth function... q = rr.identity_quadratic(L, prox_center, 0, 0) lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6*L) lossq.quadratic = rr.identity_quadratic(0.4*L, prox_center.copy(), 0, 0) problem = rr.container(lossq, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop) tests.append((atom.proximal(q), problem.solve(tol=1.e-12,FISTA=self.FISTA,coef_stop=self.coef_stop), 'solving prox with container with monotonicity ' + 'but loss has identity_quadratic\n %s ' % str(self))) d = atom.conjugate problem = rr.container(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((d.proximal(q), solver.composite.coefs, 'solving dual prox with container\n %s ' % str(self))) if not self.interactive: for test in tests: yield (all_close,) + test + (self,) else: for test in tests: yield all_close(*((test + (self,))))
def __init__(self, X, initial=None, lagrange=1, rho=1): self.X = R.affine_transform(X, None) self.atom = R.l1norm(X.shape[1], l) self.rho = rho self.loss = R.quadratic.affine(X, -np.zeros(X.shape[0]), lagrange=rho / 2.) self.lasso = R.container(self.loss, self.atom) self.solver = R.FISTA(self.lasso.problem()) if initial is None: self.beta[:] = np.random.standard_normal(self.atom.primal_shape) else: self.beta[:] = initial
def test_1d_fused_lasso(): """ Check the 1d fused lasso solution using an equivalent lasso formulation """ n = 100 l1 = 1. D = (np.identity(n) - np.diag(np.ones(n-1),-1))[1:] extra = np.zeros(n) extra[0] = 1. D = np.vstack([D,extra]) D = sparse.csr_matrix(D) fused = R.l1norm.linear(D, lagrange=l1) X = np.random.standard_normal((2*n,n)) Y = np.random.standard_normal((2*n,)) loss = R.quadratic.affine(X, -Y, coef=0.5) fused_lasso = R.container(loss, fused) solver=R.FISTA(fused_lasso) vals1 = solver.fit(max_its=25000, tol=1e-10) soln1 = solver.composite.coefs B = np.array(sparse.tril(np.ones((n,n))).todense()) X2 = np.dot(X,B) loss = R.quadratic.affine(X2, -Y, coef=0.5) sparsity = R.l1norm(n, lagrange=l1) lasso = R.container(loss, sparsity) solver = R.FISTA(lasso) solver.fit(tol=1e-10) soln2 = np.dot(B, solver.composite.coefs) npt.assert_array_almost_equal(soln1, soln2, 3)
def test_1d_fused_lasso(): """ Check the 1d fused lasso solution using an equivalent lasso formulation """ n = 100 l1 = 1. D = (np.identity(n) - np.diag(np.ones(n - 1), -1))[1:] extra = np.zeros(n) extra[0] = 1. D = np.vstack([D, extra]) D = sparse.csr_matrix(D) fused = R.l1norm.linear(D, lagrange=l1) X = np.random.standard_normal((2 * n, n)) Y = np.random.standard_normal((2 * n, )) loss = R.quadratic.affine(X, -Y, coef=0.5) fused_lasso = R.container(loss, fused) solver = R.FISTA(fused_lasso) vals1 = solver.fit(max_its=25000, tol=1e-10) soln1 = solver.composite.coefs B = np.array(sparse.tril(np.ones((n, n))).todense()) X2 = np.dot(X, B) loss = R.quadratic.affine(X2, -Y, coef=0.5) sparsity = R.l1norm(n, lagrange=l1) lasso = R.container(loss, sparsity) solver = R.FISTA(lasso) solver.fit(tol=1e-10) soln2 = np.dot(B, solver.composite.coefs) npt.assert_array_almost_equal(soln1, soln2, 3)
def test_lasso_via_dual_split(): """ Test the lasso by breaking it up into multiple l1 atoms over the range of beta """ def selector(p, slice): return np.identity(p)[slice] penalties = [R.l1norm.linear(selector(500, slice(i*100,(i+1)*100)), lagrange=0.2) for i in range(5)] x = np.random.standard_normal(500) loss = R.quadratic.shift(-x, coef=0.5) lasso = R.container(loss,*penalties) solver = R.FISTA(lasso) solver.fit(tol=1e-8) npt.assert_array_almost_equal(np.maximum(np.fabs(x)-0.2, 0) * np.sign(x), solver.composite.coefs, 3)
def test_admm_l1_seminorm(): """ Test ADMM using the l1norm in lagrange form """ p = 1000 Y = 10 * np.random.normal(0,1,p) loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(p, lagrange=5.) prob = R.container(loss, sparsity) solver = R.admm_problem(prob) solver.fit(debug=False, tol=1e-12) solution = solver.beta npt.assert_array_almost_equal(solution, np.maximum(np.fabs(Y) - sparsity.lagrange,0.)*np.sign(Y), 3)
def test_admm_l1_constraint(): """ Test ADMM using the l1norm in bound form """ p = 1000 Y = 10 * np.random.normal(0, 1, p) loss = R.linear(Y, coef=0.5) sparsity = R.l1norm(p, bound=5.) sparsity.bound *= 1. prob = R.container(loss, sparsity) solver = R.admm_problem(prob) solver.fit(debug=False, tol=1e-12) solution = solver.beta npt.assert_almost_equal(np.fabs(solution).sum(), sparsity.bound, 3)
def test_multiple_lasso(): """ Check that the solution of the lasso signal approximator dual problem is soft-thresholding even when specified with multiple seminorms """ p = 1000 l1 = 2 sparsity1 = R.l1norm(p, lagrange=l1 * 0.75) sparsity2 = R.l1norm(p, lagrange=l1 * 0.25) x = np.random.normal(0, 1, p) loss = R.quadratic.shift(-x, coef=0.5) p = R.container(loss, sparsity1, sparsity2) solver = R.FISTA(p) vals = solver.fit(tol=1.0e-10) soln = solver.composite.coefs st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) npt.assert_array_almost_equal(soln, st, 3)
def test_admm_l1_seminorm(): """ Test ADMM using the l1norm in lagrange form """ p = 1000 Y = 10 * np.random.normal(0, 1, p) loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(p, lagrange=5.) prob = R.container(loss, sparsity) solver = R.admm_problem(prob) solver.fit(debug=False, tol=1e-12) solution = solver.beta npt.assert_array_almost_equal( solution, np.maximum(np.fabs(Y) - sparsity.lagrange, 0.) * np.sign(Y), 3)
def example2(lambda1=10): #Example with a non-identity X n = 100 p = 1000 X = np.random.standard_normal(n*p).reshape((n,p)) Y = 10*np.random.standard_normal(n) loss = rr.l2normsq.affine(X,-Y,coef=1.) sparsity = rr.l1norm(p, lagrange = lambda1) nonnegative = rr.nonnegative(p) problem = rr.container(loss, sparsity, nonnegative) solver = rr.FISTA(problem) solver.fit(debug=True) solution = solver.composite.coefs
def test_admm_l1_constraint(): """ Test ADMM using the l1norm in bound form """ p = 1000 Y = 10 * np.random.normal(0,1,p) loss = R.linear(Y, coef=0.5) sparsity = R.l1norm(p, bound=5.) sparsity.bound *= 1. prob = R.container(loss, sparsity) solver = R.admm_problem(prob) solver.fit(debug=False, tol=1e-12) solution = solver.beta npt.assert_almost_equal(np.fabs(solution).sum(), sparsity.bound, 3)
def test_lasso_via_dual_split(): """ Test the lasso by breaking it up into multiple l1 atoms over the range of beta """ def selector(p, slice): return np.identity(p)[slice] penalties = [ R.l1norm.linear(selector(500, slice(i * 100, (i + 1) * 100)), lagrange=0.2) for i in range(5) ] x = np.random.standard_normal(500) loss = R.quadratic.shift(-x, coef=0.5) lasso = R.container(loss, *penalties) solver = R.FISTA(lasso) solver.fit(tol=1e-8) npt.assert_array_almost_equal( np.maximum(np.fabs(x) - 0.2, 0) * np.sign(x), solver.composite.coefs, 3)
def test_l1_seminorm(): """ Test using the l1norm in lagrange form """ p = 1000 Y = 10 * np.random.normal(0,1,p) loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(p, lagrange=5.) sparsity.lagrange *= 1. prob = R.container(loss, sparsity) problem = prob solver = R.FISTA(problem) vals = solver.fit(tol=1e-10, max_its=500) solution = solver.composite.coefs npt.assert_array_almost_equal(solution, np.maximum(np.fabs(Y) - sparsity.lagrange,0.)*np.sign(Y), 3)
def test_multiple_lasso(): """ Check that the solution of the lasso signal approximator dual problem is soft-thresholding even when specified with multiple seminorms """ p = 1000 l1 = 2 sparsity1 = R.l1norm(p, lagrange=l1*0.75) sparsity2 = R.l1norm(p, lagrange=l1*0.25) x = np.random.normal(0,1,p) loss = R.quadratic.shift(-x, coef=0.5) p = R.container(loss, sparsity1, sparsity2) solver = R.FISTA(p) vals = solver.fit(tol=1.0e-10) soln = solver.composite.coefs st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) npt.assert_array_almost_equal(soln, st, 3)
def example1(lambda1=10): #Example with X = np.identity(n) #Try varying lambda1 to see shrinkage n = 100 Y = 10*np.random.standard_normal(n) loss = rr.l2normsq.shift(-Y,coef=1.) sparsity = rr.l1norm(n, lagrange = lambda1) nonnegative = rr.nonnegative(n) problem = rr.container(loss, sparsity, nonnegative) solver = rr.FISTA(problem) solver.fit(debug=True) solution = solver.composite.coefs pl.plot(Y, color='red', label='Y') pl.plot(solution, color='blue', label='beta') pl.legend()
def test_l1_seminorm(): """ Test using the l1norm in lagrange form """ p = 1000 Y = 10 * np.random.normal(0, 1, p) loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(p, lagrange=5.) sparsity.lagrange *= 1. prob = R.container(loss, sparsity) problem = prob solver = R.FISTA(problem) vals = solver.fit(tol=1e-10, max_its=500) solution = solver.composite.coefs npt.assert_array_almost_equal( solution, np.maximum(np.fabs(Y) - sparsity.lagrange, 0.) * np.sign(Y), 3)
def test_lasso_dual_from_primal(l1=.1, L=2.): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding, when call from primal composite object """ sparsity = R.l1norm(500, lagrange=l1) x = np.random.normal(0, 1, 500) y = np.random.normal(0, 1, 500) X = np.random.standard_normal((1000, 500)) Y = np.random.standard_normal((1000, )) regloss = R.quadratic.affine(-X, Y) p = R.container(regloss, sparsity) z = x - y / L soln = p.proximal(R.identity_quadratic(L, z, 0, 0)) st = np.maximum(np.fabs(z) - l1 / L, 0) * np.sign(z) print x[range(10)] print soln[range(10)] print st[range(10)] np.testing.assert_almost_equal(soln, st, decimal=3)
def test_lasso_dual_from_primal(l1 = .1, L = 2.): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding, when call from primal composite object """ sparsity = R.l1norm(500, lagrange=l1) x = np.random.normal(0,1,500) y = np.random.normal(0,1,500) X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) regloss = R.quadratic.affine(-X,Y) p= R.container(regloss, sparsity) z = x - y/L soln = p.proximal(R.identity_quadratic(L,z,0,0)) st = np.maximum(np.fabs(z)-l1/L,0) * np.sign(z) print x[range(10)] print soln[range(10)] print st[range(10)] np.testing.assert_almost_equal(soln,st, decimal=3)
def lasso_example(): l1 = 20. sparsity = R.l1norm(500, lagrange=l1/2.) X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) regloss = R.quadratic.affine(X,-Y, coef=0.5) sparsity2 = R.l1norm(500, lagrange=l1/2.) p=R.container(regloss, sparsity, sparsity2) solver=R.FISTA(p) solver.debug = True vals = solver.fit(max_its=2000, min_its = 100) soln = solver.composite.coefs # solution pylab.figure(num=1) pylab.clf() pylab.plot(soln, c='g') # objective values pylab.figure(num=2) pylab.clf() pylab.plot(vals)
def test_1d_fused_lasso(n=100): l1 = 1. sparsity1 = R.l1norm(n, lagrange=l1) D = (np.identity(n) - np.diag(np.ones(n-1),-1))[1:] extra = np.zeros(n) extra[0] = 1. D = np.vstack([D,extra]) D = sparse.csr_matrix(D) fused = R.l1norm.linear(D, lagrange=l1) X = np.random.standard_normal((2*n,n)) Y = np.random.standard_normal((2*n,)) loss = R.quadratic.affine(X, -Y, coef=0.5) fused_lasso = R.container(loss, fused) solver=R.FISTA(fused_lasso) solver.debug = True vals1 = solver.fit(max_its=25000, tol=1e-12) soln1 = solver.composite.coefs B = np.array(sparse.tril(np.ones((n,n))).todense()) X2 = np.dot(X,B)
def test_multiple_lasso_dual(n=500): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding even when specified with multiple seminorms """ l1 = 1 sparsity1 = R.l1norm(n, lagrange=l1*0.75) sparsity2 = R.l1norm(n, lagrange=l1*0.25) x = np.random.normal(0,1,n) loss = R.quadratic.shift(-x, coef=0.5) p = R.dual_problem.fromprimal(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2-t1 st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) np.testing.assert_almost_equal(soln,st, decimal=3) p = R.container(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2-t1 st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) print soln[range(10)] print st[range(10)] np.testing.assert_almost_equal(soln,st, decimal=3)
def test_quadratic_for_smooth(): ''' this test is a check to ensure that the quadratic part of the smooth functions are being used in the proximal step ''' L = 0.45 W = np.random.standard_normal(40) Z = np.random.standard_normal(40) U = np.random.standard_normal(40) atomq = rr.identity_quadratic(0.4, U, W, 0) atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12) # specifying in this way should be the same as if we put 0.5*L below loss = rr.quadratic.shift(Z, coef=0.6 * L) lq = rr.identity_quadratic(0.4 * L, Z, 0, 0) loss.quadratic = lq ww = np.random.standard_normal(40) # specifying in this way should be the same as if we put 0.5*L below loss2 = rr.quadratic.shift(Z, coef=L) yield all_close, loss2.objective(ww), loss.objective( ww), 'checking objective', None yield all_close, lq.objective(ww, 'func'), loss.nonsmooth_objective( ww), 'checking nonsmooth objective', None yield all_close, loss2.smooth_objective( ww, 'func'), 0.5 / 0.3 * loss.smooth_objective( ww, 'func'), 'checking smooth objective func', None yield all_close, loss2.smooth_objective( ww, 'grad'), 0.5 / 0.3 * loss.smooth_objective( ww, 'grad'), 'checking smooth objective grad', None problem = rr.container(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12) problem3 = rr.simple_problem(loss, atom) solver3 = rr.FISTA(problem3) solver3.fit(tol=1.0e-12, coef_stop=True) loss4 = rr.quadratic.shift(Z, coef=0.6 * L) problem4 = rr.simple_problem(loss4, atom) problem4.quadratic = lq solver4 = rr.FISTA(problem4) solver4.fit(tol=1.0e-12) gg_soln = rr.gengrad(problem, L) loss6 = rr.quadratic.shift(Z, coef=0.6 * L) loss6.quadratic = lq + atom.quadratic atomcp = copy(atom) atomcp.quadratic = rr.identity_quadratic(0, 0, 0, 0) problem6 = rr.dual_problem(loss6.conjugate, rr.identity(loss6.shape), atomcp.conjugate) problem6.lipschitz = L + atom.quadratic.coef dsoln2 = problem6.solve(coef_stop=True, tol=1.e-10, max_its=100) problem2 = rr.container(loss2, atom) solver2 = rr.FISTA(problem2) solver2.fit(tol=1.0e-12, coef_stop=True) q = rr.identity_quadratic(L, Z, 0, 0) yield all_close, problem.objective( ww), atom.nonsmooth_objective(ww) + q.objective(ww, 'func'), '', None atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12) aq = atom.solve(q) for p, msg in zip([ solver3.composite.coefs, gg_soln, solver2.composite.coefs, dsoln2, solver.composite.coefs, solver4.composite.coefs ], [ 'simple_problem with loss having no quadratic', 'gen grad', 'container with loss having no quadratic', 'dual problem with loss having a quadratic', 'container with loss having a quadratic', 'simple_problem having a quadratic' ]): yield all_close, aq, p, msg, None
def solveit(atom, Z, W, U, linq, L, FISTA, coef_stop): p2 = copy(atom) p2.quadratic = rr.identity_quadratic(L, Z, 0, 0) d = atom.conjugate q = rr.identity_quadratic(1, Z, 0, 0) yield ac, Z - atom.proximal(q), d.proximal( q), 'testing duality of projections starting from atom %s ' % atom q = rr.identity_quadratic(L, Z, 0, 0) # use simple_problem.nonsmooth p2 = copy(atom) p2.quadratic = atom.quadratic + q problem = rr.simple_problem.nonsmooth(p2) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, FISTA=FISTA, coef_stop=coef_stop) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with monotonicity %s ' % atom # use the solve method p2.coefs *= 0 p2.quadratic = atom.quadratic + q soln = p2.solve() yield ac, atom.proximal( q), soln, 'solving prox with solve method %s ' % atom loss = rr.quadratic.shift(-Z, coef=L) problem = rr.simple_problem(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving prox with simple_problem with monotonicity %s ' % atom dproblem2 = rr.dual_problem(loss.conjugate, rr.identity(loss.shape), atom.conjugate) dcoef2 = dproblem2.solve(coef_stop=coef_stop, tol=1.e-14) yield ac, atom.proximal( q ), dcoef2, 'solving prox with dual_problem with monotonicity %s ' % atom dproblem = rr.dual_problem.fromprimal(loss, atom) dcoef = dproblem.solve(coef_stop=coef_stop, tol=1.0e-14) yield ac, atom.proximal( q ), dcoef, 'solving prox with dual_problem.fromprimal with monotonicity %s ' % atom # write the loss in terms of a quadratic for the smooth loss and a smooth function... lossq = rr.quadratic.shift(-Z, coef=0.6 * L) lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0) problem = rr.simple_problem(lossq, atom) yield ac, atom.proximal(q), problem.solve( coef_stop=coef_stop, FISTA=FISTA, tol=1.0e-12 ), 'solving prox with simple_problem with monotonicity but loss has identity_quadratic %s ' % atom problem = rr.simple_problem.nonsmooth(p2) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with no monotonocity %s ' % atom loss = rr.quadratic.shift(-Z, coef=L) problem = rr.simple_problem(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving prox with simple_problem %s no monotonicity_restart' % atom loss = rr.quadratic.shift(-Z, coef=L) problem = rr.separable_problem.singleton(atom, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom loss = rr.quadratic.shift(-Z, coef=L) problem = rr.container(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA) yield ac, atom.proximal( q ), solver.composite.coefs, 'solving atom prox with container %s ' % atom # write the loss in terms of a quadratic for the smooth loss and a smooth function... lossq = rr.quadratic.shift(-Z, coef=0.6 * L) lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0) problem = rr.container(lossq, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop) yield ( ac, atom.proximal(q), problem.solve(tol=1.e-12, FISTA=FISTA, coef_stop=coef_stop), 'solving prox with container with monotonicity but loss has identity_quadratic %s ' % atom) loss = rr.quadratic.shift(-Z, coef=L) problem = rr.simple_problem(loss, d) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA) # ac(d.proximal(q), solver.composite.coefs, 'solving dual prox with simple_problem no monotonocity %s ' % atom) yield (ac, d.proximal(q), problem.solve(tol=1.e-12, FISTA=FISTA, coef_stop=coef_stop, monotonicity_restart=False), 'solving dual prox with simple_problem no monotonocity %s ' % atom) problem = rr.container(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA) yield ac, d.proximal( q ), solver.composite.coefs, 'solving dual prox with container %s ' % atom loss = rr.quadratic.shift(-Z, coef=L) problem = rr.separable_problem.singleton(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA) yield ac, d.proximal( q ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom
import numpy as np import pylab from scipy import sparse import regreg.api as R Y = np.random.standard_normal(500) Y[100:150] += 7 Y[250:300] += 14 loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(len(Y), lagrange=1.4) # TODO should make a module to compute typical Ds D = sparse.csr_matrix((np.identity(500) + np.diag([-1] * 499, k=1))[:-1]) fused = R.l1norm.linear(D, lagrange=25.5) problem = R.container(loss, sparsity, fused) solver = R.FISTA(problem) solver.fit(max_its=100, tol=1e-10) solution = solver.composite.coefs delta = np.fabs(D * solution).sum() sparsity = R.l1norm(len(Y), lagrange=1.4) fused_constraint = R.l1norm.linear(D, bound=delta) constrained_problem = R.container(loss, fused_constraint, sparsity) constrained_solver = R.FISTA(constrained_problem) constrained_solver.composite.lipschitz = 1.01 vals = constrained_solver.fit(max_its=10, tol=1e-06, backtrack=False, monotonicity_restart=False) constrained_solution = constrained_solver.composite.coefs