def test_DataLoad(): n = 10 d = 200 A = np.arange(n*d, dtype=np.float).reshape(n, d) b = np.arange(n, dtype=np.float) col_norm_0 = np.linalg.norm(A[:,0]) col_norm_last = np.linalg.norm(A[:,d-1]) B = np.arange(n*d, dtype=np.float).reshape(n, d) prob = blitzl1.LassoProblem(B, b) if prob._get_A_column_norm(0) != col_norm_0: print "Dense data load failed (col_norm_0)" if prob._get_A_column_norm(d-1) != col_norm_last: print "Dense data load failed (col_norm_last)" if prob._get_label_i(n-1) != b[n-1]: print "Dense labels load failed" A_float16 = np.array(A, dtype=np.float16) b_float16 = np.array(b, dtype=np.float16) prob = blitzl1.LassoProblem(A_float16, b_float16) if prob._get_A_column_norm(0) != col_norm_0: print "Dense float16 data load failed (col_norm_0)" if prob._get_A_column_norm(d-1) != col_norm_last: print "Dense float16 data load failed (col_norm_last)" if prob._get_label_i(n-1) != b[n-1]: print "Dense float16 labels load failed" A_csc = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A_csc, b) if prob._get_A_column_norm(0) != col_norm_0: print "CSC data load failed (col_norm_0)" if prob._get_A_column_norm(d-1) != col_norm_last: print "CSC data load failed (col_norm_last)" if prob._get_label_i(n-1) != b[n-1]: print "CSC labels load failed" A_csr = sparse.csr_matrix(A) prob = blitzl1.LassoProblem(A_csr, b) if prob._get_A_column_norm(0) != col_norm_0: print "CSR data load failed (col_norm_0)" if prob._get_A_column_norm(d-1) != col_norm_last: print "CSR data load failed (col_norm_last)" if prob._get_label_i(n-1) != b[n-1]: print "CSR labels load failed" A_float16 = sparse.csr_matrix(A, dtype=np.float16) prob = blitzl1.LassoProblem(A_float16, b) diff = abs(prob._get_A_column_norm(d-1) - col_norm_last) if diff > 1.0: print "CSR float16 data load failed (col_norm_last)"
def test_SimpleLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.eye(4) A[3, 3] = 2.0 A[2, 2] = 2.0 b = np.array([5., -2., 2., -6.]) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(1) if not approx_equal(sol.x[0], 4.0) or not approx_equal(sol.x[3], -2.75): print "test SimpleLasso basic failed" blitzl1.set_use_intercept(True) sol = prob.solve(1) if not approx_equal(sol.intercept, -0.25): print "test SimpleLasso intercept failed" if not approx_equal(sol.objective_value, 9.75): print "test SimpleLasso obj failed" python_obj = sol.evaluate_loss(A, b) + np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SimpleLasso python_obj failed"
def test_SmallLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.arange(5) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 0.4875): print "test SmallLasso obj failed" save_path = "/tmp/blitzl1_save_test" sol.save(save_path) sol2 = blitzl1.load_solution(save_path) if not np.all(sol.x == sol2.x): print "test SmallLasso save_x failed" if sol.objective_value != sol2.objective_value: print "test SmallLasso save_obj failed" os.remove(save_path) blitzl1.set_tolerance(0.1) log_path = "/tmp/blitzl1_log_test/" sol = prob.solve(5.0, log_directory=log_path) log_point = 0 while True: time_file = "%s/time.%d" % (log_path, log_point) obj_file = "%s/obj.%d" % (log_path, log_point) try: time = float(open(time_file).read()) obj = float(open(obj_file).read()) except: break log_point += 1 if not approx_equal(obj, sol.objective_value): print "test SmallLasso log_obj failed" if time <= 0.0: print "test SmallLasso log_time failed" os.system("rm -r %s" % log_path)
def test_StatusMessage(): blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) n = 10 d = 10 A = np.arange(n*d, dtype=np.float).reshape(n, d) b = np.arange(n, dtype=np.float) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(2000.) if sol.status != "reached machine precision": print "test StatusMessage machine precision failed" blitzl1.set_tolerance(0.1) sol = prob.solve(2000.) if sol.status != "reached stopping tolerance": print "test StatusMessage stopping tolerance failed" blitzl1.set_tolerance(0.0) blitzl1.set_max_time(0.0) sol = prob.solve(2000.) if sol.status != "reached time limit": print "test StatusMessage time limit failed"
import blitzl1 import sys import os import numpy as np from scipy import sparse blitzl1.set_verbose(True) blitzl1.set_tolerance(0.0) n = 100 d = 1000 A = np.random.randn(n, d) A = sparse.csc_matrix(A) b = np.random.randn(n) prob = blitzl1.LassoProblem(A, b) lammax = prob.compute_lambda_max() print "lammax is", lammax sol = prob.solve(lammax * 0.1) from IPython import embed embed()
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd blitzl1.set_use_intercept(False) self.problem = blitzl1.LassoProblem(self.X, self.y)
names[start:end] = [(j, k) for k in range(j, p)] tot += Xint_sub.getnnz() start = end end += Xint_sub.shape[1] - 1 indices = np.hstack(indices) indices = indices.astype('int64') Xint = sp.csc_matrix((np.ones(tot, dtype=np.int8), indices, indptr), dtype=np.int8, shape=(n, p * (p + 1) / 2)) # Run blitz on SNPs data with interactions blitzl1.set_tolerance(args.tol) blitzl1.set_verbose(False) blitzl1.set_use_intercept(args.useBias) prob = blitzl1.LassoProblem(Xint, y) # Compute lambda_max t0_lammax = time.time() lammax = prob.compute_lambda_max() t1_lammax = time.time() csv_writer_preproc.writerow([t1_lammax - t0_lammax]) file_preproc.close() # Define the values of lambda for which the solution will be computed lam = [ lammax * pow(10, np.log10(args.lambdaMinRatio) * t / args.nlambda) for t in range(1, args.nlambda + 1) ]
tol=tol, verbose=True, strategy=3, min_ws_size=min_ws_size, screening=0) dur_a5g = time.time() - t0 print("A5G time %.4f" % (dur_a5g)) beta = np.array(a5g_res[0]) gaps = a5g_res[2] times = a5g_res[3] print(beta[beta != 0]) from a5g.utils import primal, dual R = y - X_new.dot(beta) dual_scale = max(alpha, np.max(np.abs(X_new.T.dot(R)))) p_obj = primal(R, beta, alpha) d_obj = dual(y, R / dual_scale, alpha, (y**2).sum()) print(p_obj - d_obj) assert (p_obj - d_obj) < tol t0 = time.time() prob = blitzl1.LassoProblem(X_new, y) blitzl1.set_use_intercept(True) blitzl1.set_tolerance(tol) sol = prob.solve(alpha) print("Blitz time %.3f s" % (time.time() - t0)) beta_blitz = sol.x[sol.x != 0] R = y - X_new.dot(sol.x) p_obj_blitz = 0.5 * (R**2).sum() + alpha * norm(sol.x, ord=1)
# parameters j_star = np.argmax(np.abs(X.T.dot(y))) alpha_max = np.linalg.norm(X.T.dot(y), ord=np.inf) n_alphas = 5 eps = 1e-3 alpha_ratio = eps**(1. / (n_alphas - 1)) # alphas = np.array([alpha_max * (alpha_ratio ** i) for i in range(0, n_alphas)]) max_iter = 5000 tol = 1e-8 scg = NO_SCREENING X = csc_matrix(X) tic = time.time() intercept, sp_beta, sp_gap, sp_n_iters, _ =\ sp_lasso_path(X, y.copy(), [alpha_max / 100.], eps=tol, max_iter=max_iter, screening=scg, j_star=j_star) print "our time = ", time.time() - tic from sklearn import linear_model clf = linear_model.Lasso(alpha=alpha_max / 100. / n_samples, fit_intercept=True) clf.fit(X, y) import blitzl1 blitzl1.set_use_intercept(1) prob = blitzl1.LassoProblem(X, y) sol = prob.solve(alpha_max / 100.) print "intercept = ", intercept, clf.intercept_, sol.intercept
def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0) self.problem = blitzl1.LassoProblem(self.X, self.y)