def test_gowl_vs_glasso_duality_gap_3(self): """ Duality Gap goes negative in this case. Should that happen? """ np.random.seed(680) p = 10 blocks = [ Block(dim=p, idx=0, block_min_size=2, block_max_size=6, block_value=0.9), Block(dim=p, idx=1, block_min_size=2, block_max_size=6, block_value=-0.9), Block(dim=p, idx=3, block_min_size=2, block_max_size=6, block_value=-0.5), ] theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, blocks=blocks) lam1 = 0.001 # controls sparsity lam2 = 0.01 # encourages equality of coefficients rho = oscar_weights(lam1, lam2, (p ** 2 - p) / 2) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) theta_0 = np.linalg.inv(S) model = GOWLModel(X, S, theta_0, lam1, lam2, 'backtracking', max_iters=100000) model.fit() theta_gowl = model.theta_hat gl = GraphicalLasso(max_iter=200) gl.fit(S) theta_glasso = gl.get_precision() print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl))) plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_glasso, theta_gowl], [f"Blocks: {len(blocks)}", 'True Theta', 'GLASSO', 'GOWL']) _fit_evaluations(theta_star, theta_glasso, 3, 'GLASSO') _fit_evaluations(theta_star, theta_gowl, 3, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_gowl, K=4) y_hat_glasso = spectral_clustering(theta=theta_glasso, K=4) y_true = spectral_clustering(theta=theta_blocks, K=4).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'GOWL') _cluster_evaluations(y_true, y_hat_glasso, 'GLASSO')
def test_ccgowl_vs_grab_1(self): np.random.seed(680) p = 10 n_blocks = 1 theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, n_blocks=n_blocks, block_min_size=2, block_max_size=6) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) lam1 = 0.05263158 lam2 = 0.05263158 theta_owl = np.zeros((p, p)) model = CCGOWLModel(X, lam1, lam2) model.fit() theta_ccgowl = model.theta_hat lmbda = .2 K = 10 o_size = .3 # The size of overlap, as an input parameter max_iter = 20 tol = 1e-4 dual_max_iter = 600 dual_tol = 1e-4 theta_grab, blocks = grab.BCD(S, lmbda=lmbda, K=K, o_size=o_size, max_iter=max_iter, tol=tol, dual_max_iter=dual_max_iter, dual_tol=dual_tol) theta_grab = np.asarray(theta_grab) print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_owl))) plot_multiple_theta_matrices_2d([S, theta_blocks, theta_star, theta_grab, theta_ccgowl], ['Sample Covariance', f"Blocks: {len(blocks)}", 'True Theta', 'GRAB', 'CCGOWL']) _fit_evaluations(theta_star, theta_grab, 1, 'GRAB') _fit_evaluations(theta_star, theta_owl, 1, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_owl, K=2) y_hat_grab = spectral_clustering(theta=theta_grab, K=2) y_true = spectral_clustering(theta=theta_blocks, K=2).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'CCGOWL') _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
def test_gowl_vs_grab_1(self): np.random.seed(680) p = 10 n_blocks = 1 theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, n_blocks=n_blocks, block_min_size=2, block_max_size=6) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) lam1 = 0.001 # controls sparsity lam2 = 0.01 # encourages equality of coefficients lmbda = .2 K = 10 o_size = .3 # The size of overlap, as an input parameter max_iter = 20 tol = 1e-4 dual_max_iter = 600 dual_tol = 1e-4 theta_grab, blocks = grab.BCD(S, lmbda=lmbda, K=K, o_size=o_size, max_iter=max_iter, tol=tol, dual_max_iter=dual_max_iter, dual_tol=dual_tol) theta_grab = np.asarray(theta_grab) model = GOWLModel(X, S, lam1, lam2, 'backtracking', max_iters=100000) model.fit() theta_gowl = model.theta_hat print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl))) plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_grab, theta_gowl], [f"1 Block of Size 2", 'True Theta', 'GRAB', 'GOWL']) _fit_evaluations(theta_star, theta_grab, 1, 'GRAB') _fit_evaluations(theta_star, theta_gowl, 1, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_gowl, K=2) y_hat_grab = spectral_clustering(theta=theta_grab, K=2) y_true = spectral_clustering(theta=theta_blocks, K=2).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'GOWL') _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
def read_stock_data(): proj_root_path = pathlib.Path.cwd().parent.parent data_path = 'data/raw/s_&_p/data.csv' sector_path = 'data/raw/s_&_p/info.csv' data_full_path = proj_root_path / data_path sectors_full_path = proj_root_path / sector_path data = pd.read_csv(data_full_path, index_col=0) info = pd.read_csv(sectors_full_path) data = standardize(data) stock_names = info['V1'] sectors = info['V2'] data.columns = [f'{comp}/{gic}' for comp, gic in zip(stock_names, sectors)] return data, data.columns
def load_gene_subset(): proj_root_path = pathlib.Path.cwd().parent.parent data_path = 'data/processed/golub_et_al/AML_ALL_reduced_50.csv' reactome_path = 'data/processed/golub_et_al/reactome.csv' data_full_path = proj_root_path / data_path reactome_full_path = proj_root_path / reactome_path df = pd.read_csv(data_full_path) react_df = pd.read_csv(reactome_full_path) labels = { k: f"{k}/{label}" for k, label in zip(react_df['gene'].to_list(), react_df['pathway'].to_list()) } X = df.drop([0, 1], axis=0) X = X.drop(['Unnamed: 0'], axis=1) X.index = pd.to_numeric(X.index) X.sort_index(inplace=True) X = X.astype(float) X = standardize(X) return X, labels