dependency_graph_full = mm.load_kernel_laplacian(BASE_PATH + "dependency_full.csv") dependency_graph_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "dependency.csv") index_sparse = np.ones(num_examples_sparse, dtype=bool) index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples_sparse - 1, dtype=bool))) np.random.shuffle(index_sparse) index_big = np.ones(num_examples_big, dtype=bool) index_big = np.concatenate((index_big, np.zeros(mm.num_examples() - num_examples_big - 1, dtype=bool))) np.random.shuffle(index_big) # ****************** Semantic FS and Standard Regression FS ****************** # features = list(k_sem_reduced.columns.values) k_sem_reduced.columns = [f.replace("http://www.i40.com/ontology#", "") for f in features] print("Getting features: ", k_sem_reduced.columns.values) X_sem = mm.get_all_features_except_response(response, index_big, k_sem_reduced) num_features_sem = X_sem.shape[1] print("Semantic reduced features: ", k_sem_reduced.columns.values) X_all = mm.get_all_features_except_response(response, index_big) num_features_all = X_all.shape[1] y_all = mm.get_data().ix[index_big, response] mean_y_all = np.mean(y_all) F, p_vals = f_regression(X_all, y_all) index_reg_reduced = p_vals <= p_val X_reg = X_all.ix[:, index_reg_reduced] num_features_reg = X_reg.shape[1] print("P-value reduced features: ", k_full.columns.values[index_reg_reduced]) for alpha in np.arange(0.1, 2.1, 0.1):
__author__ = 'martin' from learning.grakelasso import GraKeLasso, ModelManager import numpy as np lambd = 0.1 alpha = 1 num_examples = 1000 response = "TestingProduct" # *************** Load Data ************** # mm = ModelManager() mm.load_data(["../data/test.txt"]) kernel_lap = mm.load_kernel_laplacian("../data/laplacian.csv") data = mm.get_data() index_sparse = np.ones(num_examples, dtype=bool) index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples - 1, dtype=bool))) np.random.shuffle(index_sparse) X_sparse = mm.get_all_features_except_response(response, index_sparse) y_sparse = data.ix[index_sparse, response] # Evaluate GraKeLasso klasso = GraKeLasso(kernel_lap.as_matrix(), alpha) rmse, avg_theta = klasso.cross_val(X_sparse, y_sparse, 10, 10000, lambd) print("MSE and Coefficient Reduction ", rmse, avg_theta)