def predict(self, X): """ :param X: shape [n_row*n_clm, n_band] :return: """ # construct affinity matrix kwargs = { "metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1 } W = construct_W.construct_W(X, **kwargs) # obtain the feature weight matrix Weight = NDFS.ndfs(X, W=W, n_clusters=self.n_cluster) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:self.n_band]] return selected_features
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] # perform evaluation on clustering task num_fea = 100 # number of selected features num_cluster = 20 # number of clusters, it is usually set as the number of classes in the ground truth # obtain the feature weight matrix Weight = UDFS.udfs(X, gamma=0.1, n_clusters=num_cluster) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for i in range(0, 20): nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y) nmi_total += nmi acc_total += acc # output the average NMI and average ACC print 'NMI:', float(nmi_total)/20 print 'ACC:', float(acc_total)/20
def ls_l21_FS(X_train, y, train_index): Y = construct_label_matrix_pan(y) Y_train = Y[train_index] W, obj, value_gamma = ls_l21.proximal_gradient_descent(X_train, Y_train, 0.1, verbose=False) idx = feature_ranking(W) return (idx, W)
def ll_l21_FS(X_train, y, train_index): Y = construct_label_matrix_pan(y) Y_train = Y[train_index] Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(X_train, Y[train_index], 0.1, verbose=False) #print("weight ",Weight) idx = feature_ranking(Weight) return (idx, Weight)
def SKF_ndfs(X, y): # construct affinity matrix kwargs = { "metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1 } W = construct_W(X, **kwargs) num_cluster = len( set(y) ) # specify the number of clusters, it is usually set as the number of classes in the ground truth # obtain the feature weight matrix Weight = NDFS.ndfs(X, W=W, n_clusters=num_cluster) return sparse_learning.feature_ranking(Weight)
def udfs_score(diheds): import scipy.io import numpy from numpy import mean import os #os.chdir('/home/anu/Downloads/scikit-feature-1.0.0') from skfeature.function.sparse_learning_based import UDFS from skfeature.utility.sparse_learning import feature_ranking idx = [] # sort the feature scores in an ascending order according to the feature scores for i in range(0,len(diheds),5): X= diheds[i] score = UDFS.udfs(X, gamma=0.1, n_clusters=20) idx.append(score) col_mean = mean(idx, axis =0) imp_features= feature_ranking(col_mean) return col_mean,imp_features
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] # construct affinity matrix kwargs = { "metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1 } W = construct_W.construct_W(X, **kwargs) # obtain the feature weight matrix Weight = NDFS.ndfs(X, W=W, n_clusters=20) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # perform evaluation on clustering task num_fea = 100 # number of selected features num_cluster = 20 # number of clusters, it is usually set as the number of classes in the ground truth # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for i in range(0, 20): nmi, acc = unsupervised_evaluation.evaluation( X_selected=selected_features, n_clusters=num_cluster, y=y) nmi_total += nmi acc_total += acc # output the average NMI and average ACC print('NMI:', float(nmi_total) / 20) print('ACC:', float(acc_total) / 20)
def get_sel_idx(high_th_year, low_th_year, feature_list, sel_feature_num): high_risk_th = high_th_year * 365 low_risk_th = low_th_year * 365 high_risk_group, low_risk_group = helper.get_risk_group( x, c, s, high_risk_th, low_risk_th) trn_x, trn_y = helper.get_train( high_risk_group, low_risk_group, is_categori_y=False, seed=self.random_seed) #without validation set W, _, _ = ll_l21.proximal_gradient_descent(trn_x, trn_y, z=0.01, mode='raw') sort_idx = feature_ranking(W) return sort_idx[:sel_feature_num]
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] Y = construct_label_matrix(y) n_samples, n_features = X.shape # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 100 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the feature weight matrix Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(selected_features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(selected_features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) print acc correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
def ndfs_score(diheds): import scipy.io import numpy from numpy import mean import os #os.chdir('/home/anu/Downloads/scikit-feature-1.0.0') from skfeature.function.sparse_learning_based import NDFS from skfeature.utility import construct_W from skfeature.utility import unsupervised_evaluation from skfeature.utility.sparse_learning import feature_ranking idx = [] kwargs = {"metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1} #change the path for every system to be run. #os.chdir('/home/anu/Downloads/DESRES-Trajectory_GTT-1-protein/GTT-1-protein') for i in range(0,len(diheds),5): X= diheds[i] W = construct_W.construct_W(X, **kwargs) score = NDFS.ndfs(X, W=W, n_clusters=20) idx.append(score) col_mean = mean(idx, axis =0) imp_features=feature_ranking(col_mean) return col_mean,imp_features
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] # construct affinity matrix kwargs = {"metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1} W = construct_W.construct_W(X, **kwargs) # obtain the feature weight matrix Weight = NDFS.ndfs(X, W=W, n_clusters=20) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) # perform evaluation on clustering task num_fea = 100 # number of selected features num_cluster = 20 # number of clusters, it is usually set as the number of classes in the ground truth # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for i in range(0, 20): nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y) nmi_total += nmi acc_total += acc # output the average NMI and average ACC print 'NMI:', float(nmi_total)/20 print 'ACC:', float(acc_total)/20
def UDFS_FS(X): Weight = UDFS.udfs(X) idx = feature_ranking(Weight) return (idx, Weight)
def proximal_gradient_descent(X, Y_flat, z, mode="rank", **kwargs): """ This function implements supervised sparse feature selection via l2,1 norm, i.e., min_{W} ||XW-Y||_F^2 + z*||W||_{2,1} Input ----- X: {numpy array}, shape (n_samples, n_features) input data, guaranteed to be a numpy array Y: {numpy array}, shape (n_samples, n_classes) input class labels, each row is a one-hot-coding class label z: {float} regularization parameter kwargs: {dictionary} verbose: {boolean} True if user want to print out the objective function value in each iteration, false if not Output ------ W: {numpy array}, shape (n_features, n_classes) weight matrix obj: {numpy array}, shape (n_iterations,) objective function value during iterations value_gamma: {numpy array}, shape (n_iterations,) suitable step size during iterations Reference --------- Liu, Jun, et al. "Multi-Task Feature Learning Via Efficient l2,1-Norm Minimization." UAI. 2009. """ def init_factor(W_norm, XW, Y, z): """ Initialize the starting point of W, according to the author's code """ n_samples, n_classes = XW.shape a = np.inner(np.reshape(XW, n_samples * n_classes), np.reshape(Y, n_samples * n_classes)) - z * W_norm b = LA.norm(XW, 'fro')**2 ratio = a / b return ratio if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # convert Y_flat to one hot encoded Y = construct_label_matrix_pan(Y_flat) # starting point initialization n_samples, n_features = X.shape n_samples, n_classes = Y.shape # compute X'Y XtY = np.dot(np.transpose(X), Y) # initialize a starting point W = XtY # compute XW = X*W XW = np.dot(X, W) # compute l2,1 norm of W W_norm = calculate_l21_norm(W) if W_norm >= 1e-6: ratio = init_factor(W_norm, XW, Y, z) W = ratio * W XW = ratio * XW # starting the main program, the Armijo Goldstein line search scheme + accelerated gradient descent # initialize step size gamma = 1 gamma = 1 # assign Wp with W, and XWp with XW XWp = XW WWp = np.zeros((n_features, n_classes)) alphap = 0 alpha = 1 # indicate whether the gradient step only changes a little flag = False max_iter = 1000 value_gamma = np.zeros(max_iter) obj = np.zeros(max_iter) for iter_step in range(max_iter): # step1: compute search point S based on Wp and W (with beta) beta = (alphap - 1) / alpha S = W + beta * WWp # step2: line search for gamma and compute the new approximation solution W XS = XW + beta * (XW - XWp) # compute X'* XS XtXS = np.dot(np.transpose(X), XS) # obtain the gradient g G = XtXS - XtY # copy W and XW to Wp and XWp Wp = W XWp = XW while True: # let S walk in a step in the antigradient of S to get V and then do the L1/L2-norm regularized projection V = S - G / gamma W = euclidean_projection(V, n_features, n_classes, z, gamma) # the difference between the new approximate solution W and the search point S V = W - S # compute XW = X*W XW = np.dot(X, W) XV = XW - XS r_sum = LA.norm(V, 'fro')**2 l_sum = LA.norm(XV, 'fro')**2 # determine weather the gradient step makes little improvement if r_sum <= 1e-20: flag = True break # the condition is ||XV||_2^2 <= gamma * ||V||_2^2 if l_sum < r_sum * gamma: break else: gamma = max(2 * gamma, l_sum / r_sum) value_gamma[iter_step] = gamma # step3: update alpha and alphap, and check weather converge alphap = alpha alpha = (1 + math.sqrt(4 * alpha * alpha + 1)) / 2 WWp = W - Wp XWY = XW - Y # calculate obj obj[iter_step] = LA.norm(XWY, 'fro')**2 / 2 obj[iter_step] += z * calculate_l21_norm(W) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if flag is True: break # determine weather converge if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break if mode == "raw": return W, obj, value_gamma elif mode == "rank": # feature vector is to sort in ascending order according to the Weight idx = feature_ranking(W).tolist() return reverse_argsort(idx, size=X.shape[1]) else: print("Invalid mode {} selected, should be one of \"raw\" or \"rank\"". format(mode))
X = X.astype(float) Y = construct_label_matrix(y) n_samples, n_features = X.shape # split data into 10 folds cv = StratifiedKFold(n_splits=13) # perform evaluation on classification task num_fea = 11 # number of selected features for train, test in cv.split(X, y): # obtain the feature weight matrix Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(Weight) X_resampled = X[:, idx[0:11]] #X1 = X_resampled.iloc[:, [idx[0], idx[1], idx[2], idx[3], idx[4],idx[5]]] ad = pd.read_csv('D:\Spring2017\Artificial Intelligence\Lymph.csv') ad.head() X_data = ad.iloc[:, 0:27] X_data = pd.DataFrame(X_data) X_data = X_data.iloc[:, idx[0:11]] print(X_data.columns.values) #print(X_resampled.columns.values) y_resampled = y #Decision Tree dtc = DecisionTreeClassifier(random_state=0) scores = cross_val_score(dtc,
def proximal_gradient_descent(X, Y_flat, z, mode="rank", **kwargs): """ This function implements supervised sparse feature selection via l2,1 norm, i.e., min_{W} sum_{i}log(1+exp(-yi*(W'*x+C))) + z*||W||_{2,1} Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class labels, each row is a one-hot-coding class label, guaranteed to be a numpy array z: {float} regularization parameter kwargs: {dictionary} verbose: {boolean} True if user want to print out the objective function value in each iteration, false if not Output ------ W: {numpy array}, shape (n_features, n_classes) weight matrix obj: {numpy array}, shape (n_iterations,) objective function value during iterations value_gamma: {numpy array}, shape (n_iterations,s) suitable step size during iterations Reference: Liu, Jun, et al. "Multi-Task Feature Learning Via Efficient l2,1-Norm Minimization." UAI. 2009. """ if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # Starting point initialization # # convert Y_flat to one hot encoded Y = construct_label_matrix_pan(Y_flat) n_samples, n_features = X.shape n_samples, n_classes = Y.shape # the indices of positive samples p_flag = (Y == 1) # the total number of positive samples n_positive_samples = np.sum(p_flag, 0) # the total number of negative samples n_negative_samples = n_samples - n_positive_samples n_positive_samples = n_positive_samples.astype(float) n_negative_samples = n_negative_samples.astype(float) # initialize a starting point W = np.zeros((n_features, n_classes)) C = np.log(np.divide(n_positive_samples, n_negative_samples)) # compute XW = X*W XW = np.dot(X, W) # starting the main program, the Armijo Goldstein line search scheme + accelerated gradient descent # the intial guess of the Lipschitz continuous gradient gamma = 1.0 / (n_samples * n_classes) # assign Wp with W, and XWp with XW XWp = XW WWp = np.zeros((n_features, n_classes)) CCp = np.zeros((1, n_classes)) alphap = 0 alpha = 1 # indicates whether the gradient step only changes a little flag = False max_iter = 1000 value_gamma = np.zeros(max_iter) obj = np.zeros(max_iter) for iter_step in range(max_iter): # step1: compute search point S based on Wp and W (with beta) beta = (alphap - 1) / alpha S = W + beta * WWp SC = C + beta * CCp # step2: line search for gamma and compute the new approximation solution W XS = XW + beta * (XW - XWp) aa = -np.multiply(Y, XS + np.tile(SC, (n_samples, 1))) # fun_S is the logistic loss at the search point bb = np.maximum(aa, 0) fun_S = np.sum(np.log(np.exp(-bb) + np.exp(aa - bb)) + bb) / (n_samples * n_classes) # compute prob = [p_1;p_2;...;p_m] prob = 1.0 / (1 + np.exp(aa)) b = np.multiply(-Y, (1 - prob)) / (n_samples * n_classes) # compute the gradient of C GC = np.sum(b, 0) # compute the gradient of W as X'*b G = np.dot(np.transpose(X), b) # copy W and XW to Wp and XWp Wp = W XWp = XW Cp = C while True: # let S walk in a step in the antigradient of S to get V and then do the L1/L2-norm regularized projection V = S - G / gamma C = SC - GC / gamma W = euclidean_projection(V, n_features, n_classes, z, gamma) # the difference between the new approximate solution W and the search point S V = W - S # compute XW = X*W XW = np.dot(X, W) aa = -np.multiply(Y, XW + np.tile(C, (n_samples, 1))) # fun_W is the logistic loss at the new approximate solution bb = np.maximum(aa, 0) fun_W = np.sum(np.log(np.exp(-bb) + np.exp(aa - bb)) + bb) / (n_samples * n_classes) r_sum = (LA.norm(V, 'fro')**2 + LA.norm(C - SC, 2)**2) / 2 l_sum = fun_W - fun_S - np.sum(np.multiply(V, G)) - np.inner( (C - SC), GC) # determine weather the gradient step makes little improvement if r_sum <= 1e-20: flag = True break # the condition is fun_W <= fun_S + <V, G> + <C ,GC> + gamma/2 * (<V,V> + <C-SC,C-SC> ) if l_sum < r_sum * gamma: break else: gamma = max(2 * gamma, l_sum / r_sum) value_gamma[iter_step] = gamma # step3: update alpha and alphap, and check weather converge alphap = alpha alpha = (1 + math.sqrt(4 * alpha * alpha + 1)) / 2 WWp = W - Wp CCp = C - Cp # calculate obj obj[iter_step] = fun_W obj[iter_step] += z * calculate_l21_norm(W) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if flag is True: break # determine weather converge if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break if mode == "raw": return W, obj, value_gamma elif mode == "rank": # feature vector is to sort in ascending order according to the Weight idx = feature_ranking(W).tolist() return reverse_argsort(idx, size=X.shape[1]) else: print("Invalid mode {} selected, should be one of \"raw\" or \"rank\"". format(mode))
def udfs_score(X, y, gamma=.1, **kwargs): Weight = UDFS.udfs(X, gamma=gamma, n_clusters=len(np.unique(y))) idx = feature_ranking(Weight) return idx
def ndfs(X, y=None, mode="rank", **kwargs): """ This function implement unsupervised feature selection using nonnegative spectral analysis, i.e., min_{F,W} Tr(F^T L F) + alpha*(||XW-F||_F^2 + beta*||W||_{2,1}) + gamma/2 * ||F^T F - I||_F^2 s.t. F >= 0 Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} W: {sparse matrix}, shape {n_samples, n_samples} affinity matrix alpha: {float} Parameter alpha in objective function beta: {float} Parameter beta in objective function gamma: {float} a very large number used to force F^T F = I F0: {numpy array}, shape (n_samples, n_clusters) initialization of the pseudo label matirx F, if not provided n_clusters: {int} number of clusters verbose: {boolean} True if user want to print out the objective function value in each iteration, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference: Li, Zechao, et al. "Unsupervised Feature Selection Using Nonnegative Spectral Analysis." AAAI. 2012. """ def kmeans_initialization(X, n_clusters): """ This function uses kmeans to initialize the pseudo label Input ----- X: {numpy array}, shape (n_samples, n_features) input data n_clusters: {int} number of clusters Output ------ Y: {numpy array}, shape (n_samples, n_clusters) pseudo label matrix """ n_samples, n_features = X.shape kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1) kmeans.fit(X) labels = kmeans.labels_ Y = np.zeros((n_samples, n_clusters)) for row in range(0, n_samples): Y[row, labels[row]] = 1 T = np.dot(Y.transpose(), Y) F = np.dot(Y, np.sqrt(np.linalg.inv(T))) F = F + 0.02 * np.ones((n_samples, n_clusters)) return F def calculate_obj(X, W, F, L, alpha, beta): """ This function calculates the objective function of NDFS """ # Tr(F^T L F) T1 = np.trace(np.dot(np.dot(F.transpose(), L), F)) T2 = np.linalg.norm(np.dot(X, W) - F, 'fro') T3 = (np.sqrt((W * W).sum(1))).sum() obj = T1 + alpha * (T2 + beta * T3) return obj # default gamma is 10e8 if 'gamma' not in kwargs: gamma = 10e8 else: gamma = kwargs['gamma'] # use the default affinity matrix if 'W' not in kwargs: W = construct_W(X) else: W = kwargs['W'] if 'alpha' not in kwargs: alpha = 1 else: alpha = kwargs['alpha'] if 'beta' not in kwargs: beta = 1 else: beta = kwargs['beta'] if 'F0' not in kwargs: if 'n_clusters' not in kwargs: raise Exception("either F0 or n_clusters should be provided") else: # initialize F n_clusters = kwargs['n_clusters'] F = kmeans_initialization(X, n_clusters) else: F = kwargs['F0'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] n_samples, n_features = X.shape # initialize D as identity matrix D = np.identity(n_features) I = np.identity(n_samples) # build laplacian matrix L = np.array(W.sum(1))[:, 0] - W max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W T = np.linalg.inv( np.dot(X.transpose(), X) + beta * D + 1e-6 * np.eye(n_features)) W = np.dot(np.dot(T, X.transpose()), F) # update D temp = np.sqrt((W * W).sum(1)) temp[temp < 1e-16] = 1e-16 temp = 0.5 / temp D = np.diag(temp) # update M M = L + alpha * (I - np.dot(np.dot(X, T), X.transpose())) M = (M + M.transpose()) / 2 # update F denominator = np.dot(M, F) + gamma * np.dot(np.dot(F, F.transpose()), F) temp = np.divide(gamma * F, denominator) F = F * np.array(temp) temp = np.diag(np.sqrt(np.diag(1 / (np.dot(F.transpose(), F) + 1e-16)))) F = np.dot(F, temp) # calculate objective function obj[iter_step] = np.trace(np.dot(np.dot( F.transpose(), M), F)) + gamma / 4 * np.linalg.norm( np.dot(F.transpose(), F) - np.identity(n_clusters), 'fro') if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break F = feature_ranking(W) if mode == "index": return np.array(F, dtype=int) elif mode == "raw": return W else: # make sure that F is the same size?? return reverse_argsort(F, size=X.shape[1])
def fit(self, X, y): if self.name == 'LASSO': # print self.params['alpha'] LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 # alpha = self.params['alpha'] # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1) # print alpha enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) # if y_pred_enet.coef_ if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'RFS': W = RFS.rfs(X, construct_label_matrix(y), gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent( X, construct_label_matrix(y), z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] return idx
def rfs(X, Y_flat, mode='rank', **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class label matrix, each row is a one-hot-coding class label kwargs: {dictionary} gamma: {float} parameter in RFS verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ def calculate_obj(X, Y, W, gamma): """ This function calculates the objective function of rfs """ temp = np.dot(X, W) - Y return calculate_l21_norm(temp) + gamma*calculate_l21_norm(W) # convert Y_flat to one hot encoded Y = construct_label_matrix_pan(Y_flat) # default gamma is 1 if 'gamma' not in kwargs: gamma = 1 else: gamma = kwargs['gamma'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] n_samples, n_features = X.shape A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features+n_samples] = gamma*np.eye(n_samples) D = np.eye(n_features+n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv(np.dot(np.dot(A, D_inv), A.T) + 1e-6*np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step+1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] if mode=="raw": return W elif mode =="index": return feature_ranking(W) elif mode == "rank": return reverse_argsort(feature_ranking(W))
def udfs(X, y=None, mode='rank', **kwargs): """ This function implements l2,1-norm regularized discriminative feature selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} gamma: {float} parameter in the objective function of UDFS (default is 1) n_clusters: {int} Number of clusters k: {int} number of nearest neighbor verbose: {boolean} True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012. """ def construct_M(X, k, gamma): """ This function constructs the M matrix described in the paper """ n_sample, n_feature = X.shape Xt = X.T D = pairwise_distances(X) # sort the distance matrix D in ascending order idx = np.argsort(D, axis=1) # choose the k-nearest neighbors for each instance idx_new = idx[:, 0:k + 1] H = np.eye(k + 1) - 1 / (k + 1) * np.ones((k + 1, k + 1)) I = np.eye(k + 1) Mi = np.zeros((n_sample, n_sample)) for i in range(n_sample): Xi = Xt[:, idx_new[i, :]] Xi_tilde = np.dot(Xi, H) Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma * I) Si = np.zeros((n_sample, k + 1)) for q in range(k + 1): Si[idx_new[q], q] = 1 Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T) M = np.dot(np.dot(X.T, Mi), X) return M def calculate_obj(X, W, M, gamma): """ This function calculates the objective function of ls_l21 described in the paper """ return np.trace(np.dot(np.dot(W.T, M), W)) + gamma * calculate_l21_norm(W) # default gamma is 0.1 if 'gamma' not in kwargs: gamma = 0.1 else: gamma = kwargs['gamma'] # default k is set to be 5 if 'k' not in kwargs: k = 5 else: k = kwargs['k'] if 'n_clusters' not in kwargs: n_clusters = 5 else: n_clusters = kwargs['n_clusters'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # construct M n_sample, n_feature = X.shape M = construct_M(X, k, gamma) D = np.eye(n_feature) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W as the eigenvectors of P corresponding to the first n_clusters # smallest eigenvalues P = M + gamma * D eigen_value, eigen_vector = scipy.linalg.eigh(a=P) W = eigen_vector[:, 0:n_clusters] # update D as D_ii = 1 / 2 / ||W(i,:)|| D = generate_diagonal_matrix(W) obj[iter_step] = calculate_obj(X, W, M, gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break if mode == 'raw': return W elif mode == 'index': return feature_ranking(W) elif mode == 'rank': return reverse_argsort(feature_ranking(W))
def SKF_udfs(X, y): num_cluster = len( set(y) ) # specify the number of clusters, it is usually set as the number of classes in the ground truth Weight = UDFS.udfs(X, gamma=0.1, n_clusters=num_cluster) return sparse_learning.feature_ranking(Weight)
def fit(self, X, y): idx = [] if self.tp == 'ITB': if self.name == 'MRMR': idx = MRMR.mrmr(X, y, n_selected_features=self.params['num_feats']) elif self.tp == 'filter': if self.name == 'Relief': score = reliefF.reliefF(X, y, k=self.params['k']) idx = reliefF.feature_ranking(score) if self.name == 'Fisher': # obtain the score of each feature on the training set score = fisher_score.fisher_score(X, y) # rank features in descending order according to score idx = fisher_score.feature_ranking(score) if self.name == 'MI': idx = np.argsort( mutual_info_classif( X, y, n_neighbors=self.params['n_neighbors']))[::-1] elif self.tp == 'wrapper': model_fit = self.model.fit(X, y) model = SelectFromModel(model_fit, prefit=True) idx = model.get_support(indices=True) elif self.tp == 'SLB': # one-hot-encode on target y = construct_label_matrix(y) if self.name == 'SMBA': scba = fs.SCBA(data=X, alpha=self.params['alpha'], norm_type=self.params['norm_type'], verbose=self.params['verbose'], thr=self.params['thr'], max_iter=self.params['max_iter'], affine=self.params['affine'], normalize=self.params['normalize'], step=self.params['step'], PCA=self.params['PCA'], GPU=self.params['GPU'], device=self.params['device']) nrmInd, sInd, repInd, _ = scba.admm() if self.params['type_indices'] == 'nrmInd': idx = nrmInd elif self.params['type_indices'] == 'repInd': idx = repInd else: idx = sInd if self.name == 'RFS': W = RFS.rfs(X, y, gamma=self.params['gamma']) idx = feature_ranking(W) if self.name == 'll_l21': # obtain the feature weight matrix W, _, _ = ll_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'ls_l21': # obtain the feature weight matrix W, _, _ = ls_l21.proximal_gradient_descent(X, y, z=self.params['z'], verbose=False) # sort the feature scores in an ascending order according to the feature scores idx = feature_ranking(W) if self.name == 'LASSO': LASSO = Lasso(alpha=self.params['alpha'], positive=True) y_pred_lasso = LASSO.fit(X, y) if y_pred_lasso.coef_.ndim == 1: coeff = y_pred_lasso.coef_ else: coeff = np.asarray(y_pred_lasso.coef_[0, :]) idx = np.argsort(-coeff) if self.name == 'EN': # elastic net L1 enet = ElasticNet(alpha=self.params['alpha'], l1_ratio=1, positive=True) y_pred_enet = enet.fit(X, y) if y_pred_enet.coef_.ndim == 1: coeff = y_pred_enet.coef_ else: coeff = np.asarray(y_pred_enet.coef_[0, :]) idx = np.argsort(-coeff) return idx
clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # lasso lasso = Lasso(alpha=0.01, random_state=random_state) lasso.fit(X_train, y_train) weights = lasso.coef_.T idx = chi_square.feature_ranking(abs(weights)) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # rfs weights = RFS.rfs(X_train, construct_label_matrix(y_train), gamma=0.01) idx = sparse_learning.feature_ranking(weights) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # sgl idx_group = np.array([[1, 16, np.sqrt(16)], [17, 28, np.sqrt(12)], [29, 60, np.sqrt(32)], [61, 160, np.sqrt(100)]]).T idx_group = idx_group.astype(int) weights, _, _ = group_fs.group_fs(X_train, y_train, 0.01, 0.01,
def selectFeatureNDFS(filename, num_feature, num_cluster): # Recupero del pickle salvato su disco con i sample e TUTTE le feature estratte da TSFresh. SU QUESTO LAVOREREMO NOI all_features_train = pd.read_pickle( "./pickle/feature_complete/TRAIN/{0}_TRAIN_FeatureComplete.pkl".format( filename)) all_features_test = pd.read_pickle( "./pickle/feature_complete/TEST/{0}_TEST_FeatureComplete.pkl".format( filename)) # Elimino colonne con valori NaN all_features_train = all_features_train.dropna(axis=1) all_features_test = all_features_test.dropna(axis=1) # Costruisco matrice W da dare a NDFS kwargs = { "metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1 } W = construct_W.construct_W(all_features_train.values, **kwargs) # Eseguo più volte NDFS in modo da ottenere davvero le feature migliori dizionarioOccorrenzeFeature = {} for i in range(0, 10): # Esecuzione dell'algoritmo NDFS. Otteniamo il peso delle feature per cluster. featurePesate = NDFS.ndfs(all_features_train, n_clusters=20, W=W) # ordinamento delle feature in ordine discendente idx = feature_ranking(featurePesate) # prendo il numero di feature scelte idxSelected = idx[0:num_feature] # aggiorno il numero di occorrenze di quella feature nel dizionario for feature in idxSelected: if feature in dizionarioOccorrenzeFeature: dizionarioOccorrenzeFeature[ feature] = dizionarioOccorrenzeFeature[feature] + 1 else: dizionarioOccorrenzeFeature[feature] = 1 # Ordino il dizionario in maniera discendente in modo da avere la feature che compare più volte all'inizio. # Qui abbiamo un dizionario contenente tupla (nomeFeature, numeroOccorrenze) dizionarioOccorrenzeFeature_sorted = sorted( dizionarioOccorrenzeFeature.items(), key=lambda kv: -kv[1]) # Metto tutti in nomi delle feature presenti in nel dizionario in un array featureFrequenti = [] for key, value in dizionarioOccorrenzeFeature_sorted: featureFrequenti.append(key) # seleziono il numero di feature che voglio idxSelected = featureFrequenti[0:num_feature] # Estraggo i nomi delle feature che ho scelto nomiFeatureSelezionate = [] for i in idxSelected: nomiFeatureSelezionate.append(all_features_train.columns[i]) # Creo il dataframe con solo le feature che ho selezionato dataframeFeatureSelezionate = all_features_train.loc[:, nomiFeatureSelezionate] # Aggiusto anche il dataset di test con solo le feature scelte all_features_test = all_features_test.loc[:, nomiFeatureSelezionate] # Estraggo le classi conosciute labelConosciute = estrattoreClassiConosciute.estraiLabelConosciute( "./UCRArchive_2018/{0}/{0}_TEST.tsv".format(filename)) # K-means su feature selezionate print("\nRisultati con feature selezionate da noi con NDFS") print("Numero feature: {0}".format(all_features_test.shape[1])) testFeatureSelection(X_selected=dataframeFeatureSelezionate.values, X_test=all_features_test.values, num_clusters=num_cluster, y=labelConosciute)