def rfs(X, Y, gamma=1, verbose=False, **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class label matrix, each row is a one-hot-coding class label kwargs: {dictionary} gamma: {float} parameter in RFS verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ n_samples, n_features = X.shape A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features + n_samples] = gamma * np.eye(n_samples) D = np.eye(n_features + n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv( np.dot(np.dot(A, D_inv), A.T) + 1e-6 * np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter ' + str(iter_step + 1) + ': ' + str(obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] return W
def udfs(X, gamma=0.1, k=5, n_clusters=5, verbose=False, **kwargs): """ This function implements l2,1-norm regularized discriminative feature selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} gamma: {float} parameter in the objective function of UDFS (default is 1) n_clusters: {int} Number of clusters k: {int} number of nearest neighbor verbose: {boolean} True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012. """ # construct M n_sample, n_feature = X.shape M = construct_M(X, k, gamma) D = np.eye(n_feature) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W as the eigenvectors of P corresponding to the first n_clusters # smallest eigenvalues P = M + gamma * D eigen_value, eigen_vector = scipy.linalg.eigh(a=P) W = eigen_vector[:, 0:n_clusters] # update D as D_ii = 1 / 2 / ||W(i,:)|| D = generate_diagonal_matrix(W) obj[iter_step] = calculate_obj(X, W, M, gamma) if verbose: print('obj at iter ' + str(iter_step + 1) + ': ' + str(obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break return W
def rfs(X, Y, **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class label matrix, each row is a one-hot-coding class label kwargs: {dictionary} gamma: {float} parameter in RFS verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ # default gamma is 1 if 'gamma' not in kwargs: gamma = 1 else: gamma = kwargs['gamma'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] n_samples, n_features = X.shape A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features+n_samples] = gamma*np.eye(n_samples) D = np.eye(n_features+n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv(np.dot(np.dot(A, D_inv), A.T) + 1e-6*np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step+1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] return W
def udfs(X, **kwargs): """ This function implements l2,1-norm regularized discriminative feature selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} gamma: {float} parameter in the objective function of UDFS (default is 1) n_clusters: {int} Number of clusters k: {int} number of nearest neighbor verbose: {boolean} True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012. """ # default gamma is 0.1 if 'gamma' not in kwargs: gamma = 0.1 else: gamma = kwargs['gamma'] # default k is set to be 5 if 'k' not in kwargs: k = 5 else: k = kwargs['k'] if 'n_clusters' not in kwargs: n_clusters = 5 else: n_clusters = kwargs['n_clusters'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # construct M n_sample, n_feature = X.shape M = construct_M(X, k, gamma) D = np.eye(n_feature) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W as the eigenvectors of P corresponding to the first n_clusters # smallest eigenvalues P = M + gamma*D eigen_value, eigen_vector = scipy.linalg.eigh(a=P) W = eigen_vector[:, 0:n_clusters] # update D as D_ii = 1 / 2 / ||W(i,:)|| D = generate_diagonal_matrix(W) obj[iter_step] = calculate_obj(X, W, M, gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step+1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break return W
def udfs(X, y=None, mode='rank', **kwargs): """ This function implements l2,1-norm regularized discriminative feature selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} gamma: {float} parameter in the objective function of UDFS (default is 1) n_clusters: {int} Number of clusters k: {int} number of nearest neighbor verbose: {boolean} True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012. """ def construct_M(X, k, gamma): """ This function constructs the M matrix described in the paper """ n_sample, n_feature = X.shape Xt = X.T D = pairwise_distances(X) # sort the distance matrix D in ascending order idx = np.argsort(D, axis=1) # choose the k-nearest neighbors for each instance idx_new = idx[:, 0:k + 1] H = np.eye(k + 1) - 1 / (k + 1) * np.ones((k + 1, k + 1)) I = np.eye(k + 1) Mi = np.zeros((n_sample, n_sample)) for i in range(n_sample): Xi = Xt[:, idx_new[i, :]] Xi_tilde = np.dot(Xi, H) Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma * I) Si = np.zeros((n_sample, k + 1)) for q in range(k + 1): Si[idx_new[q], q] = 1 Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T) M = np.dot(np.dot(X.T, Mi), X) return M def calculate_obj(X, W, M, gamma): """ This function calculates the objective function of ls_l21 described in the paper """ return np.trace(np.dot(np.dot(W.T, M), W)) + gamma * calculate_l21_norm(W) # default gamma is 0.1 if 'gamma' not in kwargs: gamma = 0.1 else: gamma = kwargs['gamma'] # default k is set to be 5 if 'k' not in kwargs: k = 5 else: k = kwargs['k'] if 'n_clusters' not in kwargs: n_clusters = 5 else: n_clusters = kwargs['n_clusters'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # construct M n_sample, n_feature = X.shape M = construct_M(X, k, gamma) D = np.eye(n_feature) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W as the eigenvectors of P corresponding to the first n_clusters # smallest eigenvalues P = M + gamma * D eigen_value, eigen_vector = scipy.linalg.eigh(a=P) W = eigen_vector[:, 0:n_clusters] # update D as D_ii = 1 / 2 / ||W(i,:)|| D = generate_diagonal_matrix(W) obj[iter_step] = calculate_obj(X, W, M, gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break if mode == 'raw': return W elif mode == 'index': return feature_ranking(W) elif mode == 'rank': return reverse_argsort(feature_ranking(W))
def rfs(X, Y_flat, mode='rank', **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class label matrix, each row is a one-hot-coding class label kwargs: {dictionary} gamma: {float} parameter in RFS verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ def calculate_obj(X, Y, W, gamma): """ This function calculates the objective function of rfs """ temp = np.dot(X, W) - Y return calculate_l21_norm(temp) + gamma*calculate_l21_norm(W) # convert Y_flat to one hot encoded Y = construct_label_matrix_pan(Y_flat) # default gamma is 1 if 'gamma' not in kwargs: gamma = 1 else: gamma = kwargs['gamma'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] n_samples, n_features = X.shape A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features+n_samples] = gamma*np.eye(n_samples) D = np.eye(n_features+n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv(np.dot(np.dot(A, D_inv), A.T) + 1e-6*np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step+1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] if mode=="raw": return W elif mode =="index": return feature_ranking(W) elif mode == "rank": return reverse_argsort(feature_ranking(W))
def rfs(X, y, **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} gamma: {float} parameter in RFS n_selected_features: {int} the maximum number of selected features returned, the default is the number of input features verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ # default gamma is 1 gamma = kwargs.get('gamma', 0.1) verbose = kwargs.get('verbose', False) n_samples, n_features = X.shape n_selected_features = kwargs.get('n_selected_features', n_features) Y = construct_label_matrix(y) A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features + n_samples] = gamma * np.eye(n_samples) D = np.eye(n_features + n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.inv( np.dot(np.dot(A, D_inv), A.T) + 1e-6 * np.eye(n_samples)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step])) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step - 1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] scores = (W * W).sum(1) return scores