def udfs(X, **kwargs): """ This function implements l2,1-norm regularized discriminative feature selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I Input ----- X: {numpy array}, shape (n_samples, n_features) input data kwargs: {dictionary} gamma: {float} parameter in the objective function of UDFS (default is 1) n_clusters: {int} Number of clusters k: {int} number of nearest neighbor verbose: {boolean} True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012. """ # default gamma is 0.1 if 'gamma' not in kwargs: gamma = 0.1 else: gamma = kwargs['gamma'] # default k is set to be 5 if 'k' not in kwargs: k = 5 else: k = kwargs['k'] if 'n_clusters' not in kwargs: n_clusters = 5 else: n_clusters = kwargs['n_clusters'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] # construct M n_sample, n_feature = X.shape M = construct_M(X, k, gamma) D = np.eye(n_feature) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update W as the eigenvectors of P corresponding to the first n_clusters # smallest eigenvalues P = M + gamma*D eigen_value, eigen_vector = scipy.linalg.eigh(a=P) W = eigen_vector[:, 0:n_clusters] # update D as D_ii = 1 / 2 / ||W(i,:)|| D = generate_diagonal_matrix(W) obj[iter_step] = calculate_obj(X, W, M, gamma) if verbose: print 'obj at iter ' + str(iter_step+1) + ': ' + str(obj[iter_step]) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break return W
def erfs(X, Y, **kwargs): """ This function implementS efficient and robust feature selection via joint l21-norms minimization min_W||X^T W - Y||_2,1 + gamma||W||_2,1 Input ----- X: {numpy array}, shape (n_samples, n_features) input data Y: {numpy array}, shape (n_samples, n_classes) input class label matrix, each row is a one-hot-coding class label kwargs: {dictionary} gamma: {float} parameter in RFS verbose: boolean True if want to display the objective function value, false if not Output ------ W: {numpy array}, shape(n_samples, n_features) feature weight matrix Reference --------- Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010. """ # default gamma is 1 if 'gamma' not in kwargs: gamma = 1 else: gamma = kwargs['gamma'] if 'verbose' not in kwargs: verbose = False else: verbose = kwargs['verbose'] n_samples, n_features = X.shape A = np.zeros((n_samples, n_samples + n_features)) A[:, 0:n_features] = X A[:, n_features:n_features+n_samples] = gamma*np.eye(n_samples) D = np.eye(n_features+n_samples) max_iter = 1000 obj = np.zeros(max_iter) for iter_step in range(max_iter): # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y D_inv = LA.inv(D) temp = LA.pinv(np.dot(np.dot(A, D_inv), A.T)) # (A D^-1 A^T)^-1 U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y) # update D as D_ii = 1 / 2 / ||U(i,:)|| D = generate_diagonal_matrix(U) obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma) if verbose: print 'obj at iter ' + str(iter_step+1) + ': ' + str(obj[iter_step]) if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3: break # the first d rows of U are the feature weights W = U[0:n_features, :] return W