def auc_selection(X,y,n_features): """ Computes the Area Under the Curve achieved by each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the AUC from the ROC curves...' feature_scores=[] for i in xrange(len(X[0])): feature_scores+=[(roc_auc_score(y,X[:,i]),i)] #compute scores feature_scores.sort(reverse=True) scores,feature_indexes=split_tuples(feature_scores) #split into score and indexes lists return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features] #return selected features and original index features
def fisher_score_selection(X,y,n_features): """ Computes the Fisher Score of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Fisher score ...' feature_scores=[] for i in xrange(len(X[0])): feature_scores+=[(fisher_score(X,y,i),i)] #compute scores feature_scores.sort(reverse=True) scores,feature_indexes=split_tuples(feature_scores) #split into score and indexes listss return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features] #return selected features and original index features
def kruskal_wallis_selection(X,y,n_features): """ Computes the Kruskal-Wallis statistic of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Kruskal-Wallis test ...' feature_scores=[] for i in xrange(len(X[0])): H_kw,kw_p_val=kw_feature_test(X,y,i) feature_scores+=[(H_kw,i)] #compute scores feature_scores.sort(reverse=True) scores,feature_indexes=split_tuples(feature_scores) #split into score and indexes lists return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features] #return selected features and original index features
def chi_squared_selection(X,y,n_features): """ Computes the Chi-squared statistic of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Chi2 test ...' feature_scores=[] for i in xrange(len(X[0])): chi2_stat,p=chi2_feature_test(X,y,i) feature_scores+=[(chi2_stat,i)] #compute scores feature_scores.sort(reverse=True) scores,feature_indexes=split_tuples(feature_scores) #split into score and indexes lists return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features] #return selected features and original index features
def auc_selection(X, y, n_features): """ Computes the Area Under the Curve achieved by each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the AUC from the ROC curves...' feature_scores = [] for i in xrange(len(X[0])): feature_scores += [(roc_auc_score(y, X[:, i]), i)] #compute scores feature_scores.sort(reverse=True) scores, feature_indexes = split_tuples( feature_scores) #split into score and indexes lists return X[:, feature_indexes[0:n_features]], feature_indexes[ 0:n_features] #return selected features and original index features
def fisher_score_selection(X, y, n_features): """ Computes the Fisher Score of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Fisher score ...' feature_scores = [] for i in xrange(len(X[0])): feature_scores += [(fisher_score(X, y, i), i)] #compute scores feature_scores.sort(reverse=True) scores, feature_indexes = split_tuples( feature_scores) #split into score and indexes listss return X[:, feature_indexes[0:n_features]], feature_indexes[ 0:n_features] #return selected features and original index features
def kruskal_wallis_selection(X, y, n_features): """ Computes the Kruskal-Wallis statistic of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Kruskal-Wallis test ...' feature_scores = [] for i in xrange(len(X[0])): H_kw, kw_p_val = kw_feature_test(X, y, i) feature_scores += [(H_kw, i)] #compute scores feature_scores.sort(reverse=True) scores, feature_indexes = split_tuples( feature_scores) #split into score and indexes lists return X[:, feature_indexes[0:n_features]], feature_indexes[ 0:n_features] #return selected features and original index features
def chi_squared_selection(X, y, n_features): """ Computes the Chi-squared statistic of each feature and selects the top ranking features Keyword arguments: X -- The feature vectors y -- The target vector n_features -- n best ranked features """ if verbose: print '\nPerforming Feature Selection based on the Chi2 test ...' feature_scores = [] for i in xrange(len(X[0])): chi2_stat, p = chi2_feature_test(X, y, i) feature_scores += [(chi2_stat, i)] #compute scores feature_scores.sort(reverse=True) scores, feature_indexes = split_tuples( feature_scores) #split into score and indexes lists return X[:, feature_indexes[0:n_features]], feature_indexes[ 0:n_features] #return selected features and original index features