def Name_of_the_filter(data, target): # function that assigns scores to features according to 'write the name of your filter method' # the rankings should be done in increasing order of the 'write the name of your filter method' scores # initialize the variables and result structure feature_values = np.array(data) num_features = feature_values.shape[1] Name_of_the_filter_mat = np.zeros((num_features, num_features)) Name_of_the_filter_values_feat = np.zeros(num_features) Name_of_the_filter_values_class = np.zeros(num_features) Name_of_the_filter_scores = np.zeros(num_features) result = Result() result.features = feature_values weight_feat = 0.3 # weightage provided to feature-feature correlation weight_class = 0.7 # weightage provided to feature-class correlation ################ write your main feature ranking code here ################ ########################################################################### # produce scores and ranks from the information matrix Name_of_the_filter_values_feat = normalize(Name_of_the_filter_values_feat) Name_of_the_filter_values_class = normalize(Name_of_the_filter_values_class) Name_of_the_filter_scores = (weight_class * Name_of_the_filter_values_class) + (weight_feat * Name_of_the_filter_values_feat) Name_of_the_filter_ranks = np.argsort(np.argsort(-Name_of_the_filter_scores)) # assign the results to the appropriate fields result.scores = Name_of_the_filter_scores result.ranks = Name_of_the_filter_ranks result.ranked_features = feature_values[:, np.argsort(-Name_of_the_filter_scores)] return result
def Relief(data, target): # function that assigns scores to features according to Relief algorithm # the rankings should be done in increasing order of the Relief scores # initialize the variables and result structure feature_values = np.array(data) num_features = feature_values.shape[1] result = Result() result.features = feature_values # generate the ReliefF scores relief = ReliefF(n_neighbors=5, n_features_to_keep=num_features) relief.fit_transform(data, target) result.scores = normalize(relief.feature_scores) result.ranks = np.argsort(np.argsort(-relief.feature_scores)) # produce scores and ranks from the information matrix Relief_scores = normalize(relief.feature_scores) Relief_ranks = np.argsort(np.argsort(-relief.feature_scores)) # assign the results to the appropriate fields result.scores = Relief_scores result.ranks = Relief_ranks result.ranked_features = feature_values[:, Relief_ranks] return result
def execute(self): # generate the correlation matrix self.feature_mean = np.mean(self.data, axis=0) for ind_1 in range(self.num_features): for ind_2 in range(self.num_features): self.correlation_matrix[ind_1, ind_2] = self.correlation_matrix[ ind_2, ind_1] = self.compute_MI( self.data[:, ind_1], self.data[:, ind_2]) for ind in range(self.num_features): self.feature_feature_relation[ind] = -np.sum( abs(self.correlation_matrix[ ind, :])) # -ve because we want to remove the corralation self.feature_class_relation[ind] = abs( self.compute_MI(self.data[:, ind], self.target)) # produce scores and ranks from the information matrix self.feature_feature_relation = normalize( self.feature_feature_relation) self.feature_class_relation = normalize(self.feature_class_relation) self.scores = (self.algo_params["weight_class"] * self.feature_class_relation) + ( self.algo_params["weight_feature"] * self.feature_feature_relation)
def execute(self): # generate the ReliefF scores relief = ReliefF(n_neighbors=self.algo_params["n_neighbors"], n_features_to_keep=self.num_features) relief.fit_transform(self.data, self.target) # produce scores and ranks from the information matrix self.scores = normalize(relief.feature_scores)
def PCC(data, target): # function that assigns scores to features according to Pearson's Correlation Coefficient (PCC) # the rankings should be done in increasing order of the PCC scores # initialize the variables and result structure feature_values = np.array(data) num_features = feature_values.shape[1] PCC_mat = np.zeros((num_features, num_features)) PCC_values_feat = np.zeros(num_features) PCC_values_class = np.zeros(num_features) PCC_scores = np.zeros(num_features) result = Result() result.features = feature_values weight_feat = 0.3 # weightage provided to feature-feature correlation weight_class = 0.7 # weightage provided to feature-class correlation # generate the correlation matrix mean_values = np.mean(feature_values, axis=0) for ind_1 in range(num_features): for ind_2 in range(num_features): PCC_mat[ind_1, ind_2] = PCC_mat[ind_2, ind_1] = compute_PCC( feature_values[:, ind_1], feature_values[:, ind_2]) for ind in range(num_features): PCC_values_feat[ind] = -np.sum(abs( PCC_mat[ind, :])) # -ve because we want to remove the corralation PCC_values_class[ind] = abs(compute_PCC(feature_values[:, ind], target)) # produce scores and ranks from the information matrix PCC_values_feat = normalize(PCC_values_feat) PCC_values_class = normalize(PCC_values_class) PCC_scores = (weight_class * PCC_values_class) + (weight_feat * PCC_values_feat) PCC_ranks = np.argsort( np.argsort(-PCC_scores) ) # ranks basically represents the rank of the original features # assign the results to the appropriate fields result.scores = PCC_scores result.ranks = PCC_ranks result.ranked_features = feature_values[:, np.argsort(-PCC_scores)] return result
def SCC(data, target): # function that assigns scores to features according to Spearman's Correlation Coefficient (SCC) # the rankings should be done in increasing order of the SCC scores # initialize the variables and result structure feature_values = np.array(data) num_features = feature_values.shape[1] SCC_mat = np.zeros((num_features, num_features)) SCC_values_feat = np.zeros(num_features) SCC_values_class = np.zeros(num_features) result = Result() result.features = feature_values weight_feat = 0.3 # weightage provided to feature-feature correlation weight_class = 0.7 # weightage provided to feature-class correlation # generate the correlation matrix for ind_1 in range(num_features): for ind_2 in range(num_features): SCC_mat[ind_1, ind_2] = SCC_mat[ind_2, ind_1] = compute_SCC( feature_values[:, ind_1], feature_values[:, ind_2]) for ind in range(num_features): SCC_values_feat[ind] = -np.sum(abs(SCC_mat[ind, :])) SCC_values_class[ind] = compute_SCC(feature_values[:, ind], target) # produce scores and ranks from the information matrix SCC_values_feat = normalize(SCC_values_feat) SCC_values_class = normalize(SCC_values_class) SCC_scores = (weight_class * SCC_values_class) + (weight_feat * SCC_values_feat) SCC_ranks = np.argsort(np.argsort(-SCC_scores)) # assign the results to the appropriate fields result.scores = SCC_scores result.ranks = SCC_ranks result.ranked_features = feature_values[:, np.argsort(-SCC_scores)] return result
def MI(data, target): # function that assigns scores to features according to Mutual Information (MI) # the rankings should be done in increasing order of the MI scores # initialize the variables and result structure feature_values = np.array(data) num_features = feature_values.shape[1] MI_mat = np.zeros((num_features, num_features)) MI_values_feat = np.zeros(num_features) MI_values_class = np.zeros(num_features) result = Result() result.features = feature_values weight_feat = 0.3 # weightage provided to feature-feature correlation weight_class = 0.7 # weightage provided to feature-class correlation # generate the information matrix for ind_1 in range(num_features): for ind_2 in range(num_features): MI_mat[ind_1, ind_2] = MI_mat[ind_2, ind_1] = compute_MI( feature_values[:, ind_1], feature_values[:, ind_2]) for ind in range(num_features): MI_values_feat[ind] = -np.sum(abs(MI_mat[ind, :])) MI_values_class[ind] = compute_MI(feature_values[:, ind], target) # produce scores and ranks from the information matrix MI_values_feat = normalize(MI_values_feat) MI_values_class = normalize(MI_values_class) MI_scores = (weight_class * MI_values_class) + (weight_feat * MI_values_feat) MI_ranks = np.argsort(np.argsort(-MI_scores)) # assign the results to the appropriate fields result.scores = MI_scores result.ranks = MI_ranks result.ranked_features = feature_values[:, np.argsort(-MI_scores)] return result