Пример #1
0
def Name_of_the_filter(data, target):
    # function that assigns scores to features according to 'write the name of your filter method'
    # the rankings should be done in increasing order of the 'write the name of your filter method' scores 
    
    # initialize the variables and result structure
    feature_values = np.array(data)
    num_features = feature_values.shape[1]
    Name_of_the_filter_mat = np.zeros((num_features, num_features))
    Name_of_the_filter_values_feat = np.zeros(num_features)
    Name_of_the_filter_values_class = np.zeros(num_features)
    Name_of_the_filter_scores = np.zeros(num_features)
    result = Result()
    result.features = feature_values
    weight_feat = 0.3   # weightage provided to feature-feature correlation
    weight_class = 0.7  # weightage provided to feature-class correlation

    ################ write your main feature ranking code here ################



    ###########################################################################
    

    # produce scores and ranks from the information matrix
    Name_of_the_filter_values_feat = normalize(Name_of_the_filter_values_feat)
    Name_of_the_filter_values_class = normalize(Name_of_the_filter_values_class)
    Name_of_the_filter_scores = (weight_class * Name_of_the_filter_values_class) + (weight_feat * Name_of_the_filter_values_feat)
    Name_of_the_filter_ranks = np.argsort(np.argsort(-Name_of_the_filter_scores))

    # assign the results to the appropriate fields
    result.scores = Name_of_the_filter_scores
    result.ranks = Name_of_the_filter_ranks
    result.ranked_features = feature_values[:, np.argsort(-Name_of_the_filter_scores)]

    return result
Пример #2
0
def Relief(data, target):
    # function that assigns scores to features according to Relief algorithm
    # the rankings should be done in increasing order of the Relief scores

    # initialize the variables and result structure
    feature_values = np.array(data)
    num_features = feature_values.shape[1]
    result = Result()
    result.features = feature_values

    # generate the ReliefF scores
    relief = ReliefF(n_neighbors=5, n_features_to_keep=num_features)
    relief.fit_transform(data, target)
    result.scores = normalize(relief.feature_scores)
    result.ranks = np.argsort(np.argsort(-relief.feature_scores))

    # produce scores and ranks from the information matrix
    Relief_scores = normalize(relief.feature_scores)
    Relief_ranks = np.argsort(np.argsort(-relief.feature_scores))

    # assign the results to the appropriate fields
    result.scores = Relief_scores
    result.ranks = Relief_ranks
    result.ranked_features = feature_values[:, Relief_ranks]

    return result
Пример #3
0
    def execute(self):
        # generate the correlation matrix
        self.feature_mean = np.mean(self.data, axis=0)
        for ind_1 in range(self.num_features):
            for ind_2 in range(self.num_features):
                self.correlation_matrix[ind_1,
                                        ind_2] = self.correlation_matrix[
                                            ind_2, ind_1] = self.compute_MI(
                                                self.data[:, ind_1],
                                                self.data[:, ind_2])

        for ind in range(self.num_features):
            self.feature_feature_relation[ind] = -np.sum(
                abs(self.correlation_matrix[
                    ind, :]))  # -ve because we want to remove the corralation
            self.feature_class_relation[ind] = abs(
                self.compute_MI(self.data[:, ind], self.target))

        # produce scores and ranks from the information matrix
        self.feature_feature_relation = normalize(
            self.feature_feature_relation)
        self.feature_class_relation = normalize(self.feature_class_relation)
        self.scores = (self.algo_params["weight_class"] *
                       self.feature_class_relation) + (
                           self.algo_params["weight_feature"] *
                           self.feature_feature_relation)
Пример #4
0
    def execute(self):
        # generate the ReliefF scores
        relief = ReliefF(n_neighbors=self.algo_params["n_neighbors"],
                         n_features_to_keep=self.num_features)
        relief.fit_transform(self.data, self.target)

        # produce scores and ranks from the information matrix
        self.scores = normalize(relief.feature_scores)
Пример #5
0
def PCC(data, target):
    # function that assigns scores to features according to Pearson's Correlation Coefficient (PCC)
    # the rankings should be done in increasing order of the PCC scores

    # initialize the variables and result structure
    feature_values = np.array(data)
    num_features = feature_values.shape[1]
    PCC_mat = np.zeros((num_features, num_features))
    PCC_values_feat = np.zeros(num_features)
    PCC_values_class = np.zeros(num_features)
    PCC_scores = np.zeros(num_features)
    result = Result()
    result.features = feature_values
    weight_feat = 0.3  # weightage provided to feature-feature correlation
    weight_class = 0.7  # weightage provided to feature-class correlation

    # generate the correlation matrix
    mean_values = np.mean(feature_values, axis=0)
    for ind_1 in range(num_features):
        for ind_2 in range(num_features):
            PCC_mat[ind_1, ind_2] = PCC_mat[ind_2, ind_1] = compute_PCC(
                feature_values[:, ind_1], feature_values[:, ind_2])

    for ind in range(num_features):
        PCC_values_feat[ind] = -np.sum(abs(
            PCC_mat[ind, :]))  # -ve because we want to remove the corralation
        PCC_values_class[ind] = abs(compute_PCC(feature_values[:, ind],
                                                target))

    # produce scores and ranks from the information matrix
    PCC_values_feat = normalize(PCC_values_feat)
    PCC_values_class = normalize(PCC_values_class)
    PCC_scores = (weight_class * PCC_values_class) + (weight_feat *
                                                      PCC_values_feat)
    PCC_ranks = np.argsort(
        np.argsort(-PCC_scores)
    )  # ranks basically represents the rank of the original features

    # assign the results to the appropriate fields
    result.scores = PCC_scores
    result.ranks = PCC_ranks
    result.ranked_features = feature_values[:, np.argsort(-PCC_scores)]

    return result
Пример #6
0
def SCC(data, target):
    # function that assigns scores to features according to Spearman's Correlation Coefficient (SCC)
    # the rankings should be done in increasing order of the SCC scores

    # initialize the variables and result structure
    feature_values = np.array(data)
    num_features = feature_values.shape[1]
    SCC_mat = np.zeros((num_features, num_features))
    SCC_values_feat = np.zeros(num_features)
    SCC_values_class = np.zeros(num_features)
    result = Result()
    result.features = feature_values
    weight_feat = 0.3  # weightage provided to feature-feature correlation
    weight_class = 0.7  # weightage provided to feature-class correlation

    # generate the correlation matrix
    for ind_1 in range(num_features):
        for ind_2 in range(num_features):
            SCC_mat[ind_1, ind_2] = SCC_mat[ind_2, ind_1] = compute_SCC(
                feature_values[:, ind_1], feature_values[:, ind_2])

    for ind in range(num_features):
        SCC_values_feat[ind] = -np.sum(abs(SCC_mat[ind, :]))
        SCC_values_class[ind] = compute_SCC(feature_values[:, ind], target)

    # produce scores and ranks from the information matrix
    SCC_values_feat = normalize(SCC_values_feat)
    SCC_values_class = normalize(SCC_values_class)
    SCC_scores = (weight_class * SCC_values_class) + (weight_feat *
                                                      SCC_values_feat)
    SCC_ranks = np.argsort(np.argsort(-SCC_scores))

    # assign the results to the appropriate fields
    result.scores = SCC_scores
    result.ranks = SCC_ranks
    result.ranked_features = feature_values[:, np.argsort(-SCC_scores)]

    return result
Пример #7
0
def MI(data, target):
    # function that assigns scores to features according to Mutual Information (MI)
    # the rankings should be done in increasing order of the MI scores

    # initialize the variables and result structure
    feature_values = np.array(data)
    num_features = feature_values.shape[1]
    MI_mat = np.zeros((num_features, num_features))
    MI_values_feat = np.zeros(num_features)
    MI_values_class = np.zeros(num_features)
    result = Result()
    result.features = feature_values
    weight_feat = 0.3  # weightage provided to feature-feature correlation
    weight_class = 0.7  # weightage provided to feature-class correlation

    # generate the information matrix
    for ind_1 in range(num_features):
        for ind_2 in range(num_features):
            MI_mat[ind_1, ind_2] = MI_mat[ind_2, ind_1] = compute_MI(
                feature_values[:, ind_1], feature_values[:, ind_2])

    for ind in range(num_features):
        MI_values_feat[ind] = -np.sum(abs(MI_mat[ind, :]))
        MI_values_class[ind] = compute_MI(feature_values[:, ind], target)

    # produce scores and ranks from the information matrix
    MI_values_feat = normalize(MI_values_feat)
    MI_values_class = normalize(MI_values_class)
    MI_scores = (weight_class * MI_values_class) + (weight_feat *
                                                    MI_values_feat)
    MI_ranks = np.argsort(np.argsort(-MI_scores))

    # assign the results to the appropriate fields
    result.scores = MI_scores
    result.ranks = MI_ranks
    result.ranked_features = feature_values[:, np.argsort(-MI_scores)]

    return result