Exemplo n.º 1
0
def cife(X, y, **kwargs):
    """
    This function implements the CIFE feature selection

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    Output
    ------
    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_CMI: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Reference
    ---------
    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
    """
    
    if 'n_selected_features' in kwargs.keys():
        n_selected_features = kwargs['n_selected_features']
        dictOfHeader = kwargs['dict_features']
        F, J_CMI, MIfy, dictFeatJcmi = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features, dict_features=dictOfHeader)
    else:
        F, J_CMI, MIfy, dictFeatJcmi = LCSI.lcsi(X, y, beta=1, gamma=1)
    return F, J_CMI, MIfy, dictFeatJcmi
Exemplo n.º 2
0
def mrmr(X, y, **kwargs):
    """
    This function implements the MRMR feature selection

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    Output
    ------
    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_CMI: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Reference
    ---------
    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
    """
    if 'n_selected_features' in kwargs.keys():
        n_selected_features = kwargs['n_selected_features']
        F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features)
    else:
        F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR')
    return F, J_CMI, MIfy
Exemplo n.º 3
0
def mim(X, y, **kwargs):
    """
    This function implements the MIM feature selection

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    Output
    ------
    F: {numpy array}, shape (n_features, )
        index of selected features, F[0] is the most important feature

    Reference
    ---------
    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
    """

    if 'n_selected_features' in kwargs.keys():
        n_selected_features = kwargs['n_selected_features']
        F = LCSI.lcsi(X,
                      y,
                      beta=0,
                      gamma=0,
                      n_selected_features=n_selected_features)
    else:
        F = LCSI.lcsi(X, y, beta=0, gamma=0)
    return F
Exemplo n.º 4
0
def mifs(X, y, mode="rank", **kwargs):
    """
    This function implements the MIFS feature selection

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    Output
    ------
    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_CMI: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Reference
    ---------
    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
    """

    if 'beta' not in list(kwargs.keys()):
        beta = 0.5
    else:
        beta = kwargs['beta']
    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        F, J_CMI, MIfy = LCSI.lcsi(X,
                                   y,
                                   beta=beta,
                                   gamma=0,
                                   n_selected_features=n_selected_features)
    else:
        F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0)

    if mode == "index":
        return np.array(F, dtype=int)
    else:
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
Exemplo n.º 5
0
#            if su_calculation(fp, fi) >= t1[i, 1]:
#                # construct the mask for feature whose su is larger than su(fp,y)
#                idx = s_list[:, 0] != i
#                idx = np.array([idx, idx])
#                idx = np.transpose(idx)
#                # delete the feature by using the mask
#                s_list = s_list[idx]
#                length = len(s_list)//2
#                s_list = s_list.reshape((length, 2))
#    return np.array(F, dtype=int), np.array(SU)
#
#feat_index, sym_arr = fcbf(X_train_data.iloc[:,:5], X_test_data.iloc[:,:5])

#MIM
from skfeature.function.information_theoretical_based import LCSI
F, J_CMI, MIfy = LCSI.lcsi(X_train_data, y_train_data, beta=0, gamma=0)

from sklearn.ensemble import AdaBoostRegressor
regr = AdaBoostRegressor(random_state=0, n_estimators=100)
regr.fit(X_train_data, y_train_data)

imp = regr.feature_importances_

X_train_data.columns[imp > 0]

from chefboost import Chefboost as chef
import pandas as pd

config = {'algorithm': 'C4.5'}
df = X_train_data
df["Decision"] = y_train_data