def _evaluate_single(data, target_feature):
    mine = MINE(alpha=0.4, c=15)
    MICs = list()
    for i in range(data.shape[1]):
        mine.compute_score(target_feature,data[:,i])
        MICs.append((mine.mic(), mine.mas(), mine.mev(), mine.mcn(), mine.mcn_general()))
    return(MICs)
Пример #2
0
def mine(pair):

    assert len(pair) == 4 and isinstance(pair, tuple)

    try:

        x, y, x_name, y_name = pair

        mine_ = MINE()

        mine_.compute_score(x, y)

        result = {
            (x_name, y_name):
            (mine_.mic(), mine_.mas(), mine_.mcn(), mine_.mev(), mine_.tic())
        }

        return result

    except:

        return None
Пример #3
0
def mine_matrix(data, mode='all', n_sample=False, frac_sample=False):
    """Obtains all coefficient values related to mine as a correlation coefficient array.
    Calculated coefficients are saved as instances.
    mode : (str or list of str) Specify what to calculate, 'mic', 'mas', 'mev', 'corr', 'mic_r2'
    if mode='all', calculation all.
    data: (numpy or pandas) A data frame that contains all explanatory and objective variables
    n_sample : (int) How much random sampling to do. False if not.
    If a numerical value is entered, sampling is performed using that number of rows.
    frac_sample: [0 ~ 1] (float) Sampled as a percentage of the number of rows. Not used at the same time as n_sample.
    """
    if mode == 'all':
        mode = ['mic', 'mas', 'mev', 'corr', 'mic_r2']
    elif isinstance(mode, str):
        mode = [mode]

    data = np.array(data)
    data = data[~np.isnan(data).any(
        axis=1), :]  # Delete rows contain missing values
    # Sampling when n_sample contains a numerical value
    # Both definitions
    if n_sample and frac_sample:
        raise ValueError('n_sample and frac_sample don`t using both')
    elif not n_sample and frac_sample:
        # n_sample=False, frac_sample=int
        data = data.sample(frac=frac_sample, replace=True)
    elif n_sample and not frac_sample:
        # n_sample=int, frac_sample=False
        data = data.sample(n=n_sample, replace=True)
    # else is pass

    n_col = data.shape[1]
    mic = []  # Nonlinear correlation
    mic_append = mic.append  # Put append outside the loop and it will be a little faster
    mas = []  # Linearity
    mas_append = mas.append
    mev = []  # Functionality
    mev_append = mev.append

    for i in range(n_col):

        mic_row = []
        mic_row_append = mic_row.append
        mas_row = []
        mas_row_append = mas_row.append
        mev_row = []
        mev_row_append = mev_row.append

        for j in range(n_col):
            if i >= j:
                mic_row_append(1.0)
                mas_row_append(1.0)
                mev_row_append(1.0)
            else:
                mine = MINE()
                mine.compute_score(data[:, i], data[:, j])
                mic_row_append(mine.mic())
                mas_row_append(mine.mas())
                mev_row_append(mine.mev())

        mic_append(mic_row)
        mas_append(mas_row)
        mev_append(mev_row)

    returns = []
    if 'mic' in mode or 'mic_r2' in mode:
        mic_ = np.array(mic)
        mic_array = mic_ + mic_.T + np.eye(N=n_col, dtype=float)
        if 'mic' in mode:
            returns.append(mic_array)
    if 'mas' in mode:
        mas_ = np.array(mas)
        mas_array = mas_ + mas_.T + np.eye(N=n_col, dtype=float)
        returns.append(mas_array)
    if 'mev' in mode:
        mev_ = np.array(mev)
        mev_array = mev_ + mev_.T + np.eye(N=n_col, dtype=float)
        returns.append(mev_array)
    if 'corr' in mode or 'mic_r2' in mode:
        corr_array = np.corrcoef(
            data, rowvar=False)  # Pearson's correlation coefficient
        corr_array[np.isnan(
            corr_array
        )] = 1  # If the data values are all the same, NaN is used, so fill with 1 appropriately.
        if 'corr' in mode:
            returns.append(corr_array)
    if 'mic_r2' in mode:
        mic_r2_array = mic_array - corr_array  # Degree of nonlinearity
        returns.append(mic_r2_array)
    return returns
Пример #4
0
    def mine_matrix(self, data, n_sample=False, frac_sample=False):
        '''Obtains all coefficient values related to mine as a correlation coefficient matrix.
        Calculated coefficients are saved as instances.
        data: (numpy or pandas) A data frame that contains all explanatory and objective variables
        n_sample : (int) How much random sampling to do. False if not.
        If a numerical value is entered, sampling is performed using that number of rows.
        frac_sample: [0 ~ 1] (float) Sampled as a percentage of the number of rows. Not used at the same time as n_sample.
        '''
        data = pd.DataFrame(data).copy()
        data = data.dropna()  # Delete missing values and think
        # Sampling when n_sample contains a numerical value
        if not n_sample:
            if not frac_sample:
                # n_sample=False, frac_sample=False
                pass
            else:
                # n_sample=False, frac_sample=int
                data = data.sample(frac=frac_sample, replace=True)
        else:

            if not frac_sample:
                # n_sample=int, frac_sample=False
                data = data.sample(n=n_sample, replace=True)
            else:
                # n_sample=int, frac_sample=int
                raise ValueError(
                    'Please enter a value for `frac` OR `n`, not both')

        data = check_array(data, accept_sparse="csc",
                           dtype=float)  # Convert to numpy.ndarray
        n_col = data.shape[1]
        mic_array = []  # Nonlinear correlation
        mas_array = []  # Linearity
        mev_array = []  # Functionality
        mic_append = mic_array.append  # Put append outside the loop and it will be a little faster
        mas_append = mas_array.append
        mev_append = mev_array.append

        for i in range(n_col):
            temp_mic = []
            temp_mas = []
            temp_mev = []

            temp_mic_append = temp_mic.append
            temp_mas_append = temp_mas.append
            temp_mev_append = temp_mev.append

            for j in range(n_col):
                if i >= j:
                    temp_mic_append(1.0)
                    temp_mas_append(1.0)
                    temp_mev_append(1.0)
                else:
                    mine = MINE()
                    mine.compute_score(data[:, i], data[:, j])
                    temp_mic_append(mine.mic())
                    temp_mas_append(mine.mas())
                    temp_mev_append(mine.mev())

            mic_append(temp_mic)
            mas_append(temp_mas)
            mev_append(temp_mev)
        mic_ = np.array(mic_array)
        mas_ = np.array(mas_array)
        mev_ = np.array(mev_array)
        self.mic = mic_ + mic_.T + np.eye(N=n_col, dtype=float)
        self.mas = mas_ + mas_.T + np.eye(N=n_col, dtype=float)
        self.mev = mev_ + mev_.T + np.eye(N=n_col, dtype=float)

        self.corr = np.corrcoef(
            data, rowvar=False)  # Pearson's correlation coefficient
        self.corr[np.isnan(
            self.corr
        )] = 1  # If the data values are all the same, NaN is used, so fill with 1 appropriately.
        self.mic_r2 = self.mic - self.corr  # Degree of nonlinearity
        return self.mic, self.mas, self.mev, self.mic_r2, self.corr
Пример #5
0
base = goal_data[goal][-total:]
data = data[-total:]
#base = linear_sa(base)

#data = data.drop([goal], axis = 1)
df = pd.DataFrame(columns=["MIC", "O-MAS", "MEV", "MCN"])
#roll_result= pd.DataFrame(columns = ["MIC", "O-MAS", "MEV","MCN"])

for index, row in data.iteritems():
    slice_ = row
    #    p = pearsonr(base, slice_)[0]

    mine.compute_score(base, slice_)

    mic = mine.mic()
    mas = 1 - mine.mas()
    mev = mine.mev()
    mcn = mine.mcn_general()

    result = np.array([mic, mas, mev, mcn])
    df.loc[str(index)] = result

print("\n")
print("%s与其他指标遍历的相关系数结果是:" % (var))
print(df)
#p = df.Pearson
mic = df.MIC

#p = p.sort_values()[-num_pick:].index
#mic_value = mic.sort_values()[-num_pick:]
#mic_value.to_excel('/Users/linjunqi/Desktop/mic_result/与%s相关系数最高的%s个指标MIC.xls'%(var,num_pick))
Пример #6
0
class TestFunctions(unittest.TestCase):
    def setUp(self):
        self.mine = MINE(alpha=0.6, c=15)

    def build_const(self, n):
        x = np.linspace(0, 1, n)
        y = np.zeros(n)
        return x, y

    def build_linear(self, n):
        x = np.linspace(0, 1, n)
        return x, x

    def build_sine(self, n):
        x = np.linspace(0, 1, n)
        return x, np.sin(8 * np.pi * x)

    def build_exp(self, n):
        x = np.linspace(0, 10, n)
        return x, 2**x

    def test_const(self):
        x, y = self.build_const(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 0., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 0., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_linear(self):
        x, y = self.build_linear(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_linear(self):
        x, y = self.build_linear(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_sine(self):
        x, y = self.build_sine(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0.875, 3)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 4., 4)
        assert_almost_equal(self.mine.mcn_general(), 4., 4)

    def test_exp(self):
        x, y = self.build_exp(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)
Пример #7
0
class TestFunctions(unittest.TestCase):

    def setUp(self):
        self.mine = MINE(alpha=0.6, c=15)

    def build_const(self, n):
        x = np.linspace(0, 1, n)
        y = np.zeros(n)
        return x, y

    def build_linear(self, n):
        x = np.linspace(0, 1, n)
        return x, x

    def build_sine(self, n):
        x = np.linspace(0, 1, n)
        return x, np.sin(8*np.pi*x)

    def build_exp(self, n):
        x = np.linspace(0, 10, n)
        return x, 2**x

    def test_const(self):
        x, y = self.build_const(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 0., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 0., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_linear(self):
        x, y = self.build_linear(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_linear(self):
        x, y = self.build_linear(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)

    def test_sine(self):
        x, y = self.build_sine(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0.875, 3)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 4., 4)
        assert_almost_equal(self.mine.mcn_general(), 4., 4)

    def test_exp(self):
        x, y = self.build_exp(1000)
        self.mine.compute_score(x, y)
        assert_almost_equal(self.mine.mic(), 1., 4)
        assert_almost_equal(self.mine.mas(), 0., 4)
        assert_almost_equal(self.mine.mev(), 1., 4)
        assert_almost_equal(self.mine.mcn(), 2., 4)
        assert_almost_equal(self.mine.mcn_general(), 2., 4)