def _evaluate_single(data, target_feature): mine = MINE(alpha=0.4, c=15) MICs = list() for i in range(data.shape[1]): mine.compute_score(target_feature,data[:,i]) MICs.append((mine.mic(), mine.mas(), mine.mev(), mine.mcn(), mine.mcn_general())) return(MICs)
def mine(pair): assert len(pair) == 4 and isinstance(pair, tuple) try: x, y, x_name, y_name = pair mine_ = MINE() mine_.compute_score(x, y) result = { (x_name, y_name): (mine_.mic(), mine_.mas(), mine_.mcn(), mine_.mev(), mine_.tic()) } return result except: return None
def mine_matrix(data, mode='all', n_sample=False, frac_sample=False): """Obtains all coefficient values related to mine as a correlation coefficient array. Calculated coefficients are saved as instances. mode : (str or list of str) Specify what to calculate, 'mic', 'mas', 'mev', 'corr', 'mic_r2' if mode='all', calculation all. data: (numpy or pandas) A data frame that contains all explanatory and objective variables n_sample : (int) How much random sampling to do. False if not. If a numerical value is entered, sampling is performed using that number of rows. frac_sample: [0 ~ 1] (float) Sampled as a percentage of the number of rows. Not used at the same time as n_sample. """ if mode == 'all': mode = ['mic', 'mas', 'mev', 'corr', 'mic_r2'] elif isinstance(mode, str): mode = [mode] data = np.array(data) data = data[~np.isnan(data).any( axis=1), :] # Delete rows contain missing values # Sampling when n_sample contains a numerical value # Both definitions if n_sample and frac_sample: raise ValueError('n_sample and frac_sample don`t using both') elif not n_sample and frac_sample: # n_sample=False, frac_sample=int data = data.sample(frac=frac_sample, replace=True) elif n_sample and not frac_sample: # n_sample=int, frac_sample=False data = data.sample(n=n_sample, replace=True) # else is pass n_col = data.shape[1] mic = [] # Nonlinear correlation mic_append = mic.append # Put append outside the loop and it will be a little faster mas = [] # Linearity mas_append = mas.append mev = [] # Functionality mev_append = mev.append for i in range(n_col): mic_row = [] mic_row_append = mic_row.append mas_row = [] mas_row_append = mas_row.append mev_row = [] mev_row_append = mev_row.append for j in range(n_col): if i >= j: mic_row_append(1.0) mas_row_append(1.0) mev_row_append(1.0) else: mine = MINE() mine.compute_score(data[:, i], data[:, j]) mic_row_append(mine.mic()) mas_row_append(mine.mas()) mev_row_append(mine.mev()) mic_append(mic_row) mas_append(mas_row) mev_append(mev_row) returns = [] if 'mic' in mode or 'mic_r2' in mode: mic_ = np.array(mic) mic_array = mic_ + mic_.T + np.eye(N=n_col, dtype=float) if 'mic' in mode: returns.append(mic_array) if 'mas' in mode: mas_ = np.array(mas) mas_array = mas_ + mas_.T + np.eye(N=n_col, dtype=float) returns.append(mas_array) if 'mev' in mode: mev_ = np.array(mev) mev_array = mev_ + mev_.T + np.eye(N=n_col, dtype=float) returns.append(mev_array) if 'corr' in mode or 'mic_r2' in mode: corr_array = np.corrcoef( data, rowvar=False) # Pearson's correlation coefficient corr_array[np.isnan( corr_array )] = 1 # If the data values are all the same, NaN is used, so fill with 1 appropriately. if 'corr' in mode: returns.append(corr_array) if 'mic_r2' in mode: mic_r2_array = mic_array - corr_array # Degree of nonlinearity returns.append(mic_r2_array) return returns
def mine_matrix(self, data, n_sample=False, frac_sample=False): '''Obtains all coefficient values related to mine as a correlation coefficient matrix. Calculated coefficients are saved as instances. data: (numpy or pandas) A data frame that contains all explanatory and objective variables n_sample : (int) How much random sampling to do. False if not. If a numerical value is entered, sampling is performed using that number of rows. frac_sample: [0 ~ 1] (float) Sampled as a percentage of the number of rows. Not used at the same time as n_sample. ''' data = pd.DataFrame(data).copy() data = data.dropna() # Delete missing values and think # Sampling when n_sample contains a numerical value if not n_sample: if not frac_sample: # n_sample=False, frac_sample=False pass else: # n_sample=False, frac_sample=int data = data.sample(frac=frac_sample, replace=True) else: if not frac_sample: # n_sample=int, frac_sample=False data = data.sample(n=n_sample, replace=True) else: # n_sample=int, frac_sample=int raise ValueError( 'Please enter a value for `frac` OR `n`, not both') data = check_array(data, accept_sparse="csc", dtype=float) # Convert to numpy.ndarray n_col = data.shape[1] mic_array = [] # Nonlinear correlation mas_array = [] # Linearity mev_array = [] # Functionality mic_append = mic_array.append # Put append outside the loop and it will be a little faster mas_append = mas_array.append mev_append = mev_array.append for i in range(n_col): temp_mic = [] temp_mas = [] temp_mev = [] temp_mic_append = temp_mic.append temp_mas_append = temp_mas.append temp_mev_append = temp_mev.append for j in range(n_col): if i >= j: temp_mic_append(1.0) temp_mas_append(1.0) temp_mev_append(1.0) else: mine = MINE() mine.compute_score(data[:, i], data[:, j]) temp_mic_append(mine.mic()) temp_mas_append(mine.mas()) temp_mev_append(mine.mev()) mic_append(temp_mic) mas_append(temp_mas) mev_append(temp_mev) mic_ = np.array(mic_array) mas_ = np.array(mas_array) mev_ = np.array(mev_array) self.mic = mic_ + mic_.T + np.eye(N=n_col, dtype=float) self.mas = mas_ + mas_.T + np.eye(N=n_col, dtype=float) self.mev = mev_ + mev_.T + np.eye(N=n_col, dtype=float) self.corr = np.corrcoef( data, rowvar=False) # Pearson's correlation coefficient self.corr[np.isnan( self.corr )] = 1 # If the data values are all the same, NaN is used, so fill with 1 appropriately. self.mic_r2 = self.mic - self.corr # Degree of nonlinearity return self.mic, self.mas, self.mev, self.mic_r2, self.corr
base = goal_data[goal][-total:] data = data[-total:] #base = linear_sa(base) #data = data.drop([goal], axis = 1) df = pd.DataFrame(columns=["MIC", "O-MAS", "MEV", "MCN"]) #roll_result= pd.DataFrame(columns = ["MIC", "O-MAS", "MEV","MCN"]) for index, row in data.iteritems(): slice_ = row # p = pearsonr(base, slice_)[0] mine.compute_score(base, slice_) mic = mine.mic() mas = 1 - mine.mas() mev = mine.mev() mcn = mine.mcn_general() result = np.array([mic, mas, mev, mcn]) df.loc[str(index)] = result print("\n") print("%s与其他指标遍历的相关系数结果是:" % (var)) print(df) #p = df.Pearson mic = df.MIC #p = p.sort_values()[-num_pick:].index #mic_value = mic.sort_values()[-num_pick:] #mic_value.to_excel('/Users/linjunqi/Desktop/mic_result/与%s相关系数最高的%s个指标MIC.xls'%(var,num_pick))
class TestFunctions(unittest.TestCase): def setUp(self): self.mine = MINE(alpha=0.6, c=15) def build_const(self, n): x = np.linspace(0, 1, n) y = np.zeros(n) return x, y def build_linear(self, n): x = np.linspace(0, 1, n) return x, x def build_sine(self, n): x = np.linspace(0, 1, n) return x, np.sin(8 * np.pi * x) def build_exp(self, n): x = np.linspace(0, 10, n) return x, 2**x def test_const(self): x, y = self.build_const(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 0., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 0., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_linear(self): x, y = self.build_linear(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_linear(self): x, y = self.build_linear(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_sine(self): x, y = self.build_sine(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0.875, 3) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 4., 4) assert_almost_equal(self.mine.mcn_general(), 4., 4) def test_exp(self): x, y = self.build_exp(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4)
class TestFunctions(unittest.TestCase): def setUp(self): self.mine = MINE(alpha=0.6, c=15) def build_const(self, n): x = np.linspace(0, 1, n) y = np.zeros(n) return x, y def build_linear(self, n): x = np.linspace(0, 1, n) return x, x def build_sine(self, n): x = np.linspace(0, 1, n) return x, np.sin(8*np.pi*x) def build_exp(self, n): x = np.linspace(0, 10, n) return x, 2**x def test_const(self): x, y = self.build_const(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 0., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 0., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_linear(self): x, y = self.build_linear(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_linear(self): x, y = self.build_linear(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4) def test_sine(self): x, y = self.build_sine(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0.875, 3) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 4., 4) assert_almost_equal(self.mine.mcn_general(), 4., 4) def test_exp(self): x, y = self.build_exp(1000) self.mine.compute_score(x, y) assert_almost_equal(self.mine.mic(), 1., 4) assert_almost_equal(self.mine.mas(), 0., 4) assert_almost_equal(self.mine.mev(), 1., 4) assert_almost_equal(self.mine.mcn(), 2., 4) assert_almost_equal(self.mine.mcn_general(), 2., 4)