def setUp(self): X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2, random_state=20) dpp = DiProPerm(B=1000, separation_stats=['md', 't', 'auc'], clf='md') dpp.fit(X, y) self.dpp = dpp self.X = X self.y = y
def test_custom_classifier(self): """ User provided custom classifier. """ clf = get_training_fun(clf=LinearSVC(max_iter=10000), param_grid={'C': [.0001, .001, .01, 1, 10, 100]}, metric='roc_auc', n_splits=5) dpp = DiProPerm(B=1000, clf=clf) dpp.fit(self.X, self.y) self.assertTrue(hasattr(dpp, 'test_stats_'))
def test_custom_test_stats(self): """ User provided custom test statistics. """ def robust_Z(obs_stat, perm_samples): return (obs_stat - np.median(perm_samples))/mad(perm_samples) custom_test_stats = [('robust_Z', robust_Z)] dpp = DiProPerm(custom_test_stats=custom_test_stats).fit(self.X, self.y) self.assertTrue('robust_Z' in dpp.test_stats_['md'].keys())
def test_custom_sep_stats(self): """ User provided custom separation statistics. """ def median_difference(scores, y): y = np.array(y) classes = np.unique(y) assert len(classes) == 2 s0 = scores[y == classes[0]] s1 = scores[y == classes[1]] return abs(np.median(s0) - np.median(s1)) custom_sep_stats = [('median_difference', median_difference)] dpp = DiProPerm(custom_sep_stats=custom_sep_stats).fit(self.X, self.y) self.assertTrue('median_difference' in dpp.test_stats_.keys())
from sklearn.datasets import make_blobs import numpy as np import matplotlib.pyplot as plt from diproperm.DiProPerm import DiProPerm X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2, random_state=20) plt.scatter(X[:, 0], X[:, 1], c=y) plt.savefig('data.png') plt.close() dpp = DiProPerm(B=1000).fit(X, y) plt.figure(figsize=[12, 5]) # show histogram of separation statistics plt.subplot(1, 2, 1) dpp.plot_observed_scores() # the observed scores plt.subplot(1, 2, 2) dpp.plot_perm_sep_stats(stat='md') plt.savefig('dpp_plots.png') plt.close()
csvReader = csv.reader(csvDataFile) for row in csvReader: y.append(row[1]) y = np.array(y) X = np.array(X) y = y[1:] X = X[1:, 1:] y = y.astype(int) X = X.astype(float) #X = pd.read_csv("full_playtime_data.csv", index_col=None, header=0, dtype = 'a') #y = pd.read_csv("full_playtime_response.csv", index_col=None, header=0, dtype = 'a') # DiProPerm with mean difference classifier, mean difference summary # statistic, and 1000 permutation samples. dpp = DiProPerm(B=1000, separation_stats=['md', 't', 'auc'], clf='md') dpp.fit(X, y) print(dpp.test_stats_['md']) plt.figure(figsize=[12, 5]) # show histogram of separation statistics plt.subplot(1, 2, 1) dpp.plot_perm_sep_stats(stat='md') # the observed scores plt.subplot(1, 2, 2) dpp.plot_observed_scores() plt.title("all_distances") plt.show()
def test_parallel(self): """ Make sure the parallel processing permutation scores runs. """ dpp = DiProPerm(n_jobs=-1).fit(self.X, self.y) self.assertTrue(hasattr(dpp, 'test_stats_'))