示例#1
0
    def setUp(self):
        X, y = make_blobs(n_samples=100, n_features=2, centers=2,
                          cluster_std=2, random_state=20)

        dpp = DiProPerm(B=1000, separation_stats=['md', 't', 'auc'], clf='md')
        dpp.fit(X, y)

        self.dpp = dpp
        self.X = X
        self.y = y
示例#2
0
    def test_custom_classifier(self):
        """
        User provided custom classifier.
        """

        clf = get_training_fun(clf=LinearSVC(max_iter=10000),
                               param_grid={'C': [.0001, .001, .01, 1, 10, 100]},
                               metric='roc_auc', n_splits=5)

        dpp = DiProPerm(B=1000, clf=clf)
        dpp.fit(self.X, self.y)

        self.assertTrue(hasattr(dpp, 'test_stats_'))
示例#3
0
    def test_custom_test_stats(self):
        """
        User provided custom test statistics.
        """

        def robust_Z(obs_stat, perm_samples):
            return (obs_stat - np.median(perm_samples))/mad(perm_samples)

        custom_test_stats = [('robust_Z', robust_Z)]
        dpp = DiProPerm(custom_test_stats=custom_test_stats).fit(self.X, self.y)

        self.assertTrue('robust_Z' in dpp.test_stats_['md'].keys())
示例#4
0
    def test_custom_sep_stats(self):
        """
        User provided custom separation statistics.
        """

        def median_difference(scores, y):
            y = np.array(y)
            classes = np.unique(y)
            assert len(classes) == 2
            s0 = scores[y == classes[0]]
            s1 = scores[y == classes[1]]
            return abs(np.median(s0) - np.median(s1))

        custom_sep_stats = [('median_difference', median_difference)]

        dpp = DiProPerm(custom_sep_stats=custom_sep_stats).fit(self.X, self.y)
        self.assertTrue('median_difference' in dpp.test_stats_.keys())
示例#5
0
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt

from diproperm.DiProPerm import DiProPerm

X, y = make_blobs(n_samples=100,
                  n_features=2,
                  centers=2,
                  cluster_std=2,
                  random_state=20)
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.savefig('data.png')
plt.close()

dpp = DiProPerm(B=1000).fit(X, y)

plt.figure(figsize=[12, 5])

# show histogram of separation statistics
plt.subplot(1, 2, 1)
dpp.plot_observed_scores()

# the observed scores
plt.subplot(1, 2, 2)
dpp.plot_perm_sep_stats(stat='md')

plt.savefig('dpp_plots.png')
plt.close()
示例#6
0
    csvReader = csv.reader(csvDataFile)
    for row in csvReader:
        y.append(row[1])

y = np.array(y)
X = np.array(X)
y = y[1:]
X = X[1:, 1:]
y = y.astype(int)
X = X.astype(float)

#X = pd.read_csv("full_playtime_data.csv", index_col=None, header=0, dtype = 'a')
#y = pd.read_csv("full_playtime_response.csv", index_col=None, header=0, dtype = 'a')

# DiProPerm with mean difference classifier, mean difference summary
# statistic, and 1000 permutation samples.
dpp = DiProPerm(B=1000, separation_stats=['md', 't', 'auc'], clf='md')
dpp.fit(X, y)

print(dpp.test_stats_['md'])

plt.figure(figsize=[12, 5])
# show histogram of separation statistics
plt.subplot(1, 2, 1)
dpp.plot_perm_sep_stats(stat='md')

# the observed scores
plt.subplot(1, 2, 2)
dpp.plot_observed_scores()
plt.title("all_distances")
plt.show()
示例#7
0
 def test_parallel(self):
     """
     Make sure the parallel processing permutation scores runs.
     """
     dpp = DiProPerm(n_jobs=-1).fit(self.X, self.y)
     self.assertTrue(hasattr(dpp, 'test_stats_'))