def test_pval(): def not_inplace_shuffle(x): x = list(x) random.shuffle(x) return x x = range(100000) * 20 x = np.array(x) x = x.reshape(20, 100000) x = x.T x = np.apply_along_axis(not_inplace_shuffle, axis=0, arr=x) expected_result = [100000 - 100000 * 0.001] * 20 thresholds = gct.get_thresholding_map(x, p=0.001) assert_array_equal(thresholds, expected_result) # works with datasets too dsthresholds = gct.get_thresholding_map(Dataset(x), p=0.001) assert_almost_equal(thresholds, dsthresholds) assert_raises(ValueError, gct.get_thresholding_map, x, p=0.00000001) x = range(0, 100, 5) null_dist = np.repeat(1, 100).astype(float)[None] pvals = gct._transform_to_pvals(x, null_dist) desired_output = np.array([ 1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05 ]) assert_array_almost_equal(desired_output, pvals)
def test_pval(): def not_inplace_shuffle(x): x = list(x) random.shuffle(x) return x x = range(100000) * 20 x = np.array(x) x = x.reshape(20, 100000) x = x.T x = np.apply_along_axis(not_inplace_shuffle, axis=0, arr=x) expected_result = [100000 - 100000 * 0.001] * 20 thresholds = gct.get_thresholding_map(x, p=0.001) assert_array_equal(thresholds, expected_result) # works with datasets too dsthresholds = gct.get_thresholding_map(Dataset(x), p=0.001) assert_almost_equal(thresholds, dsthresholds) assert_raises(ValueError, gct.get_thresholding_map, x, p=0.00000001) x = range(0, 100, 5) null_dist = np.repeat(1, 100).astype(float)[None] pvals = gct._transform_to_pvals(x, null_dist) desired_output = np.array([1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05]) assert_array_almost_equal(desired_output, pvals)
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)]).T rand_acc_p_slow = np.array( [[dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert (np.all(rand_acc_p <= 1)) assert (np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert (np.all(acc_p <= 1)) assert (np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)
def test_simple_cluster_level_thresholding(): nf = 13 nperms = 100 pthr_feature = 0.5 # just for testing pthr_cluster = 0.5 rand_acc = np.random.normal(size=(nperms, nf)) acc = np.random.normal(size=(1, nf)) # Step 1 is to "fit" "Nonparametrics" per each of the features from mvpa2.clfs.stats import Nonparametric dists = [Nonparametric(samples) for samples in rand_acc.T] # we should be able to assert "p" value for each random sample for each feature rand_acc_p = np.array( [dist.rcdf(v) for dist, v in zip(dists, rand_acc.T)] ).T rand_acc_p_slow = np.array([ [dist.rcdf(v) for dist, v in zip(dists, sample)] for sample in rand_acc]) assert_array_equal(rand_acc_p_slow, rand_acc_p) assert_equal(rand_acc_p.shape, rand_acc.shape) assert(np.all(rand_acc_p <= 1)) assert(np.all(rand_acc_p > 0)) # 2: apply the same to our acc acc_p = np.array([dist.rcdf(v) for dist, v in zip(dists, acc[0])])[None, :] assert(np.all(acc_p <= 1)) assert(np.all(acc_p > 0)) skip_if_no_external('scipy') # Now we need to do our fancy cluster level madness from mvpa2.algorithms.group_clusterthr import \ get_cluster_sizes, _transform_to_pvals, get_cluster_pvals, \ get_thresholding_map, repeat_cluster_vals rand_acc_p_thr = rand_acc_p < pthr_feature acc_p_thr = acc_p < pthr_feature rand_cluster_sizes = get_cluster_sizes(rand_acc_p_thr) acc_cluster_sizes = get_cluster_sizes(acc_p_thr) # This is how we can compute it within present implementation. # It will be a bit different (since it doesn't account for target value if # I got it right), and would work only for accuracies thr_map = get_thresholding_map(rand_acc, pthr_feature) rand_cluster_sizes_ = get_cluster_sizes(rand_acc > thr_map) acc_cluster_sizes_ = get_cluster_sizes(acc > thr_map) assert_equal(rand_cluster_sizes, rand_cluster_sizes_) assert_equal(acc_cluster_sizes, acc_cluster_sizes_) #print rand_cluster_sizes #print acc_cluster_sizes # That is how it is done in group_clusterthr atm # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) from scipy.sparse import dok_matrix scl = dok_matrix((1, nf + 1), dtype=int) for s in rand_cluster_sizes: scl[0, s] = rand_cluster_sizes[s] test_count_sizes = repeat_cluster_vals(acc_cluster_sizes) test_pvals = _transform_to_pvals(test_count_sizes, scl.astype('float')) # needs conversion to array for comparisons test_pvals = np.asanyarray(test_pvals) # critical cluster_level threshold (without FW correction between clusters) # would be clusters_passed_threshold = test_count_sizes[test_pvals <= pthr_cluster] if len(clusters_passed_threshold): thr_cluster_size = min(clusters_passed_threshold) #print("Min cluster size which passed threshold: %d" % thr_cluster_size) else: #print("No clusters passed threshold") pass #print test_count_sizes, test_pvals acc_cluster_ps = get_cluster_pvals(acc_cluster_sizes, rand_cluster_sizes) for test_pval, test_count_size in zip(test_pvals, test_count_sizes): assert_almost_equal(acc_cluster_ps[test_count_size], test_pval)