-
Notifications
You must be signed in to change notification settings - Fork 0
/
dissimilarity_common.py
170 lines (125 loc) · 5.99 KB
/
dissimilarity_common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
"""
Experiments of the paper 'The Approximation of the Dissimilarity
Projection' accepted at PRNI2012.
Functions to select prototypes, to compute correlations between
distances in original space and projected space and to produce
paper figures.
Copyright (c) 2012, Emanuele Olivetti
Distributed under the New BSD license (3-clauses)
"""
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform, cdist
from scipy.stats import pearsonr as correlation
from sys import stdout
import nibabel as nib
from dipy.tracking import utils
from dipy.tracking.vox2track import streamline_mapping
from mapping_test import *
from dipy.tracking.utils import length
def furthest_first_traversal(S, k, distance, permutation=True):
"""This is the farthest first traversal (fft) algorithm which is
known to be a good sub-optimal solution to the k-center problem.
See for example:
Hochbaum, Dorit S. and Shmoys, David B., A Best Possible Heuristic
for the k-Center Problem, Mathematics of Operations Research, 1985.
or: http://en.wikipedia.org/wiki/Metric_k-center
"""
# do an initial permutation of S, just to be sure that objects in
# S have no special order. Note that this permutation does not
# affect the original S.
if permutation:
idx = np.random.permutation(S.shape[0])
S = S[idx]
else:
idx = np.arange(S.shape[0], dtype=np.int)
T = [0]
while len(T) < k:
z = distance(S, S[T]).min(1).argmax()
T.append(z)
return idx[T]
def subset_furthest_first(S, k, distance, permutation=True, c=3.0):
"""Stochastic scalable version of the fft algorithm based in a
random subset of a specific size.
See: D. Turnbull and C. Elkan, Fast Recognition of Musical Genres
Using RBF Networks, IEEE Trans Knowl Data Eng, vol. 2005, no. 4,
pp. 580-584, 17.
"""
size = max(1, np.ceil(c * k * np.log(k)))
if permutation:
idx = np.random.permutation(S.shape[0])[:size]
else:
idx = range(size)
# note: no need to add extra permutation here below:
return idx[furthest_first_traversal(S[idx], k, distance, permutation=False)]
def compute_correlation(data, distance, prototype_policies, num_prototypes, iterations, verbose=False, size_limit=1000):
global tracks_t,ids_l,data_original,tracks_s,id_t,tracks_n,a_ind, tracks_subsample
print "Computing distance matrix and similarity matrix (original space):",
data_original = data
if data.shape[0] > size_limit:
print
print "Datset too big: subsampling to %s entries only!" % size_limit
data = data[np.random.permutation(data.shape[0])[:size_limit], :]
od = distance(data, data)
print od.shape
original_distances = squareform(od)
#original_distances2 = squareform(od)
"""
my code
"""
affine=utils.affine_for_trackvis(voxel_size=np.array([2,2,2]))
lengths = list(length(data_original))
lengths=np.array(lengths)
temp=np.where(lengths>10)[0]
l=np.argsort(lengths)[::-1][:len(temp)]
data_original_temp=data_original[l]
a= streamline_mapping_new_step(data_original_temp, affine=affine)
tracks_subsample=data_original_temp[a]
print len(tracks_subsample)
"""
my code
"""
rho = np.zeros((len(prototype_policies), len(num_prototypes),iterations))
for m, prototype_policy in enumerate(prototype_policies):
print prototype_policy
for j, num_proto in enumerate(num_prototypes):
print "number of prototypes:", num_proto, " - ",
for k in range(iterations):
print k,
stdout.flush()
if verbose: print("Generating %s prototypes as" % num_proto),
# Note that we use the original dataset here, not the subsampled one!
if prototype_policy=='random':
if verbose: print("random subset of the initial data.")
prototype_idx = np.random.permutation(data_original.shape[0])[:num_proto]
prototype = [data_original[i] for i in prototype_idx]
elif prototype_policy=='sff':
prototype_idx = subset_furthest_first(data_original, num_proto, distance)
prototype = [data_original[i] for i in prototype_idx]
elif prototype_policy=='fft':
prototype_idx = furthest_first_traversal( tracks_subsample, num_proto, distance)
prototype = [ tracks_subsample[i] for i in prototype_idx]
else:
raise Exception
if verbose: print("Computing dissimilarity matrix.")
data_dissimilarity = distance(data, prototype)
if verbose: print("Computing distance matrix (dissimilarity space).")
dissimilarity_distances = pdist(data_dissimilarity, metric='euclidean')
rho[m,j,k] = correlation(original_distances, dissimilarity_distances)[0]
print
return rho
def plot_results(rho, num_prototypes, prototype_policies, color_policies):
plt.figure()
for m, prototype_policy in enumerate(prototype_policies):
mean = rho[m,:,:].mean(1)
std = rho[m,:,:].std(1)
errorbar = std # 3.0 * std / np.sqrt(rho.shape[2])
plt.plot(num_prototypes, mean, color_policies[m], label=prototype_policies[m], markersize=8.0)
plt.fill(np.concatenate([num_prototypes, num_prototypes[::-1]]),
np.concatenate([mean - errorbar, (mean + errorbar)[::-1]]),
alpha=.25,fc='black', ec='None')
plt.legend(loc='lower right')
plt.xlabel("number of prototypes $(p)$")
plt.ylabel("correlation $\\rho(d, \Delta_{\Pi}^d)$")
plt.show()