-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics_functions.py
308 lines (288 loc) · 12.7 KB
/
metrics_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
from functools import partial
from IPython import embed
import numpy as np
from numpy import random
import time
# from skimage import exposure
from skimage.transform import resize
# from cv2 import resize
import cv2
try:
from cv2 import cv
except ImportError:
cv = None
# print('please install Python binding of OpenCV to compute EMD')
from tools import normalize, match_hist
from IPython import embed
def AUC_Judd(saliency_map, fixation_map, jitter=True):
'''
AUC stands for Area Under ROC Curve.
This measures how well the saliency map of an image predicts the ground truth human fixations on the image.
ROC curve is created by sweeping through threshold values
determined by range of saliency map values at fixation locations.
True positive (tp) rate correspond to the ratio of saliency map values above threshold
at fixation locations to the total number of fixation locations.
False positive (fp) rate correspond to the ratio of saliency map values above threshold
at all other locations to the total number of possible other locations (non-fixated image pixels).
AUC=0.5 is chance level.
Parameters
----------
saliency_map : real-valued matrix
fixation_map : binary matrix
Human fixation map.
jitter : boolean, optional
If True (default), a small random number would be added to each pixel of the saliency map.
Jitter saliency maps that come from saliency models that have a lot of zero values.
If the saliency map is made with a Gaussian then it does not need to be jittered
as the values vary and there is not a large patch of the same value.
In fact, jittering breaks the ordering in the small values!
Returns
-------
AUC : float, between [0,1]
'''
saliency_map = np.array(saliency_map, copy=False)
fixation_map = np.array(fixation_map, copy=False) > 0.5
# If there are no fixation to predict, return NaN
if not np.any(fixation_map):
print('no fixation to predict')
return np.nan
# Make the saliency_map the size of the fixation_map
if saliency_map.shape != fixation_map.shape:
saliency_map = resize(saliency_map, fixation_map.shape, order=3, mode='constant')
# Jitter the saliency map slightly to disrupt ties of the same saliency value
if jitter:
saliency_map += random.rand(*saliency_map.shape) * 1e-7
# Normalize saliency map to have values between [0,1]
saliency_map = normalize(saliency_map, method='range')
S = saliency_map.ravel()
F = fixation_map.ravel()
S_fix = S[F] # Saliency map values at fixation locations
n_fix = len(S_fix)
n_pixels = len(S)
# Calculate AUC
thresholds = sorted(S_fix, reverse=True)
tp = np.zeros(len(thresholds)+2)
fp = np.zeros(len(thresholds)+2)
tp[0] = 0; tp[-1] = 1
fp[0] = 0; fp[-1] = 1
for k, thresh in enumerate(thresholds):
above_th = np.sum(S >= thresh) # Total number of saliency map values above threshold
tp[k+1] = (k + 1) / float(n_fix) # Ratio saliency map values at fixation locations above threshold
fp[k+1] = (above_th - k - 1) / float(n_pixels - n_fix) # Ratio other saliency map values above threshold
return np.trapz(tp, fp) # y, x
def AUC_Borji(saliency_map, fixation_map, n_rep=100, step_size=0.1, rand_sampler=None):
'''
This measures how well the saliency map of an image predicts the ground truth human fixations on the image.
ROC curve created by sweeping through threshold values at fixed step size
until the maximum saliency map value.
True positive (tp) rate correspond to the ratio of saliency map values above threshold
at fixation locations to the total number of fixation locations.
False positive (fp) rate correspond to the ratio of saliency map values above threshold
at random locations to the total number of random locations
(as many random locations as fixations, sampled uniformly from fixation_map ALL IMAGE PIXELS),
averaging over n_rep number of selections of random locations.
Parameters
----------
saliency_map : real-valued matrix
fixation_map : binary matrix
Human fixation map.
n_rep : int, optional
Number of repeats for random sampling of non-fixated locations.
step_size : int, optional
Step size for sweeping through saliency map.
rand_sampler : callable
S_rand = rand_sampler(S, F, n_rep, n_fix)
Sample the saliency map at random locations to estimate false positive.
Return the sampled saliency values, S_rand.shape=(n_fix,n_rep)
Returns
-------
AUC : float, between [0,1]
'''
saliency_map = np.array(saliency_map, copy=False)
fixation_map = np.array(fixation_map, copy=False) > 0.5
# If there are no fixation to predict, return NaN
if not np.any(fixation_map):
print('no fixation to predict')
return np.nan
# Make the saliency_map the size of the fixation_map
if saliency_map.shape != fixation_map.shape:
saliency_map = resize(saliency_map, fixation_map.shape, order=3, mode='constant')
# Normalize saliency map to have values between [0,1]
saliency_map = normalize(saliency_map, method='range')
S = saliency_map.ravel()
F = fixation_map.ravel()
S_fix = S[F] # Saliency map values at fixation locations
n_fix = len(S_fix)
n_pixels = len(S)
# For each fixation, sample n_rep values from anywhere on the saliency map
if rand_sampler is None:
r = random.randint(0, n_pixels, [n_fix, n_rep])
S_rand = S[r] # Saliency map values at random locations (including fixated locations!? underestimated)
else:
S_rand = rand_sampler(S, F, n_rep, n_fix)
# Calculate AUC per random split (set of random locations)
auc = np.zeros(n_rep) * np.nan
for rep in range(n_rep):
thresholds = np.r_[0:np.max(np.r_[S_fix, S_rand[:,rep]]):step_size][::-1]
tp = np.zeros(len(thresholds)+2)
fp = np.zeros(len(thresholds)+2)
tp[0] = 0; tp[-1] = 1
fp[0] = 0; fp[-1] = 1
for k, thresh in enumerate(thresholds):
tp[k+1] = np.sum(S_fix >= thresh) / float(n_fix)
fp[k+1] = np.sum(S_rand[:,rep] >= thresh) / float(n_fix)
auc[rep] = np.trapz(tp, fp)
return np.mean(auc) # Average across random split
def AUC_shuffled(saliency_map, fixation_map, other_map, n_rep=100, step_size=0.1):
'''
This measures how well the saliency map of an image predicts the ground truth human fixations on the image.
ROC curve created by sweeping through threshold values at fixed step size
until the maximum saliency map value.
True positive (tp) rate correspond to the ratio of saliency map values above threshold
at fixation locations to the total number of fixation locations.
False positive (fp) rate correspond to the ratio of saliency map values above threshold
at random locations to the total number of random locations
(as many random locations as fixations, sampled uniformly from fixation_map ON OTHER IMAGES),
averaging over n_rep number of selections of random locations.
Parameters
----------
saliency_map : real-valued matrix
fixation_map : binary matrix
Human fixation map.
other_map : binary matrix, same shape as fixation_map
A binary fixation map (like fixation_map) by taking the union of fixations from M other random images
(Borji uses M=10).
n_rep : int, optional
Number of repeats for random sampling of non-fixated locations.
step_size : int, optional
Step size for sweeping through saliency map.
Returns
-------
AUC : float, between [0,1]
'''
other_map = np.array(other_map, copy=False) > 0.5
if other_map.shape != fixation_map.shape:
raise ValueError('other_map.shape != fixation_map.shape')
# For each fixation, sample n_rep values (from fixated locations on other_map) on the saliency map
def sample_other(other, S, F, n_rep, n_fix):
fixated = np.nonzero(other)[0]
indexer = list(map(lambda x: random.permutation(x)[:n_fix], np.tile(range(len(fixated)), [n_rep, 1])))
r = fixated[np.transpose(indexer)]
S_rand = S[r] # Saliency map values at random locations (including fixated locations!? underestimated)
return S_rand
return AUC_Borji(saliency_map, fixation_map, n_rep, step_size, partial(sample_other, other_map.ravel()))
def NSS(saliency_map, fixation_map):
'''
Normalized scanpath saliency of a saliency map,
defined as the mean value of normalized (i.e., standardized) saliency map at fixation locations.
You can think of it as a z-score. (Larger value implies better performance.)
Parameters
----------
saliency_map : real-valued matrix
If the two maps are different in shape, saliency_map will be resized to match fixation_map..
fixation_map : binary matrix
Human fixation map (1 for fixated location, 0 for elsewhere).
Returns
-------
NSS : float, positive
'''
s_map = np.array(saliency_map, copy=False)
f_map = np.array(fixation_map, copy=False) > 0.5
if s_map.shape != f_map.shape:
s_map = resize(s_map, f_map.shape)
# Normalize saliency map to have zero mean and unit std
s_map = normalize(s_map, method='standard')
# Mean saliency value at fixation locations
return np.mean(s_map[f_map])
def CC(saliency_map1, saliency_map2):
'''
Pearson's correlation coefficient between two different saliency maps
(CC=0 for uncorrelated maps, CC=1 for perfect linear correlation).
Parameters
----------
saliency_map1 : real-valued matrix
If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2.
saliency_map2 : real-valued matrix
Returns
-------
CC : float, between [-1,1]
'''
map1 = np.array(saliency_map1, copy=False)
map2 = np.array(saliency_map2, copy=False)
if map1.shape != map2.shape:
map1 = resize(map1, map2.shape, order=3, mode='constant') # bi-cubic/nearest is what Matlab imresize() does by default
# Normalize the two maps to have zero mean and unit std
map1 = normalize(map1, method='standard')
map2 = normalize(map2, method='standard')
# Compute correlation coefficient
return np.corrcoef(map1.ravel(), map2.ravel())[0,1]
def SIM(saliency_map1, saliency_map2):
'''
Similarity between two different saliency maps when viewed as distributions
(SIM=1 means the distributions are identical).
This similarity measure is also called **histogram intersection**.
Parameters
----------
saliency_map1 : real-valued matrix
If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2.
saliency_map2 : real-valued matrix
Returns
-------
SIM : float, between [0,1]
'''
map1 = np.array(saliency_map1, copy=False)
map2 = np.array(saliency_map2, copy=False)
if map1.shape != map2.shape:
map1 = resize(map1, map2.shape, order=3, mode='constant') # bi-cubic/nearest is what Matlab imresize() does by default
# Normalize the two maps to have values between [0,1] and sum up to 1
map1 = normalize(map1, method='range')
map2 = normalize(map2, method='range')
map1 = normalize(map1, method='sum')
map2 = normalize(map2, method='sum')
# Compute histogram intersection
intersection = np.minimum(map1, map2)
return np.sum(intersection)
# def EMD(saliency_map1, saliency_map2, sub_sample=1/32.0):
# '''
# Earth Mover's Distance measures the distance between two probability distributions
# by how much transformation one distribution would need to undergo to match another
# (EMD=0 for identical distributions).
# Parameters
# ----------
# saliency_map1 : real-valued matrix
# If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2.
# saliency_map2 : real-valued matrix
# Returns
# -------
# EMD : float, positive
# '''
# map2 = np.array(saliency_map2, copy=False)
# # Reduce image size for efficiency of calculation
# map2 = resize(map2, np.round(np.array(map2.shape)*sub_sample), order=3, mode='constant')
# map1 = resize(saliency_map1, map2.shape, order=3, mode='constant')
# # Histogram match the images so they have the same mass
# map1 = match_hist(map1, *exposure.cumulative_distribution(map2))
# # Normalize the two maps to sum up to 1,
# # so that the score is independent of the starting amount of mass / spread of fixations of the fixation map
# map1 = normalize(map1, method='sum')
# map2 = normalize(map2, method='sum')
# import ipdb; ipdb.set_trace()
# # Compute EMD with OpenCV
# # - http://docs.opencv.org/modules/imgproc/doc/histograms.html#emd
# # - http://stackoverflow.com/questions/5101004/python-code-for-earth-movers-distance
# # - http://stackoverflow.com/questions/12535715/set-type-for-fromarray-in-opencv-for-python
# r, c = map2.shape
# x, y = np.meshgrid(range(c), range(r))
# signature1= np.zeros((r,c, 3 ), np.float32)
# signature2= np.zeros((r,c, 3 ), np.float32)
# # h = signature1.shape[0]
# # w = signature1.shape[1]
# # vis2=cv.CreateMat(h,w,CV_32FC1)
# # vis0=cv.fromarray(vis)
# # signature1 = cv.CreateImage((r,c), 3, cv2.CV_32FC1)
# # signature2 = cv.CreateImage((r,c), 3, cv2.CV_32FC1)
# # signature1 = cv2.CreateMat(r*c, 3, cv2.CV_32FC1)
# # signature2 = cv2.CreateMat(r*c, 3, cv2.CV_32FC1)
# cv.Convert(cv.fromarray(np.c_[map1.ravel(), x.ravel(), y.ravel()]), signature1)
# cv.Convert(cv.fromarray(np.c_[map2.ravel(), x.ravel(), y.ravel()]), signature2)
# return cv.CalcEMD2(signature2, signature1, cv2.CV_DIST_L2)