-
Notifications
You must be signed in to change notification settings - Fork 0
/
auto_registration.py
342 lines (277 loc) · 13.9 KB
/
auto_registration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# thermal is our template image for registration
import os
import cv2
import time
import numpy as np
from skimage.measure import shannon_entropy, compare_ssim
from imageai.Detection import ObjectDetection
from skimage.exposure import adjust_gamma
from imutils.video import VideoStream, FileVideoStream, FPS
def ecc_registration(thermal, visible, warp_mode = cv2.MOTION_AFFINE):
is_thermal_reference = True
template_img = None
register_img = None
if is_thermal_reference:
template_img = thermal
register_img = visible
else:
template_img = visible
register_img = thermal
(h,w) = template_img.shape[:2]
register_img = cv2.resize(register_img, (w,h))
# template_img_gray = template_img
# register_img_gray = register_img
template_img_gray = cv2.cvtColor(template_img, cv2.COLOR_RGB2GRAY)
register_img_gray = cv2.cvtColor(register_img, cv2.COLOR_RGB2GRAY)
# cv2.imshow("Template Image", template_img_gray)
# cv2.imshow("Register Image", register_img_gray)
# cv2.waitKey()
# Preprocessing
if is_thermal_reference:
register_img_gray = adjust_gamma(register_img_gray, 0.5)
# kernel = np.array([[-1, -1, -1],
# [-1, 9, -1],
# [-1, -1, -1]])
# register_img_gray = cv2.filter2D(register_img_gray, -1, kernel)
else:
template_img = adjust_gamma(template_img, 0.5)
# kernel = np.array([[-1, -1, -1],
# [-1, 9, -1],
# [-1, -1, -1]])
# template_img = cv2.filter2D(template_img, -1, kernel)
# Find warp matrix
warp_matrix = align(template_img_gray, register_img_gray, warp_mode, 50, 1e-3, 2)
# if warp_mode == cv2.MOTION_HOMOGRAPHY:
# # Use warpPerspective for Homography
# aligned_img = cv2.warpPerspective(register_img, warp_matrix, (w, h), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
# else:
# # Use warpAffine for Translation, Euclidean and Affine
# aligned_img = cv2.warpAffine(register_img, warp_matrix, (w, h), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
# print(warp_matrix)
# dst = cv2.addWeighted(thermal, 0.5, aligned_img, 0.5, 0.0)
# cv2.imshow("Blending Image", dst)
# cv2.waitKey()
# cv2.destroyAllWindows()
return warp_matrix
def align(ref_img, match_img, warp_mode=cv2.MOTION_AFFINE, max_iterations=300, epsilon_threshold=1e-10, pyramid_levels=2,
is_video=False):
if pyramid_levels is None:
w = ref_img.shape[1]
nol = int(w / (1280 / 3)) - 1
else:
nol = pyramid_levels
# Initialize the matrix to identity
if warp_mode == cv2.MOTION_HOMOGRAPHY:
# warp_matrix = np.eye(3, 3, dtype=np.float32)
warp_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
else:
# warp_matrix = np.eye(2, 3, dtype=np.float32)
warp_matrix = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
ref_img = cv2.fastNlMeansDenoising(ref_img, None, 5, 21)
match_img = cv2.fastNlMeansDenoising(match_img, None, 5, 21)
kernel = np.ones((11, 11), np.uint8)
ref_img = cv2.morphologyEx(ref_img, cv2.MORPH_OPEN, kernel)
match_img = cv2.morphologyEx(match_img, cv2.MORPH_OPEN, kernel)
ref_img = gradient(ref_img)
match_img = gradient(match_img)
ref_img_pyr = [ref_img]
match_img_pyr = [match_img]
for level in range(nol):
ref_img_pyr[0] = normalize(ref_img_pyr[0])
ref_img_pyr.insert(0, cv2.resize(ref_img_pyr[0], None, fx=1/2, fy=1/2, interpolation=cv2.INTER_LINEAR))
match_img_pyr[0] = normalize(match_img_pyr[0])
match_img_pyr.insert(0, cv2.resize(match_img_pyr[0], None, fx=1/2, fy=1/2, interpolation=cv2.INTER_LINEAR))
# Terminate the optimizer if either the max iterations or the threshold are reached
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, max_iterations, epsilon_threshold)
# run pyramid ECC
for level in range(nol):
ref_img_grad = ref_img_pyr[level]
match_img_grad = match_img_pyr[level]
try:
cc, warp_matrix = cv2.findTransformECC(ref_img_grad, match_img_grad, warp_matrix, warp_mode, criteria, inputMask=None, gaussFiltSize=1)
except TypeError:
cc, warp_matrix = cv2.findTransformECC(ref_img_grad, match_img_grad, warp_matrix, warp_mode, criteria)
if level != nol: # scale up only the offset by a factor of 2 for the next (larger image) pyramid level
if warp_mode == cv2.MOTION_HOMOGRAPHY:
warp_matrix = warp_matrix * np.array([[1, 1, 2], [1, 1, 2], [0.5, 0.5, 1]], dtype=np.float32)
else:
warp_matrix = warp_matrix * np.array([[1, 1, 2], [1, 1, 2]], dtype=np.float32)
# return warp_matrix
return warp_matrix
def gradient(im, ksize=15):
im = normalize(im)
grad_x = cv2.Sobel(im, cv2.CV_32FC1, 1, 0, ksize=ksize)
grad_y = cv2.Sobel(im, cv2.CV_32FC1, 0, 1, ksize=ksize)
grad = cv2.addWeighted(np.absolute(grad_x), 0.5, np.absolute(grad_y), 0.5, 0)
return convert_to_img(grad)
def normalize(im, min=None, max=None):
width, height = im.shape
norm = np.zeros((width, height), dtype=np.float32)
if min is not None and max is not None:
norm = (im - min) / (max - min)
else:
cv2.normalize(im, dst=norm, alpha=0.0, beta=1.0, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32FC1)
norm[norm < 0.0] = 0.0
norm[norm > 1.0] = 1.0
return norm
def convert_to_img(img):
img = np.multiply(np.divide(img - np.min(img), (np.max(img) - np.min(img))), 255)
img = img.astype(np.uint8)
return img
def corr2d(a,b):
a = a - np.mean(a)
b = b - np.mean(b)
r = np.sum(a*b) / float(np.sqrt(np.sum(a**2) * np.sum(b**2)))
return r
def calculate_correlation_coef(thermal, visible):
thermal_gray = cv2.cvtColor(thermal, cv2.COLOR_BGR2GRAY)
visible_gray = cv2.cvtColor(visible, cv2.COLOR_BGR2GRAY)
return corr2d(visible_gray, thermal_gray)
def fixBorder(frame):
s = frame.shape
# Scale the image 4% without moving the center
T = cv2.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.04)
frame = cv2.warpAffine(frame, T, (s[1], s[0]))
return frame
def get_initial_transformaton_matrix(thermal_cap, visible_cap, frame_count):
initial_matrix = None
# frame_num = int(frame_count/4)
frame_indexes = np.random.choice(range(0, frame_count), size = 10, replace = False)
warp_matrix_list = []
print()
print("*************************************")
print("Get initial transformation matrix....")
print(frame_indexes)
print("Processing " + str(len(frame_indexes)) + " selected frames")
for i, frame_index in enumerate(frame_indexes):
print("Frame " + str(i+1) + "/" + str(len(frame_indexes)))
thermal_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
visible_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
_, thermal_frame = thermal_cap.read()
_, visible_frame = visible_cap.read()
(h, w) = visible_frame.shape[:2]
thermal_frame = cv2.resize(thermal_frame, (w, h))
warp_matrix_list.append(ecc_registration(thermal_frame, visible_frame))
initial_matrix = np.mean(np.array(warp_matrix_list), axis=0)
print("Initial transformatrion matrix:")
print(initial_matrix)
print("*************************************")
print()
# set camera capture to init position
thermal_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
visible_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
return initial_matrix
def finetune_registration(detector, custom, thermal, visible, fused, corr_coef_after_registration):
(h, w) = visible.shape[:2]
after_registration_object, after_registration_detections = detector.detectCustomObjectsFromImage(custom_objects = custom,
input_type = "array",
input_image = fused,
output_type = "array",
minimum_percentage_probability=30)
# cv2.imshow("Bounding Box", cv2.resize(after_registration_object, (800, 500)))
# cv2.waitKey()
if len(after_registration_detections) == 0 :
print("No human object detection!")
return visible
# get biggest bounding boxes (closest object)
final_bounding_box = after_registration_detections[0]["box_points"]
max_area = (final_bounding_box[2] - final_bounding_box[0]) * (final_bounding_box[3] - final_bounding_box[1])
if len(after_registration_detections) > 1:
for bounding_box in after_registration_detections:
local_width = bounding_box["box_points"][2] - bounding_box["box_points"][0]
local_height = bounding_box["box_points"][3] - bounding_box["box_points"][1]
local_area = local_width * local_height
if(local_area > max_area):
final_bounding_box = bounding_box["box_points"]
max_area = local_area
# set bounding boxes location
x1 = final_bounding_box[0] - 50
y1 = final_bounding_box[1] - 50
x2 = final_bounding_box[2] + 50
y2 = final_bounding_box[3] + 50
start_point = (x1 if x1 > 0 else 0, y1 if y1 > 0 else 0)
end_point = (x2 if x2 < w-1 else w-1, y2 if y2 < h-1 else h-1)
object_thermal = thermal[start_point[1]:end_point[1], start_point[0]:end_point[0]]
object_visible = visible[start_point[1]:end_point[1], start_point[0]:end_point[0]]
# get fintune matrix for object
finetune_matrix = ecc_registration(object_thermal, object_visible, warp_mode=cv2.MOTION_TRANSLATION)
finetuned_visible = cv2.warpAffine(visible, finetune_matrix, (w, h), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
if np.abs(calculate_correlation_coef(fixBorder(thermal), fixBorder(finetuned_visible))) > np.abs(corr_coef_after_registration):
return finetuned_visible
return visible
def run(detector, custom, thermal_video_path, visible_video_path, output_video):
thermal_cap = cv2.VideoCapture(thermal_video_path)
visible_cap = cv2.VideoCapture(visible_video_path)
# get thermal and visible video primary info
video_fps = thermal_cap.get(cv2.CAP_PROP_FPS)
frame_count = int(thermal_cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("Video FPS:", video_fps)
print("Number of frames:", frame_count)
# resolution for both thermal and visible
fps = FPS().start()
# (w, h) = (2048, 1536)
(w, h) = int(visible_cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(visible_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# init video writer
out = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30, (2*w, h), True)
initial_transformation_matrix = get_initial_transformaton_matrix(thermal_cap, visible_cap, frame_count)
# initial_transformation_matrix = np.array([[9.9126685e-01, 5.3021866e-03, 1.8741880e+01], [9.3280076e-04, 1.0312355e+00, -2.4526514e+01]])
print("Processing thermal and visible videos...")
print("+++++++++++++++++++++++++++++++++++++")
index = 1
while True:
_, thermal_frame = thermal_cap.read()
_, visible_frame = visible_cap.read()
# if index < 89:
# index += 1
# continue
if thermal_frame is None or visible_frame is None:
break
print("Frame " + str(index) + "/" + str(frame_count))
# resize thermal to visible resolution
thermal_frame = cv2.resize(thermal_frame, (w, h))
# visualize before registration
print("Correlation Coef Before Registration:", calculate_correlation_coef(fixBorder(thermal_frame), fixBorder(visible_frame)))
before_registration = cv2.addWeighted(thermal_frame, 0.5, visible_frame, 0.5, 0.0)
cv2.imshow("Before Registration", cv2.resize(before_registration, (800, 500)))
cv2.waitKey(1)
# get registered visible frame
registered_visible_frame = cv2.warpAffine(visible_frame, initial_transformation_matrix, (w, h), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
# visualize after applying initial transformation matrix
after_registration = cv2.addWeighted(thermal_frame, 0.5, registered_visible_frame, 0.5, 0.0)
corr_coef_after_registration = calculate_correlation_coef(fixBorder(thermal_frame), fixBorder(registered_visible_frame))
print("Correlation Coef After Initial Registration:", corr_coef_after_registration)
cv2.imshow("After Initial Registration", cv2.resize(after_registration, (800, 500)))
cv2.waitKey(1)
# finetune registration
finetuned_visible_frame = finetune_registration(detector, custom, thermal_frame, registered_visible_frame, after_registration, corr_coef_after_registration)
after_finetune = cv2.addWeighted(thermal_frame, 0.5, finetuned_visible_frame, 0.5, 0.0)
print("Correlation Coef After Finetune Registration:", calculate_correlation_coef(fixBorder(thermal_frame), fixBorder(finetuned_visible_frame)))
cv2.imshow("After Finetune Registration", cv2.resize(after_finetune, (800, 500)))
cv2.waitKey(1)
frame_out = cv2.hconcat([after_finetune, after_registration])
out.write(frame_out)
index += 1
fps.update()
# cv2.destroyAllWindows()
print("+++++++++++++++++++++++++++++++++++++")
fps.stop()
print("Elasped time: {:.2f}".format(fps.elapsed()))
print("Approx. FPS: {:.2f}".format(fps.fps()))
thermal_cap.release()
visible_cap.release()
# out.release()
# cv2.destroyAllWindows()
if __name__ == "__main__":
# init YOLOv3 object detector for person
execution_path = os.getcwd()
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath(os.path.join(execution_path , "yolo.h5"))
detector.loadModel()
custom = detector.CustomObjects(person=True)
# paths to videos
thermal_video_path = "./440/thermal_440.avi"
visible_video_path = "./440/visible_440.avi"
output_video = "./auto_registration_440.avi"
# run auto registration
run(detector, custom, thermal_video_path, visible_video_path, output_video)