-
Notifications
You must be signed in to change notification settings - Fork 1
/
ping_pong_tracker.py
471 lines (380 loc) · 16.5 KB
/
ping_pong_tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
# To force floating point division
from __future__ import division
# Standard libs
import argparse
import os
import sys
# Parallel processing
import multiprocessing
import threading
from collections import deque
# For calculations
import numpy as np
import cv2
# For visualization
from mayavi import mlab
# Utilities
from utils.display import Display
from utils.input import Video
from utils.config import LoadConfig
# Custom
import mean_shift_tracking_frame
from histogram_comparison import histogram_comparison
PING_PONG_DIAMETER = 40 # mm
DEFAULT_FOCAL_LENGTH = 14 # mm
# Main worker of which 2
# instances will be called
# Processes video from each camera
# to work out ball position independently
def main_worker(id, video_file, camera_model, K, D, R, T, measurements, quit_event):
# Setup video displays
video_disp = Display({'name': 'Camera_{}'.format(id)})
# Get input video
video = Video(video_file)
# Setup the undistortion stuff
if camera_model == 'P':
# Harcoded image size as
# this is a test script
img_size = (1920, 1080)
# First create scaled intrinsics because we will undistort
# into region beyond original image region
new_K = cv2.getOptimalNewCameraMatrix(K, D, img_size, 0.35)[0]
# Then calculate new image size according to the scaling
# Unfortunately the Python API doesn't directly provide the
# the new image size. They forgot?
new_img_size = (int(img_size[0] + (new_K[0, 2] - K[0, 2])), int(
img_size[1] + (new_K[1, 2] - K[1, 2])))
# Standard routine of creating a new rectification
# map for the given intrinsics and mapping each
# pixel onto the new map with linear interpolation
map1, map2 = cv2.initUndistortRectifyMap(
K, D, None, new_K, new_img_size, cv2.CV_16SC2)
elif camera_model == 'F':
# Harcoded image size
img_size = (1920, 1080)
# First create scaled intrinsics because we will undistort
# into region beyond original image region. The alpha
# parameter in pinhole model is equivalent to balance parameter here.
new_K = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
K, D, img_size, np.eye(3), balance=1)
# Then calculate new image size according to the scaling
# Well if they forgot this in pinhole Python API,
# can't complain about Fisheye model. Note the reversed
# indexing here too.
new_img_size = (int(img_size[0] + (new_K[0, 2] - K[0, 2])), int(
img_size[1] + (new_K[1, 2] - K[1, 2])))
# Standard routine of creating a new rectification
# map for the given intrinsics and mapping each
# pixel onto the new map with linear interpolation
map1, map2 = cv2.fisheye.initUndistortRectifyMap(
K, D, np.eye(3), new_K, new_img_size_1, cv2.CV_16SC2)
# Set up foreground and background separation
fgbg = cv2.createBackgroundSubtractorMOG2()
# Averaging kernel that will be used in opening
kernel = np.ones((6, 6), np.uint8)
# Code commented out because not using
# confidence currently, but could be
# used again with changes later
# # Will be used for histogram comparison
# # (Confidence measure)
# ball_image_file = 'ball_image.jpg'
# ball_image = cv2.imread(ball_image_file)
# 2D ball detection and 3D ball tracking setup
ball_position_frame = None
ball_wc = [0, 0, 0]
while not video.end_reached() and not quit_event.value:
# Get each frame
frame = video.next_frame()
# Undistort the current frame
img_undistorted = cv2.remap(
frame, map1, map2, cv2.INTER_LINEAR, cv2.BORDER_CONSTANT)
# Convert to HSV and threshold range of ball
img_hsv = cv2.cvtColor(img_undistorted, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(img_hsv, np.array(
(15, 190, 200)), np.array((25, 255, 255)))
# Foreground and background separation mask
fgmask = fgbg.apply(img_undistorted)
mask_color_bgs = cv2.bitwise_and(mask, mask, mask=fgmask)
masked_and_opened = cv2.morphologyEx(
mask_color_bgs, cv2.MORPH_OPEN, kernel)
# Hough transform to detect ball (circle)
circles = cv2.HoughCircles(masked_and_opened, cv2.HOUGH_GRADIENT, dp=3,
minDist=2500, param1=300, param2=5, minRadius=3, maxRadius=30)
if circles is not None:
# Make indexing easier and
# convert everything to int
circles = circles[0, :]
circles = np.round(circles).astype("int")
# Take only the first
# (and hopefully largest)
# circle detected
x, y, r = circles[0]
ball_position_frame = [x - r, y - r, 2 * r, 2 * r]
else:
ball_position_frame = None
# Determine the correct ball radius
mask_ball_radius = cv2.bitwise_and(fgmask, fgmask, mask=cv2.inRange(
img_hsv, np.array((10, 150, 180)), np.array((40, 255, 255))))
if ball_position_frame:
x1, y1, w1, h1 = ball_position_frame
ball_crop_temp = mask_ball_radius[(
y1 + h1 // 2 - 50):(y1 + h1 // 2 + 50), (x1 + w1 // 2 - 50):(x1 + w1 // 2 + 50)]
height, width = ball_crop_temp.shape
if height and width:
# Successfully cropped image
ball_crop = ball_crop_temp
cnts = cv2.findContours(
ball_crop.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
if len(cnts) > 0:
# contour detected
c = max(cnts, key=cv2.contourArea)
ellipse = cv2.fitEllipse(c)
width = min(ellipse[1])
ball_position_frame = [
ball_position_frame[0], ball_position_frame[1], 2 * width, 2 * width]
# Code commented out because not using
# confidence currently, but could be
# used again with changes later
# # Calculate confidence
# confidence = histogram_comparison(ball_image, img_undistorted, ball_position_frame)
# print confidence
if ball_position_frame:
x1, y1, w1, h1 = ball_position_frame
pixels_per_mm = (
K[0, 0] + K[1, 1]) / 2 / DEFAULT_FOCAL_LENGTH
z = PING_PONG_DIAMETER * \
DEFAULT_FOCAL_LENGTH / (w1 / pixels_per_mm)
x = ((x1 - K[0, 2]) /
pixels_per_mm) * z / DEFAULT_FOCAL_LENGTH
y = ((y1 - K[1, 2]) /
pixels_per_mm) * z / DEFAULT_FOCAL_LENGTH
ball_cc = np.array([x, y, z]) / 1000
ball_wc = np.dot(R.T, ball_cc - T.ravel())
# Push measurements to be processed/visualized
measurement = {
'id': id,
'frame_num': video.get_cur_frame_num(),
'ball_ic': ball_position_frame,
'ball_wc': ball_wc
}
measurements.put(measurement)
# Update video display
video_disp.refresh(img_undistorted)
# Add quitting event
if video_disp.can_quit():
break
# Setting this will signal
# the other parallel process
# to exit too.
quit_event.value = 1
def update_visu(measurements_camera_1, measurements_camera_2, visu, quit_event):
# Setup a little local deck of measurements
# so that this thread maybe continued in
# case of 1 or 2 frames out of sync
deck_camera_maxlen = 10
deck_camera_1 = deque(maxlen=deck_camera_maxlen)
deck_camera_2 = deque(maxlen=deck_camera_maxlen)
# Setup a deck to store ball trail
deck_ball_maxlen = 5
deck_ball = deque(maxlen=deck_ball_maxlen)
# Keep track of latest frame processed
latest_frame_processed = 0
# Setup balls last known position as some
# default "known" position; near player 1
last_known_position = [0, 137, 50]
# Rotation for measurements from camera 2
def camera_2_to_camera_1(wc):
# Rotation matrix will be
# -1 0 0
# 0 -1 0
# 0 0 1
# since it is just a flip on the Z-axis
wc[0] = -wc[0]
wc[1] = -wc[1]
# No change to wc[2]
return wc
while not quit_event.value:
deck_camera_1.append(measurements_camera_1.get())
deck_camera_2.append(measurements_camera_2.get())
# Sequentially look for the next frame
# to process from camera 1
for i in range(deck_camera_maxlen):
if deck_camera_1[i]['frame_num'] > latest_frame_processed:
camera_1 = deck_camera_1[i]
break
# Find the same frame in camera 2
for i in range(deck_camera_maxlen):
if deck_camera_2[i]['frame_num'] == camera_1['frame_num']:
camera_2 = deck_camera_2[i]
break
# If ball in image coordinates available
# from either camera update as predicted from either.
# But if both available, predict the average
if camera_1['ball_ic'] and camera_2['ball_ic']:
ball_wc = (camera_1['ball_wc'] +
camera_2_to_camera_1(camera_2['ball_wc'])) / 2
elif camera_1['ball_ic']:
ball_wc = camera_1['ball_wc']
elif camera_2['ball_ic']:
ball_wc = camera_2_to_camera_1(camera_2['ball_wc'])
else:
ball_wc = None
# Store a history of ball measurements
deck_ball.append(ball_wc)
# Update latest frame processed
latest_frame_processed = camera_1['frame_num']
# Process the ball deck to visualize a ball trail
# If there are maxlen continuous (valid) measurements,
# form the entire trail. Or else form part of the trail.
# Also NOTE the reversed indices
visu_scalars = np.zeros(deck_ball_maxlen)
visu_x = np.zeros_like(visu_scalars)
visu_y = np.zeros_like(visu_scalars)
visu_z = np.zeros_like(visu_scalars)
for i in sorted(range(len(deck_ball)), reverse=True):
if deck_ball[i] is None:
break
# The table is in cm units
visu_x[i] = deck_ball[i][0] * 100
visu_y[i] = deck_ball[i][1] * 100
visu_z[i] = deck_ball[i][2] * 100
# Update last known position
last_known_position = deck_ball[i] * 100
# Scale the ball sizes depending
# on number of measurements we got
scaling_template = [16, 12, 10, 6, 0]
visu_scalars[i:] = scaling_template[i:][::-1]
# If no valid measurements, then
# update visu with last known position
# but with smaller representation
if i == len(deck_ball) - 1:
visu_x[i] = last_known_position[0]
visu_y[i] = last_known_position[1]
visu_z[i] = last_known_position[2]
visu_scalars[i] = 4
# Update ball in visu
visu.set(x=visu_x, y=visu_y, z=visu_z, scalars=visu_scalars)
# If thread wants to exit, should ideally
# take down visu window with it. But there
# seems to be some problem with the close
# function in mayavi. Ask user to manually
# close that window
print 'Please close the visualization window too if not done already'
# Argument parser
def get_args():
# Setup argument parser
# and parse the inputs
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
'-v1', '--video-1', required=True, help='Video file from Camera 1')
arg_parser.add_argument(
'-v2', '--video-2', required=True, help='Video file from Camera 2')
arg_parser.add_argument(
'-m', '--model', default='P', help='Camera model - Pinhole (P)/Fisheye (F)'
)
args = vars(arg_parser.parse_args())
if not os.path.exists(args['video_1']):
arg_parser.error('{} file not found'.format(args['video_1']))
if not os.path.exists(args['video_2']):
arg_parser.error('{} file not found'.format(args['video_2']))
if args['model'].upper() != 'P' and args['model'].upper() != 'F':
arg_parser.error(
'{} not supported. Enter P for Pinhole or F for Fisheye'.format(
args['model']))
return args
# Setup table and other stuff
# for visualization
def setup_visu():
# Create a Mayavi figure
fig = mlab.figure('Ball tracking visualization')
# Measurements have to be whole numbers
TABLE_WIDTH = 152 # cm
TABLE_LENGTH = 274 # cm
table_structure = np.ones((TABLE_WIDTH, TABLE_LENGTH))
# Draw out the lines on the table
# Creases
table_structure[1:-2, 1] = 0
table_structure[1:-2, -2] = 0
table_structure[1, 1:-2] = 0
table_structure[-2, 1:-2] = 0
# Center line
table_structure[TABLE_WIDTH // 2, 1:-2] = 0
# Representing net
table_structure[1:-2, TABLE_LENGTH // 2 - 1] = 0.4
table_structure[1:-2, TABLE_LENGTH // 2] = 0.4
table_structure[1:-2, TABLE_LENGTH // 2 + 1] = 0.4
# Create the table layout in the figure
table = mlab.imshow(table_structure, colormap='Blues', opacity=0.6)
# Setup a 3D point plotter with
# proper shapes, ie., no. of points
# for the ball and its trail
x = y = z = s = np.zeros(5)
point_plt = mlab.points3d(x, y, z, s, scale_factor=1, color=(1, 0.4, 0))
# Put text to indicate table orientation
mlab.text3d(0, TABLE_LENGTH // 2 + 10, -40, 'Player 1',
scale=10, orient_to_camera=True)
mlab.text3d(0, -(TABLE_LENGTH // 2 + 10), -40,
'Player 2', scale=10, orient_to_camera=True)
return point_plt.mlab_source
def main():
args = get_args()
# Read in intrinsic calibration
load_config_1 = LoadConfig('config/intrinsic_calib_{}_camera_1.npz'.format(
args['model'].lower()), 'calib_camera_1')
intrinsic_1 = load_config_1.load()
K_1 = intrinsic_1['camera_matrix']
D_1 = intrinsic_1['dist_coeffs']
load_config_2 = LoadConfig('config/intrinsic_calib_{}_camera_2.npz'.format(
args['model'].lower()), 'calib_camera_2')
intrinsic_2 = load_config_2.load()
K_2 = intrinsic_2['camera_matrix']
D_2 = intrinsic_2['dist_coeffs']
# Read in extrinsic calibration
load_config_e_1 = LoadConfig('config/extrinsic_calib_{}_camera_1.npz'.format(
args['model'].lower()), 'extrinsic_camera_1')
extrinsic_1 = load_config_e_1.load()
R_1 = cv2.Rodrigues(extrinsic_1['rvec'])[0]
T_1 = extrinsic_1['tvec']
load_config_e_2 = LoadConfig('config/extrinsic_calib_{}_camera_2.npz'.format(
args['model'].lower()), 'extrinsic_camera_2')
extrinsic_2 = load_config_e_2.load()
R_2 = cv2.Rodrigues(extrinsic_2['rvec'])[0]
T_2 = extrinsic_2['tvec']
# Setup visualization
visu = setup_visu()
# Setup atomic queues to send measurement information from
# worker processes back to main process. Also limit queue
# size to maintain sync between frames. Too big queue
# can result in run away condition, while too less may
# not give enough time process keyboard event.
measurements_camera_1 = multiprocessing.Queue(maxsize=3)
measurements_camera_2 = multiprocessing.Queue(maxsize=3)
# Setup a shared variable to share keyboard event
# across the main worker processes and the thread
quit_event = multiprocessing.Value('b', False)
# Setup a thread that calculates final ball position
# and updates that into the visualization
thread_update_visu = threading.Thread(target=update_visu, args=(
measurements_camera_1, measurements_camera_2, visu, quit_event))
thread_update_visu.start()
# Start the two main worker processes
# NOTE: When using Python 3, to maintain compatibility with
# MACOSX, *spawn* may have to be used instead of *fork*
# multiprocessing.set_start_method('spawn')
process_camera_1 = multiprocessing.Process(target=main_worker, args=(
1, args['video_1'], args['model'], K_1, D_1, R_1, T_1, measurements_camera_1, quit_event))
process_camera_2 = multiprocessing.Process(target=main_worker, args=(
2, args['video_2'], args['model'], K_2, D_2, R_2, T_2, measurements_camera_2, quit_event))
process_camera_1.start()
process_camera_2.start()
# Show the visualization interactively
# NOTE: This is a blocking event loop
# And this **has** to be done in main thread
mlab.show()
# Wait for everyone to complete
process_camera_1.join()
process_camera_2.join()
thread_update_visu.join()
if __name__ == '__main__':
main()