-
Notifications
You must be signed in to change notification settings - Fork 0
/
obj_detection.py
1100 lines (852 loc) · 38.1 KB
/
obj_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Udacity Self Driving Car Nanodegree
Term 1, Project 5 -- Object Detection
Author: Ciaran Murphy
Date: 27th Nov 2017
"""
import os
import glob
import cv2
import sys
import pickle
import tempfile
import shutil
import numpy as np
from argparse import ArgumentParser
from collections import defaultdict
from moviepy.editor import VideoFileClip
from collections import deque
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from scipy.ndimage.measurements import label
from skimage.feature import hog
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from mpl_toolkits.mplot3d import Axes3D
CAR_FNAMES = glob.glob('training_images/my_images/car1/*')
CAR_FNAMES += glob.glob('training_images/my_images/car2/*')
CAR_FNAMES += glob.glob('training_images/my_images/car3/*')
CAR_FNAMES += glob.glob('./training_images/vehicles/*/*.png')
CAR_FNAMES += glob.glob('./training_images/vehicles/[!KITTI]*/*.png')
NOTCAR_FNAMES = glob.glob('training_images/my_images/notcar2/*')
NOTCAR_FNAMES += glob.glob('training_images/my_images/notcar3/*')
NOTCAR_FNAMES += glob.glob('training_images/my_images/notcar4/*')
NOTCAR_FNAMES += glob.glob('training_images/my_images/notcar1/*')
NOTCAR_FNAMES += glob.glob('./training_images/non-vehicles/[!KITTI]*/*.png')
NOTCAR_FNAMES += glob.glob('./training_images/non-vehicles/*/*.png')
NOTCAR_FNAMES = NOTCAR_FNAMES[:len(CAR_FNAMES)]
#######################################################################################################
#
# Helper functions and classes
#
#######################################################################################################
def parse_args():
parser = ArgumentParser()
ex_group = parser.add_mutually_exclusive_group(required=True)
ex_group.add_argument('-t', '--train', dest='trainsize',
help="""Include the training step and use TRAIN value as the number of _notcar_ samples to
use for training on. Specify 'all' to train on all available data. If not including
training, must specify a classifier to load from disk (a previously pickled one)
using the `-c` switch. The number of car samples loaded for training will be porportional
to the number of notcar samples specified here.""")
ex_group.add_argument('-v', '--videoin', dest='input_video',
help="""The input video file""")
parser.add_argument('-0', '--t0', dest='video_start',
help="""T0 -- time in seconds to start the video from""")
parser.add_argument('-1', '--t1', dest='video_end',
help="""T1 -- time in seconds to end the video at""")
parser.add_argument('-s', '--save', dest='save_file',
help="""Where to save the SVM model back to disk (if training)""")
parser.add_argument('-o', '--videoout', dest='output_video',
help="""The video output file""")
parser.add_argument('-c', '--classifier', dest='clf_fname',
help="""Where to find a previously pickled SVM file to use if not training""")
parser.add_argument('-d', '--stdscaler', dest='scaler_fname',
help="""Where to find a previously pickled StandadrScaler for the SVM""")
parser.add_argument('-g', '--debug', dest='debug', action='store_true',
help="""Debug mode - things like include all rectangles to output video and
possibly print more logs.""")
args = parser.parse_args()
if args.trainsize is None:
args.train = False
if args.clf_fname is None:
parser.print_usage()
sys.exit()
if args.scaler_fname is None:
args.scaler_fname = ''.join(args.clf_fname.split('.')[:-1]) + '_scaler.pkl'
else:
args.train = True
if args.trainsize == 'all':
args.trainsize = -1
else:
args.trainsize = int(args.trainsize)
if args.save_file is None:
print('!!! WARNING !!! Trained model will not be stored to disk.')
return args
def imread(fname, for_prediction=False):
"""Helper function to always load images in a consistent way. Note that since we're using cv2
to load the images, they will be in BGR by default,"""
img = cv2.imread(fname).astype(np.float32)
if int(img.max()) <= 1:
print('!!! WARNING !!! Image does not apper to be the right scale, found ({}, {}) for {}.'.format(
img.min(), img.max(), fname))
if for_prediction is True:
img.resize((64, 64, 3))
return img
def imshow(*args, **kwargs):
"""Helper function to always show images in a consistent way"""
args_ = [i for i in args]
imgc = args_[0].copy()
# Because I'm using cv2 to load the images, they are in BGR format
# and scalled to 0,255. Fix this before displaying in pyplot as follows
imgc = imgc[:,:,::-1]/255
args_[0] = imgc
if 'axis' in kwargs:
axis = kwargs.pop('axis')
axis.imshow(*args_, **kwargs)
else:
plt.imshow(*args_, **kwargs)
def imsave(fname, img):
"""Save an image to disk in a consistent way"""
cv2.imwrite(fname, img)
def color_xform(img, tcmap):
"""Convert img from BGR to the target color space
Args:
img: input imge in BGR, range 0:255
tcmap: target color space (RGB, HSV, LUV, HLS, YUV, YCrCb)
Returns:
a new image in the target color space
"""
assert tcmap in ['RGB', 'HSV', 'LUV', 'HLS', 'YUV', 'YCrCb'], 'Invalid target color space'
img = img.copy()
if tcmap == 'BGR':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
elif tcmap == 'HSV':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
elif tcmap == 'LUV':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2LUV)
elif tcmap == 'HLS':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
elif tcmap == 'YUV':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
elif tcmap == 'YCrCb':
img_new = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
else:
img_new = img
assert img_new.max() > 1, "Pixel value range is not (0, 255), it's {}".format((img.min(), img.max()))
return img_new
def draw_labeled_bboxes(img, labels):
"""Take in a labels canvas and use it to overlay bounding rectangles for detected objects.
Args:
img: the image to modify
labels: at 2-tuple containing the box image and the number of objects
Returns:
a modified verion of the image with boxes added (changes in place)
"""
for car_number in range(1, labels[1]+1):
# Find pixels with each car_number label value
nonzero = (labels[0] == car_number).nonzero()
# Identify x and y values of those pixels
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Define a bounding box based on min/max x and y
x1, y1, x2, y2 = np.min(nonzerox), np.min(nonzeroy), np.max(nonzerox), np.max(nonzeroy)
min_side_length = 64
if x2 - x1 < min_side_length:
mid = x1 + ((x2 - x1) // 2)
x1 = x1 + mid - (min_side_length//2)
x2 = x1 + min_side_length
if y2 - y1 < min_side_length:
mid = y1 + ((y2 - y1) // 2)
y1 = y1 + mid - (min_side_length//2)
y2 = y1 + min_side_length
bbox = ((x1, y1), (x2, y2))
# Draw the box on the image
img = cv2.rectangle(img, bbox[0], bbox[1], (11,102,0), 4)
return img
def video_extract(src_fname, t1, t2, tgt_fname=None):
"""Extract a subset of a video and save it to disk. The ffmpeg_tools way is pretty fast.
Args:
src_fname: source video
t1: start time in seconds
t2: end time in seconds
tgt_fname: where to store the clip. If None a temp file will be created and it's path returned
"""
if tgt_fname is None:
tgt_fname = tempfile.mkstemp(suffix='.mp4')[1]
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
ffmpeg_extract_subclip(src_fname, t1, t2, targetname=tgt_fname)
return tgt_fname
class Bunch():
"""Convience class to enable dot style notation on a dict"""
def __init__(self, adict):
self.__dict__.update(adict)
def __repr__(self):
return self.__dict__.__repr__()
#######################################################################################################
#
# Pipeline functions
#
#######################################################################################################
def get_template_matches(img, template_fnames, method=cv2.TM_CCOEFF_NORMED):
"""Define a function to search for template matches and return a list of
bounding boxes. Code is from lesson 9 of the project. This method should
not be used as it is not portable. Template matching will only work when
the image being scanned contains the template object in the same color,
scale and orientation. Use template matching for objects that don't vary
in apperance, not for real world objects.
Args:
img: the image to search through
template_fnames: list of image templates file names (they are read
from disk in this method)
method: cv2.mathTemplate matching method. Other options include:
cv2.TM_CCORR_NORMED, cv2.TM_CCOEFF, cv2.TM_CCORR, cv2.TM_SQDIFF,
cv2.TM_SQDIFF_NORMED
Returns:
list of bounding boxes that were found
"""
bbox_list = []
for template in template_fnames:
tmp = imread(template)
result = cv2.matchTemplate(img, tmp, method)
# Extract the location of the best match. `min_val` and `max_val` are not
# used in reality
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# Determine a bounding box for the match
w, h = (tmp.shape[1], tmp.shape[0])
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
bbox_list.append((top_left, bottom_right))
return bbox_list
def get_colorhist(img, nbins=64, tcmap='HLS', bins_range=(0, 256), vis=False):
"""Generate a histogram of color channels (RGB).
Args:
img: the imput image
nbins: the number of bins in the histogram
tcmap: target color map. the image will be converted to this color space
before processing
bins_range: the range for the bins
vis: if True, output data for creating a visualization
Returns:
red histogram
green histogram
blue histogram
hist_features (contatenated r,g,b histograms)
bin_centers
"""
assert img.max() > 1, "Pixel value range is not (0, 255), it's {}".format((img.min(), img.max()))
img = color_xform(img, tcmap)
ch1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
ch2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
ch3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
hist_features = np.concatenate((ch1_hist[0], ch2_hist[0], ch3_hist[0]))
if vis is False:
return hist_features
bin_edges = ch1_hist[1]
bin_centers = (bin_edges[1:] + bin_edges[0:len(bin_edges)-1])/2
assert len(hist_features) > 0, 'Got no color historgram for image'
return ch1_hist, ch2_hist, ch3_hist, hist_features, bin_centers
def get_spatial(img, tcmap='LUV', size=(16, 16)):
"""Convert an image to the provided color space, rescale it and unroll it.
Args:
img: the input image (assumed to be in RGB color space)
tcmap: target color map. the image will be converted to this color space
before processing
size: target size to scale to
Returns:
one dimensional feature vector of the converted image
"""
img = color_xform(img, tcmap)
assert img.max() > 1, "Pixel value range is not (0, 255), it's {}".format((img.min(), img.max()))
features = cv2.resize(img, size).ravel()
assert len(features) > 0, 'Got no spatial features for image'
return features
def get_hog(img, orient=12, pix_per_cell=16, cell_per_block=2, channel='all', tcmap='LUV'):
"""Get a Histogram of Oriented Gradients for an image.
"Note: you could also include a keyword to set the tranform_sqrt flag but
for this exercise you can just leave this at the default value of
transform_sqrt=False"
Args:
img: the input image
orient: the number of orientation bins in the output histogram. Typical values are between 6 and 12
pix_per_cell: a 2-tuple giving the number of pixels per cell e.g. `(9, 9)`
cells_per_block: a 2-tuple giving the number of cells per block
channel: which of the 3 color channels to get the HOG for, or 'all'
tcmap: target color map. the image will be converted to this color space
before processing
Returns:
(features, hog_image) were the features are rolled out as a 1-D vector
"""
assert channel in [0, 1, 2, 'all'], "Invalid channel specified, must be one of [0, 1, 2 'all']"
img = color_xform(img, tcmap)
if channel == 'all':
all_features = []
for i in (0, 1, 2):
features, hog_image = hog(
img[:,:,i],
orientations=orient,
pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block),
transform_sqrt=False,
visualise=True,
feature_vector=True,
block_norm="L2-Hys"
)
all_features.append(features)
features = np.hstack(all_features).ravel()
else:
features, hog_image = hog(
img[:,:,channel],
orientations=orient,
pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block),
transform_sqrt=False,
visualise=True,
feature_vector=True,
block_norm="L2-Hys"
)
features = features.ravel()
return features
def pipeline(img_data):
"""Extract features to generate a feature vector.
Args:
img_data: a list of 3-tuples containing (filename, RGB image, idx) where idx is the index of
where that image was found in the original training data
spacial_size: input to be used for the `bin_spacial()` function
hist_bins: number of bins for the `get_colorhist()` function
hist_range: hist range to pass to the `get_colorhist()` function
vis: if True, plot the results in a window and also store these visualizations
to a local folder called './resources' for offline viewing
Returns:
single list of normalized features
"""
features = []
ctr = 0
for fname, img, idx in img_data:
assert img.max() > 1, "Pixel value range is not (0, 255), it's {}".format((img.min(), img.max()))
spatial_features = get_spatial(img)
color_features = get_colorhist(img)
hog_features = get_hog(img)
features.append(np.concatenate((
spatial_features,
color_features,
hog_features,
)).astype(np.float32))
ctr += 1
assert len(features) > 0, 'Got no features'
return features
def video_pipeline(img):
"""Callback for moviepy video processing.
Args:
img: the input image, coming from moviepy will be in RGB
Returns:
a new image with boxes drawn where cars are predicted
"""
global args, conf
if 'cnt' not in video_pipeline.__dict__:
video_pipeline.cnt = 0
video_pipeline.cnt += 1
if video_pipeline.cnt < 145:
conf.img_hist.append(img)
return img
if args.debug and video_pipeline.cnt % conf.n != 0:
return conf.img_hist[-1]
fname = 'video_frame_{:04}.png'.format(video_pipeline.cnt) # a dummy file name needed by pipeline()
# Convert from RGB which is what moviepy loads in to BGR which is what the
# rest of this script us using (cv2 default)
img = img.copy()[:,:,::-1]
# Lambda to generate a random color
get_clr = lambda: np.random.randint(255, size=3).tolist()
###############################################################################################################
#
# Define a method to sanity check the boxes and append them to the main array if they look ok
#
###############################################################################################################
frame_boxes = []
to_be_painted = []
def process_boxes(pstr, img, img_dbg):
for box in bboxes:
top_left, bottom_right = box
sub_img = img[ top_left[1]: bottom_right[1], top_left[0]: bottom_right[0], :]
prediction = predict(sub_img, conf.clf, args.scaler_fname, fname=pstr + fname, vis=False, verbose=True)
if prediction == 1:
frame_boxes.append(box)
img, img_dbg = process_box(img, img_dbg, box, to_be_painted)
return img, img_dbg
def process_box(img, img_dbg, box, to_be_painted):
if conf.enable_boxhist is False:
to_be_painted.append(box)
return img, img_dbg
# Find the center of the new box
center = (box[0][0] + ((box[1][0] - box[0][0]) // 2), box[0][1] + ((box[1][1] - box[0][1]) // 2))
# If the new box has it's center contained within the boxes discovered
# in the last N frames (= hist_size) consider it valid otherwise reject
# it. If in debug mode, draw a circle to indicate that it was rejected.
hits = 0
for pastframe in conf.frame_box_history:
for pastbox in pastframe:
if (center[0] > pastbox[0][0] and center[0] < pastbox[1][0]
and center[1] > pastbox[0][1] and center[1] < pastbox[1][1]):
hits += 1
break
if hits == conf.hist_size and video_pipeline.cnt > conf.hist_size:
if args.debug:
img_dbg = cv2.circle(img_dbg, center, 20, (0,200,200), 1)
to_be_painted.append(box)
elif args.debug:
img_dbg = cv2.circle(img_dbg, center, 20, (0,0,200), 1)
return img, img_dbg
###############################################################################################################
img_dbg = img.copy()
#
# Far bounding boxes
#
bboxes = get_window_points(img.shape, conf.far_window_size, conf.far_overlap,
start=conf.far_start_pos, end=conf.far_end_pos)
#_boxes = bboxes.copy()
img, img_dbg = process_boxes('far_', img, img_dbg)
#
# Middle bounding boxes
#
bboxes = get_window_points(img.shape, conf.mid_window_size, conf.mid_overlap,
start=conf.mid_start_pos, end=conf.mid_end_pos)
img, img_dbg = process_boxes('mid_', img, img_dbg)
#
# Near bounding boxes
#
bboxes = get_window_points(img.shape, conf.near_window_size, conf.near_overlap,
start=conf.near_start_pos, end=conf.near_end_pos)
img, img_dbg = process_boxes('near_', img, img_dbg)
# Add the car predictions for this frame to the history
conf.frame_box_history.append(frame_boxes)
# Some annoying bug in cv2 that needs this copy workaround :( :(
img = img.copy()
img_dbg = img_dbg.copy()
# For visualizing the process, draw boxes on a debug image
if args.debug is True:
# Draw all candidate boxes
#for box in _boxes:
# img_dbg = cv2.rectangle(img_dbg, box[0], box[1], get_clr(), 1)
# Draw all positive prediction boxes before any filtering (white)
for box in frame_boxes:
img_dbg = cv2.rectangle(img_dbg, box[0], box[1], (255, 255, 255), 1)
# Draw all positive prediction boxes after the history filter (yellow)
for box in to_be_painted:
img_dbg = cv2.rectangle(img_dbg, box[0], box[1], (0, 255, 255), 1)
# Increment the heatmap, conpare it to history and draw the labels
heatmap = np.zeros_like(img[:,:,0]).astype(np.float)
for box in to_be_painted:
heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
if args.debug:
plt.imsave(conf.dst1 + '{:04}.jpg'.format(video_pipeline.cnt), heatmap, cmap='gray')
heatmap[heatmap < conf.threshold] = 0
if args.debug:
plt.imsave(conf.dst2 + '{:04}.jpg'.format(video_pipeline.cnt), heatmap, cmap='gray')
if conf.enable_heathist:
conf.frame_heat_history.append(heatmap)
labels = label(np.sum(conf.frame_heat_history, axis=0))
else:
labels = label(heatmap)
if args.debug:
plt.imsave(conf.dst3 + '{:04}.jpg'.format(video_pipeline.cnt), heatmap, cmap='gray')
if labels[1] > 0 :
img = draw_labeled_bboxes(img, labels)
img_dbg = draw_labeled_bboxes(img_dbg, labels)
# Convert back to RGB for moviepy
img = img[:,:,::-1]
img_dbg = img_dbg[:,:,::-1]
if args.debug is True:
conf.img_hist.append(img_dbg)
return img_dbg
else:
conf.img_hist.append(img)
return img
#######################################################################################################
#
# Data model functions
#
#######################################################################################################
def load_data(car_or_not='car', ratio=1, length=-1, random=False, sanity=True, vis=False):
"""Load all images from disk. Loads using matplotlib --> range will be 0 to 1.
Args:
car_or_not: if 'car', return a generator of car images, if 'not', return a generator
for images that are not of cars
ratio: the ration of car to noncar images to load. E.g. 0.5 means load half as many
car images as notcar. This has the effect of biasing the model towards notcar
detection which makes sense because most of the visual space in the images does
not contain a car.
length: the amount of data to load per car or notcar, meaning a value of 2500 will
try to load 2500 car or notcar images depending on the value of car_or_not
random: if True, instead of returning many images, return a single fname,
image and index selected at random
sanity: if True print out some info about the data and verify it's consistency
vis: if True, display a random sample image
Returns:
a list of 2-tuples containing (filename, image). Note: the color space is RGB.
"""
assert car_or_not in ['car', 'notcar'], "Invalid option for 'car_or_not'. Chose from ['car', 'notcar']"
car_fnames = CAR_FNAMES
notcar_fnames = NOTCAR_FNAMES
# Verify that all images are of the same shape and dtype
if sanity:
print('Checking that all images have the same size, dtype and range...')
first_img = imread(car_fnames[0])
image_shape = first_img.shape
data_type = first_img.dtype
file_types = defaultdict(lambda: 0)
def get_minmax(img):
if img.min() <= 1 and img.min() >= 0:
minval = 0
elif img.min() < 0:
minval = img.min()
else:
minval = 0
if img.max() > 1 and img.max() <= 255:
maxval = 255
elif img.max() <= 1:
maxval = 1
else:
maxval = img.max()
return minval, maxval
first_minval, first_maxval = get_minmax(first_img)
for fname in car_fnames + notcar_fnames:
ext = fname.split('.')[-1]
file_types[ext] += 1
cur_img = imread(fname)
curmin, curmax = get_minmax(cur_img)
if cur_img.shape != image_shape:
raise Exception('Not all images have the same shape. {} was of shape {}.'.format(fname, cur_img.shape))
if cur_img.dtype != data_type:
raise Exception('Not all images have the same data type. {} was of type {}.'.format(fname, cur_img.dtype))
if curmin != first_minval or curmax != first_maxval:
raise Exception(
"""Not all images have the same dinemsions, got: ({}, {}) for {}, but expected ({}, {})"""\
.format(curmin, curmax, fname, first_minval, first_maxval))
print(' All images are consistent!')
data = {
'n_notcars': len(notcar_fnames),
'n_cars': len(car_fnames),
'image_shape': image_shape,
'data_type': data_type,
}
print(' Total number of images: {} cars and {} non-cars'.format(data["n_cars"], data["n_notcars"]))
print(' Image size: {}, data type: {}'.format(data["image_shape"], data["data_type"]))
print(' Pixel value range: ({}, {})'.format(first_minval, first_maxval))
print(' File type counts: {}'.format(str(dict(file_types))))
if car_or_not == 'car':
if length < 0:
car_length = len(car_fnames)
else:
car_length = length
fnames = np.random.permutation(car_fnames).tolist()[:int(car_length*ratio)]
else:
if length < 0:
notcar_length = len(notcar_fnames)
else:
notcar_length = length
fnames = np.random.permutation(notcar_fnames).tolist()[:notcar_length]
ret = []
if random is True:
idx = np.random.randint(0, len(fnames))
img = imread(fnames[idx])
# The classifier seems to have a problem with the white car in the
# video so I'm changing it to a black car using the line below.
#img[np.sum(img, axis=2) > 550] = 0
ret.append([fnames[idx], img, idx])
else:
i = 0
for f in fnames:
img = imread(f)
# The classifier seems to have a problem with the white car in the
# video so I'm changing it to a black car using the line below.
#img[np.sum(img, axis=2) > 550] = 0
ret.append([f, img, i])
i += 1
return ret
def get_xy(clf_fname=None, length=-1):
"""Load image data and extract features and labels for training
Args:
clf_fname: the file name where the classifier will be stored. This is used to generate
a filename for the StandardScaler object to also be stored.
length: the amount of data to load per car or notcar, meaning a value of 2500 will
try to load 2500 car or notcar images depending on the value of car_or_not
Returns:
X matrix of features and y vector of labels.
"""
print('Loading features and labels...')
car_data = load_data(length=length, car_or_not='car')
notcar_data = load_data(length=length, car_or_not='notcar', sanity=False)
car_features = pipeline(car_data)
notcar_features = pipeline(notcar_data)
# Create an array stack of feature vectors
X_not_scaled = np.vstack((car_features, notcar_features)).astype(np.float32)
scaler = StandardScaler()
X = scaler.fit_transform(X_not_scaled)
# Very important that the StandardScaler that's made here is reused for all future predictions
if clf_fname is not None:
scaler_fname = ''.join(clf_fname.split('.')[:-1]) + '_scaler.pkl'
pickle.dump(scaler, open(scaler_fname, 'wb'))
# Generate the labels
car_labels = np.ones(len(car_features))
notcar_labels = np.zeros(len(notcar_features))
y = np.append(car_labels, notcar_labels)
print('Loaded, extracted features, scaled and labeled {:,} images, {:,} cars and {:,} not cars'.format(
len(y), len(car_data), len(notcar_data)))
print('StandardScaler was pickled to {}'.format(scaler_fname))
return X, y
def get_svm_model(train=False, X=None, y=None, subset_size=None, model_file=None):
"""Create a Support Vector Machine and GridSearchCV for optimal params.
Args:
train: if True, training will be performed, otherwise a model is loaded
from disk
X: the feature data
y: the label data
subset_size: if not none, train on a subset of data this size, otherwise
train on all data
model_file: if not training, load a pickled model from disk at this location
and return that instead
Returns:
a trained SVM model
"""
global conf
if train is False:
assert model_file is not None, 'Must specify a model file if not training'
assert os.path.exists(model_file), 'Model file does not exist'
with open(model_file, 'rb') as f:
clf = pickle.load(f)
print('Model was loaded from disk at location: {}.'.format(model_file))
[print(' ' + l) for l in clf.__repr__().split('\n')]
return clf
assert X is not None, 'Must supply features for training'
assert y is not None, 'Must supply labels for training'
assert X.shape[0] == y.shape[0], 'Features and labels must be the same length'
# Set the amount of data to use for training the classifier.
if subset_size is None or subset_size < 0:
subset_size = len(y)
idx = np.random.permutation(X.shape[0])[:subset_size]
X_subset = X[idx]
y_subset = y[idx]
X_train, X_test, y_train, y_test = train_test_split(
X_subset, y_subset, test_size=0.2, random_state=42, shuffle=True)
print('Fitting data...')
# Parameters for GridSearchCV
grid_search = False
parameters = [
{'kernel': ['linear'], 'C': [0.001, 0.01, 1, 10]},
{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [0.001, 0.001, 1, 10, 100]},
]
if grid_search:
clf = GridSearchCV(SVC(probability=True), parameters, cv=3, verbose=9)
else:
clf = SVC(kernel='linear', C=1, probability=True)
clf.fit(X_train, y_train)
print(clf)
if grid_search:
print('Best Params: {}'.format(clf.best_params_))
print('Best Score: {}'.format(clf.best_score_))
print('Final score on test set: {}'.format(clf.score(X_test, y_test)))
print()
return clf
def predict(img, clf, scaler_pkl, fname='unknown', car=None, vis=False, verbose=False):
"""Extract features and run the classifier on an image.
Args:
img: the image to predict against
clf: the classifier
scaler_pkl: the pickled scaler for this model (must be reused from when the data
was originally fit)
fname: the filename of the original file containing the image
car: if known (while testing), whether or not this is a car image
vis: if True, display a visualization of the pipeline
verbose: if True, print stuff
Returns:
The prediction is 1 if the prediction is 'car' or 0 if the prediction is 'not car'
"""
img_new = cv2.resize(img, (64, 64))
features = pipeline([[fname, img_new, 0]])
if 'scaler' not in predict.__dict__:
scaler = pickle.load(open(scaler_pkl, 'rb'))
predict.scaler = scaler
else:
scaler = predict.scaler
features = scaler.transform(features)
prediction = clf.predict(features)[0]
probs = clf.predict_proba(features)[0]
if car is not None or verbose: # meaning, someone told this method the truth value, so print stuff
prediction_txt = 'car'
if prediction == 0:
prediction_txt = 'notcar'
if car is None:
grade = '??????'
else:
grade = 'CORRECT'
if prediction == 0 and car:
grade = 'INCORRECT'
if prediction == 1 and not car:
grade = 'INCORRECT'
if prediction == 1:
print('{:35}, predicts: {} = {:6} --> {:9}, probabilities: ({}, {})'.format(
fname, prediction, prediction_txt, grade, probs[0], probs[1]))
return int(prediction)
#######################################################################################################
#
# Main function
#
#######################################################################################################
def main():
"""Main entry point
"""
global conf
# Create a history deque to track car detection boxes
main.hist_size = 5
main.enable_boxhist = True
# Also create a history of heatmaps. The sum of all heatmaps will be
# used to detect objects instead of using just the heatmap from the current
# image
main.heat_size = 5
main.enable_heathist = True
# Threshold for the heatmap. Any pixels in the heatmap less than this
# value will be forced to zero
main.threshold = 3
# To improve processing times, only process every Nth frame when debugging.
# For all other frames, return the current image with the last known rectangles redrawn
main.n = 2
#
# Far bounding boxes
#
main.far_window_size = (64, 64)
main.far_overlap = 0.25
main.far_start_pos = (700, 400)
main.far_end_pos = (1200, 685)
#
# Middle bounding boxes
#
main.mid_window_size = (128, 128)
main.mid_overlap = 0.3
main.mid_start_pos = (700, 380)
main.mid_end_pos = (1280, 580)
#
# Near bounding boxes
#
main.near_window_size = (192, 192)
main.near_overlap = 0.5
main.near_start_pos = (700, 375)
main.near_end_pos = (1280, 685)
#
main.frame_box_history = deque(maxlen=main.hist_size)
main.frame_heat_history = deque(maxlen=main.heat_size)
main.img_hist = deque(maxlen=main.n)
if args.train is False:
main.clf = get_svm_model(model_file=args.clf_fname)
# The lines below create dirs that will be used to store the
# heat map pipeliine in static images. This is very helpful
# for debugging. Note that everytime this script is called
# these dirs are wiped and recreated from scratch.
if args.debug:
main.dst1 = './output/heat1/'
main.dst2 = './output/heat2/'
main.dst3 = './output/heat3/'
if os.path.exists(main.dst1):
shutil.rmtree(main.dst1)
if os.path.exists(main.dst2):
shutil.rmtree(main.dst2)
if os.path.exists(main.dst3):
shutil.rmtree(main.dst3)
os.makedirs(main.dst1)
os.makedirs(main.dst2)
os.makedirs(main.dst3)
if args.input_video is not None:
assert args.output_video is not None, 'Must specify an output video.'
assert args.clf_fname is not None, 'Must provide an SVM model.'
print('Processing video...')
if args.video_start is not None:
assert args.video_end is not None, 'Must specify an end time as well'
input_video = video_extract(args.input_video, int(args.video_start), int(args.video_end))
else:
input_video = args.input_video
vin = VideoFileClip(input_video)
conf = Bunch(main.__dict__)
vout = vin.fl_image(video_pipeline)
vout.write_videofile(args.output_video, audio=False)
else:
if args.save_file is not None and os.path.exists(args.save_file):
print('!!! WARNING !!! Previous model ({}) to be overwritten.'.format(args.save_file))
X, y = get_xy(clf_fname=args.save_file, length=args.trainsize)