def track_person(self, image): image_np_expanded = np.expand_dims(image, axis=0) # Actual detection. (boxes, scores, classes, num) = self.sess.run([ self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections ], feed_dict={self.image_tensor: image_np_expanded}) locations = vis_util.find_person_custom( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores)) trackers = self.tracker.update(locations, image) for d in trackers: d = d.astype(np.int32) color = vis_util.STANDARD_COLORS[d[4] % len(vis_util.STANDARD_COLORS)] vis_util.draw_bounding_box_on_image_array( image, d[1], d[0], d[3], d[2], color=color, use_normalized_coordinates=False)
def main(img, detector, thresh=0.6, iou=0.2, display=True): """ main function. """ # Download video sess = tf.compat.v1.Session() frame = cv2.imread(img) set_boxes = [] boxes, scores, classes = detector.detect_image( cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Non-maximum supression if len(boxes) > 0: indices = tf.image.non_max_suppression(boxes, scores, 20, iou_threshold=iou, score_threshold=thresh) with sess.as_default(): indices = indices.eval() boxes = np.take(boxes, indices, 0) scores = np.take(scores, indices, 0) classes = np.take(classes, indices, 0) if isinstance(detector, TensorFlowDetector): box2color, box2name = filter_boxes(boxes, classes.astype(np.int32), scores, detector.get_class_index(), min_score_thresh=thresh) # Set boxes for box in box2color.keys(): set_boxes.append(BBox(box, box2name[box], box2color[box])) elif isinstance(detector, RetinaNetDetector): for box, score, label in zip(boxes, scores, classes): color = tuple(label_color(label)) label = f'{detector.label_to_name[int(label)]}: {score}' h, w = frame.shape[:2] xmin, ymin, xmax, ymax = box box = tuple([ymin / h, xmin / w, ymax / h, xmax / w]) set_boxes.append(BBox(box, label, color)) # Draw boxes for box in set_boxes: ymin, xmin, ymax, xmax = box.box vis_util.draw_bounding_box_on_image_array( frame, ymin, xmin, ymax, xmax, color=box.color, display_str_list=box.name if isinstance( detector, TensorFlowDetector) else [box.name], use_normalized_coordinates=True) cv2.imwrite(f"{img.split('.')[0]}_output.jpg", frame) if display: cv2.imshow('detector', frame) cv2.waitKey(0) cv2.destroyAllWindows()
def visualize_boxes_and_labels_on_image_array(image, boxes, classes, scores, category_index, max_boxes_to_draw=20, min_score_thresh=.5, line_thickness=4): # normalized coordinates: True, linethickness 8 box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) for i in range(min(max_boxes_to_draw, boxes.shape[0])): if scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' # add only people if class_name == 'person': ymin, xmin, ymax, xmax = box display_str = '{}: {}%: {}'.format( class_name, int(100 * scores[i]), get_action(image, xmin, ymin, xmax, ymax)) box_to_display_str_map[box].append(display_str) box_to_color_map[box] = vis_util.STANDARD_COLORS[ classes[i] % len(vis_util.STANDARD_COLORS)] # Draw all boxes onto image. for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box print(ymin, ymax, xmin, xmax) vis_util.draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, thickness=line_thickness, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=True) return image
def process(x): global COUNT if (scores[0][COUNT] > SCORE_THRESHOLD): # First val: Top left corner: How far down from top # Second val: Top left corner: How far from the left # Third val: Bottom right corner: How far down from the top # Fourth val: Bottom right corner: How far from the left ''' print("\nItem #", COUNT + 1, ": ", category_index.get( int(classes[0][COUNT]), 'name')['name'], sep='') print("Score:", "{:.0%}".format(scores[0][COUNT])) print(((x[1] + x[3]) / 2), ",", ((x[0] + x[2]) / 2), sep='', flush=True) # (x,y) of center in csv format Score: str(scores[0][COUNT]) y cord: str(((x[0] + x[2]) / 2)) ''' # Unix timestamp,Item ID,Classification,X,Y to_print = str( time.time()) + "," + str(COUNT) + "," + category_index.get( int(classes[0][COUNT]), 'name')['name'] + "," + str( ((x[1] + x[3]) / 2)) conn.sendall(to_print.encode('utf-8')) if (VISUALIZE): # Draw box + label vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=SCORE_THRESHOLD) # Draw center vis_util.draw_bounding_box_on_image_array( image_np, ((x[0] + x[2]) / 2) - 0.005, ((x[1] + x[3]) / 2) - 0.005, ((x[0] + x[2]) / 2) + 0.005, ((x[1] + x[3]) / 2) + 0.005, color='blue', # actually draws red thickness=8, display_str_list=("center"), use_normalized_coordinates=True) COUNT += 1
def expected_person(image, boxes, classes, scores, category_index, filename, max_boxes_to_draw=200, min_score_thresh=.05, draw_box=True): box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) if not max_boxes_to_draw: max_boxes_to_draw = boxes.shape[0] expected_person = 0 for i in range(min(max_boxes_to_draw, boxes.shape[0])): if scores is None or scores[i] > min_score_thresh: class_name = category_index[classes[i]]['name'] if class_name == 'person': expected_person += scores[i] display_str = '{}: {}%'.format(class_name, int(100 * scores[i])) box = tuple(boxes[i].tolist()) box_to_display_str_map[box].append(display_str) box_to_color_map[box] = 'DarkOrange' # Draw all boxes onto image. if draw_box: for box, display_str in box_to_display_str_map.items(): ymin, xmin, ymax, xmax = box color = 'DarkOrange' vis_util.draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=display_str, use_normalized_coordinates=True) image_name = filename.replace(".jpg", "_anno.jpeg") im = Image.fromarray(image) im.save(image_name) return expected_person
def test_draw_bounding_box_on_image_array(self): test_image = self.create_colorful_test_image() width_original = test_image.shape[0] height_original = test_image.shape[1] ymin = 0.25 ymax = 0.75 xmin = 0.4 xmax = 0.6 visualization_utils.draw_bounding_box_on_image_array( test_image, ymin, xmin, ymax, xmax) width_final = test_image.shape[0] height_final = test_image.shape[1] self.assertEqual(width_original, width_final) self.assertEqual(height_original, height_final)
def draw_bounding_boxes_fn(images, output): for img, ops in zip(images, output): for op in ops: ymin, xmin, ymax, xmax = op[0:4] cls_id = np.argmax(op[5:]) color = colors[cls_id % len(colors)] #TODO test len(class_names) name = [class_names[cls_id]] draw_bounding_box_on_image_array( img, ymin, xmin, ymax, xmax, color=color, display_str_list=name, use_normalized_coordinates=True) return images
def visualize_ordered_boxes_and_labels_on_image_array( image, boxes, classes, scores, category_index, use_normalized_coordinates=False, max_boxes_to_draw=20, min_score_thresh=0, agnostic_mode=False, line_thickness=4, groundtruth_box_visualization_color='black', skip_scores=False, skip_labels=False): # Create a display string (and color) for every box location, group any boxes # that correspond to the same location. categorical_colors = [ 'darkorchid', 'mediumorchid', 'violet', 'plum', 'mediumpurple', 'royalblue', 'deepskyblue', 'darkturquoise', 'paleturquoise', 'mediumspringgreen', 'lightseagreen', 'seagreen', 'olivedrab', 'darkkhaki', 'gold', 'moccasin', 'orange', 'darkorange', 'coral', 'orangered' ] #20 box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) if not max_boxes_to_draw: max_boxes_to_draw = len(boxes) for i in range(min(max_boxes_to_draw, len(boxes))): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i]) if scores is None: box_to_color_map[box] = groundtruth_box_visualization_color else: display_str = '' if not skip_labels: if not agnostic_mode: if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = str(class_name) if not skip_scores: if not display_str: display_str = '{}%'.format(int(100 * scores[i])) else: display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) box_to_display_str_map[box].append(display_str) if agnostic_mode: box_to_color_map[box] = 'DarkOrange' else: box_to_color_map[box] = categorical_colors[i] # Draw all boxes onto image. for box, color in box_to_color_map.items(): left, right, top, bottom = box ymin, xmin, ymax, xmax = top / HEIGHT, left / WIDTH, bottom / HEIGHT, right / WIDTH vis_util.draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, thickness=line_thickness, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=use_normalized_coordinates) return image
def main(vid_choice, detector, detect_rate=5, thresh=0.6, iou=0.2, display=True): """ main function. """ # Download video vid_path = download_video(vid_choice) cap = cv2.VideoCapture(vid_path) multi_tracker = cv2.MultiTracker_create() out = cv2.VideoWriter(f'{vid_choice}.mp4', \ cv2.VideoWriter_fourcc(*'mp4v'), cap.get(cv2.CAP_PROP_FPS), \ (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) progress = tqdm(total=cap.get(cv2.CAP_PROP_FRAME_COUNT)) sess = tf.compat.v1.Session() count = 0 tracking_boxes = [] while cap.isOpened(): progress.update(1) success, frame = cap.read() if not success: print('Video read failed...') break # Run a new detection: if count % detect_rate == 0: # Get detections boxes, scores, classes = detector.detect_image( cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Non-maximum supression if len(boxes) > 0: indices = tf.image.non_max_suppression(boxes, scores, 20, iou_threshold=iou, score_threshold=thresh) with sess.as_default(): indices = indices.eval() boxes = np.take(boxes, indices, 0) scores = np.take(scores, indices, 0) classes = np.take(classes, indices, 0) tracking_boxes = [] if isinstance(detector, TensorFlowDetector): box2color, box2name = filter_boxes(boxes, classes.astype(np.int32), scores, detector.get_class_index(), min_score_thresh=thresh) # Get boxes, replace tracking boxes for box in box2color.keys(): tracking_boxes.append( BBox(box, box2name[box], box2color[box])) elif isinstance(detector, RetinaNetDetector): for box, score, label in zip(boxes, scores, classes): color = tuple(label_color(label)) label = f'{detector.label_to_name[int(label)]}: {score}' h, w = frame.shape[:2] xmin, ymin, xmax, ymax = box box = tuple([ymin / h, xmin / w, ymax / h, xmax / w]) tracking_boxes.append(BBox(box, label, color)) # Set the tracker. multi_tracker = create_tracker(frame, tracking_boxes) else: # Use a tracker for the in-between frames # Get boxes from the tracker. success, boxes = multi_tracker.update(frame) meta = [(b.name, b.color) for b in tracking_boxes] tracking_boxes = [] for i, newbox in enumerate(boxes): h, w = frame.shape[:2] ymin, xmin, ymax, xmax = newbox newbox = (ymin / h, xmin / w, ymax / h, xmax / w) tracking_boxes.append(BBox(newbox, meta[i][0], meta[i][1])) # Draw boxes for box in tracking_boxes: ymin, xmin, ymax, xmax = box.box vis_util.draw_bounding_box_on_image_array( frame, ymin, xmin, ymax, xmax, color=box.color, display_str_list=box.name if isinstance( detector, TensorFlowDetector) else [box.name], use_normalized_coordinates=True) out.write(frame) if display: cv2.imshow('detector', frame) cv2.waitKey(1) count += 1 cap.release() out.release() # Attach audio. subprocess.call( f'ffmpeg -i "{vid_path}" -ab 160k -ac 2 -ar 44100 -vn {vid_choice}_audio.wav', shell=True) subprocess.call( f'ffmpeg -i "{vid_choice}.mp4" -i {vid_choice}_audio.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {vid_choice}_audio.mp4', shell=True)
image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) boxes = np.squeeze(boxes) scores = np.squeeze(scores) classes = np.squeeze(classes).astype(np.int32) num = num.astype(np.int32) for i in range(num[0]): if scores[i] >= 0.5 and classes[i] == 1: # xyminmax is 0~1 vis_util.draw_bounding_box_on_image_array( image=image_np, ymin=boxes[i][0], xmin=boxes[i][1], ymax=boxes[i][2], xmax=boxes[i][3], ) print(scores[i]) print(boxes[i]) Image.fromarray(image_np).save( 'output_images/' + '.'.join(image_path.split('.')[:-1]).split('/')[-1] + '_det.png') print('time: %.3f' % (time.time() - pre_time))
def Predict(img, detection_graph, sess, MODEL_FOLDER, labels2show, threshold=0.7): # Grab path to current working directory CWD_PATH = os.getcwd() # Number of classes the object detector can identify NUM_CLASSES = 90 # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_FOLDER, 'mscoco_complete_label_map.pbtxt') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # color scheeme for different classes color_map = { 'person': 'DeepSkyBlue', 'dog': 'IndianRed', 'cat': 'yellow', 'chair': 'Cyan', 'bottle': 'Orange' } # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # threshold of detection thresh = threshold items = [] coordinates = [] #if you want to resize to tune inference #img = cv2.resize(img_org, (300,300)) img_expanded = np.expand_dims(img, axis=0) #print(img_expanded.shape) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: img_expanded}) objects = [] for index, value in enumerate(classes[0]): object_dict = {} if scores[0, index] > thresh: object_dict[( category_index.get(value)).get('name')] = scores[0, index] objects.append(object_dict) #print (objects) #Get all the detected class labels in one list for y in objects: for keys in y.keys(): m = list(y.keys())[0] items.append(m) #Get co ordinates of the detected classes coordinates = vis_util.return_coordinates(img, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=10, min_score_thresh=thresh) new_items = [] new_coordinates = [] display_str_list = [] for i, item in enumerate(items): if item.lower() in labels2show: new_items.append(item.lower()) new_coordinates.append(coordinates[i][:-1]) display_str_list.append( [item + ' : ' + str(int(coordinates[i][-1])) + '%']) for i, box in enumerate(new_coordinates): ymin, ymax, xmin, xmax = box[0], box[1], box[2], box[3] display_str = display_str_list[i] color = color_map[new_items[i]] vis_util.draw_bounding_box_on_image_array( img, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=display_str, use_normalized_coordinates=False) return new_coordinates, new_items, img
def process_current_image(self): frame = self.image #-------------------------------------------------------------------------------------------- #updates the oTrack variable in order to track accuratley while not affecting the raw data if (self.init == False) or (time.time() - self.timer > 5): #clears all objects from the tracker when a failure occurs if self.init == False: self.oTrack = [] #used to remove objects that are already being tracked remove = [] #Updates the tracker information for each object while preventing repeats and ensuring continuity with the boxes for o in self.obj_Locs: for i in range(0, len(self.oTrack)): t = self.oTrack[i] if (o[0] == t[0] ) and abs(o[1][0] - t[1][0]) < o[1][2] / 2 and abs( o[1][1] - t[1][1]) < o[1][3] / 2: self.oTrack[i] = o remove.append(o) self.oTrack[i][2] = t[2] self.oTrack = self.oTrack + self.obj_Locs #removes duplicate objects for r in remove: self.oTrack.remove(r) #creates tracker object self.mTrack = cv2.MultiTracker.create() #adds all items to the tracker to track multiple objects at the same time for o in self.oTrack: #tracker_types = ['BOOSTING', 'MIL','KCF', 'TLD', 'MEDIANFLOW', 'GOTURN'] ok = self.mTrack.add(cv2.TrackerKCF_create(), frame, o[1]) self.init = True self.timer = time.time() #generates the tracked object as they move in the frame ok, boxes = self.mTrack.update(frame) count = 0 #draws the boxes, assign tracking ID's, and updates the tracking information with the new locations for newBox in boxes: self.oTrack[count][1] = tuple(newBox.tolist()) if len(self.oTrack[count][2]) == 0: self.oTrack[count][2] = comm_util.assignID(self.oTrack[count]) if ok: # Tracking success visualization_utils.draw_bounding_box_on_image_array( frame, int(newBox[1]), int(newBox[0]), int(newBox[1] + newBox[3]), int(newBox[0] + newBox[2]), color='red', display_str_list=[self.oTrack[count][2]], use_normalized_coordinates=False, draw=True) else: # Tracking failure self.init = False count = count + 1 #-------------------------------------------------------------------------------------------- #pulishes the new object information with updated locations and tracking numbers self.bboxU_pub.publish(json.dumps(self.oTrack)) #publishes the image for use in other nodes try: self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame, 'bgr8')) except CvBridgeError: print(CvBridgeError)