def init_detection(self): rospy.loginfo("Building Graph fpr object detection") config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with self.detection_graph.as_default(): self.sess = tf.Session(graph=self.detection_graph, config=config) # Define Input and Ouput tensors rospy.loginfo("detection graph context") try: self.image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') rospy.loginfo("image_tensor: {}".format(self.image_tensor)) self.detection_boxes = self.detection_graph.get_tensor_by_name( 'detection_boxes:0') rospy.loginfo("detection_boxes: {}".format( self.detection_boxes)) self.detection_scores = self.detection_graph.get_tensor_by_name( 'detection_scores:0') rospy.loginfo("detection_scores: {}".format( self.detection_scores)) self.detection_classes = self.detection_graph.get_tensor_by_name( 'detection_classes:0') rospy.loginfo("detection_classes: {}".format( self.detection_classes)) self.num_detections = self.detection_graph.get_tensor_by_name( 'num_detections:0') self.fps = FPS2(self.fps_interval).start() except: rospy.logwarn("Unexpected error: {}".format(sys.exc_info()[0]))
def segmentation(detection_graph): vs = WebcamVideoStream(0, 1280, 720).start() resize_ratio = 1.0 * 513 / max(vs.real_width, vs.real_height) target_size = (int(resize_ratio * vs.real_width), int(resize_ratio * vs.real_height)) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(5).start() # background_image = cv2.imread('b.jpg') # resized_background_image = cv2.resize(background_image, target_size) # (384,513) print("Starting...") with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while vs.isActive(): image = cv2.resize(vs.read(), target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) # visualization seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 # bg_copy = resized_background_image.copy() mask = (seg_map == 15) # car=render(image) car = cv2.stylization(image, sigma_s=60, sigma_r=0.07) # gray0 = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # gray0 = cv2.cvtColor(gray0, cv2.COLOR_GRAY2RGB) print(car.shape) car[mask] = image[mask] # create_colormap(seg_map).astype(np.uint8) seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( car, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass # ir=cv2.resize(car,(vs.real_width,vs.real_height)) ir = car cv2.imshow('segmentation', ir) if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() fps.stop() vs.stop() cv2.destroyAllWindows()
def segmentation(detection_graph, label_names): # fixed input sizes as model needs resize either way vs = WebcamVideoStream(VIDEO_INPUT, 640, 480).start() resize_ratio = 1.0 * 513 / max(vs.real_width, vs.real_height) target_size = (int(resize_ratio * vs.real_width), int(resize_ratio * vs.real_height)) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(FPS_INTERVAL).start() print("> Starting Segmentaion") with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while vs.isActive(): image = cv2.resize(vs.read(), target_size) batch_seg_map = sess.run( 'SemanticPredictions:0', feed_dict={ 'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)] }) # visualization seg_map = batch_seg_map[0] seg_image = create_colormap(seg_map).astype(np.uint8) cv2.addWeighted(seg_image, ALPHA, image, 1 - ALPHA, 0, image) vis_text(image, "fps: {}".format(fps.fps_local()), (10, 30)) # boxes (ymin, xmin, ymax, xmax) if BBOX: map_labeled = measure.label(seg_map, connectivity=1) for region in measure.regionprops(map_labeled): if region.area > MINAREA: box = region.bbox p1 = (box[1], box[0]) p2 = (box[3], box[2]) cv2.rectangle(image, p1, p2, (77, 255, 9), 2) vis_text( image, label_names[seg_map[tuple(region.coords[0])]], (p1[0], p1[1] - 10)) cv2.imshow('segmentation', image) if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() fps.stop() vs.stop() cv2.destroyAllWindows()
def detection(detection_graph, category_index): # Session Config: Limit GPU Memory Usage config = tf.ConfigProto() config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 # Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # fps calculation fps = FPS2(fps_interval).start() # Start Video Stream video_stream = cv2.VideoCapture(video_input) print("Press 'q' to Exit") while video_stream.isOpened(): ret, image_np = video_stream.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) if visualize: # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) cv2.imshow('object_detection', image_np) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break else: cur_frames += 1 for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print(label, score, box) if cur_frames >= max_frames: break fps.update() # End everything fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def detection(detection_graph, category_index, score, expand): outputs = [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ] config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 print('Starting detection') with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Start Video Stream video_stream = WebcamVideoStream(video_input, width, height).start() print("Press 'q' to Exit") # Get handles to input and output tensors tensor_dict = get_tensordict(detection_graph, outputs) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') if 'detection_masks' in tensor_dict: #real_width, real_height = get_image_shape() # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, video_stream.real_height, video_stream.real_width) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) if split_model: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # fps calculation fps = FPS2(fps_interval).start() cur_frames = 0 while video_stream.isActive(): image = video_stream.read() fps.update() # read video frame and expand dimensions if convert_rgb: try: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cvt = True except: print("Error converting BGR2RGB") cvt = False # detection if split_model: (score, expand) = sess.run( [score_out, expand_out], feed_dict={image_tensor: np.expand_dims(image, 0)}) output_dict = sess.run(tensor_dict, feed_dict={ score_in: score, expand_in: expand }) else: output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) #num = int(output_dict['num_detections'][0]) classes = output_dict['detection_classes'][0].astype(np.uint8) boxes = output_dict['detection_boxes'][0] scores = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] # Visualization of the results of a detection. if visualize: if convert_rgb and cvt: image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) vis_util.visualize_boxes_and_labels_on_image_array( image, boxes, classes, scores, category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) cv2.imshow('object_detection', image) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break else: # Exit after max frames if no visualization cur_frames += 1 for box, score, _class in zip(boxes, scores, classes): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print("label: {}\nscore: {}\nbox: {}".format( label, score, box)) if cur_frames >= max_frames: break # End everything fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def segmentation(detection_graph): vs = WebcamVideoStream(0, 640, 480).start() resize_ratio = 1.0 * 513 / max(vs.real_width, vs.real_height) target_size = (int(resize_ratio * vs.real_width), int(resize_ratio * vs.real_height)) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(5).start() filelist = [ file for file in os.listdir('backgrounds') if file.endswith('.jpg') ] num_files = len(filelist) background_image = [] resized_background_image = [] for x in filelist: background_image.append(cv2.imread(x)) resized_background_image.append( cv2.resize(background_image[-1], target_size)) # fff = 0 # background_image = cv2.imread('b.jpg') # resized_background_image = cv2.resize( # background_image, target_size) # (384,513) # background_image2 = cv2.imread('b2.jpg') # resized_background_image2 = cv2.resize( # background_image2, target_size) # (384,513) # background_image3 = cv2.imread('b3.jpg') # resized_background_image3 = cv2.resize( # background_image3, target_size) # (384,513) mike_background_image = cv2.imread('mike.png') mike_background_image = cv2.resize( mike_background_image, (int(resize_ratio * vs.real_width / 3), int(resize_ratio * vs.real_height * 3 / 4))) # (384,513) # Uncomment to save output # out = cv2.VideoWriter('outpy.avi', cv2.VideoWriter_fourcc( # 'M', 'J', 'P', 'G'), 1, (vs.real_height, vs.real_width))#CHANGE print("Starting...") cv2.namedWindow( 'segmentation', 16) # 16 means WINDOW_GUI_NORMAL, to disable right click context menu cv2.setMouseCallback('segmentation', next_bg) global img_num, mike_flag img_num = 0 mike_flag = False with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while vs.isActive(): image = cv2.resize(vs.read(), target_size) batch_seg_map = sess.run( 'SemanticPredictions:0', feed_dict={ 'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)] }) # visualization seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 bg_copy = resized_background_image[img_num % num_files].copy() # if fff == 0: # bg_copy = resized_background_image.copy() # elif fff == 1: # bg_copy = resized_background_image2.copy() # elif fff == 2: # bg_copy = resized_background_image3.copy() mask = (seg_map == 15) bg_copy[mask] = image[mask] # create_colormap(seg_map).astype(np.uint8) seg_image = np.stack((seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] _, cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass if mike_flag: x_offset = 150 y_offset = 95 bg_copy[y_offset:y_offset + mike_background_image.shape[0], x_offset:x_offset + mike_background_image.shape[1]][ mike_background_image != 0] = mike_background_image[ mike_background_image != 0] ir = cv2.resize(bg_copy, (vs.real_width, vs.real_height)) cv2.imshow('segmentation', ir) if cv2.waitKey(1) & 0xFF == ord('q'): break elif cv2.waitKey(1) & 0xFF == ord('a'): fff = 0 elif cv2.waitKey(1) & 0xFF == ord('b'): fff = 1 elif cv2.waitKey(1) & 0xFF == ord('c'): fff = 2 fps.update() # out.write(ir) fps.stop() vs.stop() # out.release() cv2.destroyAllWindows()
cur_frames = 0 # Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph, config = config) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # fps calculation fps = FPS2(fps_interval).start() # Start Video Stream video_stream = WebcamVideoStream(video_input,width,height).start() print ("Press 'q' to Exit") while video_stream.isActive(): image_np_list = [] for _ in range(batch_size): image_np_list.append(video_stream.read()) # fps calculation fps.update() image_np_expanded = np.asarray(image_np_list) # Actual detection. (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) if visualize:
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # Threading gpu_worker = SessionWorker("GPU", detection_graph, config) cpu_worker = SessionWorker("CPU", detection_graph, config) gpu_opts = [score_out, expand_out] cpu_opts = [ detection_boxes, detection_scores, detection_classes, num_detections ] gpu_counter = 0 cpu_counter = 0 # Start Video Stream, FPS calculation and Tracker fps = FPS2(fps_interval).start() video_stream = WebcamVideoStream(video_input, width, height).start() #tracker = create_tracker(tracker_type) tracker = KCF.kcftracker(False, True, False, False) real_width = video_stream.real_width real_height = video_stream.real_height tracker_counter = 0 track = False print("Press 'q' to Exit") print('Starting Detection') while video_stream.isActive(): # Detection if not (use_tracker and track): if split_model: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb frame = video_stream.read() frame_expanded = np.expand_dims(cv2.cvtColor( frame, cv2.COLOR_BGR2RGB), axis=0) # put new queue gpu_feeds = {image_tensor: frame_expanded} if visualize: gpu_extras = frame # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts, gpu_feeds, gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score, expand, frame = g["results"][0], g[ "results"][1], g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = { score_in: score, expand_in: expand } cpu_extras = frame cpu_worker.put_sess_queue( cpu_opts, cpu_feeds, cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 time.sleep(0.005) continue # If CPU RESULT has not been set yet, no fps update else: cpu_counter = 0 boxes, scores, classes, num, frame = c["results"][ 0], c["results"][1], c["results"][2], c[ "results"][3], c["extras"] else: # default session frame = video_stream.read() frame_expanded = np.expand_dims(cv2.cvtColor( frame, cv2.COLOR_BGR2RGB), axis=0) (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # reformat detection num = int(num) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.int32) scores = np.squeeze(scores) # visualize detection vis = visualize_detection(frame, boxes, classes, scores, category_index, fps) if not vis: break # Activate Tracker if use_tracker and num <= num_trackers: tracker_frame = frame track = True first_track = True # Tracking else: frame = video_stream.read() if first_track: trackers = [] tracker_boxes = boxes for box in boxes[~np.all(boxes == 0, axis=1)]: tracker.init( conv_detect2track(box, real_width, real_height), tracker_frame) trackers.append(tracker) first_track = False #print ("A: {}".format(boxes[~np.all(boxes == 0, axis=1)])) i = 0 for tracker in trackers: tracker_box = tracker.update(frame) #print ("B: {}".format(tracker_box)) tracker_boxes[i, :] = conv_track2detect( tracker_box, real_width, real_height) i += 1 #p1 = (tracker_box[0], tracker_box[1]) #p2 = (tracker_box[0] + tracker_box[2], tracker_box[1] + tracker_box[3]) #cv2.rectangle(frame, p1, p2, (255,0,0), 2) #cv2.imshow('object_detection', frame) #print ("C: {}".format(tracker_boxes[~np.all(tracker_boxes == 0, axis=1)])) vis = visualize_detection(frame, tracker_boxes, classes, scores, category_index, fps) if not vis: break tracker_counter += 1 #tracker_frame = frame if tracker_counter >= tracker_frames: track = False tracker_counter = 0 fps.update() # End everything if split_model: gpu_worker.stop() cpu_worker.stop() fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def segmentation(detection_graph): vs = cv2.VideoCapture('rtsp://192.168.43.233:8080/h264_ulaw.sdp') width = int(vs.get(3)) height = int(vs.get(4)) fps_v = int(vs.get(5)) resize_ratio = 1.0 * 513 / max(width, height) target_size = (int(resize_ratio * height), int(resize_ratio * width) ) #reversed config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(5).start() background_image = cv2.imread('b.jpg') resized_background_image = cv2.resize(background_image, target_size) # (384,513) out = cv2.VideoWriter('outpy.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps_v, (height, width)) print("Starting...") ret = True counting = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while (1): ret, frame = vs.read() if (not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) bg_copy = resized_background_image.copy() batch_seg_map = sess.run( 'SemanticPredictions:0', feed_dict={ 'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)] }) # visualization seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 # bg_copy = resized_background_image.copy() mask = (seg_map == 15) bg_copy[mask] = image[mask] seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype( np.uint8) #create_colormap(seg_map).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] major = cv2.__version__.split('.')[0] if major == '3': _, cnts, hierarchy = cv2.findContours( thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) else: cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass # ir=cv2.resize(bg_copy,(960,720)) if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() combo_resized = cv2.resize(bg_copy, (height, width)) cv2.imshow('segmentation', bg_copy) #out.write(combo_resized) counting += 1 fps.stop() #out.release() vs.release() cv2.destroyAllWindows()
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # Threading gpu_worker = SessionWorker("GPU", detection_graph, config) cpu_worker = SessionWorker("CPU", detection_graph, config) gpu_opts = [score_out, expand_out] cpu_opts = [ detection_boxes, detection_scores, detection_classes, num_detections ] gpu_counter = 0 cpu_counter = 0 # Start Video Stream and FPS calculation fps = FPS2(fps_interval).start() #video_stream = WebcamVideoStream(video_input,width,height).start() cap = cv2.VideoCapture(video_input) cur_frames = 0 print("Press 'q' to Exit") print('Starting Detection') kcf = False box_to_color_map = collections.defaultdict(str) box_to_display_str_map = collections.defaultdict(list) tracker = KCF.kcftracker(False, True, False, False) count = 0 ret1, image = cap.read() im_height, im_width = image.shape[:2] im_height /= 3 im_width /= 3 print im_height, im_width fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 30.0, (im_width, im_height)) while True: count += 1 if count == 500: break if kcf and use_kcf: start = timer() ret1, image = cap.read() image = cv2.resize(image, (im_width, im_height)) image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) for box, color in box_to_color_map.items(): label = box_to_display_str_map[box] ymin, xmin, ymax, xmax = box xmin = (int)(xmin * im_width) ymin = (int)(ymin * im_height) xmax = (int)(xmax * im_width) ymax = (int)(ymax * im_height) tracker.init([xmin, ymin, xmax - xmin, ymax - ymin], preframe) boundingbox = tracker.update(image) boundingbox = map(int, boundingbox) cv2.rectangle(image, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (0, 255, 255), 2) cv2.putText( image, label[0], (boundingbox[0], boundingbox[1] + boundingbox[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (77, 255, 9), 2) #print 'hello' if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) out.write(image) cv2.imshow('object_detection', image) cv2.waitKey(1) fps.update() kcf = False time_elapsed = timer() - start print( "Detection time in frame using KCF: {:.4f} sec fps {:.4f}" .format(time_elapsed, 1 / time_elapsed)) else: start = timer() kcf = True # actual Detection if split_model: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb #image = video_stream.read() ret1, image = cap.read() preframe = image image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) # put new queue gpu_feeds = {image_tensor: image_expanded} if visualize: gpu_extras = image # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts, gpu_feeds, gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score, expand, image = g["results"][0], g[ "results"][1], g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = { score_in: score, expand_in: expand } cpu_extras = image cpu_worker.put_sess_queue( cpu_opts, cpu_feeds, cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 time.sleep(0.005) continue else: cpu_counter = 0 boxes, scores, classes, num, image = c["results"][ 0], c["results"][1], c["results"][2], c[ "results"][3], c["extras"] else: # default session #image = video_stream.read() ret1, image = cap.read() image = cv2.resize(image, (im_width, im_height)) preframe = image image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_expanded}) # Visualization of the results of a detection. if visualize: box_to_color_map, box_to_display_str_map = vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) for box, color in box_to_color_map.items(): label = box_to_display_str_map[box] ymin, xmin, ymax, xmax = box xmin = (int)(xmin * im_width) ymin = (int)(ymin * im_height) xmax = (int)(xmax * im_width) ymax = (int)(ymax * im_height) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 255), 2) cv2.putText(image, label[0], (xmin, ymax), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (77, 255, 9), 2) if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) out.write(image) cv2.imshow('object_detection', image) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break time_elapsed = timer() - start print( "Detection time in frame using SSD: {:.4f} sec fps {:.4f}" .format(time_elapsed, 1 / time_elapsed)) else: cur_frames += 1 # Exit after max frames if no visualization for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print("label: {}\nscore: {}\nbox: {}".format( label, score, box)) if cur_frames >= max_frames: break fps.update() # End everything if split_model: gpu_worker.stop() cpu_worker.stop() fps.stop() out.release() cap.release() #video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth=allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph,config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0') # Start Video Stream and FPS calculation fps = FPS2(fps_interval).start() video_stream = WebcamVideoStream(video_input,width,height).start() cur_frames = 0 print ("Press 'q' to Exit") print('Starting Detection') while video_stream.isActive(): # read video frame, convert color and expand dimensions image = video_stream.read() fps.update() if convert_rgb: try: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cvt = True except: print("Error converting BGR2RGB") cvt = False image_expanded = np.expand_dims(image, axis=0) # actual Detection if split_model: # Split Detection in two sessions. (score, expand) = sess.run([score_out, expand_out], feed_dict={image_tensor: image_expanded}) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={score_in:score, expand_in: expand}) else: # default session (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Visualization of the results of a detection. if visualize: if convert_rgb and cvt: image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if vis_text: cv2.putText(image,"fps: {}".format(fps.fps_local()), (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) cv2.imshow('object_detection', image) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break else: # Exit after max frames if no visualization cur_frames += 1 for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames%det_interval==0 and score > det_th: label = category_index[_class]['name'] print("label: {}\nscore: {}\nbox: {}".format(label, score, box)) if cur_frames >= max_frames: break # End everything fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def detection(detection_graph, category_index): # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Start Video Stream video_stream = WebcamVideoStream(video_input, width, height).start() print("Press 'q' to Exit") # fps calculation fps = FPS2(fps_interval).start() while video_stream.isActive(): image_np = video_stream.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. if visualize: vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) # print fps on visualization screen if vis_text: cv2.putText(image_np, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) cv2.imshow('object_detection', image_np) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break else: # Exit after max frames if no visualization cur_frames += 1 for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print(label, score, box) if cur_frames >= max_frames: break fps.update() # End everything fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def segmentation(detection_graph): vs = cv2.imread('a.jpg') resize_ratio = 1.0 * 513 / max(vs.shape) target_size = (int(resize_ratio * vs.shape[1]), int(resize_ratio * vs.shape[0])) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(5).start() background_image = cv2.imread('b.jpg') resized_background_image = cv2.resize(background_image, target_size) # (384,513) print("Starting...") with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image = cv2.resize(vs, target_size) batch_seg_map = sess.run( 'SemanticPredictions:0', feed_dict={ 'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)] }) # visualization seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 bg_copy = resized_background_image.copy() mask = (seg_map == 15) bg_copy[mask] = image[mask] # create_colormap(seg_map).astype(np.uint8) seg_image = np.stack((seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass # ir=cv2.resize(bg_copy,(960,720)) # cv2.imshow('segmentation', bg_copy) fps.update() combo_resized = cv2.resize(bg_copy, (vs.shape[1], vs.shape[0])) cv2.imwrite("out.jpg", combo_resized) fps.stop() cv2.destroyAllWindows()
def segmentation(filename_fg, cv_type, filename_bg=None): detection_graph = graph print(filename_fg) print(cv_type) print(filename_bg) if cv_type == "fg_video": vs = cv2.VideoCapture('uploads/' + filename_fg) width = int(vs.get(3)) height = int(vs.get(4)) fps_v = int(vs.get(5)) resize_ratio = 1.0 * 513 / max(width, height) target_size = (int(resize_ratio * height), int(resize_ratio * width)) # reversed background_image = cv2.imread('uploads/' + filename_bg) resized_background_image = cv2.resize( background_image, target_size) # (384,513) elif cv_type == "fg_image": vs = cv2.imread('uploads/'+filename_fg) width = vs.shape[0] height = vs.shape[1] resize_ratio = 1.0 * 513 / max(width, height) target_size = (int(resize_ratio * height), int(resize_ratio * width)) # reversed background_image = cv2.imread('uploads/' + filename_bg) resized_background_image = cv2.resize( background_image, target_size) # (384,513) elif cv_type == "bg_video": vs = cv2.VideoCapture('uploads/' + filename_fg) width = int(vs.get(3)) height = int(vs.get(4)) fps_v = int(vs.get(5)) bgv = cv2.VideoCapture('uploads/' + filename_bg) print("in") resize_ratio = 1.0 * 513 / max(width, height) target_size = (int(resize_ratio * height), int(resize_ratio * width)) # reversed elif cv_type == "bw" or cv_type == "crayon" or cv_type == "cartoon": vs = cv2.VideoCapture('uploads/' + filename_fg) width = int(vs.get(3)) height = int(vs.get(4)) fps_v = int(vs.get(5)) resize_ratio = 1.0 * 513 / max(width, height) target_size = (int(resize_ratio * height), int(resize_ratio * width)) # reversed config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True fps = FPS2(5).start() if cv_type != "fg_image": out = cv2.VideoWriter('outpy.avi', cv2.VideoWriter_fourcc( 'M', 'J', 'P', 'G'), fps_v, (height, width)) print("Starting...") ret = True counting = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: if cv_type == "fg_image": image = cv2.resize(vs, target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 bg_copy = resized_background_image.copy() mask = (seg_map == 15) bg_copy[mask] = image[mask] seg_image = np.stack((seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] major = cv2.__version__.split('.')[0] if major == '3': _, cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) else: cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass fps.update() combo_resized = cv2.resize(bg_copy, (vs.shape[1], vs.shape[0])) cv2.imwrite("out.jpg", combo_resized) fps.stop() cv2.destroyAllWindows() return elif cv_type == "fg_video": while (True): counting += 1 ret, frame = vs.read() if(not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) bg_copy = resized_background_image.copy() batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 mask = (seg_map == 15) bg_copy[mask] = image[mask] seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() combo_resized = cv2.resize(bg_copy, (height, width)) out.write(combo_resized) fps.stop() out.release() vs.release() cv2.destroyAllWindows() return elif cv_type == "bg_video": while (True): ret, frame = vs.read() if(not ret): break ret, bg_frame = bgv.read() if(not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) bg_copy = cv2.resize(bg_frame, target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 mask = (seg_map == 15) bg_copy[mask] = image[mask] seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( bg_copy, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() combo_resized = cv2.resize(bg_copy, (height, width)) out.write(combo_resized) counting += 1 fps.stop() out.release() vs.release() bgv.release() cv2.destroyAllWindows() return elif cv_type == "bw": while (True): counting += 1 ret, frame = vs.read() if(not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 mask = (seg_map == 15) gray0 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray0 = cv2.cvtColor(gray0, cv2.COLOR_GRAY2BGR) print(gray0.shape) gray0[mask] = image[mask] seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( gray0, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass ir = gray0 if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() ir = cv2.resize(ir, (height, width)) out.write(ir) fps.stop() out.release() vs.release() cv2.destroyAllWindows() return elif cv_type == "crayon": while (True): counting += 1 ret, frame = vs.read() if(not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 mask = (seg_map == 15) car = cv2.stylization(image, sigma_s=60, sigma_r=0.07) car[mask] = image[mask] seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( car, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass ir = car if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() ir = cv2.resize(ir, (height, width)) out.write(ir) fps.stop() out.release() vs.release() cv2.destroyAllWindows() return elif cv_type == "cartoon": while (True): counting += 1 ret, frame = vs.read() if(not ret): break frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) image = cv2.resize(frame, target_size) batch_seg_map = sess.run('SemanticPredictions:0', feed_dict={'ImageTensor:0': [cv2.cvtColor(image, cv2.COLOR_BGR2RGB)]}) seg_map = batch_seg_map[0] seg_map[seg_map != 15] = 0 mask = (seg_map == 15) gray0 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray0 = cv2.medianBlur(gray0, 5) edges = cv2.adaptiveThreshold( gray0, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9) # 2) Color color = cv2.bilateralFilter(image, 9, 300, 300) # 3) Cartoon cartoon = cv2.bitwise_and(color, color, mask=edges) # gray0 = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # gray0 = cv2.cvtColor(gray0, cv2.COLOR_GRAY2RGB) print(cartoon.shape) cartoon[mask] = image[mask] # create_colormap(seg_map).astype(np.uint8) seg_image = np.stack( (seg_map, seg_map, seg_map), axis=-1).astype(np.uint8) gray = cv2.cvtColor(seg_image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)[1] cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) try: cv2.drawContours( cartoon, cnts, -1, (randint(0, 255), randint(0, 255), randint(0, 255)), 2) except: pass ir = cartoon if cv2.waitKey(1) & 0xFF == ord('q'): break fps.update() ir = cv2.resize(ir, (height, width)) out.write(ir) fps.stop() out.release() vs.release() cv2.destroyAllWindows() return
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0') # Threading gpu_worker = SessionWorker("GPU", detection_graph, config) cpu_worker = SessionWorker("CPU", detection_graph, config) gpu_opts = [score_out, expand_out] cpu_opts = [detection_boxes, detection_scores, detection_classes, num_detections] gpu_counter = 0 cpu_counter = 0 # Start Video Stream and FPS calculation fps = FPS2(fps_interval).start() video_stream = WebcamVideoStream(video_input, width, height).start() cur_frames = 0 print("Press 'q' to Exit") print('Starting Detection') while video_stream.isActive(): # actual Detection if split_model: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb image = video_stream.read() image_expanded = np.expand_dims(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), axis=0) # put new queue gpu_feeds = {image_tensor: image_expanded} if visualize: gpu_extras = image # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts, gpu_feeds, gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score, expand, image = g["results"][0], g["results"][1], g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = {score_in: score, expand_in: expand} cpu_extras = image cpu_worker.put_sess_queue(cpu_opts, cpu_feeds, cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 time.sleep(0.005) continue # If CPU RESULT has not been set yet, no fps update else: cpu_counter = 0 boxes, scores, classes, num, image = c["results"][0], c["results"][1], c["results"][2], \ c["results"][3], c["extras"] else: # default session image = video_stream.read() image_expanded = np.expand_dims(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), axis=0) boxes, scores, classes, num = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Visualization of the results of a detection. if visualize: vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) cv2.imshow('object_detection', image) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break else: cur_frames += 1 # Exit after max frames if no visualization for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print("label: {}\nscore: {}\nbox: {}".format(label, score, box)) if cur_frames >= max_frames: break fps.update() # End everything gpu_worker.stop() cpu_worker.stop() fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def object_detection(video_input,visualize,max_frames,width,height,fps_interval,bbox_thickness, \ allow_memory_growth,det_intervall,det_th,model_name): config = tf.ConfigProto() config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # fps calculation fps = FPS2(fps_interval).start() # Start Video Stream video_stream = WebcamVideoStream(video_input, width, height).start() print("Press 'q' to Exit") while video_stream.isActive(): image_np = video_stream.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) if visualize: # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), CATEGORY_INDEX, min_score_thresh=MINIMUM_CONFIDENCE, use_normalized_coordinates=True, line_thickness=bbox_thickness) cv2.imshow('object_detection', image_np) # print(boxes) # print bounding corners of boxes when confidence is > minimum confidence (the ones you're drawing boxes around) # print("NEW FRAME") for i, box in enumerate(np.squeeze(boxes)): if (np.squeeze(scores)[i] > MINIMUM_CONFIDENCE): # This uses actual coordinates based on size of image - remove height and width to use normalized coordinates ymin = box[0] * height xmin = box[1] * width ymax = box[2] * height xmax = box[3] * width #normalized/percentage nymin = box[0] * 100 nxmin = box[1] * 100 nymax = box[2] * 100 nxmax = box[3] * 100 #TODO: should pass through image size at the very beginning print('Top left') print("(" + str(nxmin) + "%," + str(nymin) + "%)") print('Bottom right') print("(" + str(nxmax) + "%," + str(nymax) + "%)") print() # Exit Option--BROKEN if cv2.waitKey(1) & 0xFF == ord('q'): break else: cur_frames += 1 for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_intervall == 0 and score > det_th: label = category_index[_class]['name'] print(label, score, box) if cur_frames >= max_frames: break # fps calculation fps.update() # End everything fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
def main(self): rospy.init_node('detector_node') rate = rospy.Rate(30) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 self.cur_frames = 0 with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') detection_boxes = self.detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = self.detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = self.detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = self.detection_graph.get_tensor_by_name( 'num_detections:0') if split_model: score_out = self.detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = self.detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = self.detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = self.detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # fps calculation fps = FPS2(fps_interval).start() cur_frames = 0 while not rospy.is_shutdown(): if self.image_flag: image_np = self.cv_image image_np = cv2.resize(image_np, (height, width)) image_np_expanded = np.expand_dims(image_np, axis=0) # actual Detection if not split_model: (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) else: # Split Detection in two sessions. (score, expand) = sess.run( [score_out, expand_out], feed_dict={image_tensor: image_np_expanded}) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={ score_in: score, expand_in: expand }) # Visualization of the results of a detection. if visualize: vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=8) if vis_text: cv2.putText(image_np, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) image_height = self.cv_image.shape[0] image_width = self.cv_image.shape[1] resize_image = cv2.resize(image_np, (image_width, image_height)) pub_image = CvBridge().cv2_to_imgmsg( resize_image, "bgr8") self.image_pub.publish(pub_image) self.cur_frames += 1 for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if self.cur_frames % det_interval == 0 and score > det_th: label = self.category_index[_class]['name'] print(label, score, box) # cv2.imshow('object_detection', image_np) # if cv2.waitKey(1) & 0xFF == ord('q'): # break # else: # # Exit after max frames if no visualization # self.cur_frames += 1 # for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): # if self.cur_frames%det_interval==0 and score > det_th: # label = self.category_index[_class]['name'] # print(label, score, box) # # if self.cur_frames >= max_frames: # # break fps.update() rate.sleep()