def draw_bounding_boxes(image, decoded, class_names): image = tf.image.convert_image_dtype(image, tf.uint8) image = tf.py_func(lambda a, b, c, d: utils.draw_bounding_boxes( a, b, c, [x.decode() for x in d]), [image, decoded.boxes, decoded.class_ids, class_names], tf.uint8, stateful=False) image = tf.image.convert_image_dtype(image, tf.float32) return image
def main(args): video, video_writer, frame_count = init_video(args.video_file_path, args.output_video_file_path) net_id, runtime, input_binding_info, output_binding_info = create_network( args.model_file_path, args.preferred_backends) output_tensors = ann.make_output_tensors(output_binding_info) labels, process_output, resize_factor = get_model_processing( args.model_name, video, input_binding_info) labels = dict_labels( labels if args.label_path is None else args.label_path) for _ in tqdm(frame_count, desc='Processing frames'): frame_present, frame = video.read() if not frame_present: continue input_tensors = preprocess(frame, input_binding_info) inference_output = execute_network(input_tensors, output_tensors, runtime, net_id) detections = process_output(inference_output) draw_bounding_boxes(frame, detections, resize_factor, labels) video_writer.write(frame) print('Finished processing frames') video.release(), video_writer.release()
def draw_bounding_boxes(image, classifications, regressions, class_names): decoded = [] for k in classifications: decoded.append(utils.boxes_decode(classifications[k], regressions[k])) decoded = utils.merge_boxes_decoded(decoded) decoded = utils.nms_classwise(decoded, num_classes=len(class_names)) image = tf.image.convert_image_dtype(image, tf.uint8) image = tf.py_func(lambda a, b, c, d: utils.draw_bounding_boxes( a, b, c, [x.decode() for x in d]), [image, decoded.boxes, decoded.class_ids, class_names], tf.uint8, stateful=False) image = tf.image.convert_image_dtype(image, tf.float32) return image
def runThreads(source=0, FiniteStateMachine=None): """ Dedicated thread for grabbing video frames with VideoGet object. Dedicated thread for showing video frames with VideoShow object. Main thread serves only to pass frames between VideoGet and VideoShow objects/threads. """ logger.info("Start video getter -----------------------") video_getter = VideoGet(source, (1600, 900)).start() logger.info( "Creates video window -- XXXXX NO THREAD ---------------------") cv2.namedWindow("Video", cv2.WND_PROP_FULLSCREEN) cv2.moveWindow("Video", 3000, 0) cv2.setWindowProperty("Video", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) frame = video_getter.frame # cv2.imshow("Video",frame) logger.info("Initiated Video get and video show threads") # now using Ray library work_manager = TaskManager.remote(conf) work_manager.start.remote() logger.info("Instantiated Task Manager") # instantiates GetImage object getimages = GetImages() # loads data from conf file, generates language dict getimages.generateImageDict() logger.info("GET IMAGES - generated image dictionary") # local variables declaration start = time.time() period = 0.3 stackLen = 5 # stack to register last 10 detected faces and last 10 detected smiles faceStack = deque(maxlen=stackLen) smileStack = deque(maxlen=stackLen) people = 0 faces = 0 smiles = 0 happiness = 0 language = 0 prev_state = None FSM_state = None detections = None while True: if (cv2.waitKey(1) == ord("q")) or video_getter.stopped: video_getter.stop() ray.shutdown() break # if defined a FSM then evolve states and returns current state if FiniteStateMachine is not None: logger.debug( F"FSM NEXT function call with STATE: {FSM_state}, smiles:{happiness}-{smileStack} and people: {faces}-{faceStack}" ) FiniteStateMachine.next(smiles=happiness, people=faces) language = FiniteStateMachine.language logger.debug(f"FSM language set to {language}") FSM_state = FiniteStateMachine.state logger.debug(F"FSM actual state is : {FSM_state}") if prev_state != FSM_state: logger.debug( f"New State {FSM_state} with people={people} and smiles={smiles}" ) prev_state = FSM_state # when states changes the system looks for a nother image to show logger.debug( f"FSM - Gets new image for state: {FSM_state} and language: {language}" ) new_bg = getimages.getImage(FSM_state, language) # if there is no new bg image, bg_image keeps the same if new_bg is not None: bg = new_bg # gets frame from VideoGet thread # process frame in principal thread frame = video_getter.frame # it is not possible to analyze all frames, so we'll peek # one frame each .3 seconds or so (3.3 frames/sec) if (time.time() - start) > period: start = time.time() try: logger.info("CALL IMAGE PREDICTION TASK") task = ImagePredictionTask(image=frame, result="", time=start, operation='faces') logger.debug("CREATES TASK AND SEND IT TO REMOTE PROCESS") taskRemote = ray.put(task) logger.debug("EXECUTES PROCESS TASK IN REMOTE") taskRemote = work_manager.process_task.remote(taskRemote) logger.debug("GETS TASK RESULT POINTER FROM REMOTE") task = ray.get(taskRemote) if task is not None: detections = task.result logger.debug(F"Got task result: {detections}") people = get_people(detections) if len(faceStack) >= stackLen: faceStack.popleft() faceStack.append(people) logger.debug( F"Got number of people from detections: {people}") if people > 0: smiles = get_happiness(detections) / people logger.debug( F"Got number of smiles from detections: {smiles}") else: smiles = 0 if len(smileStack) >= stackLen: smileStack.popleft() smileStack.append(smiles) # is more efficient to accumulate measures through time # to dilute moment detection problems # calculates number of faces through time divided by num of elements in stack faces = np.sum(faceStack) / stackLen # calculates happiness as the sum of elements in stack divided by num of elements in stack happiness = np.sum(smileStack) / stackLen logger.debug( F"Got % of smiles from detections: {smiles} and accumulatd: {happiness}" ) else: detections = None except Exception as err: traceback.print_exc() logger.exception(F"FACE DETECTION LOOP ERROR - {err}") continue logger.info("Show result in Video Window-----------------------") if detections is not None: frame = draw_bounding_boxes(detections, frame, (255, 0, 0)) frame = overlay_transparent(bg, frame, -1, 0) # shows new frame cv2.imshow("Video", frame)
from tqdm import tqdm import matplotlib.pyplot as plt from data_loaders.inferred import Inferred import utils import cv2 if __name__ == '__main__': # dl = Inferred('pascal', [os.path.expanduser('~/Datasets/pascal/VOCdevkit/VOC2012'), 'trainval']) # dl = Inferred('coco', [os.path.expanduser('~/Datasets/coco/instances_train2017.json'), # os.path.expanduser('~/Datasets/coco/images')]) dl = Inferred('shapes', ['./tmp', 10, 600]) for x in tqdm(dl): image = cv2.imread(x['image_file'].decode('utf-8')) image = utils.draw_bounding_boxes(image, x['boxes'] / [600, 600, 600, 600], x['class_ids'], dl.class_names) plt.imshow(image) plt.show() break
def m(): global output, heatmap_image, total_people, field1_count, field2_count frame_rate_calc = 1 freq = cv2.getTickFrequency() videostream = VideoStream(VIDEO_PATH).start() color_f1 = (0, 0, 255) color_f2 = (255, 0, 0) heatmap = np.zeros((720, 1270, 3), dtype=np.uint8) ht_color = (191, 255, 1) while True: t1 = cv2.getTickCount() frame1 = videostream.read() frame = frame1.copy() boxes, classes, scores = generate_detections(frame, interpreter) total_people = 0 field1_count = 0 field2_count = 0 for i in range(len(scores)): if ((scores[i] > THRESHOLD) and (scores[i] <= 1.0)): ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) total_people = total_people + 1 center_x = int((xmin + xmax) / 2) center_y = int((ymin + ymax) / 2) center_coor = (center_x, center_y) color_bb = (10, 200, 10) cv2.circle(frame, center_coor, 10, color_bb, cv2.FILLED) pts_f1 = [[522, 138], [1066, 522], [1200, 270], [580, 30]] pts_f2 = [[172, 142], [410, 607], [657, 440], [319, 142]] create_polygon(pts_f1, frame, color_f1) create_polygon(pts_f2, frame, color_f2) center_point = Point(center_x, center_y) polygon_f1 = Polygon(pts_f1) polygon_f2 = Polygon(pts_f2) if is_field_contain_center(polygon_f1, center_point): field1_count = field1_count + 1 color_bb = color_f1 if is_field_contain_center(polygon_f2, center_point): field2_count = field2_count + 1 color_bb = color_f2 draw_bounding_boxes(frame, classes, xmin, xmax, ymin, ymax, color_bb, labels) if (heatmap[center_y, center_x][0] != 0) and (heatmap[center_y, center_x][1] != 0) and (heatmap[center_y, center_x][2] != 0): b = heatmap[center_y, center_x][0] g = heatmap[center_y, center_x][1] r = heatmap[center_y, center_x][2] b = b - b * 0.5 g = g - g * 0.2 r = r + r * 0.5 cv2.circle(heatmap, center_coor, 10, (b, g, r), cv2.FILLED) else: cv2.circle(heatmap, center_coor, 10, ht_color, cv2.FILLED) cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) frame = cv2.resize(frame, (698, 396)) t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 overlay = frame1 alpha_backgroud = 0.7 alpha_heatmap = 0.9 cv2.addWeighted(overlay, alpha_heatmap, frame1, 1 - alpha_heatmap, 0, frame1) cv2.addWeighted(heatmap, alpha_backgroud, frame1, 1 - alpha_backgroud, 0, frame1) frame2 = cv2.resize(frame1, (698, 396)) output = frame.copy() heatmap_image = frame2
def main(args): client_options = { 'compression_format': '.jpg', 'compression_param': cv2.IMREAD_COLOR, 'flag': 1 } client = NetGear(address=args.address, port=args.port, protocol='tcp', bidirectional_mode=True, pattern=1, receive_mode=True, logging=True, **client_options) face_detector = None mask_detector = None if args.detection_method == 'dnn': model_file = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel" config_file = "models/deploy.prototxt" face_detector = utils.load_cvdnn(model_file, config_file) elif args.detection_method == 'haar': model_file = 'models/haarcascade_frontalface_alt.xml' face_detector = utils.load_haar_cascade(model_file) if not args.ignore_masks: # Load mask detector # Credit for the model: https://github.com/chandrikadeb7/Face-Mask-Detection mask_detector = utils.load_masknet("models/mask_detector.model") # Keep track of FPS frame_counter = 0 start_time = None # Bookkeeping for face tracking next_id = count(0) objects = {} target_coords = [] target_bb_size = 0 target = None while True: # receive frames from network data = client.recv(return_data=(target_coords, target_bb_size)) if start_time is None: start_time = time.time() # check for received frame if Nonetype if data is None: break _, frame = data if frame_counter % args.frameskip == 0: if args.detection_method == 'dnn': face_locations = utils.get_face_locations_dnn( frame, face_detector) else: face_locations = utils.get_face_locations_hog(frame) # Update the object positions and other parametes utils.update_objects(objects, face_locations, next_id) if frame_counter % args.mask_detect_freq == 0: # Update the mask status of the faces utils.detect_mask(frame, objects, mask_detector) # Choose the target if target not in objects.keys() or objects[target]['has_mask']: temp = [i for i, val in objects.items() if not val['has_mask']] if temp: target = temp[0] else: target = None if target is not None: target_coords = objects[target]['centroid'] # Change to centered coordinates h, w, _ = frame.shape x0, y0 = (w // 2, h // 2) target_coords = (target_coords[0] - x0, target_coords[1] - y0) # Bounding box size (top, right, bottom, left) = objects[target]['bounding_box'] target_bb_size = round( np.linalg.norm([(top, right), (bottom, left)]), 2) else: target_coords = [] target_bb_size = 0 if args.show_video: # Draw the extra information onto the frame frame = utils.draw_bounding_boxes(frame, objects) # h, w, _ = frame.shape # cv2.circle(frame, (w//2, h//2), radius=5, # color=(0, 255, 0), thickness=-1) # Show output if args.track_face and target is not None: (top, right, bottom, left) = objects[target]['bounding_box'] cv2.imshow("Output Frame", frame[bottom:top, left:right]) else: cv2.imshow("Output Frame", frame) # check for 'q' key if pressed key = cv2.waitKey(1) & 0xFF if key == ord("q"): break frame_counter += 1 elapsed_time = time.time() - start_time print(f"avg FPS: {frame_counter/elapsed_time}") # close output window cv2.destroyAllWindows() # safely close client client.close()
def main(args): args.images_dirpath = os.path.abspath( os.path.expanduser(args.images_dirpath)) args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path)) args.data_path = os.path.abspath(os.path.expanduser(args.data_path)) args.weights_path = os.path.abspath(os.path.expanduser(args.weights_path)) args.out_path = os.path.abspath(os.path.expanduser(args.out_path)) # create output directory (if neccessary) out_dir = os.path.dirname(args.out_path) if os.path.exists(out_dir) == False: os.makedirs(out_dir) # load images (sort by name) image_paths = [] for path in os.listdir(args.images_dirpath): path = os.path.join(args.images_dirpath, path) if imghdr.what(path) == None: continue image_paths.append(path) image_paths.sort() print('%s input images were loaded.' % len(image_paths)) # parse .data file names_path = None with open(args.data_path) as f: lines = f.readlines() for line in lines: # <type> = <path> type, path = line.split('=') type = type.strip() path = path.strip() if type == 'names': names_path = path # load names file names = [] print('=-=-=-=-=-=-=-=-=-= names =-=-=-=-=-=-=-=-=-=') with open(names_path) as f: lines = f.readlines() for i, line in enumerate(lines): line = line.strip() names.append(line) print('%d: %s' % (i + 1, line)) print('=-=-=-=-=-=-=-=-=-= names =-=-=-=-=-=-=-=-=-=') # generate color map # color_map['name'] = color print('=-=-=-=-=-=-=-=-=-= color map =-=-=-=-=-=-=-=-=-=') color_map = {} for name in names: color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) color_map[name] = color print('color_map[\'%s\']: %s' % (name, str(color))) print('=-=-=-=-=-=-=-=-=-= color map =-=-=-=-=-=-=-=-=-=') # load detector print('loading detector...') net = darknet.load_net(args.cfg_path.encode(), args.weights_path.encode(), 0) meta = darknet.load_meta(args.data_path.encode()) print('the detector was successfully loaded.') # create movie print('creating movie...') frame_h, frame_w, _ = cv2.imread(image_paths[0]).shape fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') movie = cv2.VideoWriter(args.out_path, fourcc, args.fps, (frame_w, frame_h)) pbar = tqdm.tqdm(total=len(image_paths)) for path in image_paths: img = cv2.imread(path) bboxes = darknet.detect(net, meta, path.encode()) img = utils.draw_bounding_boxes(img, bboxes, color_map) img = cv2.resize(img, (frame_w, frame_h)) movie.write(img) pbar.update() pbar.close() movie.release() print('output movie was saved as %s .' % args.out_path)