def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) colorred = [255,0,0] colorgreen = [0,255,0] # update tracks objectboxes = [] # Storing the IDs and co-ordinates of detected people in a list for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() objectboxes.append((int(track.track_id),int(bbox[0]),int(bbox[1]),int(bbox[2]), int(bbox[3]))) # if enable info flag then print details about each track if FLAGS.info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # Creating lists to store parameters of people who are socially distanced and people who are at risk distanced_people = [] risked_people = [] risked_people_id=[] for (id1,ax1,ay1,ax2,ay2),(id2,bx1,by1,bx2,by2) in combinations(objectboxes,2): # Checks the euclidean distance between any two people (x1,y1,x2,y2) = ((ax1+ax2)/2,(ay1+ay2)/2,(bx1+bx2)/2,(by1+by2)/2) if(np.sqrt((x1-x2)**2 +(y1-y2)**2)<75): # Change this parameter to change the social distancing threshold if (id1,ax1,ay1,ax2,ay2) not in risked_people: risked_people.append((id1,ax1,ay1,ax2,ay2)) if(id2,bx1,by1,bx2,by2) not in risked_people: risked_people.append((id2,bx1,by1,bx2,by2)) for (idp,x1,y1,x2,y2) in objectboxes: if (idp,x1,y1,x2,y2) not in risked_people: distanced_people.append((idp,x1,y1,x2,y2)) # Drawing red boxes around people who are at risk for (person_id,x1,y1,x2,y2) in risked_people: cv2.rectangle(frame, (x1,y1),(x2,y2), colorred, 2) cv2.rectangle(frame,(x1,y1-30),(x1+(len('person')+len(str(person_id)))*17,y1),colorred,-1) cv2.putText(frame,"person - "+str(person_id),(x1,y1-10),0,0.75,(255,255,255),2) risked_people_id.append(person_id) # Drawing Green boxes around people who are socially distanced for (person_id,x1,y1,x2,y2) in distanced_people: cv2.rectangle(frame, (x1,y1),(x2,y2), colorgreen, 2) cv2.rectangle(frame,(x1,y1-30),(x1+(len('person')+len(str(person_id)))*17,y1),colorgreen,-1) cv2.putText(frame,"person - "+str(person_id),(x1,y1-10),0,0.75,(255,255,255),2) # Drawing a green border around the video if everyone is socially distanced and red if not if(len(risked_people_id)==0): cv2.rectangle(frame,(0,0),(width,height),colorgreen,4) else: listToStr = ', '.join([str(elem) for elem in risked_people_id]) cv2.rectangle(frame,(0,0),(width,height),colorred,4) cv2.putText(frame,"People at risk: "+listToStr,(0,30),0,1.25,(255,0,0),2) # calculate frames per second of running detections # fps = 1.0 / (time.time() - start_time) # print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] out = None # cho camera # vid = cv2.VideoCapture('rtsp://*****:*****@192.168.1.180:554/cam/realmonitor?channel=1&subtype=1') # cho webcam vid = cv2.VideoCapture(0) if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax # định dạng các hộp giới hạn từ chuẩn hóa ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config # đọc tất cả các tên lớp từ cấu hình class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # theo mặc định cho phép tất cả các lớp trong tệp .names allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # nếu muốn chọn một đối tượng riêng biệt để phát hiện # allowed_classes = ['person'] # count objects found # đếm các đối tượng được tìm thấy counted_classes = count_objects(pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print # lặp qua và in image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) crop_rate = 200 # capture images every so many frames (ex. crop photos every 200 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', 'real-time2') try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) else: pass result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def pre(): from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True InteractiveSession(config=config)