def _to_output(self, image, boxes, scores, classes, nums): scores = scores[:nums] classes = classes[:nums].astype(np.int32) boxes = boxes[:nums] indexes = scores >= self.score_threshold scores = scores[indexes] classes = classes[indexes] boxes = convert_boxes(image, boxes[indexes]) features = self.encoder(image, boxes) return [ Detection(bbox, score, self.class_names[clazz], feature) for bbox, score, clazz, feature in zip( boxes, scores, classes, features) ]
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print(fps) print(width) print(height) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 tot = -1 coord = {} pred = {} framelists = [] while True: tot = tot + 1 _, img = vid.read() print(len(framelists)) temp_img = img count = 0 temp_framelists = [] if (len(framelists) == 5): framelists.remove(framelists[0]) for i in range(0, 5): img_v = cv2.imread("data/video/raw1/frame%d.jpg" % i) temp_framelists.append(img_v) temp_framelists.remove(temp_framelists[0]) temp_framelists.append(temp_img) for i in range(0, len(temp_framelists)): cv2.imwrite("data/video/raw1/frame%d.jpg" % i, temp_framelists[i]) else: if (count < 5): if (count == 0): cv2.imwrite("data/video/raw1/frame0.jpg", temp_img) else: cv2.imwrite("data/video/raw1/frame%d.jpg" % count, temp_img) for i in range(0, count + 1): img_v = cv2.imread("data/video/raw1/frame%d.jpg" % i) temp_framelists.append(img_v) framelists.append(temp_img) if (len(framelists) > 0): cv2.imwrite("data/video/raw/frame%d.jpg" % tot, framelists[-1]) if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] #if(90<tot<115): #cv2.putText(img,"warning",(10,50),cv2.FONT_HERSHEY_SIMPLEX,fontScale=2.5,thickness=5,color=(255,0,0)) for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #print(detections) #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #print(detections) # Call the tracker tracker.predict() tracker.update(detections) ##### ls = [] #to get the track ids of each frame a = framelists[-1] #cv2.imwrite("data/video/raw1/frame%d.jpg" % tot, temp_framelists[-1]) for track in tracker.tracks: x_min = [] x_max = [] y_max = [] temp_box = track.to_tlbr() ls.append(track.track_id) #add track ids if track.track_id not in coord: coord[track.track_id] = [] else: leng = len(coord[track.track_id]) if (leng == 5): coord[track.track_id].remove(coord[track.track_id][0]) coord[track.track_id].append(list(temp_box)) for i in range(0, len(coord[track.track_id])): if (coord[track.track_id][i][0] > 0 and coord[track.track_id][i][2] < 720 and coord[track.track_id][i][3] < 720): x_min.append(coord[track.track_id][i][0]) x_max.append(coord[track.track_id][i][2]) y_max.append(coord[track.track_id][i][3]) #base - frames series base = [] num_of_frames = len(x_min) if (num_of_frames >= 3): base = list(range(1, (num_of_frames + 1))) #for i in range(0,len(x_min)): #base.append(i+1) model1 = polyfit(base, x_min, 1) predict_x_min = poly1d(model1) model2 = polyfit(base, x_max, 1) predict_x_max = poly1d(model2) model3 = polyfit(base, y_max, 1) predict_y_max = poly1d(model3) tempo_framelists = [] if ((50 < predict_x_min(60) < 650 or 50 < predict_x_max(60) < 650) and 500 < predict_y_max(60) < 800): cv2.putText(img, "warning", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, fontScale=2.5, thickness=5, color=(255, 0, 0)) for i in range(0, len(framelists)): image_vehicle = cv2.imread( "data/video/raw1/frame%d.jpg" % i) tempo_framelists.append(image_vehicle) for j in range(0, len(coord[track.track_id])): vehicle = coord[track.track_id][-(j + 1)] crop_img = tempo_framelists[-(j + 1)][ int(vehicle[1]) - 10:int(vehicle[3]) + 10, int(vehicle[0]) - 10:int(vehicle[2]) + 10] inp_img = tempo_framelists[-(j + 1)] crop_height = crop_img.shape[0] crop_width = crop_img.shape[1] print(crop_height, crop_width) print( int(vehicle[3]) - int(vehicle[1]), int(vehicle[2]) - int(vehicle[0])) input_dir = os.path.join("data/video/cropped/turn%d" % tot) output_dir = os.path.join("data/video/segment/turn%d" % tot) if not os.path.exists(input_dir): os.mkdir(input_dir) os.mkdir(output_dir) #cv2.imwrite("data/video/cropped/frame%d.jpg" % j,crop_img ) #input_image = "data/video/cropped/frame%d.jpg" % j #output_image ="data/video/segment/frame%d.jpg" % j input_folder = "data/video/cropped/turn%d" % tot output_folder = "data/video/segment/turn%d" % tot input_image = input_folder + "/frame%d.jpg" % j output_image = output_folder + "/frame%d.jpg" % j #cv2.imwrite(input_image ,crop_img ) cv2.imwrite(input_image, inp_img) xmin = str(int(vehicle[0]) - 10) xmax = str(int(vehicle[2]) + 10) ymin = str(int(vehicle[1]) - 10) ymax = str(int(vehicle[3]) + 10) dim = xmin + "," + xmax + "," + ymin + "," + ymax os.system( "python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=" + input_image + ":" + output_image + " --dimension=" + dim) #os.system("python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=car2.jpg") #os.system("python segment.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=car2.jpg:output2.jpg") #crop_img = img[y:y+h, x:x+w] #cv2.imshow("cropped", crop_img) #cv2.waitKey(0) #cv.imwrite("images/frame%d.jpg" % count, frame) #if(track.track_id ==13): #print("temp " + str(temp_box )) #if not track.is_confirmed() or track.time_since_update > 1: #continue bbox = track.to_tlbr() #if(track.track_id ==13): #print("special " + str(bbox)) #print(bbox) #print(track.track_id , bbox) #print(track.track_id) class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #print("track ids : " +str(ls)) #print("dict : " + str(coord)) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) #if tot == 6 : #cv2.imwrite("data/video/images/000.jpg",framelists[0]) #cv2.imwrite("data/video/images/001.jpg",framelists[1]) #cv2.imwrite("data/video/images/002.jpg",framelists[2]) #cv2.imwrite("data/video/images/003.jpg",framelists[3]) #cv2.imwrite("data/video/images/004.jpg",framelists[4]) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def track(self, img, yolooutput, recognizeoutput): #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #img_in = tf.expand_dims(img_in, 0) #img_in = transform_images(img_in, 416) #416 =size t1 = time.time() boxes, detect_scores, classes, nums = yolooutput persons, names, scores = recognizeoutput self.person_dict = dict(self.person_dict, **persons) ############################################################################################################################################################################### names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = self.encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores, names, features) ] #if there is an error here try removing "[0]" #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker self.tracker.predict() self.tracker.update(detections) output_data = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name in self.person_dict: person = self.person_dict[class_name] if person: person_data = { "key": person.id, "name": person.familyName + " " + person.firstName, "age": person.age, "address": person.address, } output_data.append(person_data) color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) """if self.output_path: self.out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(converted_boxes) != 0: for i in range(0,len(converted_boxes)): list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') list_file.write('\n')""" self.output_data = output_data return img
def main(): class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) vid = cv2.VideoCapture("traffic1.mkv") #vid = cv2.VideoCapture("video.webm") #vid = VideoCaptureAsync("video.webm") #vid = vid.start() codec = cv2.VideoWriter_fourcc(*'XVID') vid_fps =int(vid.get(cv2.CAP_PROP_FPS)) vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height)) from collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] directory1 = "/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset/" result = [] new_cnt = 0 while True: _, img = vid.read() if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] #current_count = int(0) #count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update >1: continue bbox = track.to_tlbr() class_name= track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name) +len(str(track.track_id)))*17, int(bbox[1])), color, -1) cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75, (255, 255, 255), 2) center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64/float(j+1))*2) cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness) height, width, _ = img.shape cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2) #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2) cv2.line(img, (220, 460), (1000, 450), (0, 0, 255), 2) center_y = int(((bbox[1])+(bbox[3]))/2) #count = 0 if center_y <= int(3*height/6+height/20) and center_y >= int(3*height/6-height/20): if class_name == 'car' or class_name == 'truck' or class_name == 'person': counter.append(int(track.track_id)) directory = r'/home/ecl/Downloads/Limon/Object_Tracking/imgzmq/dataset' for filename in os.listdir(directory): if filename.endswith(".jpg") or filename.endswith(".png"): a1 = os.path.join(directory, filename) b = int(re.search(r'\d+', a1).group()) result.append(b) else: continue b1 = max(result) + 1 count = 0 while(True): count += 1 print(count) #count = b1 ##increase image size and resoulation #new_img = img[int(bbox[0]):(int(bbox[2])+int(bbox[3])), int(bbox[1]):(int(bbox[2])+int(bbox[3]))] new_img = img[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))] #new_rgb = rgb[int(bbox[1]):(int(bbox[1])+int(bbox[3])), int(bbox[0]):(int(bbox[0])+int(bbox[2]))] #new_img = cv2.resize(new_img, (360, 360), interpolation = cv2.INTER_NEAREST) cv2.imwrite(directory1 + f"image{b1}.jpg", new_img) if count > 1: print("break the loop..............") break #current_count += 1 total_count = len(set(counter)) #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2) cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0,130), 0, 1, (0,0,255), 2) fps = 1./(time.time()-t1) cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2) #cv2.resizeWindow('output', 1024, 768) cv2.imshow('output', img) out.write(img) if cv2.waitKey(1) == ord('q'): break vid.release() out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters right2left_koi = 0 right2left_til = 0 left2right_koi = 0 left2right_til = 0 font = cv2.FONT_HERSHEY_DUPLEX max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 #midline position variables midline_pos_x = int(width / 2) - 3 midline_pos_y = int(height) fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) #draw midline cv2.line(img, (midline_pos_x, 0), (midline_pos_x, midline_pos_y), (0, 0, 0), 3) screen1_koi = 0 screen1_til = 0 screen2_koi = 0 screen2_til = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() c_curr = (int(bbox[0] + abs(bbox[0] - bbox[2]) / 2), int(bbox[1] + abs(bbox[1] - bbox[3]) / 2)) center_x = c_curr[0] color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 17)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 14, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 1)), font, 0.6, (0, 0, 0), 1) #store patterns of individual fish pattern = get_patterns(c_curr, track.track_id, class_name) pre_p = c_curr #Draw the patterns on the screen for p in pattern[-50::5]: cv2.circle(img, p, 3, color, -1) if pre_p != c_curr: cv2.line(img, pre_p, p, color, 1) pre_p = p if len(pattern) >= 2: moving2right = center_x > pattern[-2][0] on_screen_left = pattern[-2][0] < midline_pos_x moving2left = center_x < pattern[-2][0] on_screen_right = pattern[-2][0] > midline_pos_x if (class_name == 'Koi') and on_screen_left: screen1_koi += 1 if moving2right and center_x > midline_pos_x: left2right_koi += 1 if (class_name == 'Tilapia') and on_screen_left: screen1_til += 1 if moving2right and center_x > midline_pos_x: left2right_til += 1 if (class_name == 'Koi') and on_screen_right: screen2_koi += 1 if moving2left and center_x < midline_pos_x: right2left_koi += 1 if (class_name == 'Tilapia') and on_screen_right: screen2_til += 1 if moving2left and center_x < midline_pos_x: right2left_til += 1 ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) #Print the instantenous numbers detected on the screens cv2.putText(img, "Koi : " + str(screen1_koi), (20, 30), font, 0.7, (30, 50, 205), 2) cv2.putText(img, "Tla : " + str(screen1_til), (20, 70), font, 0.7, (255, 0, 0), 2) cv2.putText(img, "Koi : " + str(screen2_koi), (int(width) - 120, 30), font, 0.7, (30, 50, 205), 2) cv2.putText(img, "Tla : " + str(screen2_til), (int(width) - 120, 70), font, 0.7, (255, 0, 0), 2) #Print left2right and right2left counts and total of them cv2.putText(img, str(right2left_koi) + " <-- Koi", (midline_pos_x - 75, int(height) - 30), font, 0.7, (0, 0, 0), 2) cv2.putText(img, str(right2left_til) + " <-- Tla", (midline_pos_x - 75, int(height) - 70), font, 0.7, (0, 0, 0), 2) cv2.putText(img, "Koi --> " + str(left2right_koi), (midline_pos_x - 55, 30), font, 0.7, (0, 0, 0), 2) cv2.putText(img, "Tla --> " + str(left2right_til), (midline_pos_x - 55, 70), font, 0.7, (0, 0, 0), 2) cv2.putText(img, "Total L2R : " + str(left2right_koi + left2right_til), (int(width) - 200, int(height) - 30), font, 0.7, (0, 0, 0), 2) cv2.putText(img, "Total R2L : " + str(right2left_koi + right2left_til), (int(width) - 200, int(height) - 70), font, 0.7, (0, 0, 0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (20, int(height) - 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (200, 0, 100), 2) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') img = cv2.resize(img, (1200, 720)) cv2.imshow('output', img) # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # PARAMS max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 pure_yolo = False AVG_PERSON_HEIGHT = 1.7 # meters DANGER_THRESHOLD = 3.0 # meters #initialize deep sort output_name = FLAGS.output model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if output_name: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(output_name, codec, fps, (width + width // 3 + 3, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 setup = False while True: _, img = vid.read() image = img.copy() h_image, w_image = image.shape[:2] if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break if not setup: mouse_pts = pu.get_reference_pts_by_ui(image, pu.ui_callback) cv2.namedWindow("Worker Monitoring", cv2.WINDOW_NORMAL) cv2.resizeWindow("Worker Monitoring", 1000, 700) # points of reference and ROI chosen by UI ref_pts = np.array(mouse_pts[:4]) ref_len_pts = np.array(mouse_pts[4:6]) roi = mouse_pts[6:] # length between reference points w_dst = max(pu.euclidean(ref_pts[0], ref_pts[2]), pu.euclidean(ref_pts[1], ref_pts[3])) ref_len = pu.euclidean(ref_len_pts[0], ref_len_pts[1]) # calculating parallel vectors of lines c_1 = pu.get_perpendicular_vector(mouse_pts[0], mouse_pts[1], direction='ccw', magnitude=w_dst) c_2 = pu.get_perpendicular_vector(mouse_pts[1], mouse_pts[0], direction='cw', magnitude=w_dst) # getting the transformation matrix between the original reference # and the perpendicular "corrected" points dst = [ref_pts[0], ref_pts[1], c_2, c_1] new_M, Ht, borders = pu.get_homography_matrix(ref_pts, dst, roi) setup = True img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) obj_pts = [] obj_classes = [] if not pure_yolo: for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cntr_x = (bbox[0] + bbox[2]) / 2 cntr_y = (bbox[1] + bbox[3]) / 2 obj_pts.append([cntr_x, cntr_y]) obj_classes.append(str(class_name)) else: for det in detections: bbox = det.to_tlbr() class_name = det.get_class() cntr_x = (bbox[0] + bbox[2]) / 2 cntr_y = (bbox[1] + bbox[3]) / 2 obj_pts.append([cntr_x, cntr_y]) obj_classes.append(str(class_name)) if len(obj_pts) == 0: temp_canvas = np.zeros((h_image, w_image + w_image // 3 + 3, 3), dtype='uint8') temp_canvas[:, w_image // 3 + 3:, :] = image cv2.imshow('Worker Monitoring', temp_canvas) if cv2.waitKey(1) == ord('q'): break continue obj_pts = np.array(obj_pts, dtype='float32').reshape(-1, 1, 2) # transforming the object coordinates and the reference length points transformed_obj_pts = cv2.perspectiveTransform( obj_pts, new_M).astype('int').reshape(-1, 2) # filtering the points that are not in the ROI valid_pts, valid_classes = pu.remove_objects_off_limits( transformed_obj_pts, obj_classes) px_per_meter = ref_len / AVG_PERSON_HEIGHT indices_in_danger, classes_in_danger = pu.detect_in_danger( valid_pts, valid_classes, px_per_meter, DANGER_THRESHOLD) final_visualization = pu.visualize(image, borders, valid_pts, obj_pts, indices_in_danger, Ht) cv2.namedWindow("Worker Monitoring", cv2.WINDOW_NORMAL) cv2.resizeWindow("Worker Monitoring", 1000, 700) cv2.imshow("Worker Monitoring", final_visualization) if output_name: out.write(final_visualization) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() break vid.release() if output_name: out.release() list_file.close() cv2.destroyAllWindows()
def get_bboxes(img): # YOLO START img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) # print("-------------------------------") # print(type(img_in)) # <class 'tensorflow.python.framework.ops.EagerTensor'> # print(img_in.shape) # (1, 416, 416, 3) # print(img_in.dtype) # <dtype: 'float32'> # print("-------------------------------") boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #p.print(detections) // [<deep_sort.detection.Detection object at 0x00000218F72D8D68>, <deep_sort.detection.Detection object at 0x00000218F7325940>, <deep_sort.detection.Detection object at 0x00000218F7325C18>, <deep_sort.detection.Detection object at 0x00000218F7325F28>, <deep_sort.detection.Detection object at 0x00000218F7325EF0>, <deep_sort.detection.Detection object at 0x00000218F7325F60>, <deep_sort.detection.Detection object at 0x00000218F7325D68>, <deep_sort.detection.Detection object at 0x00000218F7325DA0>, <deep_sort.detection.Detection object at 0x00000218F7329400>] #p.type_(detections) // TYPE => <class 'list'> # p.print(detections[1]) // <deep_sort.detection.Detection object at 0x00000266814E3828> tracker.predict() tracker.update(detections) # Matplotlib has a number of built-in colormaps accessible via matplotlib.cm.get_cmap. cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # Current vehicle count current_count = int(0) # (tek frame içindeki tüm araçlar için döner) tracker'ın tüm sonuçları için for döngüsü bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr( ) # [848.78925062 113.98058018 901.1299524 144.32627563] # class_name = track.get_class() # car (nesne ismi) # color = colors[int(track.track_id) % len(colors)] # (0.807843137254902, 0.8588235294117647, 0.611764705882353) # color = [i * 255 for i in color] # [231.0, 203.0, 148.0] bboxes.append(bbox) # img => videodan alınan frame (np ndarray) #Bounding box çiz # cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) # #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name) # #+len(str(track.track_id)))*17, int(bbox[1])), color, -1) # cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75, # (255, 255, 255), 2) return bboxes
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 frame_count = 0 dict = {} previous_dict = {} speed_dict = {} tracked_objects = [] speed_dict = {} first_frame_hgt = {} while True: _, img = vid.read() print("The length of a tracked objects " + str(len(tracked_objects))) frame_count = frame_count + 1 if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) #print("Before the length of the classes is "+str(len(classes))) classes = classes[0] #print("the class name is "+str(classes[0])) #print("after the length of the classes is "+str(len(classes))) names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) print("printing the detected class name " + str(classes[0])) detections = [detections[i] for i in indices if classes[i] == 'person'] # Call the tracker tracker.predict() tracker.update(detections) previous_dict = dict dict = {} pt_distance = None for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name != 'person': continue color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) dict[(class_name + str(track.track_id))] = (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])) if not (class_name + str(track.track_id)) in tracked_objects: tracked_objects.append(class_name + str(track.track_id)) speed_dict.update({(class_name + str(track.track_id)): []}) first_frame_hgt[class_name + str(track.track_id)] = int( (bbox[3] - bbox[1])) print("the height calculated is " + str(int((bbox[3] - bbox[1])))) #class_name+str(track.track_id)+str("_speed") =[] if (class_name + str(track.track_id)) in dict.keys(): if (class_name + str(track.track_id)) in previous_dict.keys(): a, b, c, d = dict[(class_name + str(track.track_id))] current_fr_ctr = a + 1 / 2 * (c - a), b + 1 / 2 * (d - b) a1, b1, c1, d1 = previous_dict[(class_name + str(track.track_id))] prv_fr_ctr = a1 + 1 / 2 * (c1 - a1), b1 + 1 / 2 * (d1 - b1) #pt_distance = math.sqrt(sum([(a - b) ** 2 for a, b in zip(dict[(class_name+str(track.track_id))], previous_dict[(class_name+str(track.track_id))])])) # calulating the distance between the bounding box centers of 2 adjacent frames pt_distance = math.sqrt( sum([(a - b)**2 for a, b in zip(current_fr_ctr, prv_fr_ctr)])) frame_rate = 12 image_aspect_ratio = 18 current_frame_height = int((bbox[3] - bbox[1])) #calculating the speed, adjusting for the patron's distance from camera and metre vs pixel aspect ratio cur_fr_spd = pt_distance * frame_rate * ( first_frame_hgt[class_name + str(track.track_id)] ) / current_frame_height / image_aspect_ratio print(pt_distance) print("the speed calculated is " + str(cur_fr_spd)) speed_dict[(class_name + str(track.track_id))].append(pt_distance) #np.sqrt((a-a1)**2+(b-b1)**2+(c-c1)+(d-d1) #(class_name+str(track.track_id)+str("_speed")).append((a-a1)+(b-b1)+(c-c1)+(d-d1)) print(class_name + str(track.track_id)) print(dict[(class_name + str(track.track_id))]) print("speed dict length is " + str(len(speed_dict[(class_name + str(track.track_id))]))) if pt_distance is not None: cv2.putText( img, class_name + "-" + str(track.track_id) + "-" + '%.2f' % cur_fr_spd, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) else: cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) print("the dict size is " + " for the frame " + str(frame_count) + " " + str(len(dict))) print("the dict size is " + " for the previous frame " + str(frame_count) + " " + str(len(previous_dict))) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(argv): # print("location recieved in main as: ", e) ################################### global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME violator_count_list = list() ################################### # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=80) yolo.load_weights('./weights/yolov3.tf') logging.info('weights loaded') class_names = [c.strip() for c in open('./coco.names').readlines()] logging.info('classes loaded') video_path = 'test.mkv' try: vid = cv2.VideoCapture(int(FILE_URL)) except: vid = cv2.VideoCapture(FILE_URL) time.sleep(1.0) out = None width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("height: ", height) print("width: ", width) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('./result.avi', codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 PROCESSING_STATUS = True while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) temp_violators = set() temp_total_people = set() t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name1 = track.get_class() if class_name1 == "person": temp_total_people.add(track.track_id) bbox1 = track.to_tlbr() x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2) y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2) r1 = int(abs(bbox1[3] - bbox1[1])) color = (255, 0, 0) cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2), (0, 255, 0), 2) cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1) scale = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 200, 20), scale=scale) for other in tracker.tracks: if not other.is_confirmed() or other.time_since_update > 1: continue if track.track_id == other.track_id: continue class_name2 = other.get_class() if class_name2 == "person": temp_total_people.add(other.track_id) bbox2 = other.to_tlbr() x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2) y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2) r2 = int(abs(bbox2[3] - bbox2[1])) if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 // 2) >= 0 and abs(y1_c - y2_c) < r1 // 4: temp_violators.add(track.track_id) temp_violators.add(other.track_id) cv2.line(img, (x1_c, y1_c), (x2_c, y2_c), (0, 0, 255), 2) scale1 = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale1) scale2 = (r2) / 100 transparentOverlay(img, dst_circle, (x2_c, y2_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale2) # print fps on screen ### Comment below 3 lines to not see live output screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) ### Violators calculation violators_for_frame = len(temp_violators) VIOLATION_PERCENTAGE = violators_for_frame print("Violation percentage: ", violators_for_frame) violator_count_list.append(int(violators_for_frame)) ### ### Call to firebase upload function # if violators_for_frame > 20: # social_dist_violation_frame_handler(img) # cv2.imwrite("temp.png",img) # firebase_upload("temp.png") # os.remove("temp.png") frame_index = frame_index + 1 # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if len(violator_count_list) == 0: mean_violation = 0 else: mean_violation = sum(violator_count_list) / len(violator_count_list) PROCESSING_STATUS = False out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') length = 0 try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) print('Opened video ', FLAGS.video, '. W x H ', int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), ' x ', int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) out = None dir_prefix = os.path.splitext(FLAGS.video)[0] vid_name = dir_prefix.replace('static/', '') if os.path.exists(dir_prefix): shutil.rmtree(dir_prefix) os.mkdir(dir_prefix) else: os.mkdir(dir_prefix) dir_prefix += '/' out = None length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 start_time = datetime.min bike_set = set() bike_list = [] bike_dict = { 'frame': 0, 'id': 0, 'finish_time': start_time, 'Recognitions': 'default', 'Recognised_plate': 'default', 'plate_number': 'XXX', # 'appearance_num':0, 'Bike_image': None, 'Full_frame': None, 'Image_name': 'default', 'Full_image_name': 'default' } # This is needed to write image fragments on disk img_name = 'default' full_img_name = 'default' fps = 0.0 count = 0 frame_num = -1 while True: _, img = vid.read() frame_num += 1 if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name != 'bicycle': continue color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # Routines for bike detection at finish if not (track.track_id in bike_set): bike_set.add(track.track_id) input_fps = int(vid.get(cv2.CAP_PROP_FPS)) dt = start_time + timedelta(seconds=frame_num / input_fps) bike_list.append( dict(bike_dict, frame=frame_num, finish_time=str(dt.time()), id=track.track_id)) else: # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) for item in bike_list: if item['id'] == track.track_id and 600 < bbox[3] < 800: # Clear previously written images, something like overwrite them previous_image_name = item['Image_name'] previous_full_image_name = item['Full_image_name'] if os.path.exists(previous_image_name): os.remove(previous_image_name) if os.path.exists(previous_full_image_name): os.remove(previous_full_image_name) # Edit finish_time input_fps = int(vid.get(cv2.CAP_PROP_FPS)) dt = start_time + timedelta(seconds=frame_num / input_fps) item['finish_time'] = str(dt.time())[:-3] # Save image and put to bike dict img_name = str(frame_num) + '_' + str( item['id']) + '.jpg' full_img_name = str(frame_num) + '_fullframe.jpg' cv2.imwrite( dir_prefix + img_name, img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) cv2.imwrite(dir_prefix + full_img_name, img) item[ 'Bike_image'] = '<img src="../static/' + vid_name + '/' + img_name + '" width="200" >' item[ 'Full_frame'] = '<img src="../static/' + vid_name + '/' + full_img_name + '" width="500" >' item[ 'Recognitions'] = '<img src="../static/' + vid_name + '/' + 'res_' + img_name + '" width="200" >' item[ 'Recognised_plate'] = '<img src="../static/' + vid_name + '/' + 'plate_' + img_name + '" width="200" >' item['Image_name'] = dir_prefix + img_name item['Full_image_name'] = dir_prefix + full_img_name # Update appearance and frame_num # item['appearance_num']+=1 item['frame'] = frame_num break ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen and to commandline fps = (fps + (1. / (time.time() - t1))) / 2 if frame_num % 15 == 0: time_left = int((length - frame_num) / fps) # if FLAGS.output: # out.write(img) # frame_index = frame_index + 1 # list_file.write(str(frame_index)+' ') # if len(converted_boxes) != 0: # for i in range(0,len(converted_boxes)): # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) # + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') # list_file.write('\n') # press q to quit vid.release() if FLAGS.output: out.release() df = pd.DataFrame(bike_list) df = df.dropna() if df.empty: print('The resulting dataframe is empty') else: del df['Image_name'] del df['Full_image_name'] df = df.sort_values(by='finish_time', ascending=True) df.reset_index(drop=True) engine = sqlalchemy.create_engine(database_url) table_name = 'table_' + vid_name df.to_sql(table_name, engine, method='multi') list_file.close()
def main(_argv): class_names = [c.strip() for c in open('coco.names').readlines()] # class_names=['car', 'truck','bus', 'bicycle','motorbike'] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) vid_fps = int(vid.get(cv2.CAP_PROP_FPS)) vid_width, vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int( vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) out = cv2.VideoWriter(FLAGS.output, codec, vid_fps, (vid_width, vid_height)) from _collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] while True: _, img = vid.read() if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs, scores, classes = [], [], [] f = ['car', 'truck', 'bus', 'bicycle', 'motorbike'] for d in detections: if d.class_name in f: boxs.append(d.tlwh) scores.append(d.confidence) classes.append(d.class_name) boxs = np.array(boxs) scores = np.array(scores) classes = np.array(classes) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] for track in tracker.tracks: if track.class_name in f: # print("new track") if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) center = (int(((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(img, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) height, width, _ = img.shape # print("p",height,width) # print(int(3*height/6+height/20)) oo = [int(x) for x in FLAGS.line_coordinates] print(oo) cv2.line(img, (oo[0], oo[1]), (oo[2], oo[3]), (0, 255, 0), thickness=2) center_y = int(((bbox[1]) + (bbox[3])) / 2) if center_y <= int(3 * height / 6 + height / 20) and center_y >= int(3 * height / 6 - height / 20): counter.append(int(track.track_id)) print(int(track.track_id)) total_count = len(set(counter)) h, w = img.shape[0:2] img[0:70, 0:500] = [0, 0, 0] cv2.putText(img, "Total Vehicle Count: " + str(total_count), (7, 56), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (255, 255, 255), 2) cv2.resizeWindow('output', 1024, 768) cv2.imshow('output', img) out.write(img) if cv2.waitKey(1) == ord('q'): break vid.release() out.release() cv2.destroyAllWindows()
def show_frame(self): global running _, frame = self.cap.read() # frame = cv2.flip(frame, 0) self.numframes = self.numframes + 1 print(self.numframes) frame = imutils.resize(frame, width=width_screen - 300) print(self.pause) #cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Image.fromarray( obj , mode = None ) # obj - Objeto com interface de matriz # mode - Modo a ser usado (será determinado a partir do tipo se None) Consulte: # img1=img # img1 = imutils.resize(img1, width=900) img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img1, 0) img_in = transform_images(img_in, FLAGS.size) # img_in = Image.fromarray(img_in); # original: shape=(1, 288, 288, 3) # simple_example(MISTURA_COM_CV2).py: shape=(1, 288, 288, 4) boxes, scores, classes, nums = self.yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(self.class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(frame, boxes[0]) features = self.encoder(frame, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] # initialize color map #cmap = plt.get_cmap('tab20b') #colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker self.tracker.predict() self.tracker.update(detections) cont_objects_positions_id = 0 cont_objects_positions_x_min = 0 cont_objects_positions_y_min = 0 cont_objects_positions_x_max = 0 cont_objects_positions_y_max = 0 for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() #color = colors[int(track.track_id) % len(colors)] #color = [i * 255 for i in color] # Se o id do track estiver no array de ids dos jogadores selecionados o rectagulo irá ser desenhado com uma cor diferente if self.contain(int(track.track_id)): #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 255), 2) #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), # (int(bbox[0]) + (len(str(track.track_id))) * 5, int(bbox[1])), # (255, 0, 255), -1) cv2.ellipse(frame, (int(bbox[0] + ((bbox[2] - bbox[0]) / 2)), int(bbox[3])), (25, 4), 0, 0, 360, (255, 0, 255), 2, 15) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 1) else: #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255) , 2) #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), # (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), (0, 0, 255), -1) cv2.ellipse(frame, (int(bbox[0] + ((bbox[2] - bbox[0]) / 2)), int(bbox[3])), (20, 4), 0, 0, 360, (100, 255, 100), 2, 15) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 1) # cada jogador selecionado irá ter o id registado no array de ids, a posicao do x no array dos x e a posicao do y no array do y # self.objects_positions_id.insert(cont_objects_positions_id,track.track_id) # self.objects_positions_x.insert(cont_objects_positions_x,int(bbox[0])) #self.objects_positions_y.insert(cont_objects_positions_y,int(bbox[1])) self.objects_positions_id[ cont_objects_positions_id] = track.track_id self.objects_positions_x_min[cont_objects_positions_x_min] = int( bbox[0]) self.objects_positions_y_min[cont_objects_positions_y_min] = int( bbox[1]) self.objects_positions_x_max[cont_objects_positions_x_max] = int( bbox[2]) self.objects_positions_y_max[cont_objects_positions_y_max] = int( bbox[3]) # Incrementar contadores dos arrays para que cada posicao dos arrays coecidir com um jogador cont_objects_positions_id = cont_objects_positions_id + 1 cont_objects_positions_x_min = cont_objects_positions_x_min + 1 cont_objects_positions_y_min = cont_objects_positions_y_min + 1 cont_objects_positions_x_max = cont_objects_positions_x_max + 1 cont_objects_positions_y_max = cont_objects_positions_y_max + 1 # Criacao das multiplas linhas def arrayLenght(array): cont = len(array) - 1 while array[cont] == 0 and cont >= 0: cont = cont - 1 return cont + 1 cont_line_player1_id = 0 cont_line_player2_id = 0 if arrayLenght(self.line_player1) > 0: print(arrayLenght(self.line_player1)) while cont_line_player1_id < arrayLenght(self.line_player1): player1 = 0 player2 = 0 cont = 0 for n in self.objects_positions_id: if int(n) == int(self.line_player1[cont_line_player1_id] ) and player1 == 0: player1 = cont if int(n) == int(self.line_player2[cont_line_player2_id] ) and player2 == 0: player2 = cont cont = cont + 1 if self.line_player1[ cont_line_player1_id] == self.line_player2[ cont_line_player2_id]: x_new_player1 = self.objects_positions_x_min[player1] + ( (self.objects_positions_x_max[player1] - self.objects_positions_x_min[player1]) / 2) x_new_player2 = self.objects_positions_x_min[player2] + ( (self.objects_positions_x_max[player2] - self.objects_positions_x_min[player2]) / 2) cv2.line(frame, (int(x_new_player1), self.objects_positions_y_max[player1]), (int(x_new_player2), self.objects_positions_y_max[player2]), (0, 125, 255), 5) else: x_new_player1 = self.objects_positions_x_min[player1] + ( (self.objects_positions_x_max[player1] - self.objects_positions_x_min[player1]) / 2) x_new_player2 = self.objects_positions_x_min[player2] + ( (self.objects_positions_x_max[player2] - self.objects_positions_x_min[player2]) / 2) cv2.line(frame, (int(x_new_player1), self.objects_positions_y_max[player1]), (int(x_new_player2), self.objects_positions_y_max[player2]), (255, 255, 255), 5) cont_line_player1_id = cont_line_player1_id + 1 cont_line_player2_id = cont_line_player2_id + 1 # criacao das setas if self.frame_arrow_create[0] != 0: contador_setas = 0 while contador_setas < arrayLenght(self.frame_arrow_create): start_point = ( int(self.coordinates_arrow_x_init[contador_setas]), int(self.coordinates_arrow_y_init[contador_setas])) end_point = ( int(self.coordinates_arrow_x_final[contador_setas]), int(self.coordinates_arrow_y_final[contador_setas])) if int( self.coordinates_arrow_x_final[contador_setas] ) == 0 and int( self.coordinates_arrow_y_final[contador_setas]) == 0: end_point = ( int(self.coordinates_arrow_x_init[contador_setas]), int(self.coordinates_arrow_y_init[contador_setas])) color = (0, 255, 0) thickness = 2 if int(self.numframes) - int( self.frame_arrow_create[contador_setas]) < 25: cv2.arrowedLine(frame, start_point, end_point, color, thickness) contador_setas = contador_setas + 1 # if FLAGS.output: # out.write(img) # cv2.imshow('output', img) # Zoom aplicado, o self.zoom irá decidir a escala aplicada no video scale_percent = self.zoom # percent of original size width = int(frame.shape[1] * scale_percent / 100) height = int(frame.shape[0] * scale_percent / 100) dim = (width, height) # resize image img = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray( img ) # Cria uma memória de imagem de um objeto que exporta a interface da matriz imgtk = ImageTk.PhotoImage( image=img ) # usada para exibir imagens (em escala de cinza ou em cores verdadeiras) em rótulos, botões, telas e widgets de texto self.lmain.imgtk = imgtk self.lmain.configure(image=imgtk) key = cv2.waitKey(1) self.lmain.bind('<Leave>', self.exit_) self.lmain.bind( '<Button-1>', self.motion ) # quando alguem clica na tela de jogo o irá imediatamente assionar a funcao self.motion if not self.pause: self.lmain.after(5, self.show_frame)
def nayanam(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) yolo = YoloV3(classes=80) yolo.load_weights(PATH_TO_WEIGHTS) print('weights loaded') class_names = [c.strip() for c in open(PATH_TO_CLASSES).readlines()] print('classes loaded') out = None fps = 0.0 count = 0 vid = cv2.VideoCapture(RTSP_URL) while (vid.isOpened()): try: _, img = vid.read() except: print("Empty frame") continue img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # running NMS boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Deepsort tracker called here tracker.predict() tracker.update(detections) #dump file set here # file = open(PATH_TO_RESULTS,'a+') for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] if VIDEO_DEBUG == 1: cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) s = str(track.track_id) + ',' + class_name + ',' + str(int( bbox[0])) + ',' + str(int(bbox[1])) + '\n' # file.write(s) print(s) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps = ", fps) # file.close() if VIDEO_DEBUG == 1: cv2.imshow('output', img) if cv2.waitKey(1) == 27: break signal.signal(signal.SIGINT, user_exit) vid.release() if VIDEO_DEBUG == 1: cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: #원래 코드 #vid = cv2.VideoCapture(int(FLAGS.video)) #다음 팟플레이어 #vid = cv2.VideoCapture('rtsp://172.20.10.4:8554/test') vid = cv2.VideoCapture('rtsp://192.168.0.28:8554/test') #연결x #os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'protocol_whitelist;file,rtp,udp' #vid = cv2.VideoCapture('C:/Users/Jiwon/Desktop/yolov3_deepsort-master/stream.sdp') #vid = cv2.VideoCapture( #'udpsrc port=8400 caps=application/x-rtp,media=(string)video,clock-rate=(int)9000,encoding-name=(string)H264,payload=(int)96!rtph264depay!decodebin!videoconvert!appsink', #cv2.CAP_GSTREAMER) #vid = cv2.VideoCapture("rtspsrc location=rtsp://192.168.0.25/main latency=30 ! decodebin ! nvvidconv ! appsink") #vid = cv2.VideoCapture('udp://@:5000') #vid = cv2.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtph264depay ! avdec_h264 ! appsink', cv2.CAP_GSTREAMER) #vid = cv2.VideoCapture(1) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of qint width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 #확인을 위한 코드 f_cnt = 0 redetect = False fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #img_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name == "person": if int(track.track_id) == 1: cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), (0, 255, 0), -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (0, 0, 0), 2) #if(MQTT초기 확인 값이면 저장)k 여기 코드 수정하셈 #img_user = img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])] img_user = img[int(bbox[1]):int(bbox[1]) + int(bbox[3]), int(bbox[0]):int(bbox[0]) + int(bbox[2]) - 10] cv2.imwrite( 'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png', img_user) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') f_cnt += 1 print("False") if f_cnt > 10: redetect = True f_cnt = 0 #""" if redetect: # https://opencv-python.readthedocs.io/en/latest/doc/24.imageTemplateMatch/imageTemplateMatch.html _, img = vid.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) template = cv2.imread( 'C:/Users/Jiwon/Desktop/re/yolov3_deepsort-master/userface/user.png', 0) w, h = template.shape[::1] #template 이미지의 가로와 세로 res = cv2.matchTemplate(gray, template, cv2.TM_SQDIFF) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) top_left = min_loc bottom_right = (top_left[0] + w, top_left[1] + h) cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 1) print("TRUE") # """ # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth( physical_devices[0], True) ################################### if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 inDanger = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) inDanger = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #If a person is found calculate the distance from it to all other objects human = "human" if class_name == human: mainIndex = tracker.tracks.index(track) nonhuman_index = [] mainCenterPoint = (int((int(bbox[2]) - int(bbox[0])) / 2 + int(bbox[0])), int((int(bbox[3]) - int(bbox[1])) / 2 + int(bbox[1]))) centerPoints = [] lengths = [] diagonals = [] for track in tracker.tracks: class_type = track.get_class() bbox = track.to_tlbr() #diagonal = math.sqrt((int(bbox[0]) - int(bbox[2]))**2 + (int(bbox[1]) - int(bbox[3]))**2) centerPoint = (int((int(bbox[2]) - int(bbox[0])) / 2 + int(bbox[0])), int((int(bbox[3]) - int(bbox[1])) / 2 + int(bbox[1]))) length = math.sqrt( ((mainCenterPoint[0] - centerPoint[0]))**2 + (mainCenterPoint[1] - centerPoint[1])**2) diagonal = abs(int(bbox[0]) - int(bbox[2])) diagonals.append(diagonal) centerPoints.append(centerPoint) lengths.append(length) if class_type != human: nonhuman_index.append(tracker.tracks.index(track)) #Normalizing radii using the diagonal length of each bbox if not (diagonals == []): cp_diagonals = list( diagonals ) #creamos una copia del arreglo de diagonales cp_diagonals.sort() normDiag = cp_diagonals[-1] else: normDiag = 0 if normDiag > 0 and not (diagonals == []): max_radius = normDiag / 2 normalizedDiags = [i / normDiag for i in diagonals] radii = [i * max_radius for i in normalizedDiags] else: normalizedDiags = [i * 0 for i in diagonals] radii = normalizedDiags for track in nonhuman_index: if lengths[track] < 150: inDanger += 1 break if (radii != [] and centerPoints != [] and lengths != []): #print(not radii == [] and not centerPoints == [] and not lengths == []) #for track in range(0,len(tracker.tracks)): for track in nonhuman_index: try: if lengths[track] <= 150 and track != mainIndex: cv2.line(img, mainCenterPoint, centerPoints[track], (255, 0, 0), 1) cv2.circle(img, mainCenterPoint, int(radii[mainIndex]), (0, 0, 255), 2) cv2.circle(img, centerPoints[track], int(radii[track]), (0, 255, 0), 2) except: continue ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.putText(img, "People in DANGER: {}".format(inDanger), (0, 60), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort see github deep sort for more information model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) """ A nearest neighbor distance metric that, for each target, returns the closest distance to any sample that has been observed so far. """ metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # multi target tracker tracker = Tracker(metric) # Return an identifiable list of physical devices visible to the host runtime physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) # enable memory growth for physical devices # utilised to identify type of YoloV3 used if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # load pre-trained weights # pre-trained from open sources, many from public repos on github. yolo.load_weights(FLAGS.weights) logging.info('weights loaded') # array contains name of classes (flags) class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # capture a video from the camera or a video file, files for our demonstrations. try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) # output video is empty out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 _, img = vid.read() h, w, c = img.shape h_numStep = 12 # number of boxes in a column w_numStep = 20 # number of boxes in a row #make matrix-array M of categories of different areas 1=food area, etc. M = [[1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5], [1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5], [1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 8, 8], [2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 8, 8, 8, 8], [2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8], [2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 7, 7], [2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7], [2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 7, 7, 7, 7], [2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7], [2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7]] # store the total time that customers stay in box[i][j] total_time_engage = [[0 for i in range(w_numStep + 1)] for j in range(h_numStep + 1)] # store the time that customer k is stationary in box[i][j] stationary_time = [[[0 for i in range(w_numStep + 1)] for j in range(h_numStep + 1)] for k in range(100000)] # store the positions of single customer x_single_tracking = [] y_single_tracking = [] # single customer's trackingID single_trackingID = 34 # store the current position of customer max_trackID = 0 x_trackID = [-1] * 1000000 y_trackID = [-1] * 1000000 # file store the total_time_engage file = 'total_time_engage.txt' fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break # convert an image from one color space to another img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # return a tensor with a length 1 axis inserted at index 0 img_in = tf.expand_dims(img_in, 0) # resize the image to 416x416 # remember resolution has to be able to work with it # tensorflow.image.resize: resize image to size img_in = transform_images(img_in, FLAGS.size) # return the number of seconds passed since epoch t1 = time.time() time_finish_last_tracking = t1 boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) # detections detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Pass detections to the deepsort object and obtain the track information # predicts and updates via detection tracker.predict() tracker.update(detections) # draw horizontal boxes y_step = int(h / h_numStep) y_start = 0 while True: y_end = y_start + y_step cv2.rectangle(img, (0, y_start), (int(w), y_end), (0, 0, 0), 1) y_start = y_end if y_start >= int(h): break # finish drawing here # draw vertical boxes x_step = int(w / w_numStep) x_start = 0 while True: x_end = x_start + x_step cv2.rectangle(img, (x_start, 0), (x_end, int(h)), (0, 0, 0), 1) x_start = x_end if x_start >= int(w): break # finish drawing here time_step = time.time() - time_finish_last_tracking for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # get the corrected/predicted bounding box class_name = track.get_class( ) # get the class name of particular object color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] # identify center of a boundary box x_cent = int(bbox[0] + (bbox[2] - bbox[0]) / 2) y_cent = int(bbox[1] + (bbox[3] - bbox[1]) / 2) # draw detection on frame cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) # draw rectangle cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # insert objectName and objectID # display the area each person is in # cv # update the stationary_time and total_time_engage array if class_name == "person": x_pos = int(x_cent / x_step) y_pos = int(y_cent / y_step) #print(str(track.track_id) + ": [" + str(y_pos) + ", " + str(x_pos) + "]") if track.track_id > max_trackID: max_trackID = track.track_id x_trackID[track.track_id] = y_pos y_trackID[track.track_id] = x_pos stationary_time[track.track_id][y_pos][x_pos] += time_step total_time_engage[y_pos][x_pos] += time_step # track a single person if class_name == "person" and track.track_id == single_trackingID: x_single_tracking.append(x_pos) y_single_tracking.append(y_pos) for track_index in range(max_trackID + 1): if x_trackID[track_index] != -1: print("customerID " + str(track_index) + ": [" + str(x_trackID[track_index]) + "," + str(y_trackID[track_index]) + "] in " + market_section(M[ x_trackID[track_index]][y_trackID[track_index]])) with open(file, 'w') as filetostore: for i in range(h_numStep): for j in range(w_numStep): filetostore.write( "{:.2f}".format(total_time_engage[i][j]) + " ") filetostore.write("\n") ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) time_finish_last_tracking = time.time() # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break f = open("total_time_engage.txt", "rt") f.close() # insert data into the database # initialise track arrays track_time = [0] * 10000000 track_customerID = [0] * 10000000 track_area = ["" for x in range(10000000)] x_single = [0] * 10000000 y_single = [0] * 10000000 # organise data to be inserted track_index = -1 for k in range(1000): for h in range(h_numStep): for w in range(w_numStep): if stationary_time[k][h][w] != 0: track_index += 1 track_time[track_index] = stationary_time[k][h][w] track_customerID[track_index] = k track_area[track_index] = str(h) + ', ' + str(w) x_tmp = -1 y_tmp = -1 single_track_index = -1 for k in range(len(x_single_tracking)): if x_single_tracking[k] != x_tmp and y_single_tracking[k] != y_tmp: single_track_index += 1 x_single[single_track_index] = x_single_tracking[k] y_single[single_track_index] = y_single_tracking[k] x_tmp = x_single[single_track_index] y_tmp = y_single[single_track_index] single_tracking_areas = "" for k in range(single_track_index): single_tracking_areas += '[' + str(x_single[k]) + ',' + str( y_single[k]) + '] , ' # connect and insert the appropriate data in primary_table for k in range(track_index + 1): try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO primary_table(trackID, customerID, area) VALUES (%s, %s, %s) """ recordTuple = (k, track_customerID[k], track_area[k]) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the primary_table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # connect and insert the appropriate data in "engaged" table for k in range(track_index + 1): try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO engaged(trackID, engagement_time) VALUES (%s, %s) """ recordTuple = (k, track_time[k]) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the engaged table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # connect and insert the appropriate data in "total_areas" table try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO total_areas(customerID, all_areas_visited) VALUES (%s, %s) """ recordTuple = (single_trackingID, single_tracking_areas) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the total_areas table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # plot the graph fig = plt.figure(1) fig.suptitle('Engagement time on different areas', fontsize=20) ax = plt.axes(projection='3d') ax = plt.axes(projection='3d') # Data for a three-dimensional line x = np.arange(w_numStep - 1, -1, -1) y = np.linspace(0, h_numStep - 1, h_numStep) X, Y = np.meshgrid(x, y) Z = [[0 for j in range(w_numStep)] for i in range(h_numStep)] for i in range(h_numStep): for j in range(w_numStep): Z[i][j] = total_time_engage[i][j] Z = np.array(Z) # Plot the surface. ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis', edgecolor='none') ax.set_xlabel('width') ax.set_ylabel('height') ax.set_zlabel('time') ax.view_init(35, 80) #gets the polar axis on the current image frame = plt.gca() #gets x and y axis list of x and y axis tick locations frame.axes.get_xaxis().set_ticks([]) frame.axes.get_yaxis().set_ticks([]) #Plots the figure fig2 = plt.figure(2) fig2_title = 'Walking pattern of a single customer( trackingID = ' + str( single_trackingID) + ')' fig2.suptitle(fig2_title, fontsize=15) plt.plot(x_single_tracking, y_single_tracking, 'ro') plt.axis([0, w_numStep, h_numStep, 0]) frame.axes.get_xaxis().set_ticks([]) frame.axes.get_yaxis().set_ticks([]) fig.savefig('engage_level.jpg') fig2.savefig('single_tracking.jpg') plt.show() vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): region = load_ROI() # Definition of the parameters max_cosine_distance = 0.3 #Default = 0.5 nn_budget = None nms_max_overlap = 0.8 #Default = 0.5 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) video_name = os.path.splitext(FLAGS.video)[-2] weights = 'weights/yolov3_sang.tf' yolo.load_weights(weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') #WRITE RESULT result = "tracking_result/{}_track.txt".format(video_name) file_out = open(result,'w') path = os.getcwd() path = str(os.path.split(os.path.split(path)[0])[0]) #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name)) vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name)) vid = cv2.VideoCapture(vid_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) frame_index = frame_index + 1 if frame_index % 100 == 0: print('FRAME: ',frame_index) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) x_cen = int((int(bbox[2]) + int(bbox[0]))/2) y_cen = int((int(bbox[3]) + int(bbox[1]))/2) if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False: #NGOAI ROI THI XOA track.delete_track() cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2) #GHI FILE TRACKING_RESULT theo chuan CountMovement bb_width = int(bbox[2]) - int(bbox[0]) bb_height = int(bbox[3]) - int(bbox[1]) diagonal = math.sqrt(bb_height**2 + bb_width**2) file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height)) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1) # print fps on screen fps = ( fps + (1./(time.time()-t1)) ) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): # set present path home = os.getcwd() # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort # model_filename = 'weights/mars-small128.pb' model_filename = os.path.join(home, "weights", "arcface_weights.h5") encoder = gdet.create_box_encoder(model_filename, batch_size=128) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] # Database 생성 face_db = dict() db_path = FLAGS.database for name in os.listdir(db_path): name_path = os.path.join(db_path, name) name_db = [] for i in os.listdir(name_path): if i.split(".")[1] != "jpg": continue id_path = os.path.join(name_path, i) img = cv2.imread(id_path) # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img_in = tf.expand_dims(img_in, 0) # img_in = transform_images(img_in, FLAGS.size) # boxes, scores, classes, nums = yolo.predict(img_in) boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]]) scores = np.asarray([[1]]) converted_boxes = convert_boxes(img, boxes, scores) features = encoder(img, converted_boxes) if features.shape[0] == 0: continue for f in range(features.shape[0]): name_db.append(features[f, :]) name_db = np.asarray(name_db) face_db[name] = dict({"used": False, "db": name_db}) try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 detection_list = [] while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) # print(boxes, scores, classes, nums) # time.sleep(5) t2 = time.time() times.append(t2 - t1) print(f'yolo predict time : {t2-t1}') times = times[-20:] t3 = time.time() ############# classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0], scores[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] t4 = time.time() print(f'feature generation time : {t4-t3}') #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] t5 = time.time() # Call the tracker tracker.predict() # tracker.update(detections) tracker.update(detections, face_db, FLAGS.max_face_threshold) t6 = time.time() print(f'tracking time : {t6-t5}') frame_index = frame_index + 1 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() face_name = track.get_face_name() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id)) + len(str(face_name))) * 23, int(bbox[1])), color, -1) # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) cv2.putText( img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) # print(class_name + "-" + str(track.track_id)) # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))})) if face_name != "": detection_list.append( dict({ "frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2]) - int(bbox[0])), "height": str(int(bbox[3]) - int(bbox[1])) })) ####### fps = (fps + (1. / (time.time() - t1))) / 2 # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2) if FLAGS.output: out.write(img) # frame_index = frame_index + 1 # list_file.write(str(frame_index)+' ') # if len(converted_boxes) != 0: # for i in range(0,len(converted_boxes)): # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') # list_file.write('\n') cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows() frame_list = sorted(detection_list, key=lambda x: (int(x["frame_no"]), int(x["id"]))) # pprint.pprint(frame_list) f = open(FLAGS.eval, "w") for a in frame_list: f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n") # 파일 닫기 f.close()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=40) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) objects = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if (FLAGS.class_1 == 'all'): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) elif (FLAGS.class_1 != 'all'): if (class_name == FLAGS.class_1): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # print("Objetos filtrados:{}".format(objects)) # print N_objects on screen cv2.putText(img, "# Objetos: {}".format(objects), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # print fps on screen #fps = ( fps + (1./(time.time()-t1)) ) / 2 #cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
# # img_in = cv2.imread(img_filename, cv2.COLOR_BGR2RGB) # img_in = cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes( img, boxes[0] ) #transformation from relative x1/xsize, y1/ysize... to x,y,w,h features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices]
def run(self, img): if img is None: logging.warning("Empty Frame") time.sleep(0.1) return img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = self.yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(self.class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = self.encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker self.tracker.predict() self.tracker.update(detections) for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: return bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] self.last_tracked.append( (track.track_id, track.get_class(), int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen self.fps = (self.fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(self.fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
def main(): class_names = [ c.strip() for c in open('./data/labels/coco.names').readlines() ] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') imageHub = imagezmq.ImageHub() max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) #vid = cv2.VideoCapture('./data/video/traffic1.mkv') #vid = cv2.VideoCapture("video.webm") #vid = VideoCaptureAsync("video.webm") #vid = vid.start() codec = cv2.VideoWriter_fourcc(*'XVID') #vid_fps =int(vid.get(cv2.CAP_PROP_FPS)) #vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #out = cv2.VideoWriter('./data/video/results.avi', codec, vid_fps, (vid_width, vid_height)) out = cv2.VideoWriter('./data/video/results.avi', codec, 20, (480, 480)) from collections import deque pts = [deque(maxlen=30) for _ in range(1000)] counter = [] while True: #_, img = vid.read() (rpiName, img) = imageHub.recv_image() imageHub.send_reply(b'OK') if img is None: print('Completed') break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] #current_count = int(0) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(img, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) height, width, _ = img.shape #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2) #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2) center_y = int(((bbox[1]) + (bbox[3])) / 2) if center_y <= int(3 * height / 6 + height / 20) and center_y >= int(3 * height / 6 - height / 20): if class_name == 'car' or class_name == 'truck' or class_name == 'person': counter.append(int(track.track_id)) #current_count += 1 total_count = len(set(counter)) #cv2.putText(img, "Current Vehicle Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2) cv2.putText(img, "Total Vehicle Count: " + str(total_count), (0, 130), 0, 1, (0, 0, 255), 2) fps = 1. / (time.time() - t1) cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 0, 1, (0, 0, 255), 2) #cv2.resizeWindow('output', 1024, 768) cv2.imshow('output', img) out.write(img) if cv2.waitKey(1) == ord('q'): break #vid.release() out.release() cv2.destroyAllWindows()
boxes, scores, classes, nums = yolo.predict(img_in) #pass images and predict # The yolo predictions return numpy empty arrays, includes the boxes, scores, classes and nums, the boxes per images are limited # boxes, 3D shape (1, 100, 4) # scores, 2D shape (1, 100) # classes, 2D shape (1, 100) # nums, 1D shape (1,) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) #generate the features vector detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,scores) #witch boxes thas be gotten detections = [detections[i] for i in indices] # ready for deep_sort tracker.predict()
def main(self, _argv): if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.y_axis = height + 1 if FLAGS.output: # by default VideoCapture returns float instead of int fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 cv2.namedWindow('HawkEye') cv2.setMouseCallback('HawkEye', self.mouse_callback) while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break if self.y_axis < height: cv2.line(img, (0, self.y_axis), (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), self.y_axis), (255, 0, 0), 3) img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = self.encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression( boxs, classes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker self.tracker.predict() self.tracker.update(detections) for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] present_x, present_y, w, h = track.to_xywh() present_size = int(w * h) if self.y_axis <= present_y: if track.size < present_size and track.y_axis < self.y_axis: label = 'coming' else: label = 'warning' else: label = '' cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.putText(img, label, (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('HawkEye', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()