def Apply(self, batched_data_dict, batch_size, istub, tracker, my_lock): if (batch_size != len(batched_data_dict[batched_data_dict.keys()[0]])): print("[Error] Apply() batch size not matched...") return None else: batched_result_dict = dict() detection_list = [Detection(batched_data_dict["ds_boxes"][0][i], batched_data_dict["scores"][0][i], batched_data_dict["features"][0][i]) for i in xrange(len(batched_data_dict["ds_boxes"][0]))] my_lock.acquire() tracker.predict() tracker.update(detection_list) output = "" for tk in tracker.tracks: # print("tk.is_confirmed() = %s, tk.time_since_update = %s" % (tk.is_confirmed(), tk.time_since_update)) if not tk.is_confirmed() or tk.time_since_update > 1: continue left, top, width, height = map(int, tk.to_tlwh()) track_id = tk.track_id output += "%s|%s|%s|%s|%s-" % (str(left), str(top), str(width), str(height), str(track_id)) output = output[:-1] my_lock.release() batched_result_dict["deepsort_output"] = [output] batched_result_dict["raw_image"] = batched_data_dict["raw_image"] return batched_result_dict
def create_detections(detection_mat, frame_idx, min_height=0): """Create detections for given frame index from the raw detection matrix. Parameters ---------- detection_mat : ndarray Matrix of detections. The first 10 columns of the detection matrix are in the standard MOTChallenge detection format. In the remaining columns store the feature vector associated with each detection. frame_idx : int The frame index. min_height : Optional[int] A minimum detection bounding box height. Detections that are smaller than this value are disregarded. Returns ------- List[tracker.Detection] Returns detection responses at given frame index. """ frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def track(self, frame, dknetBoxs): boxs = [list(i[2]) for i in dknetBoxs] labels = [i for i in dknetBoxs] features = self.encoder(frame, boxs) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] print("Detections: ", detections) # Call the traccker self.tracker.predict() self.tracker.update(detections, labels) retVal = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() if track.track_id not in self.trackedIDs: self.trackedIDs[track.track_id] = 1 retVal.append(track.label) return retVal
def update_ids(self, image, people): bboxes = [] scores = [] identifiers = [] for i, ((x, y, z, t), score) in enumerate(people): bboxes.append(np.array([x, y, (z-x), (t-y)]).astype(np.float64)) scores.append(score) identifiers.append(i) bboxes = np.array(bboxes) scores = np.array(scores) identifiers = np.array(identifiers) features = self.encoder(image, bboxes.copy()) detections = [Detection(bbox, score, feature, i) for bbox, score, feature, i in zip(bboxes, scores, features, identifiers)] self.tracker.predict() self.tracker.update(detections) ids = [-1 for person in people] for track in self.tracker.tracks: if track.info in identifiers: ids[track.info] = track.track_id return ids
def track_objects(self, image, people): bboxes = [] scores = [] identifiers = [] for i, (x, y, z, t, score, person_id) in enumerate(people): bboxes.append(np.array([x, y, (z-x), (t-y)]).astype(np.float64)) scores.append(score) identifiers.append(i) bboxes = np.array(bboxes) scores = np.array(scores) identifiers = np.array(identifiers) features = self.encoder(image, bboxes.copy()) detections = [Detection(bbox, score, feature, i) for bbox, score, feature, i in zip(bboxes, scores, features, identifiers)] self.tracker.predict() self.tracker.update(detections) new_detections = [] for track in self.tracker.tracks: if track.info in identifiers: print(track.info) people[track.info][5] = track.track_id bbox = track.to_tlbr() new_detections.append(list(bbox) + [track.confidence, track.track_id]) return people
def video_frame_processing(vis,frame_idx, index, image): print("Processing frame %05d" % frame_idx) frame_indices = seq_info_dic[index]["detections"][:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in seq_info_dic[index]["detections"][mask]: bbox, confidence, feature = row[1:5], row[5], row[6:] # if bbox[3] < min_height: # continue detection_list.append(Detection(bbox, confidence, feature, index)) detections = [d for d in detection_list if d.confidence >= min_confidence] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker_dic[index].predict() matching_results = tracker_dic[index].update(detections,tracker_dic,global_id,global_track,world_viewer_threshold) for res in matching_results: det = detections[res[1]] tracking_file.write('{} {} {} {} {} {} {}\n'.format( frame_idx, det.tlwh[0], det.tlwh[1], det.tlwh[2], det.tlwh[3], det.confidence, res[0] )) # tracking_file.flush() if display: if index == 0: vis.reset_image() vis.append_image(image.copy()) vis.draw_trackers(tracker_dic[index].tracks,index,frame_idx,file)
def create_det_from_model(model, img, conf_thresh, nms_thresh, min_detection_height, use_cuda): height, width = img.shape[:2] sized = cv2.resize(img, (model.width, model.height)) sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) boxes = do_detect(model, sized, conf_thresh, nms_thresh, use_cuda) dummy_feat = np.ones((1,), dtype=np.float32) detection_list = [] for i in range(len(boxes)): box = boxes[i] cls_id = box[6] if cls_id != 0: continue x = int(round((box[0] - box[2] / 2.0) * width)) y = int(round((box[1] - box[3] / 2.0) * height)) w = int(box[2] * width) h = int(box[3] * height) if h < min_detection_height: continue cls_conf = box[5] detection_list.append(Detection((x, y, w, h), cls_conf, dummy_feat)) return detection_list
def track(self, descriptions): if descriptions == []: self.tracker.predict() print("No detections") trackers = self.tracker.tracks return trackers, None detections = np.array([ (description.bounding_box.minX, description.bounding_box.minY, description.bounding_box.width, description.bounding_box.height) for description in descriptions ]) out_scores = [description.probability for description in descriptions] features = self.encoder(self.frame, detections) dets = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, out_scores, features) ] outboxes = np.array([d.tlwh for d in dets]) outscores = np.array([d.confidence for d in dets]) indices = prep.non_max_suppression(outboxes, 0.5, outscores) dets = [dets[i] for i in indices] self.tracker.predict() self.tracker.update(dets) return self.tracker, dets
def create_detections(detection_mat, frame_idx, min_height=0): """ 주어진 프레임에 대한 detection을 만들기 Parameters ---------- detection_mat : ndarray detection matric은 처음 10개 열은 MOTChallenge 형식이고 나머지 열은 각 detection과 연관된 벡터가 저장된다. frame_idx : int 프레임 인덱스 min_height : Optional[int] 최소 bounding box 높이 / 이것보다 작으면 무시한다. Returns ------- List[tracker.Detection] detection 반환 """ frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] # detection parsing for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def predict_obtracker(self, frame, dets): #tt = time.time() boxs = [d[:4] for d in dets] #features = gdet.HOG_feature(frame, boxs) #features = gdet.create_his(frame, boxs) self.detections = [Detection(det[:4], det[4], None) for det in dets] self.obtracker.predict()
def cvt_to_detection_objects(frame, bboxes, detection_scores, encoder, transforms, gaussian_mask): processed_crops = pre_process(frame, bboxes, transforms) if USE_GAUSSIAN_MASK: processed_crops = gaussian_mask * processed_crops features = encoder.forward_once(processed_crops) features = features.detach().cpu().numpy() if len(features.shape)==1: features = np.expand_dims(features,0) detection_list = [] for bbox, score, feature in zip(bboxes, detection_scores, features): feature = feature/np.linalg.norm(feature, ord=2) # Normalizing the feature vector detection_list.append(Detection(bbox, score, feature)) bboxes = np.array([d.tlwh for d in detection_list]) detection_scores = np.array([d.confidence for d in detection_list]) indices = dsutil_prep.non_max_suppression(bboxes, NMS_MAX_OVERLAP, detection_scores) detection_list = [detection_list[i] for i in indices] return detection_list
def create_gts(gt_mat, frame_idx): """Create detections for given frame index from the raw groundtruth detection matrix. Parameters ---------- gt_mat : ndarray Matrix of ground truth detections frame_idx : int The frame index. Returns ------- List[detection.Detection] Returns detection responses at given frame index. """ frame_indices = gt_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] #for row, appearance in zip(detection_mat[mask], features): for row in gt_mat[mask]: if int(row[6]) == 0 or int(row[7]) != 1: continue bbox = row[2:6] feature = np.ones((1,), dtype=np.float32) vis = float(row[-1]) #print('create gt detections') assert int(row[6]) == 1, 'flag should be 1' #if bbox[3] < min_height: # continue detection_list.append(Detection(bbox, vis, feature)) return detection_list
def callback_image(data): #Display Image bridge = CvBridge() cv_rgb = bridge.imgmsg_to_cv2(data, "bgr8") #Features and detections features = encoder(cv_rgb, detections) detections_new = [Detection(bbox, score, feature) for bbox,score, feature in zip(detections,scores, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections_new]) scores_new = np.array([d.confidence for d in detections_new]) indices = prep.non_max_suppression(boxes, 1.0 , scores_new) detections_new = [detections_new[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections_new) #Detecting bounding boxes for det in detections_new: bbox = det.to_tlbr() cv2.rectangle(cv_rgb,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(100,255,50), 1) cv2.putText(cv_rgb , "person", (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (100,255,50), lineType=cv2.LINE_AA) #Tracker bounding boxes for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() msg.data = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), track.track_id] cv2.rectangle(cv_rgb, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 1) cv2.putText(cv_rgb, str(track.track_id),(int(bbox[2]), int(bbox[1])),0, 5e-3 * 200, (255,255,255),1) cv2.imshow("YOLO+SORT", cv_rgb) cv2.waitKey(3)
def create_obj_infos(cur_frame, final_boxes, final_probs, final_labels, box_feats, targetid2class, tracking_objs, min_confidence, min_detection_height, scale): obj_infos = [] tracking_boxes = final_boxes / scale for j, (box, prob, label) in enumerate(zip(tracking_boxes, final_probs, final_labels)): cat_name = targetid2class[label] confidence_socre = float(round(prob, 7)) if cat_name not in tracking_objs or confidence_socre < min_confidence: continue box[2] -= box[0] box[3] -= box[1] avg_feat = np.mean(np.mean(box_feats[j], axis=1), axis=1) norm_feat = avg_feat / np.linalg.norm(avg_feat) list_feat = norm_feat.tolist() bbox_data = [ cur_frame, box[0], box[1], box[2], box[3], confidence_socre ] + list_feat obj_infos.append(bbox_data) detections = [] for row in obj_infos: bbox, confidence, feature = row[1:5], row[5], row[6:] if bbox[3] < min_detection_height: continue detections.append(Detection(bbox, confidence, feature)) return detections
def detect(self, image): ''' 只用检测模型获取bbox,输出不包含目标id :param image: shape w*h*3 :return: bboxes[[min x, min y, max x, max y]...] ''' img = Image.fromarray(image[..., ::-1]) # bgr to rgb boxs, ret_clss = self.yolo.detect_image(img) features = self.encoder( image, boxs) # The image of each frame is coded to match the box # score to 1.0 here). Each detection box and feature is encapsulated as an object detections = [ Detection(bbox, 1.0, feature, ret_cls) for bbox, feature, ret_cls in zip(boxs, features, ret_clss) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] bboxes = [] for det in detections: bbox = det.to_tlbr() bboxes.append(bbox) return bboxes, ret_clss, detections
def step(self, frame): img = cv2.imread(frame.filename) det_cfg = self.cfg.tracktor.detection if det_cfg.default: bbox_result = frame.detections[:, 2:7] bbox_result[:, 2:4] += bbox_result[:, :2] else: bbox_result, other_cls_result = self.get_bboxes(img) valid_inds = bbox_result[:, 4] > det_cfg.min_conf bbox_result = bbox_result[valid_inds] _, keep_inds = nms(bbox_result, det_cfg.nms_iou_thr) bbox_result = bbox_result[keep_inds] features = self.get_features(img, bbox_result) bbox_result[:, 2:4] -= bbox_result[:, :2] detections = [] for bbox, feat in zip(bbox_result, features): detections.append(Detection(bbox[0:4], bbox[4], feat)) self.tracker.predict() self.tracker.update(detections) frame_results = self.tracker.get_results() frame_results = frame_results if frame_results else np.zeros((0, 5)) self.results[frame.frame_idx] = np.array(frame_results)
def Apply(self): detection_list = [ Detection(self.ds_boxes[i], self.scores[i], self.features[i]) for i in xrange(len(self.ds_boxes)) ] # for detection in detection_list: # print("tlwh = %s, confidence = %s" % (detection.tlwh, detection.confidence)) self.my_lock.acquire() self.tracker.predict() self.tracker.update(detection_list) self.my_lock.release() # print(len(self.tracker.tracks)) output = "" for tk in self.tracker.tracks: # print("tk.is_confirmed() = %s, tk.time_since_update = %s" % (tk.is_confirmed(), tk.time_since_update)) if not tk.is_confirmed() or tk.time_since_update > 1: continue left, top, width, height = map(int, tk.to_tlwh()) track_id = tk.track_id # print("%s|%s|%s|%s|%s-" % (str(left), str(top), str(width), str(height), str(track_id))) output += "%s|%s|%s|%s|%s-" % (str(left), str(top), str(width), str(height), str(track_id)) output = output[:-1] self.output = output.replace("--", "-") # weird bug...
def create_detections(detection_mat, min_height=0): detection_list = [] for row in detection_mat: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def create_detections(objects): detection_list = [] for obj in objects: bbox = np.array( [obj.x_offset, obj.y_offset, obj.width, obj.height]) confidence = obj.score detection_list.append(Detection(bbox, confidence, [])) return detection_list
def create_detections(self, detections_out): detection_list = [] for row in detections_out: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < self.min_detection_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def main(filename, metrics=False): movement = {} ids = {} max_cosine_distance = 0.5 metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) ts = 0 total_time = 0 while True: ts += 1 ret, frame = video_capture.read() if ret != True: break movement[ts] = {} ti = time.time() boxs,_ = yolo.detect_image(frame) total_time += time.time() - ti features = encoder(frame,boxs) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] ti = time.time() tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # bbox = track.to_tlbr() tx,ty,w,h = track.to_tlwh() center = (int(tx + w/2), int(ty + h/2)) _id = track.track_id movement[ts][_id] = center if _id not in ids: ids[_id] = [] ids[_id].append(w) total_time += time.time() - ti # print (time.time() - ti) video_capture.release() if metrics: return movement, ids, total_time return movement, ids
def create_detections(self, frame, raw_dets, embeds): detection_list = [] for i in range(len(embeds)): # detection_list.append(Detection(raw_dets[i][0], raw_dets[i][1], embeds[i])) detection = raw_dets[i] bbox = [detection[x] for x in 'ltwh'] confidence = detection['confidence'] detection_list.append(Detection(bbox, confidence, embeds[i])) return detection_list
def main(yolo): nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) video_capture = cv2.VideoCapture(0) ret, frame = video_capture.read() # frame shape 640*480*3 image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) features = encoder(frame, boxs) print(features) for count, feature in enumerate(features): #create new 2d arrays for each individual box_arr = [boxs[count]] feat_arr = [feature] detections = [ Detection(bbox, 1.0, feat) for bbox, feat in zip(box_arr, feat_arr) ] # Run non-maxima suppression. gets rid of annoying overlapping BB's that are likely the same object boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) #output frame cv2.startWindowThread() cv2.namedWindow("preview") cv2.imshow("preview", frame) cv2.waitKey(2000) cv2.imwrite("pic" + str(count) + ".jpg", frame) #write individual feature vectors to pickle files pickle_out = open("pickledump" + str(count) + ".pickle", "wb") cPickle.dump(feature, pickle_out) pickle_out.close() #read and print pickled feature vectors pickle_in = open("pickledump" + str(count) + ".pickle", "rb") ret = cPickle.load(pickle_in) print(ret) video_capture.release() cv2.destroyAllWindows()
def create_detections(self, frame, raw_dets, embeds): detection_list = [] # print(len(raw_dets)) # print(len(embeds)) for i in range(len(raw_dets)): # initially embeds detection_list.append( Detection(raw_dets[i]['bbox'], raw_dets[i]['score'], embeds[i])) return detection_list
def Apply(self): ''' Extract features and update the tracker ''' detection_list = [] for obj in self.input['meta']['obj']: detection_list.append( Detection(obj['box'], obj['conf'], obj['feature'])) self.tracker.predict() self.tracker.update(detection_list)
def create_detections(self, frame, raw_dets, embeds): detection_list = [] for i in range(len(embeds)): detection_list.append( Detection(raw_dets[i][0], raw_dets[i][1], embeds[i], class_name=raw_dets[i] [2])) #raw_det = [bbox, conf_score, class] return detection_list
def create_detections_poly(self, dets, embeds, bounding_rects): detection_list = [] dets.extend([embeds, bounding_rects]) for raw_polygon, cl, score, embed, bounding_rect in zip(*dets): x,y,w,h = bounding_rect x = max(0, x) y = max(0, y) bbox = [x,y,w,h] detection_list.append(Detection(bbox, score, embed, class_name=cl, others=raw_polygon)) return detection_list
def main(mask_rcnn): filename = sys.argv[1] # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) reader = imageio.get_reader(filename, "ffmpeg") fps = reader.get_meta_data()['fps'] N = len(reader) - 1 writer = imageio.get_writer("output/" + get_filename(filename), fps=fps) try: for i, frame in tqdm(enumerate(reader), desc="Frames ", total=N): masks = mask_rcnn.detect_people(frame) masks = image_utils.classify_masks_with_hash(masks) boxs = masks.get_xywh() # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(mask.xywh, mask.score, feature, mask.kmeans_label) for mask, feature in zip(masks, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) #TODO: with maskrcnn, this may not be required detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) image_utils.draw_player_with_tracks(frame, tracker.tracks) writer.append_data(frame) finally: writer.close()
def create_detections(detection_mat, frame_idx, min_height=0): frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def create_detections(self, bboxes, feature): detections = [] for box in np.array(bboxes): if box is None or len(box) == 0: continue box[2:4] -= box[:2] # too small to do reid if box[2] < self.conf.min_width and box[3] < self.conf.min_height: continue detections.append(Detection(tlwh=box[:4], confidence=box[4], feature=[])) return detections