def week2_adaptive_hsv(video: Video, debug=False) -> Iterator[Frame]: model_mean, model_std = get_background_model(video, int(2141 * 0.25), total_frames=int(2141 * 0.25), pixel_value=PixelValue.HSV) ground_truth = read_detections( '../datasets/AICity_data/train/S03/c010/gt/gt.txt') frame_id = int(2141 * 0.25) roi = cv2.cvtColor( cv2.imread('../datasets/AICity_data/train/S03/c010/roi.jpg'), cv2.COLOR_BGR2GRAY) for im, mask in gaussian_model_adaptive(video, int(2141 * 0.25), model_mean, model_std, total_frames=int(2141 * 0.10), pixel_value=PixelValue.HSV, alpha=1.75, rho=0.01): mask = mask & roi if debug: cv2.imshow('f', mask) cv2.waitKey() mask = opening(mask, 7) if debug: cv2.imshow('f', mask) cv2.waitKey() mask = closing(mask, 35) if debug: cv2.imshow('f', mask) cv2.waitKey() mask, detections = find_boxes(mask) frame = Frame(frame_id) frame.detections = detections frame.ground_truth = ground_truth[frame_id] if debug: mask2 = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) for detection in detections: cv2.rectangle( mask2, (int(detection.top_left[1]), int(detection.top_left[0])), (int(detection.get_bottom_right()[1]), int(detection.get_bottom_right()[0])), (0, 255, 0), 5) for gt in ground_truth[frame_id]: cv2.rectangle(mask2, (int(gt.top_left[1]), int(gt.top_left[0])), (int(gt.get_bottom_right()[1]), int(gt.get_bottom_right()[0])), (255, 0, 0), 5) cv2.imshow('f', mask2) cv2.waitKey() yield im, mask, frame frame_id += 1
def compute_map_gt_det(det_file, gt_file): det_list = read_detections(det_file) gt_list = read_detections(gt_file) frames = [] for i in range(0, len(det_list)): frame = Frame(i) frame.detections = det_list[i] frame.ground_truth = gt_list[i] frames.append(frame) mAP = mean_average_precision(frames, ignore_classes=True) return mAP
def main(): start_frame = 1440 end_frame = 1789 gt = read_annotations('../annotations', start_frame, end_frame) alg = 'mask_rcnn' detections = read_detections('../datasets/AICity_data/train/S03/c010/det/det_{0}.txt'.format(alg)) kalman = KalmanTracking() for i in range(start_frame, end_frame): f = Frame(i) f.detections = detections[i] f.ground_truth = gt[i - start_frame] kalman(f) print(seq(f.detections).map(lambda d: d.id).to_list())
def main(): im_1440 = cv2.imread( "../datasets/AICity_data_S03_c010_1440/frame_1440.jpg") top_left = [995, 410] width = 1241 - 995 height = 605 - 410 ground_truth = [Detection('', 'car', top_left, width, height)] """ DETECTIONS FROM ALTERED GROUND TRUTH """ frame = Frame(0, ground_truth) frame.detections = alter_detections(ground_truth) plot_frame(im_1440, frame) iou = frame.get_detection_iou() iou_mean = frame.get_detection_iou_mean() print("IOU: ", iou, "IOU mean", iou_mean)
def get_frames(self) -> Iterable[Frame]: video = cv2.VideoCapture(os.path.join(self.video_path, "vdo.avi")) full_detections = read_detections( os.path.join(self.video_path, "det/det_yolo3.txt")) if os.path.exists(os.path.join(self.video_path, "gt/gt.txt")): full_ground_truth = read_detections( os.path.join(self.video_path, "gt/gt.txt")) else: full_ground_truth = [] count = 0 while video.isOpened(): ret, frame = video.read() if ret: det = full_detections[count] if count < len( full_detections) else [] gt = full_ground_truth[count] if count < len( full_ground_truth) else [] yield Frame(count, det, gt, frame) count += 1 else: video.release()
def week2_soa(video: Video, debug=False) -> Iterator[Frame]: th = 150 frame_id = 0 fgbg = cv.createBackgroundSubtractorMOG2() ground_truth = read_detections( '../datasets/AICity_data/train/S03/c010/gt/gt.txt') roi = cv.cvtColor( cv.imread('../datasets/AICity_data/train/S03/c010/roi.jpg'), cv.COLOR_BGR2GRAY) for im in tqdm(video.get_frames(), total=2141, file=sys.stdout, desc='Training model...'): mask = fgbg.apply(im) mask[mask < th] = 0 mask.astype(np.uint8) * 255 mask = mask & roi mask = opening(mask, 5) # cv.imshow('f', mask) # cv.waitKey() mask = closing(mask, 25) # cv.imshow('f', mask) # cv.waitKey() mask, detections = find_boxes(mask) frame = Frame(frame_id) frame.detections = detections frame.ground_truth = ground_truth[frame_id] frame_id += 1 yield im, mask, frame
def createFrame(self,timeNow): if self.frame : print('Frame already created') else : self.frame = Frame.Frame(timeNow,self)
sio = socketio.Server() app = Flask(__name__) model = None prev_image_array = None # Let to access the command line arguments globally args = None # Integration buffer for the PI speed controller ibuf=0 # CNN input data shape. It is set during initialization input_size = [] # Create a single Frame object that we will use for image preprocessing frame = Frame() @sio.on('telemetry') def telemetry(sid, data): global ibuf if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image_src = Image.open(BytesIO(base64.b64decode(imgString)))
def off_the_shelf_yolo(tracking, debug=False, *args, **kwargs): video = Video("../datasets/AICity_data/train/S03/c010/frames") detection_transform = DetectionTransform() classes = utils.load_classes('../config/coco.names') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') model = Darknet('../config/yolov3.cfg') model.load_weights('../weights/fine_tuned_yolo_freeze.weights') if torch.cuda.is_available(): model = model.cuda() frames = [] last_im = None model.eval() with torch.no_grad(): for i, im in tqdm(enumerate(video.get_frames(start=len(video) // 4)), total=len(video), file=sys.stdout, desc='Yolo'): im_tensor = detection_transform(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() detections = model.forward(im_tensor) detections = utils.non_max_suppression(detections, 80, conf_thres=.6, nms_thres=0.3) frame = Frame(i + (len(video) // 4)) frame.ground_truth = gt[frame.id] for d in detections[0]: if int(d[6]) in VALID_LABELS: bbox = d.cpu().numpy() det = Detection(-1, classes[int(d[6])], (bbox[0], bbox[1]), width=bbox[2] - bbox[0], height=bbox[3] - bbox[1], confidence=d[5]) detection_transform.unshrink_detection(det) frame.detections.append(det) if tracking is not None: last_frame = None if len(frames) == 0 else frames[-1] tracking(frame=frame, im=im, last_frame=last_frame, last_im=last_im, frames=frames, debug=False) frames.append(frame) last_im = im if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) if tracking is None: text = '{}'.format(det.label) else: text = '{} ~ {}'.format(det.label, det.id) plt.text(det.top_left[0], det.top_left[1], s=text, color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_yolo_fine_tune_good/frame_{:04d}'.format(i)) plt.show() plt.close() # iou_over_time(frames) mAP = mean_average_precision(frames) print("YOLO mAP:", mAP)
def off_the_shelf_ssd(tracking, debug=False, **kwargs): if cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') video = Video("../datasets/AICity_data/train/S03/c010/frames") trans = transforms.Compose( [transforms.Resize((300, 300)), transforms.ToTensor()]) labels = ( # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') model = build_ssd('test', 300, 21) # initialize SSD model.load_weights('../weights/ssd300_mAP_77.43_v2.pth') if torch.cuda.is_available(): model = model.cuda() frames = [] model.eval() with torch.no_grad(): for i, im in enumerate(video.get_frames()): im_tensor = trans(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() output = model.forward(im_tensor) detections = output.data w = im.width h = im.height frame = Frame(i) frame.ground_truth = gt[frame.id] # skip j = 0, because it's the background class for j in (2, 6, 7, 14): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.size(0) == 0: continue boxes = dets[:, 1:] scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) for cls_det in cls_dets: x1 = int(w * cls_det[0]) y1 = int(h * cls_det[1]) det = Detection(-1, labels[j - 1], (x1, y1), width=w * (cls_det[2] - cls_det[0]), height=h * (cls_det[3] - cls_det[1]), confidence=cls_det[4]) frame.detections.append(det) # kalman(frame) if tracking is not None: tracking(frame, frames, debug=debug) frames.append(frame) if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) plt.text(det.top_left[0], det.top_left[1], s='{} ~ {}'.format(det.label, det.id), color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_ssd_KalmanID/frame_{:04d}'.format(i)) plt.show() plt.close() #iou_over_time(frames) mAP = mean_average_precision(frames) print("SSD mAP:", mAP)
def query(self, picture: Picture) -> (List[Picture], Frame): res = self.compare_histograms.query(picture) return res, Frame()
def get_frame_with_lines(im: np.ndarray) -> Frame: scale = min(MAX_SIDE / im.shape[0], MAX_SIDE / im.shape[1]) resized = cv2.resize(im, (0, 0), fx=scale, fy=scale) if SHOW_OUTPUT: plt.imshow(cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)) plt.show() gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) if SHOW_OUTPUT: plt.imshow(gray, 'gray') plt.show() gray = cv2.GaussianBlur(gray, ksize=(5, 5), sigmaX=0) # uses the above two partial derivatives sobelx = cv2.Sobel(gray, cv2.CV_16S, 1, 0, ksize=3) sobely = cv2.Sobel(gray, cv2.CV_16S, 0, 1, ksize=3) abs_gradientx = cv2.convertScaleAbs(sobelx) abs_gradienty = cv2.convertScaleAbs(sobely) # combine the two in equal proportions gray = cv2.addWeighted(abs_gradientx, 0.5, abs_gradienty, 0.5, 0) gray = cv2.GaussianBlur(gray, ksize=(5, 5), sigmaX=0) gray = cv2.Canny(gray, threshold1=0, threshold2=50, apertureSize=3) if SHOW_OUTPUT: plt.imshow(gray, 'gray') plt.show() lines = cv2.HoughLinesP(gray, rho=1, theta=np.pi / 180, threshold=80, minLineLength=100, maxLineGap=10) imres = None if SHOW_OUTPUT: im2 = resized.copy() for line in lines: cv2.line(im2, (line[0][0], line[0][1]), (line[0][2], line[0][3]), (0, 0, 255), 3) plt.imshow(cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)) plt.show() imres = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) intersections = get_intersections(lines) if SHOW_OUTPUT: im2 = resized.copy() for p in intersections: cv2.circle(im2, (int(p[0]), int(p[1])), 3, (0, 0, 255), -1) plt.imshow(cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)) plt.show() intersections = simplify_intersections(intersections) if SHOW_OUTPUT: im2 = resized.copy() for p in intersections: cv2.circle(im2, (int(p[0]), int(p[1])), 3, (0, 0, 255), -1) plt.imshow(cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)) plt.show() points = [(0., 0.), (0., 0.), (0., 0.), (0., 0.)] angle = 0 if len(intersections) > 4: points, angle = magic(intersections, im.shape[0] * scale, im.shape[1] * scale) if SHOW_OUTPUT: for p in points: cv2.circle(imres, (int(p[0]), int(p[1])), 3, (255, 0, 0), thickness=-1) if SHOW_OUTPUT: plt.imshow(imres) plt.show() # Undo scale points = (seq(points) .map(lambda point: (int(point[0] / scale), int(point[1] / scale))) .to_list()) return Frame(points, angle)