class MaskRCNN(object): def __init__(self, confidence_threshold=0.7): cfg.merge_from_file('e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml') cfg.MODEL.DEVICE cfg.freeze() self.model_wrapper = COCODemo( cfg, confidence_threshold=confidence_threshold, ) def get_chips_and_masks(self, img, label_index=COCO_PERSON_INDEX): ''' Params ------ img : nd array like, RGB label_index : int, index of label wanted Returns ------- list of tuple (chip, mask) - chip is a ndarray: bb crop of the image - mask is a ndarray: same shape as chip, whose 'pixel' value is either 0 or 1, indicating if that pixel belongs to that class or not. ''' preds = self.model_wrapper.compute_prediction(img) top_preds = self.model_wrapper.select_top_predictions(preds) labels = top_preds.get_field('labels') person_bool_mask = (labels==label_index).numpy().astype(bool) masks = top_preds.get_field('mask').numpy()[person_bool_mask] bboxes = top_preds.bbox.to(torch.int64).numpy()[person_bool_mask] results = [] for mask, box in zip( masks, bboxes ): thresh = mask[0, :, :, None] # l,t,r,b = box.to(torch.int64).numpy() l,t,r,b = box if b - t <= 0 or r - l <= 0: continue content = img[ t:(b+1), l:(r+1), : ] minimask = thresh[ t:(b+1), l:(r+1), : ] results.append( (content, minimask) ) return results
def detect_person(cfg, image): coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.7, ) predictions = coco_demo.compute_prediction(image) top_predictions = coco_demo.select_top_predictions(predictions) #result = coco_demo.overlay_class_names(result, top_predictions) labels = top_predictions.get_field("labels").tolist() labels = [coco_demo.CATEGORIES[i] for i in labels] if 'person' in labels: return 1 else: return 0
def single_predict(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' config_file = "configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml" # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options #cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.5, ) import cv2 from process_image import show_image, draw_bb im = cv2.imread('/home/jianfw/data/sample_images/TaylorSwift.jpg', cv2.IMREAD_COLOR) predictions = coco_demo.compute_prediction(im) predictions = coco_demo.select_top_predictions(predictions) scores = predictions.get_field("scores").tolist() labels = predictions.get_field("labels").tolist() labels = [coco_demo.CATEGORIES[i] for i in labels] boxes = predictions.bbox rects = [] import torch for box, score, label in zip(boxes, scores, labels): box = box.to(torch.int64) top_left, bottom_right = box[:2].tolist(), box[2:].tolist() r = [top_left[0], top_left[1], bottom_right[0], bottom_right[1]] rect = {'class': label, 'conf': score, 'rect': r} rects.append(rect) import numpy as np im_mask = np.copy(im) draw_bb(im_mask, [r['rect'] for r in rects], [r['class'] for r in rects], [r['conf'] for r in rects]) import json from process_image import show_images show_images([im, im_mask], 1, 2)
n2 = 2 * n // 3 #start, end = 0, n1 #start, end = n1, n2 #start, end = n2, n #start, end = [(0,n1), (n1,n2), (n2,n)][idx_no] start, end = 0, n print("working on %i - %i" % (start, end)) all_images = all_images[start:end] for image_name in tqdm(all_images): image_full_path = images_path + image_name cur_img = cv2.imread(image_full_path) H, W, C = cur_img.shape predictions = coco_demo.compute_prediction(cur_img) scores = predictions.get_field("scores").tolist() labels = predictions.get_field("labels").tolist() #labels = [self.CATEGORIES[i] for i in labels] boxes = predictions.bbox.tolist() cur_results = {'image_name': image_name, 'H': H, 'W': W, 'detections': []} for ii in range(len(boxes)): #box_coords = boxes[0,ii] # hard code batch dimension to 1 #score = scores[0,ii] #class_no = int(classes[0,ii]) left, top, right, bottom = boxes[ii] box_coords = [top, left, bottom, right] score = scores[ii] class_no = int(labels[ii])
Y1List = [] Y2List = [] Y3List = [] Y4List = [] TypeList = [] empty_img_name = [] # for img_name in img_names: for i, img_name in enumerate(tqdm(img_names)): path = os.path.join(imgs_dir, img_name) image = load(path) # compute predictions predictions = coco_demo.compute_prediction(image) try: scores = predictions.get_field("scores").numpy() bbox = predictions.bbox[np.argmax(scores)].numpy() labelList = predictions.get_field("labels").numpy() label = labelList[np.argmax(scores)] filenameList.append(img_name) X1List.append(round(bbox[0])) Y1List.append(round(bbox[1])) X2List.append(round(bbox[2])) Y2List.append(round(bbox[1])) X3List.append(round(bbox[2])) Y3List.append(round(bbox[3])) X4List.append(round(bbox[0])) Y4List.append(round(bbox[3])) TypeList.append(label)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.6, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=256, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument("--svo-filename", help="Optional SVO input filepath", default=None) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) init_cap_params = sl.InitParameters() if args.svo_filename: print("Loading SVO file " + args.svo_filename) init_cap_params.set_from_svo_file(args.svo_filename) init_cap_params.svo_real_time_mode = True init_cap_params.camera_resolution = sl.RESOLUTION.HD720 init_cap_params.depth_mode = sl.DEPTH_MODE.ULTRA init_cap_params.coordinate_units = sl.UNIT.METER init_cap_params.depth_stabilization = True init_cap_params.camera_image_flip = sl.FLIP_MODE.AUTO init_cap_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Y_UP cap = sl.Camera() if not cap.is_opened(): print("Opening ZED Camera...") status = cap.open(init_cap_params) if status != sl.ERROR_CODE.SUCCESS: print(repr(status)) exit() display = True runtime = sl.RuntimeParameters() left = sl.Mat() ptcloud = sl.Mat() depth_img = sl.Mat() depth = sl.Mat() res = sl.Resolution(1280, 720) py_transform = sl.Transform( ) # First create a Transform object for TrackingParameters object tracking_parameters = sl.PositionalTrackingParameters( init_pos=py_transform) tracking_parameters.set_as_static = True err = cap.enable_positional_tracking(tracking_parameters) if err != sl.ERROR_CODE.SUCCESS: exit(1) running = True keep_people_only = True if coco_demo.cfg.MODEL.MASK_ON: print("Mask enabled!") if coco_demo.cfg.MODEL.KEYPOINT_ON: print("Keypoints enabled!") while running: start_time = time.time() err_code = cap.grab(runtime) if err_code != sl.ERROR_CODE.SUCCESS: break cap.retrieve_image(left, sl.VIEW.LEFT, resolution=res) cap.retrieve_image(depth_img, sl.VIEW.DEPTH, resolution=res) cap.retrieve_measure(depth, sl.MEASURE.DEPTH, resolution=res) cap.retrieve_measure(ptcloud, sl.MEASURE.XYZ, resolution=res) ptcloud_np = np.array(ptcloud.get_data()) img = cv2.cvtColor(left.get_data(), cv2.COLOR_RGBA2RGB) prediction = coco_demo.select_top_predictions( coco_demo.compute_prediction(img)) # Keep people only if keep_people_only: labels_tmp = prediction.get_field("labels") people_coco_label = 1 keep = torch.nonzero(labels_tmp == people_coco_label).squeeze(1) prediction = prediction[keep] composite = img.copy() humans_3d = None masks_3d = None if coco_demo.show_mask_heatmaps: composite = coco_demo.create_mask_montage(composite, prediction) composite = coco_demo.overlay_boxes(composite, prediction) if coco_demo.cfg.MODEL.MASK_ON: masks_3d = get_masks3d(prediction, depth) composite = coco_demo.overlay_mask(composite, prediction) if coco_demo.cfg.MODEL.KEYPOINT_ON: # Extract 3D skeleton from the ZED depth humans_3d = get_humans3d(prediction, ptcloud_np) composite = coco_demo.overlay_keypoints(composite, prediction) if True: overlay_distances(prediction, get_boxes3d(prediction, ptcloud_np), composite, humans_3d, masks_3d) composite = coco_demo.overlay_class_names(composite, prediction) print(" Time: {:.2f} s".format(time.time() - start_time)) if display: cv2.imshow("COCO detections", composite) cv2.imshow("ZED Depth", depth_img.get_data()) key = cv2.waitKey(10) if key == 27: break # esc to quit
min_image_size=800, confidence_threshold=0.6) fourcc = cv2.VideoWriter_fourcc(*'XVID') cap = cv2.VideoCapture('tmp/S2_Cars_day_cut.mp4') out = cv2.VideoWriter('tmp/test_S2_Cars_day.avi', fourcc, 20.0, size) index = 0 while (cap.isOpened()): ret, frame_bgr = cap.read() frame_bgr = cv2.resize(frame_bgr, size) index += 1 if not ret: break with log.Tick(): predictions = coco_demo.compute_prediction(frame_bgr) top_predictions = coco_demo.select_top_predictions(predictions) result = frame_bgr.copy() result = coco_demo.overlay_mask(result, top_predictions) result = coco_demo.overlay_boxes(result, top_predictions) result = coco_demo.overlay_class_names(result, top_predictions) cv2.imshow('result', result) out.write(result) if 32 == cv2.waitKey(1): break out.release()
def main(): parser = argparse.ArgumentParser() # total_no_sets and current_set is used for splitting the data and running multiple processes manually on different gpus or machines # if total no sets is 1 and then current_set can only be 0 and will run normally(default mode) # if total no sets is 2 lets say, then you have to run this script with current_set=0 and current_set=1 which will split the total data into 2 points and # allows running separately parser.add_argument('-t', '--total_no_sets', type=int, required=False, default=1) parser.add_argument('-c', '--current_set', type=int, required=False, default=0) parser.add_argument('-g', '--gpu', type=str, required=False, default="0") # NO_GPUS = 4 # CUR_GPU = 0 # zero based #parser.add_argument('-g', '--gpu', type=str, required=True) args = parser.parse_args() gpu = args.gpu os.environ["CUDA_VISIBLE_DEVICES"] = gpu total_no_sets = args.total_no_sets current_set = args.current_set print('SET no %i (0 based) of %i SETS' % (current_set, total_no_sets)) with open(SEGMENT_ANN_FILE) as fp: annotations = json.load(fp) segment_keys = annotations.keys() segment_keys.sort() # -5KQ66BBWC4.0902 movie_timestamp_mapping = {} #for segment_key in current_segment_keys: for segment_key in segment_keys: movie_name, timestamp = segment_key.split('.') if movie_name in movie_timestamp_mapping.keys(): movie_timestamp_mapping[movie_name].append(segment_key) else: movie_timestamp_mapping[movie_name] = [segment_key] movies = movie_timestamp_mapping.keys() movies.sort() no_segments = len(movies) vid_per_set = no_segments / total_no_sets start_idx = current_set * vid_per_set start_idx = np.floor(start_idx).astype(int) end_idx = (current_set + 1) * vid_per_set end_idx = no_segments if ( current_set + 1 == total_no_sets) else np.ceil(end_idx).astype(int) print('Working on movies: [%i - %i)' % (start_idx, end_idx)) cur_movies = movies[start_idx:end_idx] #config_file = "../configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml" config_file = "e2e_faster_rcnn_X_101_32x8d_FPN_1x_ava.yaml" # update the config options with the config file cfg.merge_from_file(config_file) #### if you are changing the weights, change the name for the ouput folder!!!! #cfg.merge_from_list(["MODEL.WEIGHT", "e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth"]) ### original COCO weights from facebook cfg.merge_from_list(["MODEL.WEIGHT", "faster_rcnn_ava_model_0255000.pth" ]) ### finetuned on AVA actors coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=DETECTION_TH, ) for mm, movie in enumerate(tqdm(cur_movies)): print('\n\n Working on %s, %i/%i \n\n' % (movie, mm, len(cur_movies))) for segment_key in movie_timestamp_mapping[movie]: midframe = read_keyframe(segment_key) pred_boxes = coco_demo.compute_prediction(midframe) H, W, C = midframe.shape boxes = pred_boxes.bbox / torch.tensor([W, H, W, H], dtype=torch.float) box_list = boxes.tolist() scores = pred_boxes.get_field("scores") classes = pred_boxes.get_field("labels") num_boxes = len(box_list) # clean up organize segment_detections = [] for bb in range(num_boxes): left, top, right, bottom = [ get_3_decimal_float(coord) for coord in box_list[bb] ] # xyxy : left top right bottom cur_box = [top, left, bottom, right] cur_score = get_3_decimal_float(scores[bb]) cur_class_no = int(classes[bb]) cur_class_str = coco_demo.CATEGORIES[cur_class_no] cur_detection = { 'box': cur_box, 'score': cur_score, 'class_str': cur_class_str, 'class_no': cur_class_no } segment_detections.append(cur_detection) movie_name, timestamp = segment_key.split('.') #cur_detections = object_detections results_dict = { 'movie_name': movie_name, 'timestamp': timestamp, 'detections': segment_detections, 'height': H, 'width': W, } save_results_json(results_dict) #print('Timestamp done : %s' %timestamp) tqdm.write('Timestamp done : %s' % timestamp) #print('\n\nMovie done %s\n\n' % movie) tqdm.write('\n\nMovie done %s\n\n' % movie)
def main(): parser = argparse.ArgumentParser() # total_no_sets and current_set is used for splitting the data and running multiple processes manually on different gpus or machines # if total no sets is 1 and then current_set can only be 0 and will run normally(default mode) # if total no sets is 2 lets say, then you have to run this script with current_set=0 and current_set=1 which will split the total data into 2 points and # allows running separately parser.add_argument('-t', '--total_no_sets', type=int, required=False, default=1) parser.add_argument('-c', '--current_set', type=int, required=False, default=0) parser.add_argument('-g', '--gpu', type=str, required=False, default='0') # NO_GPUS = 4 # CUR_GPU = 0 # zero based #parser.add_argument('-g', '--gpu', type=str, required=True) args = parser.parse_args() gpu = args.gpu os.environ["CUDA_VISIBLE_DEVICES"] = gpu total_no_sets = args.total_no_sets current_set = args.current_set print('SET no %i (0 based) of %i SETS' % (current_set, total_no_sets)) all_files = os.listdir(IMAGES_FOLDER) all_files.sort() no_segments = len(all_files) vid_per_set = no_segments / total_no_sets start_idx = current_set * vid_per_set start_idx = np.floor(start_idx).astype(int) end_idx = (current_set + 1) * vid_per_set end_idx = no_segments if ( current_set + 1 == total_no_sets) else np.ceil(end_idx).astype(int) print('Working on images: [%i - %i)' % (start_idx, end_idx)) cur_files = all_files[start_idx:end_idx] config_file = "../configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml" # update the config options with the config file cfg.merge_from_file(config_file) cfg.merge_from_list( ["MODEL.WEIGHT", "e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth"]) #cfg.merge_from_list(["MODEL.WEIGHT", "faster_rcnn_ava_model_0255000.pth"]) coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=DETECTION_TH, ) for mm, img_file in enumerate(tqdm(cur_files)): print('\n\n Working on %s, %i/%i \n\n' % (img_file, mm, len(cur_files))) img_path = os.path.join(IMAGES_FOLDER, img_file) img = cv2.imread(img_path) pred_boxes = coco_demo.compute_prediction(img) H, W, C = img.shape boxes = pred_boxes.bbox / torch.tensor([W, H, W, H], dtype=torch.float) box_list = boxes.tolist() scores = pred_boxes.get_field("scores") classes = pred_boxes.get_field("labels") num_boxes = len(box_list) # clean up organize segment_detections = [] for bb in range(num_boxes): left, top, right, bottom = [ get_3_decimal_float(coord) for coord in box_list[bb] ] # xyxy : left top right bottom cur_box = [top, left, bottom, right] cur_score = get_3_decimal_float(scores[bb]) cur_class_no = int(classes[bb]) cur_class_str = coco_demo.CATEGORIES[cur_class_no] cur_detection = { 'box': cur_box, 'score': cur_score, 'class_str': cur_class_str, 'class_no': cur_class_no } segment_detections.append(cur_detection) #cur_detections = object_detections results_dict = { 'segment_name': img_file, 'detections': segment_detections, 'height': H, 'width': W, } save_results_json(results_dict) #print('Timestamp done : %s' %timestamp) #print('\n\nMovie done %s\n\n' % movie) tqdm.write('\n\nImage done %s\n\n' % img_file)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--predictions-out", default="./test.json", metavar="FILE", help="path to file to output labels", ) parser.add_argument( "--test-image-dir", default="/n/pana/scratch/ravi/bdd/bdd100k/images/100k/val/", metavar="FILE", help="path to test image directory", ) parser.add_argument( "--confidence-threshold", type=float, default=0.5, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=800, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) image_paths = glob.glob(os.path.join(args.test_image_dir, '*.jpg')) pred_list = [] coco_cat_to_bdd_cat = { "person": "person", "car": "car", "traffic light": "traffic light", "stop sign": "traffic sign", "bus": "bus", "truck": "truck", "bicycle": "bike", "motorcycle": "motor", "train": "train" } for i in tqdm(image_paths): img = cv2.imread(i) image_id = i.split('/')[-1].split('.')[0] start = time.time() predictions = coco_demo.compute_prediction(img) end = time.time() scores = predictions.get_field('scores') #high_conf_idx = scores > args.confidence_threshold #predictions = predictions[high_conf_idx] #scores = predictions.get_field('scores') boxes = predictions.bbox labels = predictions.get_field('labels') labels = [coco_demo.CATEGORIES[l] for l in labels] for b in range(len(labels)): if labels[b] in coco_cat_to_bdd_cat: label = coco_cat_to_bdd_cat[labels[b]] obj_dict = { 'name': image_id, 'bbox': [ float(boxes[b][0]), float(boxes[b][1]), float(boxes[b][2]), float(boxes[b][3]) ], 'category': label, 'score': float(scores[b]) } pred_list.append(obj_dict) with open(args.predictions_out, 'w') as fp: json.dump(pred_list, fp)