def train_task(model_name, model_file): path = os.path.join(SAVE_PATH, 'train_task', model_name) if not os.path.exists(path): os.makedirs(path) # Load Data print('Loading Data.') dataloader = KITTI_Dataloader() def kitti_train(): return dataloader.get_dicts(train_flag=True) def kitti_test(): return dataloader.get_dicts(train_flag=False) DatasetCatalog.register("KITTI_train", kitti_train) MetadataCatalog.get("KITTI_train").set(thing_classes=[k for k,_ in CATEGORIES.items()]) DatasetCatalog.register("KITTI_test", kitti_test) MetadataCatalog.get("KITTI_test").set(thing_classes=[k for k,_ in CATEGORIES.items()]) # Load MODEL and configure train hyperparameters print('Loading Model.') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTI_train',) cfg.DATASETS.TEST = ('KITTI_test',) cfg.DATALOADER.NUM_WORKERS = 0 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = NUM_IMGS // cfg.SOLVER.IMS_PER_BATCH + 1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 9 # TRAIN!! print('Training.......') os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train() print('Training Done.') # EVAL print('Evaluating......') cfg.TEST.KEYPOINT_OKS_SIGMAS cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth') cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 predictor = DefaultPredictor(cfg) dataset_dicts = kitti_test() for i,d in enumerate(random.sample(dataset_dicts, 5)): im = cv2.imread(d['file_name']) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite(os.path.join(path, 'Evaluation_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1]) print('COCO EVALUATOR....') evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir="./output/") trainer.test(cfg, trainer.model, evaluators=[evaluator]) # Loading training and test examples inference_dataloader = Inference_Dataloader(MIT_DATA_DIR) inference_dataset = inference_dataloader.load_data() # Qualitative results: visualize some prediction results on MIT_split dataset for i, img_path in enumerate([i for i in inference_dataset['test'] if 'inside_city' in i][:20]): img = cv2.imread(img_path) outputs = predictor(img) v = Visualizer( img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite(os.path.join(path, 'Inference_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1]) """ val_loader = build_detection_test_loader(cfg, 'KITTI_test') inference_on_dataset(trainer.model, val_loader, evaluator) """ print('DONE!!')
if __name__ == "__main__": args = parser.parse_args() output_dir = args.output_dir d = args.path_to_pkl dataset_name = "mtsd" DatasetCatalog.register(dataset_name, lambda d=d: load_obj(d)) MetadataCatalog.get(dataset_name).set(thing_classes=CATEGORIES) Meta_data = MetadataCatalog.get(dataset_name) #print(Meta_data) dataset_dicts = load_obj(d) cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9 # set threshold for this model cfg.MODEL.WEIGHTS = args.path_to_model cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(CATEGORIES) predictor = DefaultPredictor(cfg) cfg.OUTPUT_DIR = output_dir #path_to_argo_dir = os.path.join('argoverse-tacking', 'train1') path_to_argo_dir = 'ring_front_center' validate_on_argoverse_single_seq(cfg, Meta_data, path_to_argo_dir) #validate_on_argoverse_multiple_seqs(cfg, Meta_data, path_to_argo_dir)
# Visualizing datasets # train_dicts = get_train_dicts() # for d in random.sample(train_dicts, 30): # print(d) # img = cv2.imread(d["file_name"]) # visualizer = Visualizer(img[:,:,::-1], metadata=openimages_train_metadata, scale=0.5) # vis = visualizer.draw_dataset_dict(d) # cv2.imshow("image", vis.get_image()[:,:,::-1]) # cv2.waitKey() cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')) cfg.DATASETS.TRAIN = ("openimages_train",) cfg.DATASETS.TEST = () <<<<<<< HEAD cfg.MODEL.WEIGHTS = 'output/model_0054999_wo_solver_states.pth' cfg.DATALOADER.NUM_WORKERS = 2 ======= cfg.MODEL.WEIGHTS = 'projects/CenterMask2/configs/model_0199999.pth' cfg.DATALOADER.NUM_WORKERS = 0 >>>>>>> refs/remotes/origin/master cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.0007 cfg.SOLVER.GAMMA = 0.2 cfg.SOLVER.STEPS = (40000,) cfg.SOLVER.MAX_ITER = 100000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8
if len(sys.argv) < 2: print("usage:python detectron.py [input_Image]") sys.exit(0) im = cv2.imread(sys.argv[1]) if im is None: print("file open fail") sys.exit(0) cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library # start path in configs [dir] fileName = "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" model = model_zoo.get_config_file(fileName) cfg.merge_from_file(model) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.merge_from_list(['MODEL.DEVICE', 'cpu']) # Find a model from detectron2's model zoo. You can either use the https://dl.fbaipublicfiles.... url, or use the detectron2:// shorthand cfg.MODEL.WEIGHTS = "detectron2://" + model_zoo.get_weight_suffix(fileName) predictor = DefaultPredictor(cfg) outputs = predictor(im) # We can use `Visualizer` to draw the predictions on the image. v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0) v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
# %% # Create our Detectron2 model # ---------------------------- # # Next, we create a detectron2 config and a detectron2 `DefaultPredictor` to # run predictions on the new images. # # - We use a pre-trained Faster R-CNN with a ResNet-50 backbone # - We use an MS COCO pre-trained model from detectron2 cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library ###cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well ###cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg) # %% # We use this little helper method to overlay the model predictions on a # given image. def predict_and_overlay(model, filename): # helper method to run the model on an image and overlay the predictions im = cv2.imread(filename) out = model(im)
if channels == 3: im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) else: im = cv2.cvtColor(im, cv2.COLOR_RGBA2BGR) else: im = cv2.imread(opt.file, cv2.IMREAD_COLOR) height, width, channels = im.shape print('image W:%d H:%d' % (width, height)) network_model = 'COCO-InstanceSegmentation/' + opt.model + '.yaml' cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file(model_zoo.get_config_file(network_model)) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(network_model) predictor = DefaultPredictor(cfg) fps_time = time.perf_counter() outputs = predictor(im) fps = 1.0 / (time.perf_counter() - fps_time) print('===== pred_boxes =====') print(outputs["instances"].pred_boxes) print('===== scores =====') print(outputs["instances"].scores) print('===== pred_classes =====')
from detectron2.utils.visualizer import Visualizer from detectron2.checkpoint import DetectionCheckpointer import cv2 import random import torch from kitti_mots_dataset import get_kiti_mots_dicts, register_kitti_mots_dataset # Task predict from pretrained model (uses COCO classes) if __name__ == '__main__': # cfg_file = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml" cfg_file = "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml" cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(cfg_file)) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(cfg_file) model = build_model(cfg) DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) register_kitti_mots_dataset("datasets/KITTI-MOTS/training/image_02", "datasets/KITTI-MOTS/instances_txt", ("kitti_mots_train", "kitti_mots_test"), image_extension="png") cfg.DATASETS.TRAIN = ("kitti_mots_train", ) cfg.DATASETS.TEST = ("kitti_mots_test", ) evaluator = COCOEvaluator("kitti_mots_test", cfg,
lambda: get_pedestrain_dict(train_people_imagelist, train_people_jsonlist)) MetadataCatalog.get("pedestrain_train").set(thing_classes=["person"]) DatasetCatalog.register( "pedestrain_test", lambda: get_pedestrain_dict(test_people_imagelist, test_people_jsonlist)) MetadataCatalog.get("pedestrain_test").set(thing_classes=["person"]) pedestrain_metadata = MetadataCatalog.get("pedestrain_train") # Traning from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("Base-RCNN-FPN.yaml")) cfg.DATASETS.TRAIN = ("pedestrain_train", ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 3 #cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl" # Let training initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR cfg.SOLVER.MAX_ITER = num_of_iter # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # faster, and good enough for this toy dataset (default: 512) cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.OUTPUT_DIR = out_path os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=True) trainer.train()
models = ["Cityscapes/mask_rcnn_R_50_FPN.yaml"] for to_evaluate in models: print('* ' * 30, to_evaluate, '* ' * 30) # Train lr = 0.0025 EXPERIMENT_NAME = f"{to_evaluate[:-5]}_trained" OUTPUT_DIR = f"/home/group00/working/week4/model_evaluation/{EXPERIMENT_NAME}" print('Loading pre-trained models...') cfg = get_cfg() #Select model # cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/{to_evaluate}")) cfg.merge_from_file(model_zoo.get_config_file(f"{to_evaluate}")) cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5 # set threshold for this model # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-InstanceSegmentation/{to_evaluate}") cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"{to_evaluate}") #configure parameters cfg.INPUT.MASK_FORMAT = 'bitmask' cfg.OUTPUT_DIR = OUTPUT_DIR os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) cfg.DATASETS.TRAIN = ("train_kitti-mots", ) cfg.DATASETS.TEST = ("val_kitti-mots", ) cfg.DATALOADER.NUM_WORKERS = 1 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = lr cfg.SOLVER.MAX_ITER = 1500
def task_b_MOTS_and_KITTI_training(model_name, model_file): # model_name = model_name + '_inference' print('Running task B for model', model_name) SAVE_PATH = os.path.join('./results_week_5_task_c', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('MOTS_KITTI_train', ) cfg.DATASETS.TEST = ('KITTIMOTS_val', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR" #hyperparameters #cfg.SOLVER.LR_POLICY = 'steps_with_decay' #cfg.SOLVER.STEPS = [0, 1000, 2000] #cfg.SOLVER.GAMMA = 0.1 #cfg.DATASETS.TRAIN.USE_FLIPPED = True #Eeste no va #cfg.MODEL.RPN.IOU_THRESHOLDS = [0.1, 0.9] #defatults 0.3 and 0.7 #cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]#default: [[32, 64, 128, 256, 512]] #cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] #End of hyperparameters playing cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 print(cfg) # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('KITTIMOTS_val', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = kitti_val() # inputs = inputs[:20] + inputs[-20:] inputs = inputs[220:233] + inputs[1995:2100] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def task_a_KITTI_training(model_name, model_file): #model_name = model_name + '_inference' print('Running task A for model', model_name) SAVE_PATH = os.path.join('./results_week_5_task_a', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTIMOTS_train', ) cfg.DATASETS.TEST = ('MOTS_train', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('MOTS_train', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = mots_train() inputs = inputs[:20] + inputs[-20:] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
import os, json, cv2, random #from google.colab.patches import cv2_imshow from cv2 import imshow cv2_imshow = imshow # import some common detectron2 utilities from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog, DatasetCatalog cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library #cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_C4_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well #cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml") predictor = DefaultPredictor(cfg) #v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) #out = v.draw_instance_predictions(outputs["instances"].to("cpu")) #im2 = out.get_image()[:, :, ::-1] #b,g,r = cv2.split(im2) #image_rgb2 = cv2.merge([r,g,b]) #plt.figure() #plt.imshow(image_rgb2)
json_file=json_path, image_root=img_path, ) MetadataCatalog.get("marker").thing_classes = ['bolt-roi'] box_metadata = MetadataCatalog.get("marker") model_config_path = "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml" #model_config_path = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ############################################### cfg = get_cfg() cfg.merge_from_file(get_config_file(model_config_path)) cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.WEIGHTS = get_checkpoint_url(model_config_path) # Let training initialize from model zoo cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # one class (markedbolt=1) cfg.INPUT.MIN_SIZE_TEST = 1024 cfg.INPUT.MAX_SIZE_TEST = 1024 #cfg.INPUT.MIN_SIZE_TEST = 0 #Size of the smallest side of the image during testing. Set to zero to disable resize in testing #cfg.INPUT.MAX_SIZE_TEST = 1333 # Maximum size of the side of the image during testing by deafult 1333 # Size of the smallest side of the image during testing. Set to zero to disable resize in testing. #cfg.INPUT.MIN_SIZE_TEST = 800 # Maximum size of the side of the image during testing
def test(path_to_input, path_to_output, network): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/"+network+".yaml")) cfg.OUTPUT_DIR = "./code_workspace/output" cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "model_0009999.pth" ) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4 cfg.MODEL.MASK_ON = False cfg.TEST.EVAL_PERIOD = 5000 #cfg.INPUT.MIN_SIZE_TEST = 0 # cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2, 3]] # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32], [48, 64], [96, 128], [192, 256], [512, 640]] predictor = DefaultPredictor(cfg) video_cap = cv2.VideoCapture(path_to_input) video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) ret, i_frame = video_cap.read() video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_out = cv2.VideoWriter( path_to_output, fourcc, 30.0, (int(i_frame.shape[1]), int(i_frame.shape[0])), True ) total_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT) i=0 #timing t0 = time.time() #try to not do saves in between iterations, instead save all in list, then do all saving at the end out_list = [] frames = [] predict_time = 0 t2 = time.time() while True: t3 = time.time() ret, frame = video_cap.read() if (not ret): break #Produce some nice console output to show progess progress = "\r %progress: " + str(int((i/total_frames)*100)) + " " + "fps: " + str(int(i/(t3-t0))) i+=1 sys.stdout.write(progress) sys.stdout.flush() t4 = time.time() outputs = predictor(frame) t5 = time.time() predict_time += t5-t4 out_list.append(outputs["instances"].to("cpu")) frames.append(frame) t22 = time.time() inference_time = t22-t2 print() print("Inference complete, creating video") t10 = time.time() for output, frame in zip(out_list, frames): v = Visualizer( frame, MetadataCatalog.get("traffic"), scale=1, instance_mode=ColorMode.SEGMENTATION) #output.remove("scores") v = v.draw_instance_predictions(output) video_out.write(v.get_image()) t11 = time.time() print("Time to create video: ", t11-t10) #timing t1 = time.time() print("average fps: ", total_frames/inference_time) print("total time: ", t1-t0) print("%total predict: ", predict_time/(t1-t0)) print("Video produced on path: ", path_to_output) video_out.release() video_cap.release()
def _config_training(args: argparse.Namespace) -> CfgNode: r"""Create a configuration node from the script arguments. In this application we consider object detection use case only. We finetune object detection networks trained on COCO dataset to a custom use case Parameters ---------- args : argparse.Namespace training script arguments, see :py:meth:`_parse_args()` Returns ------- CfgNode configuration that is used by Detectron2 to train a model Raises: RuntimeError: if the combination of `model_type`, `backbone`, `lr_schedule` is not valid. Please refer to Detectron2 model zoo for valid options. """ cfg = get_cfg() pretrained_model = ( f"COCO-Detection/{args.model_type}_{args.backbone}_{args.lr_schedule}x.yaml" ) LOGGER.info(f"Loooking for the pretrained model {pretrained_model}...") try: cfg.merge_from_file(model_zoo.get_config_file(pretrained_model)) except RuntimeError as err: LOGGER.error(f"{err}: check model backbone and lr schedule combination") raise cfg.DATASETS.TRAIN = (f"{args.dataset_name}_training",) cfg.DATASETS.TEST = (f"{args.dataset_name}_validation",) cfg.DATALOADER.NUM_WORKERS = args.num_workers # Let training initialize from model zoo cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(pretrained_model) LOGGER.info(f"{pretrained_model} correctly loaded") cfg.SOLVER.CHECKPOINT_PERIOD = 20000 cfg.SOLVER.BASE_LR = args.lr cfg.SOLVER.MAX_ITER = args.num_iter cfg.SOLVER.IMS_PER_BATCH = args.batch_size cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = args.num_rpn if args.model_type == "faster_rcnn": cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(args.classes) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.pred_thr cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = args.nms_thr cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE = args.reg_loss_type cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = args.bbox_reg_loss_weight cfg.MODEL.RPN.POSITIVE_FRACTION = args.bbox_rpn_pos_fraction cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = args.bbox_head_pos_fraction elif args.model_type == "retinanet": cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.pred_thr cfg.MODEL.RETINANET.NMS_THRESH_TEST = args.nms_thr cfg.MODEL.RETINANET.NUM_CLASSES = len(args.classes) cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = args.reg_loss_type cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = args.focal_loss_gamma cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = args.focal_loss_alpha else: assert False, f"Add implementation for model {args.model_type}" cfg.MODEL.DEVICE = "cuda" if args.num_gpus else "cpu" cfg.TEST.DETECTIONS_PER_IMAGE = args.det_per_img cfg.OUTPUT_DIR = args.model_dir os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) return cfg
train_annos = join(data_dir, 'grini_nc_merged_no_masks_train.json') val_annos = join(data_dir, 'grini_nc_merged_no_masks_val.json') test_annos = join(data_dir, 'grini_nc_merged_no_masks_test.json') # Register dataset configs register_coco_instances('grini_nc_merged_bbox_only_train', {}, train_annos, train_imgs) register_coco_instances('grini_nc_merged_bbox_only_val', {}, val_annos, val_imgs) register_coco_instances('grini_nc_merged_bbox_only_test', {}, test_annos, test_imgs) cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml')) register_datasets() cfg.DATASETS.TRAIN = ('grini_nc_merged_bbox_only_train', ) cfg.DATASETS.TEST = ('grini_nc_merged_bbox_only_val', ) cfg.MODEL.WEIGHTS = get_checkpoint_url( 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml') cfg.MODEL.DEVICE = "cpu" # cpu or cuda cfg.MASK_ON = False # todo find out how rescale images and annotations first... # Parameters fixed cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.001 cfg.SOLVER.WARMUP_ITERS = 1000
def main(): """ Mask RCNN Object Detection with Detectron2 """ rospy.init_node("mask_rcnn", anonymous=True) bridge = CvBridge() start_time = time.time() image_counter = 0 register_coco_instances( "train_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train") register_coco_instances( "test_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test") train_metadata = MetadataCatalog.get("train_set") print(train_metadata) dataset_dicts_train = DatasetCatalog.get("train_set") test_metadata = MetadataCatalog.get("test_set") print(test_metadata) dataset_dicts_test = DatasetCatalog.get("test_set") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train_set") cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) # initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.01 cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough, but you can certainly train longer cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( 128) # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper) # Temporary Solution. If I train again I think I can use the dynamically set path again cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth" ) # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4 # set the testing threshold for this model cfg.DATASETS.TEST = ("test_set") predictor = DefaultPredictor(cfg) class_names = MetadataCatalog.get("train_set").thing_classes # Set up custom cv2 visualization parameters # Classes: [name, id] # - # [Plate, 0] # [Carrot, 1] # [Celery, 2] # [Pretzel, 3] # [Gripper, 4] # Colors = [blue, green, red] color_plate = [0, 255, 0] # green color_carrot = [255, 200, 0] # blue color_celery = [0, 0, 255] # red color_pretzel = [0, 220, 255] # yellow color_gripper = [204, 0, 150] # purple colors = list([ color_plate, color_carrot, color_celery, color_pretzel, color_gripper ]) alpha = .4 run = maskRCNN() while not rospy.is_shutdown(): # Get images img = run.get_img() if img is not None: outputs = predictor(img) predictions = outputs["instances"].to("cpu") # Get results unsorted = run.getResult(predictions, class_names) # Sort detections by x and y sorted = run.sort_detections(unsorted) result = Result() for i in range(len(sorted)): result.class_ids.append(sorted[i][0]) result.class_names.append(sorted[i][1]) result.scores.append(sorted[i][2]) result.boxes.append(sorted[i][3]) result.masks.append(sorted[i][4]) # Visualize using detectron2 built in visualizer # v = Visualizer(im[:, :, ::-1], # metadata=train_metadata, # scale=1.0 # # instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels # ) # v = v.draw_instance_predictions(outputs["instances"].to("cpu")) # im = v.get_image()[:, :, ::-1] # im_msg = bridge.cv2_to_imgmsg(im, encoding="bgr8") # Visualize using custom cv2 code if result is not None: result_cls = result.class_names result_clsId = result.class_ids result_scores = result.scores result_masks = result.masks # Create copies of the original image im = img.copy() output = img.copy() # Initialize lists masks = [] masks_indices = [] for i in range(len(result_clsId)): # Obtain current object mask as a numpy array (black and white mask of single object) current_mask = bridge.imgmsg_to_cv2(result_masks[i]) # Find current mask indices mask_indices = np.where(current_mask == 255) # Add to mask indices list if len(masks_indices) > len(result_clsId): masks_indices = [] else: masks_indices.append(mask_indices) # Add to mask list if len(masks) > len(result_clsId): masks = [] else: masks.append(current_mask) if len(masks) > 0: # Create composite mask composite_mask = sum(masks) # Clip composite mask between 0 and 255 composite_mask = composite_mask.clip(0, 255) for i in range(len(result_clsId)): # Select correct object color color = colors[result_clsId[i]] # Change the color of the current mask object im[masks_indices[i][0], masks_indices[i][1], :] = color # Apply alpha scaling to image to adjust opacity cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output) for i in range(len(result_clsId)): # Draw Bounding boxes start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset) end_point = (result.boxes[i].x_offset + result.boxes[i].width, result.boxes[i].y_offset + result.boxes[i].height) start_point2 = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 2) end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2, result.boxes[i].y_offset + 12) color = colors[result_clsId[i]] box_thickness = 1 name = result_cls[i] score = result_scores[i] conf = round(score.item() * 100, 1) string = str(name) + ":" + str(conf) + "%" font = cv2.FONT_HERSHEY_SIMPLEX org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10) fontScale = .3 text_thickness = 1 output = cv2.rectangle(output, start_point, end_point, color, box_thickness) output = cv2.rectangle(output, start_point2, end_point2, color, -1) # Text box output = cv2.putText(output, string, org, font, fontScale, [0, 0, 0], text_thickness, cv2.LINE_AA, False) im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8") ##### The entire goal of the below code is to get N random points on the mask in 3D ##### and publish on cloud samples topic for GPD item_ids = result_clsId idx = [i for i, e in enumerate(item_ids) if e > 0 and e < 4] numFoodItems = len(idx) mask = bridge.imgmsg_to_cv2(result_masks[idx[0]]) coord = cv2.findNonZero( mask) # Coordinates of the mask that are on the food item # Pick 3 random points on the object mask sample_list = list() for ii in range(3): point = Point() x = random.choice( coord[:, 0, 1]) # x and y reversed for some reason y = random.choice( coord[:, 0, 0]) # x and y reversed for some reason depth = (run.depth_array[y, x]) / 1000 # Deproject pixels and depth to 3D coordinates (camera frame) X, Y, Z = run.convert_depth_to_phys_coord_using_realsense( y, x, depth, run.cam_info) # print("(x,y,z) to convert: ("+str(y)+", "+str(x)+", "+str(depth)+")") # print("(X,Y,Z) converted: ("+str(X)+", "+str(Y)+", "+str(Z)+")") point.x = X point.y = Y point.z = Z sample_list.append(point) # print(sample_list) cam_source = Int64() cam_source.data = 0 cloud_source = CloudSources() cloud_source.cloud = run.pointCloud cloud_source.camera_source = [cam_source] view_point = Point() view_point.x = 0.640 view_point.y = 0.828 view_point.z = 0.505 # view_point.x = 0; view_point.y = 0; view_point.z = 0 cloud_source.view_points = [view_point] cloud_samples = CloudSamples() cloud_samples.cloud_sources = cloud_source cloud_samples.samples = sample_list # Print publish info # print(type(cloud_source.cloud)) # print(cloud_source.camera_source) # print(cloud_source.view_points) # print("") # print(type(cloud_samples.cloud_sources)) # print(cloud_samples.samples) # print("-------------------------\n") # Display Image Counter # image_counter = image_counter + 1 # if (image_counter % 11) == 10: # rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time)) run.publish(im_msg, result, cloud_samples) return 0
def task12_B(): lr = 0.0025 batch_size = 256 n_iter = 300 EXPERIMENT_NAME = 'K1_' + str(lr) + 'lr_' + str( batch_size) + 'bsize_' + str(n_iter) + 'iter' def get_aicity_dataset(frame_idx_list): path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml' video_path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/vdo.avi' reader = ReadData(path) gt, num_iter = reader.getGTfromXML() sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt, 0, 2141) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) dataset_dicts = [] directory = '/home/group09/code/week6/datasets/AICity_data/AICity_frames' for frame_idx in tqdm(frame_idx_list): filename = str(frame_idx).zfill(4) + '.png' record = {} im_path = os.path.join(directory, filename) im = cv2.imread(im_path) print(filename) height, width = im.shape[:2] record["file_name"] = im_path record["image_id"] = str(frame_idx).zfill(4) record["height"] = height record["width"] = width classes = ['Car'] objs = [] for [ x1, y1, x2, y2 ] in gtInfo[frame_idx]['bbox']: # for every bbox in a frame's gt class_id = 0 obj = { "type": 'Car', "bbox": [x1, y1, x2, y2], "bbox_mode": BoxMode.XYXY_ABS, "category_id": 0 } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts # K-FOLD SPLITS k_train = 0 # take 0th k-fold for train k_step = int(np.floor(2141 * 0.25)) frame_idx = [i for i in range(2141)] ini_frame_train = k_step * k_train # train dataset train_frame_idx = frame_idx[ini_frame_train:(ini_frame_train + k_step)] print("==== TRAIN SPLIT") print("") for d in ['train']: DatasetCatalog.register( 'train_retina', lambda d=d: get_aicity_dataset(train_frame_idx)) MetadataCatalog.get('train_retina').set(thing_classes=['Car']) # val dataset val_frame_idx = [x for x in frame_idx if x not in train_frame_idx] print("==== VALIDATION SPLIT") print("") for d in ['val']: DatasetCatalog.register('val_retina', lambda d=d: get_aicity_dataset(val_frame_idx)) MetadataCatalog.get('val_retina').set(thing_classes=['Car']) train_metadata = MetadataCatalog.get("train_retina") dataset_dicts = get_aicity_dataset(train_frame_idx) OUTPUT_DIR = '/home/group09/code/week6/models_retina/' + EXPERIMENT_NAME if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) cfg = get_cfg() cfg.OUTPUT_DIR = OUTPUT_DIR cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train_retina", ) cfg.DATASETS.TEST = ("val_retina", ) # cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Detection/retinanet_R_50_FPN_3x.yaml" ) # Let training initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = lr # pick a good LR cfg.SOLVER.MAX_ITER = n_iter # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset cfg.SOLVER.STEPS = [] # do not decay learning rate cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size # faster, and good enough for this toy dataset (default: 512) cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (car) cfg.TEST.EVAL_PERIOD = 100 class MyTrainer(DefaultTrainer): @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): if output_folder is None: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") return COCOEvaluator(dataset_name, cfg, True, output_folder) def build_hooks(self): hooks = super().build_hooks() hooks.insert( -1, LossEvalHook( cfg.TEST.EVAL_PERIOD, self.model, build_detection_test_loader(self.cfg, self.cfg.DATASETS.TEST[0], DatasetMapper(self.cfg, True)))) return hooks os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = MyTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()
sys.exit(0) im = cv2.imread(sys.argv[1]) if im is None: print("file open fail") sys.exit(0) #cv2.imshow('image',im) #cv2.waitKey(0) #cv2.destroyAllWindows() cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file( model_zoo.get_config_file( "../detectron2/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" )) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.merge_from_list(['MODEL.DEVICE', 'cpu']) # Find a model from detectron2's model zoo. You can either use the https://dl.fbaipublicfiles.... url, or use the detectron2:// shorthand cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" predictor = DefaultPredictor(cfg) outputs = predictor(im) size = outputs["instances"].scores.shape[0] poslist = [] boxes = outputs["instances"].pred_boxes.tensor pred = outputs['instances'].pred_classes masks = outputs["instances"].pred_masks for i in range(size):
mask_encode = dets_frame_dict['pred_masks'][objid] det_str = "%d %s %.4f %.2f %.2f %.2f %.2f %d %d %s\n" % \ (frameid, CITYSCAPES_THINGS[dets_frame_dict['pred_classes'][objid]], dets_frame_dict['scores'][objid], dets_frame_dict['pred_boxes'][objid][0], dets_frame_dict['pred_boxes'][objid][1], dets_frame_dict['pred_boxes'][objid][2], dets_frame_dict['pred_boxes'][objid][3], mask_encode['size'][0], mask_encode['size'][1], mask_encode['counts'].decode('UTF-8')) f.write(det_str) if __name__ == '__main__': cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file( model_zoo.get_config_file("Cityscapes/mask_rcnn_R_50_FPN.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("Cityscapes/mask_rcnn_R_50_FPN.yaml") cfg.MODEL.WEIGHTS = 'model_final_af9cf5.pkl' predictor = DefaultPredictor(cfg) # seq_names = sorted(os.listdir(cruw_data_root)) seq_names = ['12', '13'] for seq in seq_names: output_dict = { 'IMAGES_0': [], 'IMAGES_1': [], } seq_path = os.path.join(cruw_data_root, seq)
help="Root directory to store the outputs.") parser.add_argument("--dataset_root", type=str, required=True, help="Root directory of the dataset") args = parser.parse_args() # ------------------------ MODEL SELECTION AND CONFIGURATION ---------------------------------- # MODEL_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml" MODEL_PATH = args.model MODEL = MODEL_PATH.split('/')[1].split('.')[0] cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH)) # Does not affect output bbox coordinates cfg.INPUT.MAX_SIZE_TRAIN = 1333 # Setting to lower than 0.7 because using very low objectness cfg.MODEL.RPN.NMS_THRESH = 0.5 # Objectness threshold cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # set threshold for this model cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 # Setting to lower than 0.7 because using very low objectness # Setting to 500 for safety cfg.TEST.DETECTIONS_PER_IMAGE = 150 # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
def demo(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2= "caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) if cfg.DETECTION.ENABLE: # Load object detector from detectron2 dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5 dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS object_predictor = DefaultPredictor(dtron2_cfg) # Load the labels of AVA dataset with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split('\n')[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() boxes = [] else: # Load the labels of Kinectics-400 dataset labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df['name'].values frame_provider = VideoReader(cfg) seq_len = cfg.DATA.NUM_FRAMES*cfg.DATA.SAMPLING_RATE frames = [] pred_labels = [] s = 0. videoOut = cv2.VideoWriter(cfg.DEMO.OUTPUT_FILE, cv2.VideoWriter_fourcc(*'MP4V'), 30.0, (cfg.DEMO.DISPLAY_WIDTH,cfg.DEMO.DISPLAY_HEIGHT)) for able_to_read, frame in frame_provider: if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] continue if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) if cfg.DETECTION.ENABLE and len(frames) == seq_len//2 - 1: mid_frame = frame if len(frames) == seq_len: start = time() if cfg.DETECTION.ENABLE: outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes(cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1 ) inputs = torch.as_tensor(frames).float() inputs = inputs / 255.0 # Perform color normalization. inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape)) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2]//cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape)) inputs = [slow_pathway, fast_pathway] # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Perform the forward pass. if cfg.DETECTION.ENABLE: # When there is nothing in the scene, # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) else: preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] if cfg.DETECTION.ENABLE: # This post processing was intendedly assigned to the cpu since my laptop GPU # RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend # to change this section to make CUDA does the processing. preds = preds.cpu().detach().numpy() pred_masks = preds > .1 label_ids = [np.nonzero(pred_mask)[0] for pred_mask in pred_masks] pred_labels = [ [labels[label_id] for label_id in perbox_label_ids] for perbox_label_ids in label_ids ] # I'm unsure how to detectron2 rescales boxes to image original size, so I use # input boxes of slowfast and rescale back it instead, it's safer and even if boxes # was not rescaled by cv2_transform.rescale_boxes, it still works. boxes = boxes.cpu().detach().numpy() ratio = np.min( [frame_provider.display_height, frame_provider.display_width] ) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio else: ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold label_ids = torch.nonzero(preds.squeeze() > .1).reshape(-1).cpu().detach().numpy() pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ['Unknown'] # # option 1: remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) # option 2: empty the buffer frames = [] s = time() - start if cfg.DETECTION.ENABLE and pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): cv2.rectangle(frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2) cv2.rectangle( frame, (label_origin[0], label_origin[1] + 5), (label_origin[0] + label_width, label_origin[1] - label_height - 5), palette[labels.index(label)], -1 ) cv2.putText( frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1 ) label_origin[-1] -= label_height + 5 if not cfg.DETECTION.ENABLE: # Display predicted labels to frame. y_offset = 50 cv2.putText(frame, 'Action:', (10, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) for pred_label in pred_labels: y_offset += 30 cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display prediction speed cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (10, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display the frame cv2.imshow('SlowFast', frame) videoOut.write(frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break frame_provider.clean() videoOut.release()
dataset_dicts.append(record) return dataset_dicts DatasetCatalog.register("aicity_train", partial(get_datasect_dicts, 1080, 1620)) MetadataCatalog.get("aicity_train").set(thing_classes=["car"]) aicity_metadata = MetadataCatalog.get("aicity_train") dataset_dicts = get_datasect_dicts(0, 540) from detectron2.engine import DefaultTrainer cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("aicity_train", ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Detection/retinanet_R_50_FPN_3x.yaml" ) # Let training initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.001 # pick a good LR cfg.SOLVER.WARMUP_ITERS = 300 cfg.SOLVER.MAX_ITER = 600 cfg.SOLVER.STEPS = (350, 500) #decay learning rate cfg.SOLVER.GAMMA = 0.1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
def __init__(self): self.visualize = False self.verbose = False self.save_imgs = True self.plot_loss = True # st() # these are all map names a = np.arange(1, 30) b = np.arange(201, 231) c = np.arange(301, 331) d = np.arange(401, 431) abcd = np.hstack((a, b, c, d)) mapnames = [] for i in list(abcd): mapname = 'FloorPlan' + str(i) mapnames.append(mapname) train_len = int(0.9 * len(mapnames)) np.random.seed(1) random.shuffle(mapnames) self.mapnames_train = mapnames[:train_len] self.mapnames_val = mapnames[train_len:] # self.num_episodes = len(self.mapnames) self.ignore_classes = [] # classes to save self.include_classes = [ 'ShowerDoor', 'Cabinet', 'CounterTop', 'Sink', 'Towel', 'HandTowel', 'TowelHolder', 'SoapBar', 'ToiletPaper', 'ToiletPaperHanger', 'HandTowelHolder', 'SoapBottle', 'GarbageCan', 'Candle', 'ScrubBrush', 'Plunger', 'SinkBasin', 'Cloth', 'SprayBottle', 'Toilet', 'Faucet', 'ShowerHead', 'Box', 'Bed', 'Book', 'DeskLamp', 'BasketBall', 'Pen', 'Pillow', 'Pencil', 'CellPhone', 'KeyChain', 'Painting', 'CreditCard', 'AlarmClock', 'CD', 'Laptop', 'Drawer', 'SideTable', 'Chair', 'Blinds', 'Desk', 'Curtains', 'Dresser', 'Watch', 'Television', 'WateringCan', 'Newspaper', 'FloorLamp', 'RemoteControl', 'HousePlant', 'Statue', 'Ottoman', 'ArmChair', 'Sofa', 'DogBed', 'BaseballBat', 'TennisRacket', 'VacuumCleaner', 'Mug', 'ShelvingUnit', 'Shelf', 'StoveBurner', 'Apple', 'Lettuce', 'Bottle', 'Egg', 'Microwave', 'CoffeeMachine', 'Fork', 'Fridge', 'WineBottle', 'Spatula', 'Bread', 'Tomato', 'Pan', 'Cup', 'Pot', 'SaltShaker', 'Potato', 'PepperShaker', 'ButterKnife', 'StoveKnob', 'Toaster', 'DishSponge', 'Spoon', 'Plate', 'Knife', 'DiningTable', 'Bowl', 'LaundryHamper', 'Vase', 'Stool', 'CoffeeTable', 'Poster', 'Bathtub', 'TissueBox', 'Footstool', 'BathtubBasin', 'ShowerCurtain', 'TVStand', 'Boots', 'RoomDecor', 'PaperTowelRoll', 'Ladle', 'Kettle', 'Safe', 'GarbageBag', 'TeddyBear', 'TableTopDecor', 'Dumbbell', 'Desktop', 'AluminumFoil', 'Window' ] self.action_space = { 0: "MoveLeft", 1: "MoveRight", 2: "MoveAhead", 3: "MoveBack" } self.num_actions = len(self.action_space) cfg_det = get_cfg() cfg_det.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg_det.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # set threshold for this model cfg_det.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg_det.MODEL.DEVICE = 'cpu' self.cfg_det = cfg_det self.maskrcnn = DefaultPredictor(cfg_det) self.conf_thresh_detect = 0.7 # for initially detecting a low confident object self.conf_thresh_init = 0.8 # for after turning head toward object threshold self.conf_thresh_end = 0.9 # if reach this then stop getting obs self.BATCH_SIZE = 12 self.percentile = 70 self.max_iters = 100000 self.max_frames = 10 self.val_interval = 15 self.save_interval = 50 self.BATCH_SIZE = 1 self.percentile = 70 self.max_iters = 100000 self.max_frames = 1 self.val_interval = 1 self.save_interval = 1 self.small_classes = [] self.rot_interval = 5.0 self.radius_max = 3.5 #3 #1.75 self.radius_min = 1.0 #1.25 self.num_flat_views = 3 self.num_any_views = 7 self.num_views = 25 self.center_from_mask = False # get object centroid from maskrcnn (True) or gt (False) self.obj_per_scene = 5 # self.origin_quaternion = np.quaternion(1, 0, 0, 0) # self.origin_rot_vector = quaternion.as_rotation_vector(self.origin_quaternion) # self.homepath = f'/home/nel/gsarch/aithor/data/test2' self.homepath = '/home/sirdome/katefgroup/gsarch/ithor/data/test' if not os.path.exists(self.homepath): os.mkdir(self.homepath) else: val = input("Delete homepath? [y/n]: ") if val == 'y': import shutil shutil.rmtree(self.homepath) os.mkdir(self.homepath) else: print("ENDING") assert (False) self.log_freq = 1 self.log_dir = self.homepath + '/..' + '/log_cem' + '/aa' if not os.path.exists(self.log_dir): os.mkdir(self.log_dir) MAX_QUEUE = 10 # flushes when this amount waiting self.writer = SummaryWriter(self.log_dir, max_queue=MAX_QUEUE, flush_secs=60) self.W = 256 self.H = 256 # self.fov = 90 # hfov = float(self.fov) * np.pi / 180. # self.pix_T_camX = np.array([ # [(self.W/2.)*1 / np.tan(hfov / 2.), 0., 0., 0.], # [0., (self.H/2.)*1 / np.tan(hfov / 2.), 0., 0.], # [0., 0., 1, 0], # [0., 0., 0, 1]]) # self.pix_T_camX[0,2] = self.W/2. # self.pix_T_camX[1,2] = self.H/2. self.fov = 90 self.camera_matrix = self.get_camera_matrix(self.W, self.H, self.fov) self.K = self.get_habitat_pix_T_camX(self.fov) self.init_network() self.run_episodes()
register_coco_instances("dataset_train0", {}, "train_patch_0.json", "/content/thre_whole_patches") register_coco_instances("dataset_val0", {}, "val_patch_0.json", "/content/thre_whole_patches") register_coco_instances("dataset_train1", {}, "train_patch_1.json", "/content/thre_whole_patches") register_coco_instances("dataset_val1", {}, "val_patch_1.json", "/content/thre_whole_patches") register_coco_instances("dataset_train2", {}, "train_patch_2.json", "/content/thre_whole_patches") register_coco_instances("dataset_val2", {}, "val_patch_2.json", "/content/thre_whole_patches") print("done") """### first""" cfg = get_cfg() # cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")) cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("dataset_val0",) cfg.DATASETS.TEST = ("dataset_train0",) cfg.DATALOADER.NUM_WORKERS = 1 # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml") # Let training initialize from model zoo cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml") cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.MODEL.RETINANET.NUM_CLASSES = 2 cfg.SOLVER.MAX_ITER = 1000 #adjust up if val mAP is still rising, adjust down if overfit cfg.SOLVER.GAMMA = 0.05 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
image_root="./data/20210204_Digi_generated/images/") DatasetCatalog.register("val", lambda: load_coco_json("./data/20210204_Digi_val.json", "./data/20210204_Digi_generated/valid_images/", "val")) MetadataCatalog.get("val").set(thing_classes=["trash"], json_file="./data/20210204_Digi_val.json", image_root="./data/20210204_Digi_generated/valid_images/") # # training from detectron2.engine import DefaultTrainer cfg = get_cfg() # cfg.merge_from_file(model_zoo.get_config_file("./detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train",) cfg.DATASETS.TEST = ("val",) cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml") # Let training initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.005 # pick a good LR cfg.SOLVER.MAX_ITER = 6000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset cfg.SOLVER.STEPS = (2000,4000) cfg.SOLVER.MOMENTUM = 0.9 cfg.SOLVER.WEIGHT_DECAY = 0.0001 cfg.TEST.EVAL_PERIOD = 500 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512) cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets) # NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
MetadataCatalog.get("faces_" + d).set(thing_classes=classes) statement_metadata = MetadataCatalog.get("faces_train") class CocoTrainer(DefaultTrainer): @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): if output_folder is None: os.makedirs("coco_eval", exist_ok=True) output_folder = "coco_eval" return COCOEvaluator(dataset_name, cfg, False, output_folder) cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" ) ) cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" ) cfg.DATASETS.TRAIN = ("faces_train",) cfg.DATASETS.TEST = ("faces_val",) cfg.DATALOADER.NUM_WORKERS = 4 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.001 cfg.SOLVER.WARMUP_ITERS = 1000 cfg.SOLVER.MAX_ITER = 1500 cfg.SOLVER.STEPS = (1000, 1500) cfg.SOLVER.GAMMA = 0.05 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
def main(): parser = argparse.ArgumentParser("person falldown trainer") parser.add_argument("--datapath", type=str, default="./data") parser.add_argument("--res_dir", type=str, default="./res-2") parser.add_argument("--cfg_dir", type=str, default="COCO-Detection/") parser.add_argument("--cfg", type=str, default="faster_rcnn_R_50_FPN_3x.yaml") parser.add_argument("--model_url", type=str, default="./output") parser.add_argument("--tta", action='store_true', default=False) parser.add_argument("--min_size", type=int, default=800) parser.add_argument("--save_bbox", action="store_true", default=False) parser.add_argument("--bbox_dir", type=str, default="./bbox_out/") parser.add_argument("--model2_url", type=str, default=None) parser.add_argument("--cfg2_dir", type=str, default="COCO-Detection/") parser.add_argument("--cfg2", type=str, default="faster_rcnn_X_101_32x8d_FPN_3x.yaml") args = parser.parse_args() if args.save_bbox: os.makedirs(args.bbox_dir, exist_ok=True) if args.cfg2 is not None and args.cfg2_dir is not None and args.model2_url is not None: has_model2 = True else: has_model2 = False DatasetCatalog.register("pfallcnt_" + args.datapath, lambda d=args.datapath: get_person_dict(d)) MetadataCatalog.get("pfallcnt_" + args.datapath).set( thing_classes=["0", "1"], thing_colors=[(0, 255, 0), (255, 0, 0)]) cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(args.cfg_dir + args.cfg)) cfg.DATALOADER.NUM_WORKERS = 2 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.MODEL.MASK_ON = False cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 # only has two class cfg.MODEL.WEIGHTS = args.model_url cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.895 # set the testing threshold for this model cfg.DATASETS.TEST = ("pfallcnt_" + args.datapath) cfg.INPUT.MIN_SIZE_TEST = args.min_size cfg.TEST.AUG.ENABLED = args.tta if args.tta: #cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.993 predictor = PredictorWithTTA(cfg) else: predictor = DefaultPredictor(cfg) data_dicts = get_person_dict(args.datapath) person_metadata = MetadataCatalog.get("pfallcnt_" + args.datapath) os.makedirs(os.path.join(args.res_dir, "vis"), exist_ok=True) name_list = [] fallcnts = [] for d in data_dicts: im = cv2.imread(d["file_name"]) height, width = im.shape[:2] outputs = predictor(im) field_dict = outputs["instances"].to("cpu").get_fields() pred_cls = field_dict["pred_classes"].numpy() name_list.append(d["image_id"]) fallcnts.append(pred_cls.sum()) print(name_list[-1], fallcnts[-1]) if args.save_bbox: anno_dict = { "version": "4.2.7", "flag": {}, "shapes": [], "imagePath": "../A/" + d["image_id"], "imageData": None, "imageHeight": height, "imageWidth": width } bboxes = field_dict["pred_boxes"].tensor.numpy().astype(np.float64) for i in range(bboxes.shape[0]): box_dict = { "label": str(int(pred_cls[i])), "gound_id": None, "shape_type": "rectangle", "flags": {} } box_dict["points"] = [[bboxes[i][0], bboxes[i][1]], [bboxes[i][2], bboxes[i][3]]] anno_dict["shapes"].append(box_dict) with open(os.path.join(args.bbox_dir, d["image_id"][:-3] + "json"), 'w') as f: json.dump(anno_dict, f, indent=1) v = Visualizer(im[:, :, ::-1], metadata=person_metadata, scale=1.0, instance_mode=ColorMode.SEGMENTATION) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) save_file = os.path.join(os.path.join(args.res_dir, "vis"), d["image_id"]) cv2.imwrite(save_file, v.get_image()[:, :, ::-1]) del predictor if has_model2: print("has model2, building model2 config") cfg2 = get_cfg() cfg2.merge_from_file( model_zoo.get_config_file(args.cfg2_dir + args.cfg2)) cfg2.DATALOADER.NUM_WORKERS = 2 cfg2.SOLVER.IMS_PER_BATCH = 2 cfg2.MODEL.MASK_ON = False cfg2.MODEL.ROI_HEADS.NUM_CLASSES = 2 # only has two class cfg2.MODEL.WEIGHTS = args.model2_url cfg2.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.92 # set the testing threshold for this model cfg2.DATASETS.TEST = ("pfallcnt_" + args.datapath) cfg2.INPUT.MIN_SIZE_TEST = args.min_size cfg2.TEST.AUG.ENABLED = args.tta if args.tta: predictor2 = PredictorWithTTA(cfg2) else: predictor2 = DefaultPredictor(cfg2) idx = -1 modifies = [] for d in data_dicts: idx += 1 im = cv2.imread(d["file_name"]) outputs2 = predictor2(im) field_dict2 = outputs2["instances"].to("cpu").get_fields() pred_cls2 = field_dict2["pred_classes"].numpy() fallcnt2 = pred_cls2.sum() print("model 2: ", d["file_name"], fallcnt2) if fallcnt2 > 5 and fallcnts[idx] < fallcnt2 and name_list[ idx] == d["image_id"]: fallcnts[idx] = fallcnt2 modifies.append(name_list[idx]) elif name_list[idx] != d["image_id"]: print("file name is not the same") csv_dict = {"file": name_list, "fall_count": fallcnts} csv_df = pd.DataFrame(csv_dict) csv_df.to_csv(os.path.join(args.res_dir, "fallcnt_submit.csv"), sep=",", index=False) print(len(name_list), len(fallcnts)) if has_model2: print(modifies) print("modify %d results" % len(modifies))
import detectron2 from detectron2.utils.logger import setup_logger from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from PIL import Image from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True #Setup Detectron2 setup_logger() cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") if torch.cuda.is_available() == False: cfg.MODEL.DEVICE = "cpu" predictor = DefaultPredictor(cfg) #VGG-16 vgg16 = torchvision.models.vgg16(pretrained=True) vgg16.eval() normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) preprocess = torchvision.transforms.Compose([
def retrain_detector(settings): """ settings: properties to be used in the retraining process Splits the COCO-formatted data located in annotation_path, then trains and evaluates a Detectron2 model from scratch. The resulting model is saved in the model_path/ folder. Returns an object mapping different AP (average precision) metrics to the model's scores. """ if len(settings) == 0: settings["trainSplit"] = 0.7 settings["learningRate"] = 0.005 settings["maxIters"] = 100 base_path = "annotation_data/" coco_path = os.path.join(base_path, "coco") output_path = os.path.join(base_path, "output") annotation_path = os.path.join(coco_path, "coco_results.json") train_path = os.path.join(coco_path, "train.json") test_path = os.path.join(coco_path, "test.json") # 1) Split coco json file into train and test using cocosplit code # Adapted from https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py with open(annotation_path, "rt", encoding="UTF-8") as annotations_file: # Extract info from json coco = json.load(annotations_file) info = coco["info"] licenses = coco["licenses"] images = coco["images"] annotations = coco["annotations"] categories = coco["categories"] # Remove images without annotations images_with_annotations = set( map(lambda a: int(a["image_id"]), annotations)) images = list( filter(lambda i: i["id"] in images_with_annotations, images)) # Split images and annotations x_images, y_images = train_test_split( images, train_size=settings["trainSplit"]) x_ids = list(map(lambda i: int(i["id"]), x_images)) x_annots = list( filter(lambda a: int(a["image_id"]) in x_ids, annotations)) y_ids = list(map(lambda i: int(i["id"]), y_images)) y_annots = list( filter(lambda a: int(a["image_id"]) in y_ids, annotations)) # Save to file def save_coco(file, info, licenses, images, annotations, categories): with open(file, 'wt', encoding="UTF-8") as coco: json.dump( { "info": info, "licenses": licenses, "images": images, "annotations": annotations, "categories": categories }, coco, indent=2, sort_keys=True) save_coco(train_path, info, licenses, x_images, x_annots, categories) save_coco(test_path, info, licenses, y_images, y_annots, categories) # 2) Use train/test files to retrain detector dataset_name = "annotation_coco" image_dir = base_path + "rgb/" train_data = dataset_name + "_train" test_data = dataset_name + "_test" DatasetCatalog.clear() MetadataCatalog.clear() register_coco_instances(train_data, {}, train_path, image_dir) register_coco_instances(test_data, {}, test_path, image_dir) MetadataCatalog.get(train_data) coco_yaml = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(coco_yaml)) cfg.DATASETS.TRAIN = (train_data, ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(categories) cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( coco_yaml) # Let training initialize from model zoo cfg.OUTPUT_DIR = output_path cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = settings["learningRate"] # Make sure LR is good cfg.SOLVER.MAX_ITER = settings[ "maxIters"] # 300 is good for small datasets # Train os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train() # Move model to most recent model folder model_dir = os.path.join(base_path, "model") model_names = os.listdir(model_dir) # Get highest x for model/vx model_dirs = list( filter(lambda n: os.path.isdir(os.path.join(model_dir, n)), model_names)) model_nums = list(map(lambda x: int(x.split("v")[1]), model_dirs)) last_model_num = max(model_nums) # Add model to new folder model_path = os.path.join(model_dir, "v" + str(last_model_num)) new_model_path = os.path.join(model_path, "model_999.pth") old_model_path = os.path.join(output_path, "model_final.pth") os.replace(old_model_path, new_model_path) # Evaluate evaluator = COCOEvaluator(test_data, ("bbox", "segm"), False, output_dir="../../annotation_data/output/") val_loader = build_detection_test_loader(cfg, test_data) inference = inference_on_dataset(trainer.model, val_loader, evaluator) # inference keys: bbox, semg # bbox and segm keys: AP, AP50, AP75, APs, APm, AP1, AP-category1, ... inference_json = json.loads(json.dumps(inference).replace("NaN", "null")) return inference_json