Пример #1
0
    def run(self):
        # Core function of your process
        input = self.getInput(0)
        # Get parameters :
        param = self.getParam()

        if len(input.data["images"]) > 0:
            param.cfg["epochs"] = int(param.cfg["maxIter"] * param.cfg["batchSize"] / len(input.data["images"]))

            # complete class names if input dataset has no background class
            if not (input.has_bckgnd_class):
                tmp_dict = {0: "background"}
                for k, name in input.data["metadata"]["category_names"].items():
                    tmp_dict[k + 1] = name
                input.data["metadata"]["category_names"] = tmp_dict
                input.has_bckgnd_class = True

            param.cfg["classes"] = len(input.data["metadata"]["category_names"])

            # Call beginTaskRun for initialization
            self.beginTaskRun()

            if param.cfg["expertModeCfg"] == "":
                # Get default config
                cfg = get_cfg()

                # Add specific deeplab config
                add_deeplab_config(cfg)
                cfg.merge_from_file(os.path.dirname(os.path.realpath(__file__)) + "/model/configs/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml")

                # Generic dataset names that will be used
                cfg.DATASETS.TRAIN = ("datasetTrain",)
                cfg.DATASETS.TEST = ("datasetTest",)
                cfg.SOLVER.MAX_ITER = param.cfg["maxIter"]
                cfg.SOLVER.WARMUP_FACTOR = 0.001
                cfg.SOLVER.WARMUP_ITERS = param.cfg["maxIter"] // 5
                cfg.SOLVER.POLY_LR_FACTOR = 0.9
                cfg.SOLVER.POLY_LR_CONSTANT_FACTOR = 0.0
                cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = param.cfg["classes"]
                cfg.SOLVER.BASE_LR = param.cfg["learningRate"]
                cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256
                cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
                cfg.SOLVER.IMS_PER_BATCH = param.cfg["batchSize"]
                cfg.DATALOADER.NUM_WORKERS = 0
                cfg.INPUT_SIZE = (param.cfg["inputWidth"], param.cfg["inputHeight"])
                cfg.TEST.EVAL_PERIOD = param.cfg["evalPeriod"]
                cfg.SPLIT_TRAIN_TEST = param.cfg["splitTrainTest"]
                cfg.SPLIT_TRAIN_TEST_SEED = -1
                cfg.MODEL.BACKBONE.FREEZE_AT = 5
                cfg.CLASS_NAMES = [name for k, name in input.data["metadata"]["category_names"].items()]

                if param.cfg["earlyStopping"]:
                    cfg.PATIENCE = param.cfg["patience"]
                else:
                    cfg.PATIENCE = -1

                if param.cfg["outputFolder"] == "":
                    cfg.OUTPUT_DIR = os.path.dirname(os.path.realpath(__file__)) + "/output"
                elif os.path.isdir(param.cfg["outputFolder"]):
                    cfg.OUTPUT_DIR = param.cfg["outputFolder"]
                else:
                    print("Incorrect output folder path")
            else:
                cfg = None
                with open(param.cfg["expertModeCfg"], 'r') as file:
                    cfg_data = file.read()
                    cfg = CfgNode.load_cfg(cfg_data)

            if cfg is not None:
                deeplabutils.register_train_test(input.data["images"], input.data["metadata"],
                                                 train_ratio=cfg.SPLIT_TRAIN_TEST / 100,
                                                 seed=cfg.SPLIT_TRAIN_TEST_SEED)

                os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

                str_datetime = datetime.now().strftime("%d-%m-%YT%Hh%Mm%Ss")
                model_folder = cfg.OUTPUT_DIR + os.path.sep + str_datetime
                cfg.OUTPUT_DIR = model_folder

                if not os.path.isdir(model_folder):
                    os.mkdir(model_folder)

                cfg.OUTPUT_DIR = model_folder

                self.trainer = deeplabutils.MyTrainer(cfg, self)
                self.trainer.resume_or_load(resume=False)
                print("Starting training job...")
                launch(self.trainer.train, num_gpus_per_machine=1)
                print("Training job finished.")
                self.trainer = None
                gc.collect()
                torch.cuda.empty_cache()
                with open(cfg.OUTPUT_DIR+"/Detectron2_DeepLabV3Plus_Train_Config.yaml", 'w') as file:
                    file.write(cfg.dump())
            else:
                print("Error : can't load config file "+param.cfg["expertModeCfg"])

        # Call endTaskRun to finalize process
        self.endTaskRun()
Пример #2
0
    def run(self):
        # Core function of your process
        # Call beginTaskRun for initialization
        self.beginTaskRun()
        # we use seed to keep the same color for our masks + boxes + labels (same random each time)
        random.seed(10)
        # Get input :
        input = self.getInput(0)
        srcImage = input.getImage()

        # Get output :
        mask_output = self.getOutput(0)
        graph_output = self.getOutput(2)

        # Get parameters :
        param = self.getParam()

        # Config file and model file needed are in the output folder generated by the train plugin
        if (self.cfg is None or param.update) and param.configFile != "":
            with open(param.configFile, 'r') as file:
                cfg_data = file.read()
                self.cfg = CfgNode.load_cfg(cfg_data)
                self.classes = self.cfg.CLASS_NAMES

        if self.model is None or param.update:
            if param.dataset == "Cityscapes":
                url = "https://dl.fbaipublicfiles.com/detectron2/DeepLab/Cityscapes-" \
                      "SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16/" \
                      "28054032/model_final_a8a355.pkl"
                self.cfg = get_cfg()
                cfg_file = os.path.join(
                    os.path.dirname(__file__),
                    os.path.join(
                        "configs",
                        "deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml"))
                add_deeplab_config(self.cfg)
                self.cfg.merge_from_file(cfg_file)
                self.cfg.MODEL.WEIGHTS = url

                self.classes = [
                    'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
                    'traffic light', 'traffic sign', 'vegetation', 'terrain',
                    'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train',
                    'motorcycle', 'bicycle'
                ]

            elif self.cfg is not None:
                self.cfg.MODEL.WEIGHTS = param.modelFile

            if not torch.cuda.is_available():
                self.cfg.MODEL.DEVICE = "cpu"
                self.cfg.MODEL.RESNETS.NORM = "BN"
                self.cfg.MODEL.SEM_SEG_HEAD.NORM = "BN"

            self.model = build_model(self.cfg)
            DetectionCheckpointer(self.model).load(self.cfg.MODEL.WEIGHTS)
            self.model.eval()

        if self.model is not None and srcImage is not None:
            # Convert numpy image to detectron2 input format
            input = {}
            h, w, c = np.shape(srcImage)
            input["image"] = (torch.tensor(srcImage).permute(2, 0, 1))

            if param.dataset == "Cityscapes":
                input["image"] = Resize([512, 1024])(input["image"])

            input["height"] = h
            input["width"] = w

            # Inference with pretrained model
            with torch.no_grad():
                pred = self.model([input])
                pred = pred[0]["sem_seg"].cpu().numpy()

            # Convert logits to labelled image
            dstImage = (np.argmax(pred, axis=0)).astype(dtype=np.uint8)
            # Set image of input/output (numpy array):
            # dstImage +1 because value 0 is for background but no background here
            mask_output.setImage(dstImage)

            # Create random color map
            if self.colors is None or param.update:
                n = len(self.classes)
                self.colors = []
                for i in range(n):
                    self.colors.append([
                        random.randint(0, 255),
                        random.randint(0, 255),
                        random.randint(0, 255), 255
                    ])

                # Apply color map on labelled image
                self.setOutputColorMap(1, 0, self.colors)
            self.forwardInputImage(0, 1)

            graph_output.setImage(self.draw_legend())
            param.update = False

        # Step progress bar:
        self.emitStepProgress()

        # Call endTaskRun to finalize process
        self.endTaskRun()
Пример #3
0
def main(config):

    root = expanduser(config["base"]["root"])
    imgs_root = expanduser(config["base"]["imgs_root"])
    jsons_dir = join(root, "jsons")
    model_dir = join(root, "outputs")

    scale = float(config["test_model"]["scale"])
    do_show = config["test_model"]["do_show"]

    register_data(jsons_dir, imgs_root)

    # Need this datasets line, in order for metadata to have .thing_classes attribute
    datasets = DatasetCatalog.get("test_data") 
    metadata = MetadataCatalog.get("test_data")
    
    # Read the cfg back in:
    with open(join(model_dir, "cfg.txt"), "r") as f:
        cfg = f.read()
    # Turn into CfgNode obj:
    cfg = CfgNode.load_cfg(cfg) 

    # Use the weights from the model trained on our custom dataset:
    cfg.MODEL.WEIGHTS = join(model_dir, "model_final.pth") # TODO: have option to use snapshot instead
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # make small so I can make PR curve for broad range of scores
    # cfg.DATASETS.TEST = ("val_data", ) # should already be saved from train_model.py

    print("Generating predictor ...")
    predictor = DefaultPredictor(cfg)

    # For saving images with predicted labels:
    output_imgs_dir = join(model_dir, "test_pred_imgs")
    makedirs(output_imgs_dir, exist_ok=True)

    # For saving detection predictions as csv:
    output_csv = join(model_dir, "all_test_preds.csv")
    csv_file_handle = open(output_csv, "w", newline="")
    atexit.register(csv_file_handle.close) 
    col_names = ["img", "x1", "y1", "x2", "y2", "score", "thing","dummy_id"]
    csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names)
    csv_writer.writeheader()

    # Select 5 random images to visualize, 
    # but save the prediction results for all imgs:
    rando_idxs = np.random.choice(range(len(datasets)), 5, replace=False).tolist()
    for i,d in enumerate(datasets):

        print(f"Predicting on image {i+1} of {len(datasets)} ...", end="\r")

        id = d["image_id"]
        img = cv2.imread(d["file_name"])
        detected = predictor(img)
        
        # Visualize:
        visualizer = Visualizer(img[:, :, ::-1], 
                                metadata=metadata, 
                                scale=scale, 
                                instance_mode=ColorMode)
        visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu"))        
        
        # Save the first 5 images from the random draw:
        if i in rando_idxs:
            pred_img = visualizer.get_image()[:, :, ::-1]
            cv2.imwrite(join(output_imgs_dir, ("predicted_" + basename(d["file_name"]))), pred_img)

        if do_show:

            cv2.imshow(f"prediction on image {id}", pred_img)
            print(f"Press any key to go to the next image ({i+1}/5) ...")

            key = cv2.waitKey(0) & 0xFF
            if key == ord("q"):
                print("Quitting ...")
                break

        cv2.destroyAllWindows()

        # Stream the predicted box coords and scores to a csv:
        preds = detected['instances'].to('cpu')
        boxes = preds.pred_boxes
        thing_ids = preds.pred_classes.tolist()
        scores = preds.scores
        num_boxes = np.array(scores.size())[0]

        for i in range(0, num_boxes):
            coords = boxes[i].tensor.numpy()    	
            score = float(scores[i].numpy())
            thing_id = thing_ids[i] # is int
            thing_class = metadata.thing_classes[thing_id]

            csv_writer.writerow({col_names[0]: basename(d["file_name"]),
                                 col_names[1]: int(coords[0][0]), # x1
                                 col_names[2]: int(coords[0][1]), # y1
                                 col_names[3]: int(coords[0][2]), # x2
                                 col_names[4]: int(coords[0][3]), # y2
                                 col_names[5]: score, # score
                                 col_names[6]: thing_class, # thing
                                 col_names[7]: i}) # dummy id

    print(f"Finished predicting on all {len(datasets)} images from the test data fraction.")
    print(f"Results are stored in {output_csv}")
    print(f"5 sample test images are stored in {output_imgs_dir}\n"
           "Note that the 5 sample test images show all detections with a score greater than 0.01. "
           "This low score cutoff is for test purposes and is intentional. "
           "You should expect to see many false positive labels.\n")

    # Clear GPU memory
    torch.cuda.empty_cache()
Пример #4
0
def main(config):

    root = expanduser(config["base"]["root"])
    imgs_root = expanduser(config["base"]["imgs_root"])
    jsons_dir = join(root, "jsons")
    model_dir = join(root, "outputs")

    model_pth = expanduser(config["analyze_vids"]["model_pth"])
    vid_ending = '*' + config["analyze_vids"]["vid_ending"]
    expected_obj_nums = config["analyze_vids"]["expected_obj_nums"]
    score_cutoff = float(config["analyze_vids"]["score_cutoff"])
    vids_root = expanduser(config["analyze_vids"]["vids_root"])
    framerate = int(config["analyze_vids"]["framerate"])

    if not model_pth.endswith(".pth"):
        raise ValueError(f"{basename(model_pth)} must be a '.pth' file.")
    if not vid_ending.endswith(".mp4"):
        raise ValueError(f"{vid_ending} must end in '.mp4'")
    # if model_pth not in model_dir:
    #     raise IOError(f"The selected model, {basename(model_pth)}, is not in "
    #                   f"{basename(model_dir)}. Please pick a model that resides")
    if not 0 < score_cutoff < 1:
        raise ValueError(
            f"The testing threshold, {score_cutoff}, must be between 0 and 1.")

    vids = [str(path.absolute()) for path in Path(vids_root).rglob(vid_ending)]
    # from pprint import pprint; pprint(vids)
    # import ipdb;ipdb.set_trace()
    if len(vids) == 0:
        print(f"No .mp4 videos were found in {vids_root} ...")

    register_data(jsons_dir, imgs_root)

    # Need the `datasets =` line, in order for metadata to have the
    # .thing_classes attrib. I don't really use these two lines, I
    # only call them so I can get the .thing_classes attrib off
    # `metadata`. So, it doesn't matter if I use "training_data" as
    # my arg or some other registered dataset, for these two calls:
    datasets = DatasetCatalog.get("training_data")
    metadata = MetadataCatalog.get("training_data")

    # Read the cfg back in:
    with open(join(model_dir, "cfg.txt"), "r") as f:
        cfg = f.read()
    # Turn into CfgNode obj:
    cfg = CfgNode.load_cfg(cfg)

    # Use the weights from our chosen model:
    cfg.MODEL.WEIGHTS = model_pth
    # # Pick a confidence cutoff # TODO: based on PR curve
    # # TODO: don't have this as a user parameter
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_cutoff

    print("Generating predictor ...")
    predictor = DefaultPredictor(cfg)

    bgr_palette = [(165, 194, 102), (98, 141, 252), (203, 160, 141),
                   (195, 138, 231), (84, 216, 166), (47, 217, 255),
                   (148, 196, 229), (179, 179, 179)]

    for vid in vids:

        print(f"Analyzing {vid} ...")

        cap = cv2.VideoCapture(vid)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        pbar = trange(frame_count)
        output_vid = f"{splitext(vid)[0]}_detected.mp4"

        output_csv = f"{splitext(vid)[0]}_detected.csv"
        csv_file_handle = open(output_csv, "w", newline="")
        atexit.register(csv_file_handle.close)
        col_names = [
            "frame", "x1", "y1", "x2", "y2", "score", "thing", "dummy_id"
        ]
        csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names)
        csv_writer.writeheader()

        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(filename=output_vid,
                              apiPreference=0,
                              fourcc=fourcc,
                              fps=int(framerate),
                              frameSize=(int(cap.get(3)), int(cap.get(4))),
                              params=None)

        # Use Detectron2 model on each frame in vid:
        all_detection_info = []
        for f, _ in enumerate(pbar):

            ret, frame = cap.read()

            if ret:

                detected = predictor(frame)

                # # Visualize:
                # visualizer = Visualizer(frame[:, :, ::-1],
                #                         metadata=metadata,
                #                         scale=1.0,
                #                         instance_mode=ColorMode)
                # visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu"))
                # detected_img = visualizer.get_image()[:, :, ::-1]

                labelled_frame = frame

                # Save the predicted box coords and scores to a dictionary:
                preds = detected['instances'].to('cpu')
                boxes = preds.pred_boxes.tensor.numpy()
                thing_ids = preds.pred_classes.tolist()
                scores = preds.scores.numpy()

                # Get the idxs of each unique thing_id:
                idxs_of_each_thing = {}
                for thing_id in set(thing_ids):
                    idxs_of_each_thing[thing_id] = []
                for i, thing_id in enumerate(thing_ids):
                    idxs_of_each_thing[thing_id].append(i)

                # Split up the data according to thing_id:
                for i, (thing_id,
                        idxs) in enumerate(idxs_of_each_thing.items()):

                    thing_class = metadata.thing_classes[thing_id]

                    if thing_class in expected_obj_nums:

                        expected_obj_num = expected_obj_nums[thing_class]
                        num_boxes = scores.size

                        assert expected_obj_num <= num_boxes, \
                            f"You expected {expected_obj_num} {thing_class} in \
                            every frame of the video, according to `expected_obj_nums`, \
                            but only {num_boxes} were found in frame {f}."

                        thing_scores = scores[idxs]
                        thing_boxes = boxes[idxs]

                        # Here, I grab the top n animals according to their score
                        # because by default, `preds` is sorted by their descending scores:
                        for j in range(0, expected_obj_num):

                            coords = thing_boxes[j]
                            x1 = int(coords[0])
                            y1 = int(coords[1])
                            x2 = int(coords[2])
                            y2 = int(coords[3])
                            score = float(thing_scores[j])

                            labelled_frame = cv2.rectangle(
                                labelled_frame, (x1, y1), (x2, y2),
                                bgr_palette[thing_id], 2)
                            labelled_frame = cv2.putText(
                                labelled_frame, thing_class,
                                (x2 - 10, y2 - 40), cv2.FONT_HERSHEY_SIMPLEX,
                                1, bgr_palette[thing_id], 2)

                            # TODO: Write some sort of simple filtering thing that skips
                            # big jumps ... maybe just store the last bbox coords and compare.
                            # Define big jumps as a proportion of the frame width or average of
                            # last few frame deltas
                            csv_writer.writerow({
                                col_names[0]: int(f),  # frame
                                col_names[1]: x1,  # x1
                                col_names[2]: y1,  # y1
                                col_names[3]: x2,  # x2
                                col_names[4]: y2,  # y2
                                col_names[5]: score,  # score
                                col_names[6]: thing_class,  # thing
                                col_names[7]: i
                            })  # dummy id

                # Save frame to vid:
                out.write(labelled_frame)

                pbar.set_description(f"Detecting in frame {f+1}/{frame_count}")

    # Clear GPU memory
    torch.cuda.empty_cache()