示例#1
0
    def __init__(self,
                 config,
                 num_train_optimization_steps=3100,
                 include_actions=True):
        #Load vilBert config
        print("Loading ViLBERT model configuration")
        self.vilbert_config = BertConfig.from_json_file(config.BERT_CONFIG)
        self.pre_trained_model = config.BERT_PRE_TRAINED_MODEL
        self.bert_gpu = config.BERT_GPU
        self.detectron2_gpu = config.DETECTRON2_GPU
        self.bert_gpu_device = torch.device(self.bert_gpu)
        self.detectron2_gpu_device = torch.device(self.detectron2_gpu)

        print("Loading ViLBERT model on gpu {}".format(self.bert_gpu))
        self.model = VILBertForVLTasks.from_pretrained(
            self.pre_trained_model,
            self.vilbert_config,
            num_labels=len(self.model_actions) -
            2,  # number of predicted actions 6
        )
        new_voc_size = self.vilbert_config.vocab_size + 8
        self.model.resize_token_embeddings(new_voc_size)
        self.model.to(self.bert_gpu_device)
        print("ViLBERT loaded on GPU {}".format(self.bert_gpu))

        print("Loading Detectron2 predictor on GPU {}".format(
            self.detectron2_gpu))
        detectron2_cfg = self.create_detectron2_cfg(config)
        self.detector = DefaultPredictor(detectron2_cfg)
        #self.detector.eval()
        print("Detectron2 loaded")
        self._max_region_num = 36
        self._max_seq_length = 128
        #if include_actions:
        #self._max_seq_length = 128 + 10
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",
                                                       do_lower_case=True,
                                                       do_basic_tokenize=True)
        self.criterion = nn.BCEWithLogitsLoss(reduction='mean')
        self.loss = 0
        self.learning_rate = 3e-6
        self.vision_scratch = False
        self.max_steps = 30
        self.grad_accumulation = 1  #00
        self.action_history = []
        self.loss_weight = {
            "a": 0.1,
            "b": 0.1,
            "c": 0.8,
            "a_loss": [],
            "b_loss": [],
            "c_loss": [],
        }
        self.save_example = {
            "path_id": "",
            "images": [],
            "boxes": [],
            "box_probs": [],
            "text": [],
            "actions": [],
            "box_one_hots": [],
            "box_labels": []
        }
        optimizer_grouped_parameters = []
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]

        for key, value in dict(self.model.named_parameters()).items():
            if value.requires_grad:
                if any(nd in key for nd in no_decay):
                    optimizer_grouped_parameters += [{
                        "params": [value],
                        "lr": self.learning_rate,
                        "weight_decay": 0.01
                    }]
                if not any(nd in key for nd in no_decay):
                    optimizer_grouped_parameters += [{
                        "params": [value],
                        "lr": self.learning_rate,
                        "weight_decay": 0.0
                    }]

        print(len(list(self.model.named_parameters())),
              len(optimizer_grouped_parameters))

        self.optimizer = Adam(optimizer_grouped_parameters,
                              lr=self.learning_rate,
                              warmup=0.1,
                              t_total=num_train_optimization_steps,
                              schedule='warmup_constant')

        self.lr_scheduler = ReduceLROnPlateau(self.optimizer, \
                        mode='max',
                        factor=0.2,
                        patience=10,
                        cooldown=4,
                        threshold=0.001)
    def run(self):
        self.beginTaskRun()

        # we use seed to keep the same color for our masks + boxes + labels (same random each time)
        random.seed(30)

        # Get input :
        input = self.getInput(0)
        srcImage = input.getImage()

        # Get output :
        output_image = self.getOutput(0)
        output_graph = self.getOutput(1)
        output_graph.setNewLayer("TridentNet")

        # Get parameters :
        param = self.getParam()

        # predictor
        if not self.loaded:
            print("Chargement du modèle")
            if param.cuda == False:
                self.cfg.MODEL.DEVICE = "cpu"
                self.deviceFrom = "cpu"
            else:
                self.deviceFrom = "gpu"
            self.loaded = True
            self.predictor = DefaultPredictor(self.cfg)
        # reload model if CUDA check and load without CUDA
        elif self.deviceFrom == "cpu" and param.cuda == True:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            add_tridentnet_config(self.cfg)
            self.cfg.merge_from_file(self.folder + "/TridentNet_git/configs/" +
                                     self.MODEL_NAME_CONFIG + ".yaml")
            self.cfg.MODEL.WEIGHTS = self.folder + "/models/" + self.MODEL_NAME + ".pkl"
            self.deviceFrom = "gpu"
            self.predictor = DefaultPredictor(self.cfg)
        # reload model if CUDA not check and load with CUDA
        elif self.deviceFrom == "gpu" and param.cuda == False:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            self.cfg.MODEL.DEVICE = "cpu"
            add_tridentnet_config(self.cfg)
            self.cfg.merge_from_file(self.folder + "/TridentNet_git/configs/" +
                                     self.MODEL_NAME_CONFIG + ".yaml")
            self.cfg.MODEL.WEIGHTS = self.folder + "/models/" + self.MODEL_NAME + ".pkl"
            self.deviceFrom = "cpu"
            self.predictor = DefaultPredictor(self.cfg)

        outputs = self.predictor(srcImage)

        # get outputs instances
        output_image.setImage(srcImage)
        boxes = outputs["instances"].pred_boxes
        scores = outputs["instances"].scores
        classes = outputs["instances"].pred_classes

        # to numpy
        if param.cuda:
            boxes_np = boxes.tensor.cpu().numpy()
            scores_np = scores.cpu().numpy()
            classes_np = classes.cpu().numpy()
        else:
            boxes_np = boxes.tensor.numpy()
            scores_np = scores.numpy()
            classes_np = classes.numpy()

        self.emitStepProgress()

        # keep only the results with proba > threshold
        scores_np_tresh = list()
        for s in scores_np:
            if float(s) > param.proba:
                scores_np_tresh.append(s)
        self.emitStepProgress()

        if len(scores_np_tresh) > 0:
            # text label with score
            labels = None
            class_names = MetadataCatalog.get(
                self.cfg.DATASETS.TRAIN[0]).get("thing_classes")
            if classes is not None and class_names is not None and len(
                    class_names) > 1:
                labels = [class_names[i] for i in classes]
            if scores_np_tresh is not None:
                if labels is None:
                    labels = [
                        "{:.0f}%".format(s * 100) for s in scores_np_tresh
                    ]
                else:
                    labels = [
                        "{} {:.0f}%".format(l, s * 100)
                        for l, s in zip(labels, scores_np_tresh)
                    ]

            # Show Boxes + labels
            for i in range(len(scores_np_tresh)):
                color = [
                    random.randint(0, 255),
                    random.randint(0, 255),
                    random.randint(0, 255), 255
                ]
                prop_text = core.GraphicsTextProperty()
                prop_text.color = color
                prop_text.font_size = 7
                output_graph.addText(labels[i], float(boxes_np[i][0]),
                                     float(boxes_np[i][1]), prop_text)
                prop_rect = core.GraphicsRectProperty()
                prop_rect.pen_color = color
                prop_rect.category = labels[i]
                output_graph.addRectangle(
                    float(boxes_np[i][0]), float(boxes_np[i][1]),
                    float(boxes_np[i][2] - boxes_np[i][0]),
                    float(boxes_np[i][3] - boxes_np[i][1]), prop_rect)

        # Step progress bar:
        self.emitStepProgress()

        # Call endTaskRun to finalize process
        self.endTaskRun()
示例#3
0
# set up detectron
path_weigth = cfg.SERVICE.DETECT_WEIGHT
path_config = cfg.SERVICE.DETECT_CONFIG
confidences_threshold = cfg.SERVICE.THRESHOLD
num_of_class = cfg.SERVICE.NUMBER_CLASS

detectron = config_detectron()
detectron.MODEL.DEVICE = cfg.SERVICE.DEVICE
detectron.merge_from_file(path_config)
detectron.MODEL.WEIGHTS = path_weigth

detectron.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidences_threshold
detectron.MODEL.ROI_HEADS.NUM_CLASSES = num_of_class

PREDICTOR = DefaultPredictor(detectron)

# create labels
CLASSES = load_class_names(cfg.SERVICE.CLASSES)

image = cv2.imread('images/test.jpg')

height, width, channels = image.shape
center_image = (width//2, height//2)
print("shape image: ", (width, height))
list_boxes, list_scores, list_classes = predict(
    image, PREDICTOR, CLASSES)
print('list_boxes', list_boxes)
print('list_classes', list_classes)

# draw
示例#4
0
UPLOAD_FOLDER = './'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.secret_key = "secret key"


start = time.time()
# obtain detectron2's default config
cfg = get_cfg()
# load the pre trained model from Detectron2 model zoo
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
# set confidence threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
# load model weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
# create the predictor for pose estimation using the config
pose_detector = DefaultPredictor(cfg)
model_load_done = time.time()
print("Detectron model loaded in ", model_load_done - start)

# Load pretrained LSTM model from checkpoint file
lstm_classifier = ActionClassificationLSTM.load_from_checkpoint("models/saved_model.ckpt")
lstm_classifier.eval()


class DataObject():
    pass


def checkFileType(f: str):
    return f.split('.')[-1] in ['mp4']
    def run(self):
        self.beginTaskRun()

        # we use seed to keep the same color for our masks + boxes + labels (same random each time)
        random.seed(30)

        # Get input :
        img_input = self.getInput(0)
        src_img = img_input.getImage()

        # Get output :
        mask_output = self.getOutput(0)
        output_graph = self.getOutput(2)
        output_graph.setImageIndex(1)
        output_graph.setNewLayer("MaskRCNN")

        # Get parameters :
        param = self.getParam()

        # predictor
        if not self.predictor or param.update_model:
            if param.dataset == "COCO":
                self.model_link = "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
            else:
                self.model_link = "Cityscapes/mask_rcnn_R_50_FPN.yaml"

            self.cfg = get_cfg()
            self.cfg.MODEL.DEVICE = param.device
            self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold
            # load config from file(.yaml)
            self.cfg.merge_from_file(model_zoo.get_config_file(
                self.model_link))
            # download the model (.pkl)
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
                self.model_link)
            self.predictor = DefaultPredictor(self.cfg)
            param.update_model = False

        outputs = self.predictor(src_img)

        # get outputs instances
        boxes = outputs["instances"].pred_boxes
        scores = outputs["instances"].scores
        classes = outputs["instances"].pred_classes
        masks = outputs["instances"].pred_masks

        # to numpy
        boxes_np = boxes.tensor.cpu().numpy()
        scores_np = scores.cpu().numpy()
        # classes_np = classes.cpu().numpy()

        self.emitStepProgress()

        # keep only the results with proba > threshold
        scores_np_thresh = list()
        for s in scores_np:
            if float(s) > param.proba:
                scores_np_thresh.append(s)

        if len(scores_np_thresh) > 0:
            # create random color for masks + boxes + labels
            colors = [[0, 0, 0]]
            for i in range(len(scores_np_thresh)):
                colors.append([
                    random.randint(0, 255),
                    random.randint(0, 255),
                    random.randint(0, 255), 255
                ])

            # text labels with scores
            labels = None
            class_names = MetadataCatalog.get(
                self.cfg.DATASETS.TRAIN[0]).get("thing_classes")

            if classes is not None and class_names is not None and len(
                    class_names) > 1:
                labels = [class_names[i] for i in classes]

            if scores_np_thresh is not None:
                if labels is None:
                    labels = [
                        "{:.0f}%".format(s * 100) for s in scores_np_thresh
                    ]
                else:
                    labels = [
                        "{} {:.0f}%".format(l, s * 100)
                        for l, s in zip(labels, scores_np_thresh)
                    ]

            # Show boxes + labels
            for i in range(len(scores_np_thresh)):
                prop_text = core.GraphicsTextProperty()
                # start with i+1 we don't use the first color dedicated for the label mask
                prop_text.color = colors[i + 1]
                prop_text.font_size = 7
                prop_rect = core.GraphicsRectProperty()
                prop_rect.pen_color = colors[i + 1]
                prop_rect.category = labels[i]
                output_graph.addRectangle(
                    float(boxes_np[i][0]), float(boxes_np[i][1]),
                    float(boxes_np[i][2] - boxes_np[i][0]),
                    float(boxes_np[i][3] - boxes_np[i][1]), prop_rect)
                output_graph.addText(labels[i], float(boxes_np[i][0]),
                                     float(boxes_np[i][1]), prop_text)

            self.emitStepProgress()

            # label mask
            nb_objects = len(masks[:len(scores_np_thresh)])
            if nb_objects > 0:
                masks = masks[:nb_objects, :, :, None]
                mask_or = masks[0] * nb_objects
                for j in range(1, nb_objects):
                    mask_or = torch.max(mask_or, masks[j] * (nb_objects - j))
                mask_numpy = mask_or.byte().cpu().numpy()
                mask_output.setImage(mask_numpy)

                # output mask apply to our original image
                # inverse colors to match boxes colors
                c = colors[1:]
                c = c[::-1]
                colors = [[0, 0, 0]]
                for col in c:
                    colors.append(col)

                self.setOutputColorMap(1, 0, colors)
        else:
            self.emitStepProgress()

        self.forwardInputImage(0, 1)

        # Step progress bar:
        self.emitStepProgress()

        # Call endTaskRun to finalize process
        self.endTaskRun()
示例#6
0
def main(args):

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(args.cfg))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(args.cfg)
    predictor = DefaultPredictor(cfg)
    
    if os.path.isdir(args.im_or_folder):
        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
    else:
        im_list = [args.im_or_folder]

    for video_name in im_list:
        out_name = os.path.join(
                args.output_dir, os.path.basename(video_name)
            )
        print('Processing {}'.format(video_name))

        boxes = []
        segments = []
        keypoints = []

        for frame_i, im in enumerate(read_video(video_name)):
            t = time.time()
            outputs = predictor(im)['instances'].to('cpu')
            
            print('Frame {} processed in {:.3f}s'.format(frame_i, time.time() - t))

            has_bbox = False
            if outputs.has('pred_boxes'):
                bbox_tensor = outputs.pred_boxes.tensor.numpy()
                if len(bbox_tensor) > 0:
                    has_bbox = True
                    scores = outputs.scores.numpy()[:, None]
                    bbox_tensor = np.concatenate((bbox_tensor, scores), axis=1)
            if has_bbox:
                kps = outputs.pred_keypoints.numpy()
                kps_xy = kps[:, :, :2]
                kps_prob = kps[:, :, 2:3]
                kps_logit = np.zeros_like(kps_prob) # Dummy
                kps = np.concatenate((kps_xy, kps_logit, kps_prob), axis=2)
                kps = kps.transpose(0, 2, 1)
            else:
                kps = []
                bbox_tensor = []
                
            # Mimic Detectron1 format
            cls_boxes = [[], bbox_tensor]
            cls_keyps = [[], kps]
            
            boxes.append(cls_boxes)
            segments.append(None)
            keypoints.append(cls_keyps)

        
        # Video resolution
        metadata = {
            'w': im.shape[1],
            'h': im.shape[0],
        }
        
        np.savez_compressed(out_name, boxes=boxes, segments=segments, keypoints=keypoints, metadata=metadata)
示例#7
0
    def run(self):
        self.beginTaskRun()

        # we use seed to keep the same color for our masks + boxes + labels (same random each time)
        random.seed(30)

        # Get input :
        input = self.getInput(0)
        srcImage = input.getImage()

        # Get output :
        output_graph = self.getOutput(2)
        output_graph.setImageIndex(1)
        output_graph.setNewLayer("PanopticSegmentation")

        # Get parameters :
        param = self.getParam()

        # predictor
        if not self.loaded:
            print("Chargement du modèle")
            if param.cuda == False:
                self.cfg.MODEL.DEVICE = "cpu"
                self.deviceFrom = "cpu"
            else:
                self.deviceFrom = "gpu"
            self.predictor = DefaultPredictor(self.cfg)
            self.loaded = True
        # reload model if CUDA check and load without CUDA
        elif self.deviceFrom == "cpu" and param.cuda == True:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold
            self.cfg.merge_from_file(model_zoo.get_config_file(
                self.LINK_MODEL))  # load config from file(.yaml)
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
                self.LINK_MODEL)  # download the model (.pkl)
            self.predictor = DefaultPredictor(self.cfg)
            self.deviceFrom = "gpu"
        # reload model if CUDA not check and load with CUDA
        elif self.deviceFrom == "gpu" and param.cuda == False:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            self.cfg.MODEL.DEVICE = "cpu"
            self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold
            self.cfg.merge_from_file(model_zoo.get_config_file(
                self.LINK_MODEL))  # load config from file(.yaml)
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
                self.LINK_MODEL)  # download the model (.pkl)
            self.predictor = DefaultPredictor(self.cfg)
            self.deviceFrom = "cpu"

        outputs = self.predictor(srcImage)["panoptic_seg"]

        # get outputs of model
        mask = outputs[0]
        infos = outputs[1]

        # set mask output
        mask_output = self.getOutput(0)
        if param.cuda:
            mask_output.setImage(mask.cpu().numpy())
        else:
            mask_output.setImage(mask.numpy())

        self.emitStepProgress()

        # output visualisation
        nb_objects = len(infos)

        # create random color for masks + boxes + labels
        colors = [[0, 0, 0]]
        for i in range(nb_objects):
            colors.append([
                random.randint(0, 255),
                random.randint(0, 255),
                random.randint(0, 255), 255
            ])

        # get infos classes
        scores = list()
        classesThings = list()
        classesStuffs = list()
        labelsStuffs = list()

        for info in infos:
            if info["isthing"]:
                scores.append(info['score'])
                classesThings.append(info['category_id'])
            else:
                classesStuffs.append(info['category_id'])

        # text label with score - get classe name for thing and stuff from metedata
        labelsThings = None
        class_names = MetadataCatalog.get(
            self.cfg.DATASETS.TRAIN[0]).get("thing_classes")
        if classesThings is not None and class_names is not None and len(
                class_names) > 1:
            labelsThings = [class_names[i] for i in classesThings]
        if scores is not None:
            if labelsThings is None:
                labelsThings = ["{:.0f}%".format(s * 100) for s in scores]
            else:
                labelsThings = [
                    "{} {:.0f}%".format(l, s * 100)
                    for l, s in zip(labelsThings, scores)
                ]
        class_names_stuff = MetadataCatalog.get(
            self.cfg.DATASETS.TRAIN[0]).get("stuff_classes")
        [labelsStuffs.append(class_names_stuff[x]) for x in classesStuffs]
        labels = labelsThings + labelsStuffs
        seg_ids = torch.unique(mask).tolist()

        self.emitStepProgress()

        # create masks - use for text_pos
        masks = list()
        for sid in seg_ids:
            if param.cuda:
                mymask = (mask == sid).cpu().numpy().astype(np.bool)
            else:
                mymask = (mask == sid).numpy().astype(np.bool)
            masks.append(mymask)

        # text pos = median of mask - median is less sensitive to outliers.
        if len(masks) > len(
                labels
        ):  # unrecognized area - no given class for area labeled 0
            for i in range(nb_objects):
                properties_text = core.GraphicsTextProperty()
                properties_text.color = colors[i + 1]
                properties_text.font_size = 7
                text_pos = np.median(masks[i + 1].nonzero(), axis=1)[::-1]
                output_graph.addText(labels[i], text_pos[0], text_pos[1],
                                     properties_text)
        else:
            for i in range(nb_objects):
                properties_text = core.GraphicsTextProperty()
                properties_text.color = colors[i + 1]
                properties_text.font_size = 7
                text_pos = np.median(masks[i].nonzero(), axis=1)[::-1]
                output_graph.addText(labels[i], text_pos[0], text_pos[1],
                                     properties_text)

        # output mask apply to our original image
        self.setOutputColorMap(1, 0, colors)
        self.forwardInputImage(0, 1)

        # Step progress bar:
        self.emitStepProgress()

        # Call endTaskRun to finalize process
        self.endTaskRun()
示例#8
0
def detect(video_path):
    save_visual_detections = False

    results_dir = 'results/task1_1/retina'

    coco_car_id = 2

    model = 'retinanet_R_50_FPN_3x'
    model_path = 'COCO-Detection/' + model + '.yaml'
    print(model_path)

    # Run a pre-trained detectron2 model
    cfg = get_cfg()

    cfg.merge_from_file(model_zoo.get_config_file(model_path))
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5
    cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_path)
    cfg.OUTPUT_DIR = results_dir
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

    predictor = DefaultPredictor(cfg)

    det_path = os.path.join(cfg.OUTPUT_DIR, 'detections.txt')
    if os.path.exists(det_path):
        os.remove(det_path)

    vidcap = cv2.VideoCapture(video_path)
    num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    # num_frames = 3

    times = []
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    for frame_id in tqdm(range(num_frames)):
        _, frame = vidcap.read()

        start.record()
        outputs = predictor(frame)
        end.record()

        torch.cuda.synchronize()
        times.append(start.elapsed_time(end))

        pred_boxes = outputs["instances"].pred_boxes.to("cpu")
        scores = outputs["instances"].scores.to("cpu")
        pred_classes = outputs["instances"].pred_classes.to("cpu")

        for idx, pred in enumerate(pred_classes):
            if pred.item() == coco_car_id:
                box = pred_boxes[idx].tensor.numpy()[0]

                # Format: <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
                det = str(frame_id + 1) + ',-1,' + str(box[0]) + ',' + str(
                    box[1]) + ',' + str(box[2] - box[0]) + ',' + str(
                        box[3] - box[1]) + ',' + str(
                            scores[idx].item()) + ',-1,-1,-1\n'

                with open(det_path, 'a') as f:
                    f.write(det)

        if save_visual_detections:
            output_path = os.path.join(cfg.OUTPUT_DIR,
                                       'det_frame_' + str(frame_id) + '.png')
            v = Visualizer(frame[:, :, ::-1],
                           MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                           scale=1)
            out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            cv2.imwrite(output_path, out.get_image()[:, :, ::-1])

    print('Inference time (s/img): ', np.mean(times) / 1000)

    return det_path
示例#9
0
 def cfg(self, cfg: CfgNode):
     self.config = cfg
     self.predictor = DefaultPredictor(cfg)
示例#10
0
def train(cfgs):
    # dataset
    vg = VisualGenome(cfgs.ann_file, cfgs.vocab_path + 'relations_vocab.txt', cfgs.vocab_path + 'objects_vocab.txt')
    train_dataset = BoxesDataset(vg, cfgs.split_path, cfgs.img_path, split='train')
    train_loader = data.DataLoader(dataset=train_dataset, batch_size=cfgs.batch_size, num_workers=1, shuffle=True, collate_fn=collate_fn)

    val_dataset = BoxesDataset(vg, cfgs.split_path, cfgs.img_path, split='val')
    val_loader = data.DataLoader(dataset=val_dataset, batch_size=cfgs.batch_size, shuffle=False, collate_fn=collate_fn)

    # Model
    model = Classifier(0.5)
    if wandb is not None:
        wandb.watch(model)
    if cfgs.resume:
        checkpoint = torch.load(cfgs.checkpoint + 'checkpoint_final.pkl')
        model.load_state_dict(checkpoint['model_state_dict'])
        epoch = checkpoint['epoch']
        learning_rate = checkpoint['learning_rate']
        train_loss_epoch = checkpoint['train_loss_epoch']
        train_acc_epoch = checkpoint['train_acc_epoch']
        test_acc_epoch = checkpoint['test_acc_epoch']
    else:
        epoch = 0
        learning_rate = cfgs.learning_rate
        train_loss_epoch = []
        train_acc_epoch = []
        test_acc_epoch = []

    if cfgs.mGPUs:
        model = nn.DataParallel(model)

    if torch.cuda.is_available():
        model.cuda()

    # optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(list(model.parameters()), lr=learning_rate)
    # scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

    best_acc = -1.0

    cfg = get_cfg()
    cfg.merge_from_file("../../configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml")
    cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
    # VG Weight
    cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr_original.pkl"
    predictor = DefaultPredictor(cfg)
    csv.field_size_limit(sys.maxsize)

    # for epoch in range(cfgs.max_epochs):
    while epoch < cfgs.max_epochs:
        model.train()
        train_loss = 0.0
        train_acc = 0.0
        count = 0
        accuracy_count = 0
        # start_time = time.time()
        # end_time = time.time()
        progress_bar = tqdm(train_loader, desc='|Train Epoch {}'.format(epoch), leave=False)
        for i, batch in enumerate(progress_bar):
            # end_time = time.time()
            # print('Done (t={:0.2f}s)'.format(end_time - start_time))
            count += 1
            img_id, obj_boxes, sub_boxes, union_boxes, labels = batch
            # print(img_id)

            # obj_boxes, sub_boxes, union_boxes, labels = obj_boxes.cuda(), sub_boxes.cuda(), union_boxes.cuda(), labels.cuda()
            labels = labels.cuda()

            with torch.no_grad():
                obj_feature, sub_feature, union_feature = extract_feature(img_id, predictor, obj_boxes, sub_boxes,
                                                                          union_boxes, cfgs)

            outputs = model(obj_feature, sub_feature, union_feature)

            # outputs_reshape = torch.reshape(outputs, (outputs.size(0) * outputs.size(1), outputs.size(2)))
            labels_reshape = torch.reshape(labels, (labels.size(0) * labels.size(1),))
            labels_nopad = labels_reshape[labels_reshape[:] >= 0]
            # labels_reshape = torch.reshape(labels_nopad, (labels_nopad.size(0) * labels_nopad.size(1),))

            optimizer.zero_grad()
            loss = criterion(outputs, labels_nopad.long())
            loss.backward()
            optimizer.step()

            # pring statistics
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            accuracy = torch.sum(predicted == labels_nopad).item()
            train_acc += accuracy

            info_log = {
                'train_loss': '{:.3f}'.format(loss.item()),
                'train_accuracy': '{:.3f}'.format(accuracy / labels_nopad.size(0))
            }

            progress_bar.set_postfix(info_log, refresh=True)
            if wandb is not None:
                wandb.log(info_log)

            # start_time = time.time()
            accuracy_count += labels_nopad.size(0)
            # if count > 10:
            #     break

        loss_aveg = float(train_loss) / count
        acc_aveg = float(train_acc) / accuracy_count
        print('Train Epoch: {}, train_loss: {}, train_accuracy: {}.'.format(epoch, loss_aveg, acc_aveg))
        train_loss_epoch.append(loss_aveg)
        train_acc_epoch.append(acc_aveg)
        if wandb is not None:
            wandb.log({
                'train_loss_epoch': loss_aveg,
                'train_acc_epoch': acc_aveg
            })
        # scheduler.step()
        # caculate the test accuracy
        model.eval()
        if (epoch + 1) % 5 == 0:
            with torch.no_grad():
                test_total = 0
                test_correct = 0
                process_bar_test = tqdm(val_loader, desc='|Test Epoch {}'.format(epoch), leave=False)
                for i, batch in enumerate(process_bar_test):

                    img_id, obj_boxes, sub_boxes, union_boxes, labels = batch
                    # print(img_id)

                    # obj_boxes, sub_boxes, union_boxes, labels = obj_boxes.cuda(), sub_boxes.cuda(), union_boxes.cuda(), labels.cuda()
                    labels = labels.cuda()
                    with torch.no_grad():
                        obj_feature, sub_feature, union_feature = extract_feature(img_id, predictor, obj_boxes,
                                                                                  sub_boxes,
                                                                                  union_boxes, cfgs)

                    outputs = model(obj_feature, sub_feature, union_feature)

                    labels_reshape = torch.reshape(labels, (labels.size(0) * labels.size(1),))
                    labels_nopad = labels_reshape[labels_reshape[:] >= 0]
                    _, predicted = torch.max(outputs, 1)
                    test_total += labels_nopad.size(0)
                    correct = torch.sum(predicted == labels_nopad).item()
                    test_correct += correct
                    process_bar_test.set_postfix({'test_accuracy': '{:.3f}'.format(correct / labels_nopad.size(0))},
                                                 refresh=True)
                    # if count > 10:
                    #     break

                test_acc_aveg = float(test_correct) / test_total
                if wandb is not None:
                    wandb.log({
                        'test_acc_epoch': test_acc_aveg
                    })
                if acc_aveg > best_acc:
                    if cfgs.mGPUs:
                        torch.save({
                            'epoch': epoch,
                            'model_state_dict': model.module.state_dict(),
                            'learning_rate': cfgs.learning_rate,
                            'loss': loss_aveg,
                            'accuracy': acc_aveg,
                            'test_accuracy': test_acc_aveg
                        }, cfgs.checkpoint + 'checkpoint_best.pkl')
                    else:
                        torch.save({
                            'epoch': epoch,
                            'model_state_dict': model.state_dict(),
                            'learning_rate': cfgs.learning_rate,
                            'loss': loss_aveg,
                            'accuracy': acc_aveg,
                            'test_accuracy': test_acc_aveg
                        }, cfgs.checkpoint + 'checkpoint_best.pkl')
                print('Epoch: {}, Accuracy of the model on testset: {}'.format(epoch, test_acc_aveg))
                test_acc_epoch.append(test_acc_aveg)

        epoch += 1

    if epoch == cfgs.max_epochs:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'learning_rate': cfgs.learning_rate,
            'train_loss_epoch': train_loss_epoch,
            'train_acc_epoch': train_acc_epoch,
            'test_acc_epoch': test_acc_epoch
        }, cfgs.checkpoint + 'checkpoint_final.pkl')
示例#11
0
 def __init__(self, detection_thresh=0.3, use_default_weights=True):
     self.detection_thresh = detection_thresh
     # Configure predictor w/ COCO model
     self.cfg = self.get_model_cfg(use_default_weights=use_default_weights)
     self.predictor = DefaultPredictor(self.cfg)
     print('created segmenter')
示例#12
0
    def run_model(self, save_crop_output: bool, save_anno_output: bool, cpu_mode: bool):
        # set to CPU mode if system does not have NVIDIA GPU
        output_list = []

        self.register_dataset()

        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9
        cfg.MODEL.WEIGHTS = "wb_model.pth"
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
        if cpu_mode:
            cfg.MODEL.DEVICE = 'cpu'
        predictor = DefaultPredictor(cfg)
        error_count: int = 0

        # print("Model imported: {:.3f}".format(time.time() - t0))

        # image_file = "input/RC663_0040.76-0047.60_m_DRY.jpg"

        for image_file in os.listdir(self.InputDir):
            image_path = self.InputDir + '/' + image_file
            img: np.ndarray = cv2.imread(image_path)

            # print("Processing file: {:.3f}".format(time.time() - t0))

            if type(img) is np.ndarray:  # only process if image file
                print("Processing image: " + image_file)

                """
                # resize image to save on computation time
                dW = 100  # desired width

                imgRe: np.ndarray
                # if img.ndim == 2:  # black and white
                #    (tH, tW) = img.shape
                # else:  # colour
                (tH, tW, tmp) = img.shape
                if tW > dW:
                    imgRe = imutils.resize(img, width=dW)  # resize the image
                    wRatio = tW / dW
                else:
                    imgRe = img.copy()
                    wRatio = 1
                """

                output: Instances = predictor(img)["instances"]  # predict

                obj: dict = output.get_fields()

                scores: np.ndarray = obj['scores'].cpu().numpy()
                maxscore: float = 0
                indmaxscore: int = 0
                for i in range(len(scores) - 1):
                    if scores[i] > maxscore:
                        maxscore = scores[i]
                        indmaxscore = i

                if len(scores) > 0:
                    box: np.ndarray = obj['pred_boxes'].tensor.cpu().numpy()[indmaxscore]
                    # box = box * wRatio
                else:
                    box = np.ones(1) * (-1)

                # outputlist.append(output)
                # outputDict[image_file] = box

                anno_out_filename = ""
                if save_anno_output:
                    # draw output and save to png
                    v = Visualizer(img[:, :, ::-1], MetadataCatalog.get("wb_test"), scale=1.0)
                    result: VisImage = v.draw_instance_predictions(output.to("cpu"))
                    result_image: np.ndarray = result.get_image()[:, :, ::-1]

                    # get file name without extension, -1 to remove "." at the end
                    anno_out_filename: str = self.OutputWBAnnoDir + '/' + re.search(r"(.*)\.", image_file).group(0)[:-1]
                    anno_out_filename += "_WB_Anno.png"
                    cv2.imwrite(anno_out_filename, result_image)

                    # code for displaying image:
                    # imgout = cv2.imread(out_file_name)
                    # cv2.imshow('Output Image', imgout)

                    # cv2.waitKey(0)
                if len(scores) > 0:
                    if save_crop_output:
                        # crop and save the image
                        # https://www.pyimagesearch.com/2014/01/20/basic-image-manipulations-in-python-and-opencv-resizing-scaling-rotating-and-cropping/
                        crop_img = img[box[1].astype(int):box[3].astype(int), box[0].astype(int):box[2].astype(int)]
                        # get file name without extension, -1 to remove "." at the end
                        out_file_name: str = self.OutputWBDir + '/' + re.search(r"(.*)\.", image_file).group(0)[:-1]
                        out_file_name += "_WB_Cropped.png"
                        cv2.imwrite(out_file_name, crop_img)
                        # add to the outputDictionary
                        # outputDict[image_file] = (out_file_name, anno_out_filename)
                        output_list.append((image_file, image_path, out_file_name, anno_out_filename))
                else:
                    print("WARNING: WHITE BOARD NOT FOUND IN IMAGE FILE: " + image_file + ". SKIPPING IMAGE.")
                    error_count += 1

        return output_list, error_count
示例#13
0
def make_predictions():
    # Some basic setup:
    # Setup detectron2 logger
    import detectron2
    from detectron2.utils.logger import setup_logger
    setup_logger()

    # import some common libraries
    import numpy as np
    import glob
    import os
    import cv2
    import random
    from google.colab.patches import cv2_imshow

    # import some common detectron2 utilities
    from detectron2 import model_zoo
    from detectron2.engine import DefaultPredictor
    from detectron2.config import get_cfg
    from detectron2.utils.visualizer import Visualizer
    from detectron2.data import MetadataCatalog
    from detectron2.structures import BoxMode

    from detectron2.config import get_cfg

    class_names = ["specularity", "saturation", "artifact", "blur", "contrast", 
                   "bubbles", "instrument", "blood"]
    
    model_pth=""
    if current_model == "retinanet":    
        model_path = "/home/ws2080/Desktop/codes/detectron/model_retinanet/output_32"
        model_pth = "model_0059999.pth"
    elif current_model == "faster":    
        model_path = "/home/ws2080/Desktop/codes/detectron/model_faster_rcnn_R_50_FPN_3x/output_9"
        model_pth = "model_0139999.pth"
    elif current_model == "cascade":    
        model_path = "/home/ws2080/Desktop/codes/detectron/model_cascade_mask_rcnn_R_50_FPN_3x/output_24"
        model_pth = "model_0059999.pth"
        
    cfg = get_cfg()

    cfg.merge_from_file(model_path+"/config.yaml")
    cfg.MODEL.WEIGHTS = os.path.join(model_path, model_pth)
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.1   

    cfg.DATASETS.TEST = ("ead_validation_1",)

    predictor = DefaultPredictor(cfg)
    
    #make predictions
    for image_folder in glob.glob(temp_augmented_image_dir+"*/"):
        prediction_output_dir = temp_prediction_dir + image_folder.split("/")[-2] + "/"
        #prediction image_folderin içindeki resimler için yapılıp prediction_output_dir'in içine atılmalı
        for im_path in glob.glob(image_folder + "*.jpg"):
            im_name = im_path.split("/")[-1].split(".")[0]
            saved_image_path = prediction_output_dir + im_name + ".txt"
            # Burası sana kalmış abi, predict edip saved_image_path olarak kaydetmek lazım sadece
            
            im = cv2.imread(im_path)
            outputs = predictor(im)   

            total_detection = len(outputs["instances"])

            temp_detection_list = []
            detections = outputs["instances"]

            for i in range(total_detection):
                temp_detection = class_names[int(detections.pred_classes[i])]+" "+ str(float(detections.scores[i]))+" "+str(float(detections.pred_boxes.tensor[i,0]))+" "+ str(float(detections.pred_boxes.tensor[i,1])) +" "+str(float(detections.pred_boxes.tensor[i,2]))+" "+ str(float(detections.pred_boxes.tensor[i,3]))
                temp_detection_list.append(temp_detection)

            with open(saved_image_path, 'w') as f:
                    for item in temp_detection_list:
                        f.write("%s\n" % item)       
示例#14
0
def KITTIMOTS_training_and_evaluation_task(model_name, model_file):
    path = os.path.join(SAVE_PATH, 'train_task', model_name)
    if not os.path.exists(path):
        os.makedirs(path)
    # Load Data
    print('Loading Data.')
    dataloader = KITTIMOTS_Dataloader()

    def kittimots_train():
        return dataloader.get_dicts(train_flag=True)

    def kittimots_test():
        return dataloader.get_dicts(train_flag=False)

    DatasetCatalog.register("KITTIMOTS_train", kittimots_train)
    MetadataCatalog.get("KITTIMOTS_train").set(
        thing_classes=list(KITTI_CATEGORIES.keys()))
    DatasetCatalog.register("KITTIMOTS_test", kittimots_test)
    MetadataCatalog.get("KITTIMOTS_test").set(
        thing_classes=list(KITTI_CATEGORIES.keys()))

    NUM_IMGS = len(kittimots_train())
    print(NUM_IMGS)

    # PARAMETERS
    print('Loading Model.')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('KITTIMOTS_train', )
    cfg.DATASETS.TEST = ('KITTIMOTS_test', )
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = NUM_IMGS // cfg.SOLVER.IMS_PER_BATCH + 1
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2

    # Training
    print('Training....')
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # EVALUATION
    print('Evaluating....')
    evaluator = COCOEvaluator("KITTIMOTS_test",
                              cfg,
                              False,
                              output_dir="./output/")
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    plot_validation_loss(cfg)

    # Qualitative results
    print('Inference on trained model')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    dataloader = Inference_Dataloader()
    dataset = dataloader.load_data()
    print('Getting Qualitative Results...')
    for i, img_path in enumerate(dataset['test'][:20]):
        img = cv2.imread(img_path)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(
                path,
                'Inference_' + model_name + '_trained_' + str(i) + '.png'),
            v.get_image()[:, :, ::-1])
示例#15
0
def demo(cfg):
    """
    Run inference on an input video or stream from webcam.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    # Build the video model and print model statistics.
    model = build.build_model(cfg)
    model.eval()
    misc.log_model_info(model, cfg)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2="caffe2"
        in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )

    if cfg.DETECTION.ENABLE:
        # Load object detector from detectron2.
        dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
        dtron2_cfg = get_cfg()
        dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file))
        dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9
        dtron2_cfg.MODEL.WEIGHTS = (
            cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS)
        logger.info("Initialize detectron2 model.")
        object_predictor = DefaultPredictor(dtron2_cfg)
        # Load the labels of AVA dataset
        with open(cfg.DEMO.LABEL_FILE_PATH) as f:
            labels = f.read().split("\n")[:-1]
        palette = np.random.randint(64, 128, (len(labels), 3)).tolist()
        boxes = []
        logger.info("Finish loading detectron2")
    else:
        # Load the labels of Kinectics-400 dataset.
        labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH)
        labels = labels_df["name"].values

    frame_provider = VideoReader(cfg)

    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
    frames = []
    pred_labels = []
    s = 0.0
    for able_to_read, frame in tqdm.tqdm(frame_provider):
        if not able_to_read:
            # when reaches the end frame, clear the buffer and continue to the next one.
            frames = []
            break

        if len(frames) != seq_len:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed)
            frames.append(frame_processed)
            if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1:
                mid_frame = frame

        if len(frames) == seq_len:
            start = time()
            if cfg.DETECTION.ENABLE:
                outputs = object_predictor(mid_frame)
                fields = outputs["instances"]._fields
                pred_classes = fields["pred_classes"]
                selection_mask = pred_classes == 0
                # acquire person boxes.
                pred_classes = pred_classes[selection_mask]
                pred_boxes = fields["pred_boxes"].tensor[selection_mask]
                boxes = cv2_transform.scale_boxes(
                    cfg.DATA.TEST_CROP_SIZE,
                    pred_boxes,
                    frame_provider.display_height,
                    frame_provider.display_width,
                )
                boxes = torch.cat(
                    [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes],
                    axis=1,
                )
            inputs = torch.from_numpy(np.array(frames)).float() / 255.0
            inputs = tensor_normalize(inputs, cfg.DATA.MEAN, cfg.DATA.STD)

            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)

            # 1 C T H W.
            inputs = inputs.unsqueeze(0)
            if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH:
                # Sample frames for the fast pathway.
                index = torch.linspace(0, inputs.shape[2] - 1,
                                       cfg.DATA.NUM_FRAMES).long()
                inputs = [torch.index_select(inputs, 2, index)]
            elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH:
                # Sample frames for the fast pathway.
                index = torch.linspace(0, inputs.shape[2] - 1,
                                       cfg.DATA.NUM_FRAMES).long()
                fast_pathway = torch.index_select(inputs, 2, index)

                # Sample frames for the slow pathway.
                index = torch.linspace(
                    0,
                    fast_pathway.shape[2] - 1,
                    fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA,
                ).long()
                slow_pathway = torch.index_select(fast_pathway, 2, index)
                inputs = [slow_pathway, fast_pathway]
            else:
                raise NotImplementedError("Model arch {} is not in {}".format(
                    cfg.MODEL.ARCH,
                    cfg.MODEL.SINGLE_PATHWAY_ARCH +
                    cfg.MODEL.MULTI_PATHWAY_ARCH,
                ))

            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Perform the forward pass.
            if cfg.DETECTION.ENABLE:
                # When there is nothing in the scene,
                #   use a dummy variable to disable all computations below.
                if not len(boxes):
                    preds = torch.tensor([])
                else:
                    preds = model(inputs, boxes)
            else:
                preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]

            if cfg.DETECTION.ENABLE:
                # This post processing was intendedly assigned to the cpu since my laptop GPU
                #   RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend
                #   to change this section to make CUDA does the processing.
                preds = preds.cpu().detach().numpy()
                pred_masks = preds > 0.1
                label_ids = [
                    np.nonzero(pred_mask)[0] for pred_mask in pred_masks
                ]
                pred_labels = [[
                    labels[label_id] for label_id in perbox_label_ids
                ] for perbox_label_ids in label_ids]
                # I'm unsure how to detectron2 rescales boxes to image original size, so I use
                #   input boxes of slowfast and rescale back it instead, it's safer and even if boxes
                #   was not rescaled by cv2_transform.rescale_boxes, it still works.
                boxes = boxes.cpu().detach().numpy()
                ratio = (np.min([
                    frame_provider.display_height,
                    frame_provider.display_width,
                ]) / cfg.DATA.TEST_CROP_SIZE)
                boxes = boxes[:, 1:] * ratio
            else:
                ## Option 1: single label inference selected from the highest probability entry.
                # label_id = preds.argmax(-1).cpu()
                # pred_label = labels[label_id]
                # Option 2: multi-label inferencing selected from probability entries > threshold.
                label_ids = (torch.nonzero(
                    preds.squeeze() > 0.1).reshape(-1).cpu().detach().numpy())
                pred_labels = labels[label_ids]
                logger.info(pred_labels)
                if not list(pred_labels):
                    pred_labels = ["Unknown"]

            # # option 1: remove the oldest frame in the buffer to make place for the new one.
            # frames.pop(0)
            # option 2: empty the buffer
            frames = []
            s = time() - start

        if cfg.DETECTION.ENABLE and pred_labels and boxes.any():
            for box, box_labels in zip(boxes.astype(int), pred_labels):
                cv2.rectangle(
                    frame,
                    tuple(box[:2]),
                    tuple(box[2:]),
                    (0, 255, 0),
                    thickness=2,
                )
                label_origin = box[:2]
                for label in box_labels:
                    label_origin[-1] -= 5
                    (label_width, label_height), _ = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                    cv2.rectangle(
                        frame,
                        (label_origin[0], label_origin[1] + 5),
                        (
                            label_origin[0] + label_width,
                            label_origin[1] - label_height - 5,
                        ),
                        palette[labels.index(label)],
                        -1,
                    )
                    cv2.putText(
                        frame,
                        label,
                        tuple(label_origin),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (255, 255, 255),
                        1,
                    )
                    label_origin[-1] -= label_height + 5
        if not cfg.DETECTION.ENABLE:
            # Display predicted labels to frame.
            y_offset = 50
            cv2.putText(
                frame,
                "Action:",
                (10, y_offset),
                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=0.65,
                color=(0, 235, 0),
                thickness=2,
            )
            for pred_label in pred_labels:
                y_offset += 30
                cv2.putText(
                    frame,
                    "{}".format(pred_label),
                    (20, y_offset),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.65,
                    color=(0, 235, 0),
                    thickness=2,
                )

        # Display prediction speed.
        cv2.putText(
            frame,
            "Speed: {:.2f}s".format(s),
            (10, 25),
            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=0.65,
            color=(0, 235, 0),
            thickness=2,
        )
        frame_provider.display(frame)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break

    frame_provider.clean()
示例#16
0
def main(config):

    root = expanduser(config["base"]["root"])
    imgs_root = expanduser(config["base"]["imgs_root"])
    jsons_dir = join(root, "jsons")
    model_dir = join(root, "outputs")

    scale = float(config["test_model"]["scale"])
    do_show = config["test_model"]["do_show"]

    register_data(jsons_dir, imgs_root)

    # Need this datasets line, in order for metadata to have .thing_classes attribute
    datasets = DatasetCatalog.get("test_data") 
    metadata = MetadataCatalog.get("test_data")
    
    # Read the cfg back in:
    with open(join(model_dir, "cfg.txt"), "r") as f:
        cfg = f.read()
    # Turn into CfgNode obj:
    cfg = CfgNode.load_cfg(cfg) 

    # Use the weights from the model trained on our custom dataset:
    cfg.MODEL.WEIGHTS = join(model_dir, "model_final.pth") # TODO: have option to use snapshot instead
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # make small so I can make PR curve for broad range of scores
    # cfg.DATASETS.TEST = ("val_data", ) # should already be saved from train_model.py

    print("Generating predictor ...")
    predictor = DefaultPredictor(cfg)

    # For saving images with predicted labels:
    output_imgs_dir = join(model_dir, "test_pred_imgs")
    makedirs(output_imgs_dir, exist_ok=True)

    # For saving detection predictions as csv:
    output_csv = join(model_dir, "all_test_preds.csv")
    csv_file_handle = open(output_csv, "w", newline="")
    atexit.register(csv_file_handle.close) 
    col_names = ["img", "x1", "y1", "x2", "y2", "score", "thing","dummy_id"]
    csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names)
    csv_writer.writeheader()

    # Select 5 random images to visualize, 
    # but save the prediction results for all imgs:
    rando_idxs = np.random.choice(range(len(datasets)), 5, replace=False).tolist()
    for i,d in enumerate(datasets):

        print(f"Predicting on image {i+1} of {len(datasets)} ...", end="\r")

        id = d["image_id"]
        img = cv2.imread(d["file_name"])
        detected = predictor(img)
        
        # Visualize:
        visualizer = Visualizer(img[:, :, ::-1], 
                                metadata=metadata, 
                                scale=scale, 
                                instance_mode=ColorMode)
        visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu"))        
        
        # Save the first 5 images from the random draw:
        if i in rando_idxs:
            pred_img = visualizer.get_image()[:, :, ::-1]
            cv2.imwrite(join(output_imgs_dir, ("predicted_" + basename(d["file_name"]))), pred_img)

        if do_show:

            cv2.imshow(f"prediction on image {id}", pred_img)
            print(f"Press any key to go to the next image ({i+1}/5) ...")

            key = cv2.waitKey(0) & 0xFF
            if key == ord("q"):
                print("Quitting ...")
                break

        cv2.destroyAllWindows()

        # Stream the predicted box coords and scores to a csv:
        preds = detected['instances'].to('cpu')
        boxes = preds.pred_boxes
        thing_ids = preds.pred_classes.tolist()
        scores = preds.scores
        num_boxes = np.array(scores.size())[0]

        for i in range(0, num_boxes):
            coords = boxes[i].tensor.numpy()    	
            score = float(scores[i].numpy())
            thing_id = thing_ids[i] # is int
            thing_class = metadata.thing_classes[thing_id]

            csv_writer.writerow({col_names[0]: basename(d["file_name"]),
                                 col_names[1]: int(coords[0][0]), # x1
                                 col_names[2]: int(coords[0][1]), # y1
                                 col_names[3]: int(coords[0][2]), # x2
                                 col_names[4]: int(coords[0][3]), # y2
                                 col_names[5]: score, # score
                                 col_names[6]: thing_class, # thing
                                 col_names[7]: i}) # dummy id

    print(f"Finished predicting on all {len(datasets)} images from the test data fraction.")
    print(f"Results are stored in {output_csv}")
    print(f"5 sample test images are stored in {output_imgs_dir}\n"
           "Note that the 5 sample test images show all detections with a score greater than 0.01. "
           "This low score cutoff is for test purposes and is intentional. "
           "You should expect to see many false positive labels.\n")

    # Clear GPU memory
    torch.cuda.empty_cache()
示例#17
0
import pdb

config_file = "./configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
model_file = "./models/COCO-InstanceSegmentation/X-101-32x8d.pkl"

data_path = sys.argv[1]
feat_path = sys.argv[2]
conf_th = float(sys.argv[3])

cfg = get_cfg()
cfg.merge_from_file(config_file)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = conf_th
cfg.MODEL.WEIGHTS = model_file

predictor = DefaultPredictor(cfg)
vocab = predictor.metadata.thing_classes + ["__background__"]

with open(f"{feat_path}/vocab.txt", 'w') as f:
    for obj in vocab:
        f.write(obj + '\n')

save_dir = os.path.join(feat_path, f"feat_th{conf_th}")
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

img_fnames = os.listdir(data_path)
for fname in tqdm.tqdm(img_fnames):
    im = cv2.imread(os.path.join(data_path, fname))
    if im is None:
        print(f"load image failed, skipping {fname} ...")
示例#18
0
    def run(self):
        self.beginTaskRun()

        # we use seed to keep the same color for our masks + boxes + labels (same random each time)
        random.seed(30)

        # Get input :
        input = self.getInput(0)
        srcImage = input.getImage()

        # Get output :
        mask_output = self.getOutput(0)
        output_graph = self.getOutput(2)
        output_graph.setImageIndex(1)
        output_graph.setNewLayer("PointRend")

        # Get parameters :
        param = self.getParam()

        # predictor
        if not self.loaded:
            print("Chargement du modèle")
            if param.cuda == False:
                self.cfg.MODEL.DEVICE = "cpu"
                self.deviceFrom = "cpu"
            else:
                self.deviceFrom = "gpu"
            self.loaded = True
            self.predictor = DefaultPredictor(self.cfg)
        # reload model if CUDA check and load without CUDA
        elif self.deviceFrom == "cpu" and param.cuda == True:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            add_pointrend_config(self.cfg)
            self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold
            self.cfg.merge_from_file(self.folder + self.path_to_config)
            self.cfg.MODEL.WEIGHTS = self.folder + self.path_to_model
            self.deviceFrom = "gpu"
            self.predictor = DefaultPredictor(self.cfg)
        # reload model if CUDA not check and load with CUDA
        elif self.deviceFrom == "gpu" and param.cuda == False:
            print("Chargement du modèle")
            self.cfg = get_cfg()
            add_pointrend_config(self.cfg)
            self.cfg.MODEL.DEVICE = "cpu"
            self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold
            self.cfg.merge_from_file(self.folder + self.path_to_config)
            self.cfg.MODEL.WEIGHTS = self.folder + self.path_to_model
            self.deviceFrom = "cpu"
            self.predictor = DefaultPredictor(self.cfg)

        outputs = self.predictor(srcImage)

        # get outputs instances
        boxes = outputs["instances"].pred_boxes
        scores = outputs["instances"].scores
        classes = outputs["instances"].pred_classes
        masks = outputs["instances"].pred_masks

        # to numpy
        if param.cuda:
            boxes_np = boxes.tensor.cpu().numpy()
            scores_np = scores.cpu().numpy()
            classes_np = classes.cpu().numpy()
        else:
            boxes_np = boxes.tensor.numpy()
            scores_np = scores.numpy()
            classes_np = classes.numpy()

        self.emitStepProgress()

        # keep only the results with proba > threshold
        scores_np_tresh = list()
        for s in scores_np:
            if float(s) > param.proba:
                scores_np_tresh.append(s)

        if len(scores_np_tresh) > 0:
            # create random color for masks + boxes + labels
            colors = [[0, 0, 0]]
            for i in range(len(scores_np_tresh)):
                colors.append([
                    random.randint(0, 255),
                    random.randint(0, 255),
                    random.randint(0, 255), 255
                ])

            # text labels with scores
            labels = None
            class_names = MetadataCatalog.get(
                self.cfg.DATASETS.TRAIN[0]).get("thing_classes")
            if classes is not None and class_names is not None and len(
                    class_names) > 1:
                labels = [class_names[i] for i in classes]
            if scores_np_tresh is not None:
                if labels is None:
                    labels = [
                        "{:.0f}%".format(s * 100) for s in scores_np_tresh
                    ]
                else:
                    labels = [
                        "{} {:.0f}%".format(l, s * 100)
                        for l, s in zip(labels, scores_np_tresh)
                    ]

            # Show boxes + labels
            for i in range(len(scores_np_tresh)):
                prop_text = core.GraphicsTextProperty()
                # start with i+1 we don't use the first color dedicated for the label mask
                prop_text.color = colors[i + 1]
                prop_text.font_size = 7
                prop_rect = core.GraphicsRectProperty()
                prop_rect.pen_color = colors[i + 1]
                prop_rect.category = labels[i]
                output_graph.addRectangle(
                    float(boxes_np[i][0]), float(boxes_np[i][1]),
                    float(boxes_np[i][2] - boxes_np[i][0]),
                    float(boxes_np[i][3] - boxes_np[i][1]), prop_rect)
                output_graph.addText(labels[i], float(boxes_np[i][0]),
                                     float(boxes_np[i][1]), prop_text)

            self.emitStepProgress()

            # label mask
            nb_objects = len(masks[:len(scores_np_tresh)])
            if nb_objects > 0:
                masks = masks[:nb_objects, :, :, None]
                mask_or = masks[0] * nb_objects
                for j in range(1, nb_objects):
                    mask_or = torch.max(mask_or, masks[j] * (nb_objects - j))
                mask_numpy = mask_or.byte().cpu().numpy()
                mask_output.setImage(mask_numpy)

                # output mask apply to our original image
                # inverse colors to match boxes colors
                c = colors[1:]
                c = c[::-1]
                colors = [[0, 0, 0]]
                for col in c:
                    colors.append(col)
                self.setOutputColorMap(1, 0, colors)
        else:
            self.emitStepProgress()

        self.forwardInputImage(0, 1)

        # Step progress bar:
        self.emitStepProgress()

        # Call endTaskRun to finalize process
        self.endTaskRun()
    metavar='D',
    help=
    "folder where data is located. train_images/ and val_images/ need to be found in the folder"
)

args = parser.parse_args()

# Define a Mask-R-CNN model in Detectron2
cfg = get_cfg()
cfg.merge_from_file(
    "detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # Detection Threshold
cfg.MODEL.ROI_HEADS.NMS = 0.4  # Non Maximum Suppression Threshold
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl"
model = DefaultPredictor(cfg)


def detect_birds(model,
                 input_folder,
                 output_folder_crop,
                 generate_masks=False,
                 output_folder_mask="mask_dataset"):
    kernel = np.ones((25, 25), 'uint8')
    for data_folder in list(
            os.listdir(input_folder)):  # Iterate over train, val and test
        non_cropped = 0
        non_cropped_names = []
        num_imgs = 0
        directory = input_folder + '/' + data_folder
        print("\nDetecting birds on :", data_folder)
示例#20
0
def build_predictor_vis(cfg):
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0099999.pth")
    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set the testing threshold for this model
    cfg.DATASETS.TEST = (_name, )
    predictor = DefaultPredictor(cfg)
    # print(predictor.model.roi)
    # exit()

    dataset_dicts = DatasetCatalog.get(_name)
    metadata = MetadataCatalog.get(_name)
    # print(dataset_dicts)
    for k in range(500):
        d = dataset_dicts[k]
        im = cv2.imread(d["file_name"])

        outputs = predictor(im)
        # exit()
        v_p = Visualizer(
            im[:, :, ::-1],
            metadata=metadata,
            scale=2,
            instance_mode=ColorMode.
            IMAGE_BW  # remove the colors of unsegmented pixels
        )
        v_gt = Visualizer(
            im[:, :, ::-1],
            metadata=metadata,
            scale=2,
            instance_mode=ColorMode.
            IMAGE_BW  # remove the colors of unsegmented pixels
        )
        v_p_nobbx = Visualizer(
            im[:, :, ::-1],
            metadata=metadata,
            scale=2,
            instance_mode=ColorMode.
            IMAGE_BW  # remove the colors of unsegmented pixels
        )

        v_p = v_p.draw_instance_predictions(outputs["instances"].to("cpu"))
        v_gt = v_gt.draw_dataset_dict(d)
        outputs["instances"].pred_boxes.tensor[:] = 0
        v_p_nobbx = v_p_nobbx.draw_instance_predictions(
            outputs["instances"].to("cpu"))

        # plt.figure(figsize=[30, 10])
        # plt.subplot(131), plt.imshow(v_p_nobbx.get_image()[:, :, ::-1])
        # plt.subplot(132), plt.imshow(v_p.get_image()[:, :, ::-1])
        # plt.subplot(133), plt.imshow(v_gt.get_image()[:, :, ::-1])
        cv2.imwrite(f"demo/predict_{d['image_id']}_im.png", im)
        # cv2.imwrite(f"demo/predict_{d['image_id']}_ps.png", v_p_nobbx.get_image())
        cv2.imwrite(f"demo/predict_{d['image_id']}_pd.png", v_p.get_image())
        cv2.imwrite(f"demo/predict_{d['image_id']}_gt.png", v_gt.get_image())
        # plt.imshow(v_p_nobbx.get_image()[:, :, ::-1])
        # plt.savefig(f"demo/predict_{d['image_id']}_ps.png", dpi=100), plt.close()
        # plt.imshow(v_p.get_image()[:, :, ::-1])
        # plt.savefig(f"demo/predict_{d['image_id']}_pb.png", dpi=100), plt.close()
        # plt.imshow(v_gt.get_image()[:, :, ::-1])
        # plt.savefig(f"demo/predict_{d['image_id']}_gt.png", dpi=100), plt.close()
        print(f"Saving demo/predict_{d['image_id']}.png")
示例#21
0
def main(args):    
    
    # define inputs/outputs hardcoded bastards -.-
    dataset_dir = args.test_folder
    csv_origin = 'rle.csv'
    csv_submit = args.submit_csv
    ship_proba = args.ship_proba
    csv_probs = args.ship_proba_csv
    
    print("Start creating predictions, dataset from:", dataset_dir)
    test_dataset = create_test_datatset(dataset_dir)
    
    # Only mask images that include a ship.
    if csv_probs != None:
        print("Combining classifier result:",csv_probs)
        df_probs =  pd.read_csv(csv_probs)
        df_probs[['image_id','jpg']] = df_probs['file_name'].str.split('.',expand = True)
        df_probs = df_probs[df_probs['ship_proba'] > ship_proba]
        test_dataset = [item for item in test_dataset if item['image_id'] in df_probs['image_id'].values]
    
    print(test_dataset[0])
    #int(test_dataset)
    # load model, config changes - predicting 768x768 masks, NMS 0
    
    DatasetCatalog.register("submit_test", create_test_datatset)
    od_dataset = MetadataCatalog.get("submit_test")
    
    
    # https://detectron2.readthedocs.io/modules/config.html
    # https://medium.com/@hirotoschwert/digging-into-detectron-2-part-5-6e220d762f9
    cfg = get_cfg()

    cfg.merge_from_file(model_zoo.get_config_file(args.config_file))
    cfg.MODEL.WEIGHTS = os.path.join(args.model_path, "model_final.pth")
    cfg.DATASETS.TEST = ("submit_test1", )
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = args.batch_size_per_image
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = args.num_classes
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.score_thres  
    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = args.nms_thres
    if args.anchor_sizes == 'small':
        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32, 64, 128, 256, 512]]
    
    predictor = DefaultPredictor(cfg)

    outputs = []
    img_ids = []
    img_rle = []
    img_scores = []
    img_areas = []
    
    def masks_to_rle_csv(img_id, masks, scores): 
    
        index = np.argsort(-scores)          
        bg = np.zeros((768,768), dtype=np.uint8)   
        bg_list = []

        for i in index:

            mask = masks[i,:,:]
            if(mask is None):
                continue
            mask_xor = (mask^bg)&mask
            area = mask_xor.sum()
            if(area == 0):
                continue
            bg += mask_xor   #NO OVERLAPS...  

            img_ids.append(img_id)
            img_rle.append(rle_encode(mask_xor))
            img_scores.append(scores[i])
            img_areas.append(area)
    
    for i in range(len(test_dataset)):
        
        img_id = test_dataset[i]['file_name'].split('/')[-1]

        inputs = cv2.imread(test_dataset[i]['file_name'])
        output = predictor(inputs) 

        outputs.append({ 'ImageId':img_id,'Output':output})

        if(i%1000 == 0):
            print(i,len(test_dataset))

    for i in range(len(outputs)):
        img_id, boxes, segms, img_score = extract_result(outputs[i])

        if segms is not None and len(segms) > 0:
            masks = np.array(segms)
            masks_to_rle_csv(img_id, masks, img_score)
        if(i%1000 == 0):
            print(i,len(outputs))    

    df = pd.DataFrame({'ImageId':img_ids, 'EncodedPixels':img_rle, 'confidence':img_scores, 'area':img_areas})
    df = df[['ImageId', 'EncodedPixels', 'confidence', 'area']]   # change the column index
    df.to_csv(csv_origin, index=False, sep=str(','))

    df_submit = df

    print("Detectron2:  %d instances,  %d images"  %(df_submit.shape[0], len(get_im_list(df_submit))))

    #df_submit = df_submit[ (df_submit['area']>30) & (df_submit['confidence']>=0.80) ]        
        
    def generate_final_csv(df_with_ship, dataset_dir = dataset_dir):
        print("Detectron2:  %d instances,  %d images"  %(df_with_ship.shape[0], len(get_im_list(df_with_ship))))
        im_no_ship = get_im_no_ship(df_with_ship, dataset_dir)
        df_empty = pd.DataFrame({'ImageId':im_no_ship, 'EncodedPixels':get_empty_list(len(im_no_ship))})
        df_submit = pd.concat([df_with_ship, df_empty], sort=False)
        df_submit.drop(['area','confidence'], axis=1, inplace=True)
        df_submit.to_csv(csv_submit, index=False,sep=str(','))   # str(',') is needed
        print('Done!')
    
    generate_final_csv(df_submit)
def main():
    """ Mask RCNN Object Detection with Detectron2 """
    rospy.init_node("mask_rcnn", anonymous=True)
    bridge = CvBridge()
    start_time = time.time()
    image_counter = 0
    
    register_coco_instances("train_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train")
    register_coco_instances("test_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test")
    
    train_metadata = MetadataCatalog.get("train_set")
    print(train_metadata)
    dataset_dicts_train = DatasetCatalog.get("train_set")

    test_metadata = MetadataCatalog.get("test_set")
    print(test_metadata)
    dataset_dicts_test = DatasetCatalog.get("test_set")

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_set")
    cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.01
    cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128
    )  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper)

    # Temporary Solution. If I train again I think I can use the dynamically set path again
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth")
    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9   # set the testing threshold for this model
    cfg.DATASETS.TEST = ("test_set")
    predictor = DefaultPredictor(cfg)

    class_names = MetadataCatalog.get("train_set").thing_classes

    # Set up custom cv2 visualization parameters
    # Classes: [name, id]
    #               -
    #          [Plate,   0]
    #          [Carrot,  1]
    #          [Celery,  2]
    #          [Pretzel, 3]
    #          [Gripper, 4]

    # Colors = [blue, green, red]
    color_plate = [0, 255, 0]       # green
    color_carrot = [255, 200, 0]    # blue
    color_celery = [0, 0, 255]      # red
    color_pretzel = [0, 220, 255]   # yellow
    color_gripper = [204, 0, 150]   # purple
    colors = list([color_plate, color_carrot, color_celery, color_pretzel, color_gripper])

    alpha = .4

    run = maskRCNN()
    while not rospy.is_shutdown():
        # Get images
        img = run.get_img()

        if img is not None:
            outputs = predictor(img)
            predictions = outputs["instances"].to("cpu")

            # Get results
            result = run.getResult(predictions, class_names)

            # Visualize using custom cv2 code
            if result is not None:
                result_cls = result.class_names
                result_clsId = result.class_ids
                result_scores = result.scores
                result_masks = result.masks

                # Create copies of the original image
                im = img.copy()
                output = img.copy()

                # Initialize lists
                masks = []
                masks_indices = []
                for i in range(len(result_clsId)):
                    # Obtain current object mask as a numpy array (black and white mask of single object)
                    current_mask = bridge.imgmsg_to_cv2(result_masks[i])

                    # Find current mask indices
                    mask_indices = np.where(current_mask==255)

                    # Add to mask indices list
                    if len(masks_indices) > len(result_clsId):
                        masks_indices = []
                    else:
                        masks_indices.append(mask_indices)

                    # Add to mask list
                    if len(masks) > len(result_clsId):
                        masks = []
                    else:
                        masks.append(current_mask)

                if len(masks) > 0:
                    # Create composite mask
                    composite_mask = sum(masks)

                    # Clip composite mask between 0 and 255   
                    composite_mask = composite_mask.clip(0, 255)

                # # Apply mask to image
                # masked_img = cv2.bitwise_and(im, im, mask=current_mask)

                # Find indices of object in mask
                # composite_mask_indices = np.where(composite_mask==255)

                for i in range(len(result_clsId)):
                    # Select correct object color
                    color = colors[result_clsId[i]]

                    # Change the color of the current mask object
                    im[masks_indices[i][0], masks_indices[i][1], :] = color

                # Apply alpha scaling to image to adjust opacity
                cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output)

                for i in range(len(result_clsId)):
                    # Draw Bounding boxes
                    start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset)
                    end_point = (result.boxes[i].x_offset + result.boxes[i].width, result.boxes[i].y_offset + result.boxes[i].height)
                    start_point2 = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 2)
                    end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2, result.boxes[i].y_offset + 12)
                    color = colors[result_clsId[i]]
                    box_thickness =  1

                    name = result_cls[i]
                    score = result_scores[i]
                    conf = round(score.item() * 100, 1)
                    string = str(name) + ":" + str(conf) + "%"
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10)
                    fontScale = .3
                    text_thickness = 1
                    output = cv2.rectangle(output, start_point, end_point, color, box_thickness)
                    output = cv2.rectangle(output, start_point2, end_point2, color, -1)     # Text box
                    output = cv2.putText(output, string, org, font, fontScale, [0, 0, 0], text_thickness, cv2.LINE_AA, False)

                im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
                im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8")


            # Display Image Counter
            image_counter = image_counter + 1
            # if (image_counter % 11) == 10:
            #     rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time))

            run.publish(im_msg, result)    
        

    return 0
示例#23
0
def experiment_2(exp_name, model_file):

    print('Running Task B experiment', exp_name)
    SAVE_PATH = os.path.join('./results_week_6_task_b', exp_name)
    os.makedirs(SAVE_PATH, exist_ok=True)

    # Loading data
    print('Loading data')
    virtualoader = VirtualKitti()
    kittiloader = KittiMots()

    def vkitti_train():
        return virtualoader.get_dicts()

    def rkitti_val():
        return kittiloader.get_dicts(flag='val')

    def rkitti_test():
        return kittiloader.get_dicts(flag='test')

    DatasetCatalog.register('Virtual_train', vkitti_train)
    MetadataCatalog.get('Virtual_train').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))
    DatasetCatalog.register('KITTI_val', rkitti_val)
    MetadataCatalog.get('KITTI_val').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))
    DatasetCatalog.register('KITTI_test', rkitti_test)
    MetadataCatalog.get('KITTI_test').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))

    # Load model and configuration
    print('Loading Model')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('Virtual_train', )
    cfg.DATASETS.TEST = ('KITTI_val', )
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 500
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    cfg.TEST.SCORE_THRESH = 0.5

    # Training
    print('Training')
    trainer = DefaultTrainer(cfg)
    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluation
    print('Evaluating')
    cfg.DATASETS.TEST = ('KITTI_test', )
    evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir=SAVE_PATH)
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    print('Plotting losses')
    plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, exp_name, SAVE_PATH,
                         'validation_loss.png')

    # Qualitative results: visualize some results
    print('Getting qualitative results')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    inputs = rkitti_test()
    inputs = [inputs[i] for i in TEST_INFERENCE_VALUES]
    for i, input in enumerate(inputs):
        file_name = input['file_name']
        print('Prediction on image ' + file_name)
        img = cv2.imread(file_name)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(SAVE_PATH,
                         'Inference_' + exp_name + '_inf_' + str(i) + '.png'),
            v.get_image()[:, :, ::-1])
    def __init__(self):

        self.visualize = True
        self.verbose = False
        # st()
        self.mapnames = os.listdir('/home/nel/gsarch/Replica-Dataset/out/')
        # self.mapnames = os.listdir('/hdd/replica/Replica-Dataset/out/')
        # self.num_episodes = len(self.mapnames)
        self.num_episodes = 1  # temporary
        #self.ignore_classes = ['book','base-cabinet','beam','blanket','blinds','cloth','clothing','coaster','comforter','curtain','ceiling','countertop','floor','handrail','mat','paper-towel','picture','pillar','pipe','scarf','shower-stall','switch','tissue-paper','towel','vent','wall','wall-plug','window','rug','logo','set-of-clothing']
        self.include_classes = [
            'chair', 'bed', 'toilet', 'sofa', 'indoor-plant', 'refrigerator',
            'tv-screen', 'table'
        ]
        self.small_classes = ['indoor-plant', 'toilet']
        self.rot_interval = 5.0
        self.radius_max = 3
        self.radius_min = 1
        self.num_flat_views = 3
        self.num_any_views = 7
        self.num_views = 25

        self.num_objects_per_episode = 2
        # Initialize maskRCNN
        cfg_det = get_cfg()
        cfg_det.merge_from_file(
            model_zoo.get_config_file(
                "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        cfg_det.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
        cfg_det.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
        cfg_det.MODEL.DEVICE = 'cpu'
        self.cfg_det = cfg_det
        self.maskrcnn = DefaultPredictor(cfg_det)

        # Filter only the five categories we care about
        '''
        class mapping between replica and maskRCNN
        class-name      replica ID      maskRCNN ID
        chair           20              56
        bed             7               59
        dining table    80              60
        toilet          84              61
        couch           76              57
        potted plant    44              58
        # bottle          14              39
        # clock           22              74
        refrigerator    67              72
        tv(tv-screen)   87              62
        # vase            91              75
        '''
        self.maskrcnn_to_catname = {
            56: "chair",
            59: "bed",
            61: "toilet",
            57: "couch",
            58: "indoor-plant",
            72: "refrigerator",
            62: "tv",
            60: "dining-table"
        }
        self.replica_to_maskrcnn = {
            20: 56,
            7: 59,
            84: 61,
            76: 57,
            44: 58,
            67: 72,
            87: 62,
            80: 60
        }

        # self.env = habitat.Env(config=config, dataset=None)
        # st()
        # self.test_navigable_points()
        self.run_episodes()
示例#25
0
 def set_predictor(self):
     self.predictor = DefaultPredictor(self.cfg)
示例#26
0
print(f'detectron : {detectron2.__version__}')
#%% object detection
img = cv2.imread("../res/input.jpg")
print(img.shape)
# %%
# creating detectron2 config https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md
cfg_object_detection = get_cfg()
cfg_object_detection.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg_object_detection.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg_object_detection.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")

# object detection predictor
object_detection_predictor = DefaultPredictor(cfg_object_detection)

start_tick = time.time()
outputs = object_detection_predictor(img)
# using `Visualizer` to draw the predictions on the image.
v = Visualizer(cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
               MetadataCatalog.get(cfg_object_detection.DATASETS.TRAIN[0]),
               scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

print(f'delay { time.time() - start_tick }')
display(Image.fromarray(out.get_image()))

#%% instance segmentation
# create config for instance segmentation
cfg_instance_seg = get_cfg()
示例#27
0
def build_predictor(cfg, threshold_score):
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold_score
    return DefaultPredictor(cfg), cfg
示例#28
0
def get_detectnet_model(detectron_cfg):
    return DefaultPredictor(detectron_cfg)
示例#29
0
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

#v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
#out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#im2 = out.get_image()[:, :, ::-1]
#b,g,r = cv2.split(im2)
#image_rgb2 = cv2.merge([r,g,b])
#plt.figure()
#plt.imshow(image_rgb2)
#plt.show()

############## above is part a, without e.g. jpg

import os

# download, decompress the data
示例#30
0
def main(args):
    # retrieve configuration file and update the weights
    cfg = get_cfg()
    cfg.merge_from_file(args.cfg)
    # update the model so that it uses the final output weights.
    cfg.MODEL.WEIGHTS = str(Path(cfg.OUTPUT_DIR) / Path("model_final.pth"))

    predictor = DefaultPredictor(cfg)

    # load image.

    # get data from validation data
    # need to get data from the signs dataset, not the hotspots dataset.
    dset = DatasetCatalog.get(args.dataset)

    all_hotspots = []
    all_gt_aligned = []
    all_scores = []

    for example in tqdm(dset):

        img = cv2.imread(example["file_name"])
        # # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
        outputs = predictor(img)

        # gets individual hotspot images, save to npz array

        hotspots = extract_boxes(
            img[:, :, ::-1], outputs["instances"].to("cpu").pred_boxes
        )
        all_hotspots.extend(hotspots)

        # get scores
        scores = outputs["instances"].to("cpu").scores

        all_scores.extend(scores.numpy())

        # get groundtruth classes

        # these parameters can be customized.
        matcher = Matcher([0.4, 0.5], [0, -1, 1], allow_low_quality_matches=False)

        # convert the groundtruth annotations into a detectron Boxes object
        gt_boxes = Boxes(
            torch.tensor(
                np.vstack([annotation["bbox"] for annotation in example["annotations"]])
            )
        )

        gt_classes = np.array(
            [annotation["category_id"] for annotation in example["annotations"]]
        )

        pred_boxes = outputs["instances"].to("cpu").pred_boxes

        match_quality_matrix = pairwise_iou(gt_boxes, pred_boxes)
        matched_idxs, matched_labels = matcher(match_quality_matrix)

        # compute ground-truth classes for every box
        aligned_classes = gt_classes[matched_idxs]

        # handle edge case where only one aligned box shows up
        if not isinstance(aligned_classes, np.ndarray):
            aligned_classes = np.ndarray([aligned_classes])

        # handle background classes:
        aligned_classes[matched_labels == 0] = -1
        aligned_classes[matched_labels == -1] = -1

        all_gt_aligned.extend(aligned_classes)

    np.savez(
        Path(args.outpath).with_suffix(".npz"),
        hotspots=np.array(all_hotspots, dtype=object),
        scores=all_scores,
        gt_classes=all_gt_aligned,
    )