示例#1
0
    def __init__(self, model_path, with_cuda, yolact_config, fast_nms,
                 threshold, display_cv, top_k):
        self.top_k = top_k
        self.threshold = threshold
        self.display_cv = display_cv
        print("loading Yolact ...")

        with torch.no_grad():
            set_cfg(yolact_config)
            print("Configuration: ", yolact_config)

            if with_cuda:
                cudnn.benchmark = True
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            print("use cuda: ", with_cuda)

            self.net = Yolact()
            self.net.load_weights(model_path)
            print("Model: ", model_path)
            self.net.eval()

            if with_cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = fast_nms
            print("use fast nms: ", fast_nms)
        print("Yolact loaded")
示例#2
0
def main():
    parse_args()

    rospy.init_node('yolact_ros', anonymous=True)
    if args.config is not None:
        set_cfg(args.config)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.cuda:
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        detect_ = DetectImg(net)

    try:
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down")
    cv2.destroyAllWindows()
    def __init__(self, cuda=True, detect=False):
        self.trained_model = 'yolact/weights/yolact_im400_53_7000.pth'
        self.config = 'yolact_base_config'

        if self.config is not None:
            yolact_module.set_cfg(self.config)

        if self.trained_model == 'interrupt':
            trained_model = yolact_module.SavePath.get_interrupt('weights/')
        elif self.trained_model == 'latest':
            trained_model = yolact_module.SavePath.get_latest(
                'weights/', cfg.name)

        if self.config is None:
            model_path = yolact_module.SavePath.from_str(trained_model)
            # TODO: Bad practice? Probably want to do a name lookup instead.
            config = model_path.model_name + '_config'
            print('Config not specified. Parsed %s from the file name.\n' %
                  config)
            yolact_module.set_cfg(config)

        if detect:
            cfg.eval_mask_branch = False

        with torch.no_grad():

            if cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            self.net = Yolact()
            self.net.load_weights(self.trained_model)
            self.net.eval()

            if cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = True
            self.net.detect.use_cross_class_nms = False
            cfg.mask_proto_debug = False
    def predict(self, image_array: np.ndarray):
        """
        :image_path : image numpy array
        Format of returned boxes is [x1,y1,x2,y2], individual centers are tuples
        :return entire mask, individual masks, boxes, centers
        """
        with torch.no_grad():
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            frame = torch.from_numpy(image_array).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            net = Yolact()
            net.detect.use_fast_nms = True
            net.detect.use_cross_class_nms = True
            net.load_weights(self.weights)
            net.eval()
            preds = net(batch)
            mask_entire, boxes = prep_display(preds,
                                              frame,
                                              None,
                                              None,
                                              undo_transform=False)
            if len(boxes) < 1:
                return mask_entire, None, None, None
            mask_dict = {}
            centers_dict = {}
            boxes_dict = {}
            for index in range(len(boxes)):
                current_box = boxes[index]
                mask_dict[index] = mask_entire[current_box[1]:current_box[3],
                                               current_box[0]:current_box[2]]
                center = Segment.find_center(mask_dict[index])
                if not center:
                    adjusted_center = None
                else:
                    adjusted_center = Segment.adjust_centers(
                        center, current_box)
                centers_dict[index] = adjusted_center
                boxes_dict[index] = current_box

            return mask_entire, mask_dict, centers_dict, boxes_dict
示例#5
0
        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        if args.image is None and args.video is None and args.images is None:
            dataset = COCODetection(cfg.dataset.valid_images,
                                    cfg.dataset.valid_info,
                                    transform=BaseTransform(),
                                    has_gt=cfg.dataset.has_gt)
            prep_coco_cats()
        else:
            dataset = None

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        net.detect.use_fast_nms = args.fast_nms
        cfg.mask_proto_debug = args.mask_proto_debug

        detect_ = detect()
        detect_.evalvideo(net, args.video)
示例#6
0

# print("config")
# print(opt.config)

estimator = PoseNet(num_points=num_points, num_obj=num_obj)
estimator.cuda()
estimator.load_state_dict(torch.load(opt.model))
estimator.eval()

refiner = PoseRefineNet(num_points=num_points, num_obj=num_obj)
refiner.cuda()
refiner.load_state_dict(torch.load(opt.refine_model))
refiner.eval()

yolact = Yolact()
yolact.load_weights(opt.trained_model)
yolact.eval()
yolact.cuda()

torch.set_default_tensor_type('torch.cuda.FloatTensor')
yolact.detect.use_fast_nms = opt.fast_nms
yolact.detect.use_cross_class_nms = opt.cross_class_nms

# evalimage(net, args.image)

import matplotlib.pyplot as plt


def prep_display(dets_out,
                 img,
示例#7
0
class Yolact_ROS(object):
    def __init__(self, model_path, with_cuda, yolact_config, fast_nms,
                 threshold, display_cv, top_k):
        self.top_k = top_k
        self.threshold = threshold
        self.display_cv = display_cv
        print("loading Yolact ...")

        with torch.no_grad():
            set_cfg(yolact_config)
            print("Configuration: ", yolact_config)

            if with_cuda:
                cudnn.benchmark = True
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            print("use cuda: ", with_cuda)

            self.net = Yolact()
            self.net.load_weights(model_path)
            print("Model: ", model_path)
            self.net.eval()

            if with_cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = fast_nms
            print("use fast nms: ", fast_nms)
        print("Yolact loaded")

    def prediction(self, img):
        self.net.detect.cross_class_nms = True
        cfg.mask_proto_debug = False

        with torch.no_grad():
            frame = torch.Tensor(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            h, w, _ = img.shape
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=self.threshold)
            torch.cuda.synchronize()
            masks = t[3][:self.top_k]
            classes, scores, bboxes = [
                x[:self.top_k].cpu().numpy() for x in t[:3]
            ]
            time_elapsed = (time.clock() - time_start)
            num_dets_to_consider = min(self.top_k, classes.shape[0])

            for i in range(num_dets_to_consider):
                if scores[i] < self.threshold:
                    num_dets_to_consider = i
                    break

            if num_dets_to_consider >= 1:
                masks = masks[:num_dets_to_consider, :, :, None]

            masks_msg = masks.cpu().detach().numpy()
            masks_msg = masks_msg.astype(np.uint8)
            scores_msg = np.zeros(num_dets_to_consider)
            class_label_msg = np.empty(num_dets_to_consider, dtype="S20")
            bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int)
            for i in reversed(range(num_dets_to_consider)):
                scores_msg[i] = scores[i]
                class_label_msg[i] = cfg.dataset.class_names[classes[i]]
                bboxes_msg[i] = bboxes[i]
                print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i]))

            os.system('cls' if os.name == 'nt' else 'clear')
            print("%.2f" % (1 / time_elapsed), "hz")

            if self.display_cv:
                self.display(frame, masks, classes, scores, bboxes,
                             num_dets_to_consider)

            return masks_msg, class_label_msg, scores_msg, bboxes_msg

    def display(self,
                img,
                masks,
                pred_classes,
                scores,
                bboxes,
                num_dets_to_consider,
                mask_alpha=0.75):
        img_gpu = img / 255.0
        if num_dets_to_consider == 0:
            return (img_gpu * 255).byte().cpu().numpy()

        use_class_color = True
        colors = torch.cat([
            self.get_color(
                i, pred_classes, use_class_color,
                on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for i in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color_summand = masks_color[0]

        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        for i in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = bboxes[i, :]
            color = self.get_color(i, pred_classes, use_class_color)
            score = scores[i]
            cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)
            _class = cfg.dataset.class_names[pred_classes[i]]
            text_str = '%s: %.2f' % (_class, score) if True else _class
            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1
            text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale,
                                             font_thickness)[0]
            text_pt = (x1, y1 - 3)
            text_color = [255, 255, 255]
            cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4),
                          color, -1)
            cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale,
                        text_color, font_thickness, cv2.LINE_AA)

        cv2.imshow("yolact", img_numpy)
        cv2.waitKey(1)

    def get_color(self, i, pred_classes, class_color, on_gpu=None):
        color_cache = defaultdict(lambda: {})
        color_idx = (pred_classes[i] * 5 if class_color else i *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]

            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color

            return color
class Real_time_yolact():
    def __init__(self, cuda=True, detect=False):
        self.trained_model = 'yolact/weights/yolact_im400_53_7000.pth'
        self.config = 'yolact_base_config'

        if self.config is not None:
            yolact_module.set_cfg(self.config)

        if self.trained_model == 'interrupt':
            trained_model = yolact_module.SavePath.get_interrupt('weights/')
        elif self.trained_model == 'latest':
            trained_model = yolact_module.SavePath.get_latest(
                'weights/', cfg.name)

        if self.config is None:
            model_path = yolact_module.SavePath.from_str(trained_model)
            # TODO: Bad practice? Probably want to do a name lookup instead.
            config = model_path.model_name + '_config'
            print('Config not specified. Parsed %s from the file name.\n' %
                  config)
            yolact_module.set_cfg(config)

        if detect:
            cfg.eval_mask_branch = False

        with torch.no_grad():

            if cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            self.net = Yolact()
            self.net.load_weights(self.trained_model)
            self.net.eval()

            if cuda:
                self.net = self.net.cuda()

            self.net.detect.use_fast_nms = True
            self.net.detect.use_cross_class_nms = False
            cfg.mask_proto_debug = False

    def segmentation(self, img):

        with torch.no_grad():
            h, w, _ = img.shape
            frame = torch.from_numpy(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            preds = self.net(batch)
            classes, scores, boxes, masks = yolact_module.prep_display(
                5,
                preds,
                frame,
                0.5,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                fps_str='')

            if not len(masks):
                return np.zeros((img.shape[0], img.shape[1]))
            mask = masks[0]
            mask = mask.cpu().numpy()

            h, w = mask.shape
            filled_mask = np.zeros([h, w])

            contours = yolact_module.cv_contours(np.uint8(mask))
            C = len(contours)
            contours = sorted(contours, key=lambda x: cv2.contourArea(x))
            cv2.drawContours(filled_mask, contours, C - 1, 255,
                             thickness=-1)  #Fills the biggest contour

            return filled_mask

    def process(self, image_1, image_2):
        # Get segmentation masks as numpy arrays
        mask_2 = self.segmentation(img=image_2)
        mask_2 = np.uint8(mask_2)

        return mask_2
示例#9
0
def train():
    if cfg.dataset is None:
        print("Missing dataset in config!")
        exit(-1)

    save_folder = Path(args.save_folder)
    save_folder.mkdir(exist_ok=True, parents=True)

    epoch_status_file_path = Path(args.epoch_status_file)

    dataset = COCODetection(
        image_path=cfg.dataset.train_images,
        info_file=cfg.dataset.train_info,
        transform=SSDAugmentation(cfg, MEANS),
        label_map=cfg.dataset.get_valid_label_map(),
    )

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(
            image_path=cfg.dataset.valid_images,
            info_file=cfg.dataset.valid_info,
            transform=BaseTransform(cfg, MEANS),
            label_map=cfg.dataset.get_valid_label_map(),
        )

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact(cfg)
    net = yolact_net
    net.train()

    if args.log:
        log = Log(
            cfg.name,
            args.log_folder,
            dict(args._get_kwargs()),
            overwrite=(args.resume is None),
            log_gpu_stats=args.log_gpu,
        )

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == "interrupt":
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == "latest":
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print("Resuming training, loading {}...".format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print("Initializing weights...")
        yolact_net.init_weights(backbone_path=cfg.backbone.path)

    optimizer = optim.SGD(
        net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay
    )
    criterion = MultiBoxLoss(
        num_classes=cfg.num_classes,
        pos_threshold=cfg.positive_iou_threshold,
        neg_threshold=cfg.negative_iou_threshold,
        negpos_ratio=cfg.ohem_negpos_ratio,
        cfg=cfg,
    )

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(",")]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                "Error: Batch allocation (%s) does not sum to batch size (%s)."
                % (args.batch_alloc, args.batch_size)
            )
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn:
        yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = math.ceil(len(dataset) / args.batch_size)
    print(f"\n\t ==> Number of iterations per epoch: {epoch_size}")
    num_epochs = min(math.ceil(cfg.max_iter / epoch_size), cfg.max_num_epochs)
    print(f"\t ==> Number of epochs: {num_epochs}\n")

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(
        dataset,
        args.batch_size,
        num_workers=args.num_workers,
        shuffle=True,
        collate_fn=detection_collate,
        pin_memory=True,
    )

    save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(
        root=args.save_folder
    )
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print("Begin training!")
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            with epoch_status_file_path.open(
                "w", encoding="utf-8"
            ) as epoch_status_file:
                json.dump({"cur_epoch": epoch}, epoch_status_file)

            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(
                        optimizer,
                        (args.lr - cfg.lr_warmup_init)
                        * (iteration / cfg.lr_warmup_until)
                        + cfg.lr_warmup_init,
                    )

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while (
                    step_index < len(cfg.lr_steps)
                    and iteration >= cfg.lr_steps[step_index]
                ):
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma ** step_index))

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {
                    k: (v).mean() for k, v in losses.items()
                }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # Backprop
                loss.backward()  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(
                            seconds=(cfg.max_iter - iteration) * time_avg.get_avg()
                        )
                    ).split(".")[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum(
                        [
                            [k, loss_avgs[k].get_avg()]
                            for k in loss_types
                            if k in losses
                        ],
                        [],
                    )

                    print(
                        (
                            "[%3d] %7d ||"
                            + (" %s: %.3f |" * len(losses))
                            + " T: %.3f || ETA: %s || timer: %.3f"
                        )
                        % tuple(
                            [epoch, iteration] + loss_labels + [total, eta_str, elapsed]
                        ),
                        flush=True,
                    )

                if args.log:
                    precision = 5
                    loss_info = {k: round(losses[k].item(), precision) for k in losses}
                    loss_info["T"] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = iteration % 10 == 0  # nvidia-smi is sloooow

                    log.log(
                        "train",
                        loss=loss_info,
                        epoch=epoch,
                        iter=iteration,
                        lr=round(cur_lr, 10),
                        elapsed=elapsed,
                    )

                    log.log_gpu_stats = args.log_gpu

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder, cfg.name)

                    print("Saving state, iter:", iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if (
                            args.keep_latest_interval <= 0
                            or iteration % args.keep_latest_interval
                            != args.save_interval
                        ):
                            print("Deleting old save...")
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(
                        epoch,
                        iteration,
                        yolact_net,
                        val_dataset,
                        log if args.log else None,
                    )

        # Compute validation mAP after training is finished
        compute_validation_map(
            epoch, iteration, yolact_net, val_dataset, log if args.log else None
        )
    except KeyboardInterrupt:
        if args.interrupt:
            print("Stopping early. Saving network...")

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            # Wait for all torch processes to finish their task
            time.sleep(1)
            yolact_net.save_weights(save_path(epoch, repr(iteration) + "_interrupt"))
        exit()

    print("Saving weights...")
    yolact_net.save_weights(save_path(epoch, repr(iteration) + "_end"))