def __init__(self, id):
        # self.cap = cv2.VideoCapture(id)
        self.cap = WebcamVideoStream(src=id).start()
        self.width = 1280  #640#
        self.height = 720  #360#
        self.display_lincomb = False
        self.crop = True
        self.score_threshold = 0.15
        self.top_k = 30
        self.display_masks = True
        self.display_fps = False
        self.display_text = True
        self.display_bboxes = True
        self.display_scores = False

        self.fast_nms = True
        self.cross_class_nms = True
        self.config = 'yolact_plus_base_config'
        print('Config specified. Parsed %s from the file name.\n' %
              self.config)
        set_cfg(self.config)
        print('Loading model...', end='')
        self.trained_model = 'weights/yolact_plus_base_54_800000.pth'
        self.model = Yolact()
        self.model.load_weights(self.trained_model)
        self.model.detect.use_fast_nms = self.fast_nms
        self.model.detect.use_cross_class_nms = self.cross_class_nms
        self.model.eval()
        self.model = self.model.to(device, non_blocking=True)
        print(' Done.')
        self.model_path = SavePath.from_str(self.trained_model)
Пример #2
0
 def __init__(self,
              trained_model: str,
              save_json=True,
              output_dir=None,
              output_name="detection",
              output_num=5):
     """
     YOLACT 初始化,参数:
         - save_json         是否将计算结果保存为json文件
         - output_dir        当上个参数为True时,这个参数表示将json文件保存到的位置
         - output_name       保存的json文件名
         - output_num        # ? 目测是要输出的类别个数
     """
     #  step 0 初始化变量
     self.save_json = save_json
     # NOTE 卧槽还有这种用法,学习了
     self.detections = None
     self.output_num = output_num
     # step 1 如果指定了要生成json文件,就创建上面的Detection类对象
     if self.save_json and output_dir is not None:
         self.detections = Detections(output_dir, output_name)
     # step 2 初始化YOLACT网络
     with torch.no_grad():
         set_cfg("yolact_base_config")
         torch.cuda.set_device(1)
         cudnn.benchmark = True
         cudnn.fastest = True
         torch.set_default_tensor_type('torch.cuda.FloatTensor')
         self.net = Yolact()
         # TODO 这里的权值是需要进行修改的
         # self.net.load_weights('./weights/yolact_base_54_800000.pth')
         self.net.load_weights(trained_model)
         self.net.eval()
         self.net = self.net.cuda()
     print("load model complete")
Пример #3
0
class YOLACT_MODEL():

    def __init__(self, opts):
        #concat the two files to one file 
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):    
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()                        
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        
    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        output_image = self.display(preds, frame, None, None,
                                     undo_transform=False, score_threshold=self.threshold)
        return output_image

    def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]

        img_gpu = img_gpu * masks[0]
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
               
        return img_numpy        
Пример #4
0
 def __init__(self, model_pth, output_num=5):
     self.output_num = output_num
     with torch.no_grad():
         set_cfg("yolact_base_config")
         torch.cuda.set_device(0)
         cudnn.benchmark = True
         cudnn.fastest = True
         torch.set_default_tensor_type('torch.cuda.FloatTensor')
         self.net = Yolact()
         self.net.load_weights(model_pth)
         self.net.eval()
         self.net = self.net.cuda()
     print("load model complete")
Пример #5
0
    def __init__(
            self,
            weight_path='C:/Users/user/yolact_notes/weights/yolact_darknet53_249_2000.pth',
            save_path='C:/Users/user/yolact_notes/pear_output'):
        set_cfg('pear_config')
        self.save_path = save_path
        self.weight_path = weight_path
        self.net = Yolact()
        self.net.load_weights(self.weight_path)
        self.net.eval()
        self.net = self.net.cuda()
        print('model loaded...')

        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
    def __init__(self):
        parse_args(self)
        self.args.config = 'yolact_edge_mobilenetv2_config'
        set_cfg(self.args.config)
        self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth'
        self.args.top_k = 10
        self.args.score_threshold = 0.3
        self.args.trt_batch_size = 3
        self.args.disable_tensorrt = False
        self.args.use_fp16_tensorrt = False
        self.args.use_tensorrt_safe_mode = True
        self.args.cuda = True
        self.args.fast_nms = True
        self.args.display_masks = True
        self.args.display_bboxes = True
        self.args.display_text = True
        self.args.display_scores = True
        self.args.display_linecomb = False
        self.args.fast_eval = False
        self.args.deterministic = False
        self.args.no_crop = False
        self.args.crop = True
        self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images'

        setup_logger(logging_level=logging.INFO)
        self.logger = logging.getLogger('yolact.eval')

        self.color_cache = defaultdict(lambda: {})

        with torch.no_grad():
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')

            self.logger.info('Loading model...')
            self.net = Yolact(training=False)
            if self.args.trained_model is not None:
                self.net.load_weights(self.args.trained_model, args=self.args)
            else:
                self.logger.warning('No weights loaded!')
            self.net.eval()
            self.logger.info('Model loaded.')
            convert_to_tensorrt(self.net,
                                cfg,
                                self.args,
                                transform=BaseTransform())
Пример #7
0
    def __init__(self,
                 model_path="./weights/yolact_im700_54_800000.pth",
                 use_cuda=False):
        print('Loading model...', end='')
        self.use_cuda = use_cuda
        self.trained_model = model_path
        self.net = Yolact()
        self.net.load_weights(self.trained_model)
        self.net.eval()

        if self.use_cuda:
            self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        self.net.detect.use_cross_class_nms = False
        cfg.mask_proto_debug = False

        print(' Done.')
Пример #8
0
 def setWeights(self, filename: str) -> None:
     if filename == '':
         raise YolactException('Empty filename for network weights')
     self.weights_file = filename
     tic = time.perf_counter_ns()
     with torch.no_grad():
         if self.cuda:
             cudnn.fastest = True
             torch.set_default_tensor_type('torch.cuda.FloatTensor')
         else:
             torch.set_default_tensor_type('torch.FloatTensor')
         self.net = Yolact()
         self.net.load_weights(self.weights_file, self.cuda)
         self.net.eval()
         if self.cuda:
             self.net = self.net.cuda()
     toc = time.perf_counter_ns()
     logging.debug('Time to load weights %f s', 1e-9 * (toc - tic))
     self.sigInitialized.emit()
Пример #9
0
    def __init__(self, opts):
        #concat the two files to one file 
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):    
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()                        
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
Пример #10
0
def load_model(model_file):
  torch.set_default_tensor_type('torch.cuda.FloatTensor')
  set_cfg('yolact_plus_resnet50_config')
  net = Yolact()
  net.load_weights(model_file)
  net.eval()
  return net
Пример #11
0
def init_model(transform):
    args = parse_args()

    if args.config is not None:
        print(args.config)
        set_cfg(args.config)
        cfg.mask_proto_debug = False

    if args.trained_model == 'interrupt':
        args.trained_model = SavePath.get_interrupt('weights/')
    elif args.trained_model == 'latest':
        args.trained_model = SavePath.get_latest('weights/', cfg.name)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if args.cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')
        net = net.cuda()

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()

    return net, args
Пример #12
0
    def __init__(
            self,
            weights='./crow_vision_yolact/data/yolact/weights/weights_yolact_kuka_17/crow_base_35_457142.pth',
            config=None,
            batchsize=1,
            top_k=25,
            score_threshold=0.1,
            display_text=True,
            display_bboxes=True,
            display_masks=True,
            display_scores=True):
        self.score_threshold = score_threshold
        self.top_k = top_k
        self.batchsize = batchsize

        # initialize a yolact net for inference
        ## YOLACT setup
        # setup config
        if config is not None:
            if '.obj' in config:
                with open(config, 'rb') as f:
                    config = dill.load(f)
            set_cfg(config)
        self.class_names_tuple = get_class_names_tuple()

        parse_args([
            '--top_k=' + str(top_k),
            '--score_threshold=' + str(score_threshold),
            '--display_text=' + str(display_text),
            '--display_bboxes=' + str(display_bboxes),
            '--display_masks=' + str(display_masks),
            '--display_scores=' + str(display_scores),
        ])

        # CUDA setup for yolact
        torch.backends.cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

        #YOLACT net itself
        with torch.no_grad():
            net = Yolact().cuda(torch.cuda.current_device())
            net.load_weights(weights)
            net.eval()
            net.detect.use_fast_nms = True
            net.detect.use_cross_class_nms = False

        self.net = net
        print("YOLACT network available as self.net")

        #for debug,benchmark
        self.duration = 0.0
Пример #13
0
    def __init__(self, problem):
        super().__init__()

        from utils.augmentations import FastBaseTransform
        self.FastBaseTransform = FastBaseTransform

        import cv2
        self.cv2 = cv2

        import matplotlib.pyplot as plt
        self.plt = plt

        from layers.output_utils import postprocess, undo_image_transformation
        self.postprocess = postprocess
        self.undo_image_transformation = undo_image_transformation

        from utils import timer
        self.timer = timer

        import sys
        syspathsave = None
        if not 'yolact' in sys.path[1]:
            import copy
            syspathsave = copy.copy(sys.path)
            sys.path.insert(1, '../yolact/')

        from yolact import Yolact
        from train import MultiBoxLoss
        import data as D
        self.D = D

        from collections import defaultdict
        self.color_cache = defaultdict(lambda: {})

        net = Yolact()
        net.train()
        net.init_weights(backbone_path='../yolact/weights/' +
                         D.cfg.backbone.path)

        criterion = MultiBoxLoss(num_classes=D.cfg.num_classes,
                                 pos_threshold=D.cfg.positive_iou_threshold,
                                 neg_threshold=D.cfg.negative_iou_threshold,
                                 negpos_ratio=D.cfg.ohem_negpos_ratio)

        self.net = net
        self.criterion = criterion

        if syspathsave:
            sys.path = syspathsave
Пример #14
0
def convert_to_onnx_with_hydra(cfg: DictConfig):

    # create folder for onnx
    createFolderOnnx(cfg)
    # set cfg
    set_cfg(cfg.onnx.yolact_cfg)

    model = Yolact()
    model.load_weights(cfg.onnx.model_ckpt_path)
    model.eval()

    model = model.cpu()

    dummy_input = torch.rand(
        (cfg.onnx.model_batch_size, cfg.onnx.model_channel_input,
         cfg.onnx.model_height_input, cfg.onnx.model_width_input))

    torch.onnx.export(model,
                      dummy_input,
                      cfg.onnx.model_onnx_path,
                      verbose=cfg.onnx.verbose,
                      opset_version=cfg.onnx.opset_version)
Пример #15
0
def prepare_model(args):
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        init_path = args.save_folder + cfg.backbone.path
        print('Initializing weights...', init_path)
        if os.path.isfile(init_path):
            yolact_net.init_weights(backbone_path=init_path)
        else:
            print("no init weight, use empty")
    return yolact_net
Пример #16
0
def main(args):

  rospy.init_node('yolact_ros')
  rospack = rospkg.RosPack()
  yolact_path = rospack.get_path('yolact_ros')
  
  model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth"
  model_path = SavePath.from_str(model_path_str)
  set_cfg(model_path.model_name + '_config')

  with torch.no_grad():
      results_path_str = yolact_path + "/scripts/yolact/results"
      if not os.path.exists(results_path_str):
          os.makedirs(results_path_str)

      cudnn.benchmark = True
      cudnn.fastest = True
      torch.set_default_tensor_type('torch.cuda.FloatTensor')   

      print('Loading model...', end='')
      net = Yolact()
      net.load_weights(model_path_str)
      net.eval()
      print(' Done.')

      net = net.cuda()
      net.detect.use_fast_nms = True
      cfg.mask_proto_debug = False

  ic = image_converter(net)
  

  try:
    rospy.spin()
  except KeyboardInterrupt:
    print("Shutting down")
  cv2.destroyAllWindows()
Пример #17
0
def load_weights(filename, cuda):
    """Load YOLACT network weights"""
    global ynet
    if filename == '':
        raise ValueError('Empty filename for network weights')
    print('#### CUDA ENABLED', cuda)
    print(f'Loading weights from {filename}')
    tic = time.perf_counter_ns()
    with torch.no_grad():
        if cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
        # torch.set_default_tensor_type('torch.FloatTensor')
        ynet = Yolact()
        ynet.load_weights(filename, False)
        ynet.eval()
    toc = time.perf_counter_ns()
    logging.debug(f'Time to load weights: {1e-9 * (toc - tic)}')
Пример #18
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net = nn.DataParallel(net).cuda()
        criterion = nn.DataParallel(criterion).cuda()

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S']  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Load training data
                # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                images, targets, masks, num_crowds = prepare_data(datum)

                # Forward Pass
                out = net(images)

                # Compute Loss
                optimizer.zero_grad()

                wrapper = ScatterWrapper(targets, masks, num_crowds)
                losses = criterion(out, wrapper, wrapper.make_mask())

                losses = {k: v.mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(yolact_net, val_dataset)
    except KeyboardInterrupt:
        print('Stopping early. Saving network...')

        # Delete previous copy of the interrupted network so we don't spam the weights folder
        SavePath.remove_interrupt(args.save_folder)

        yolact_net.save_weights(
            save_path(epoch,
                      repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #19
0
class MattingService:
    def __init__(self,
                 model_path="./weights/yolact_im700_54_800000.pth",
                 use_cuda=False):
        print('Loading model...', end='')
        self.use_cuda = use_cuda
        self.trained_model = model_path
        self.net = Yolact()
        self.net.load_weights(self.trained_model)
        self.net.eval()

        if self.use_cuda:
            self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        self.net.detect.use_cross_class_nms = False
        cfg.mask_proto_debug = False

        print(' Done.')

    def process(self, image, top_k=1, score_threshold=0.6):
        # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
        with torch.no_grad():
            if image is not None:
                if ':' in image:
                    inp, _image_name = image.split(':')
                    self._infer_image(self.net, inp, _image_name, top_k,
                                      score_threshold)
                else:
                    _image_name = image.split('/')[-1].split('.')[0] + '.png'
                    out = os.path.join('results/', _image_name)
                    self._infer_image(self.net, image, out, top_k,
                                      score_threshold)
                return _image_name

    def _infer_image(self, net: Yolact, path, save_path, top_k,
                     score_threshold):
        if self.use_cuda:
            frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        else:
            frame = torch.from_numpy(cv2.imread(path)).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        img_numpy = self.post_process(preds,
                                      frame,
                                      None,
                                      None,
                                      top_k,
                                      score_threshold,
                                      undo_transform=False)

        if save_path is None:
            img_numpy = img_numpy[:, :, (2, 1, 0, 3)]

        if save_path is None:
            plt.subplot()
            plt.imshow(img_numpy)
            plt.title(path)
            plt.show()
        else:
            # plt.subplot()
            # plt.imshow(img_numpy)
            # plt.title(path)
            # plt.show()
            cv2.imwrite(save_path, img_numpy)

    @staticmethod
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res
Пример #20
0
def detect():
    img_path = '/home/user/dataset/pear/train/JPEGImages'
    save_path = '/home/user/pear_output'
    weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'

    set_cfg('pear_config')

    with torch.no_grad():
        torch.cuda.set_device(0)

        ######
        # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
        # cudnn.benchmark = True
        # cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        ######

        net = Yolact()
        net.load_weights(weight_path)
        net.eval()
        net = net.cuda()
        print('model loaded...')

        net.detect.cross_class_nms = True
        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        img_names = [
            name for name in os.listdir(img_path)
            if name.endswith('.jpg') or name.endswith('.png')
        ]
        #for img_name in tqdm(img_names):
        for img_name in img_names:
            img = cv2.imread(os.path.join(img_path, img_name))
            img = torch.from_numpy(img).cuda().float()
            img = FastBaseTransform()(img.unsqueeze(0))
            start = time.time()
            preds = net(img)
            print('clw: image_name: %s, inference time use %.3fs' %
                  (img_name,
                   time.time() - start))  # inference time use 0.023s, 550x550

            # start = time.time()
            h, w = img.shape[2:]
            result = postprocess(
                preds, w, h, crop_masks=True,
                score_threshold=0.3)  # classes, scores, boxes, masks 按照score排序
            # top_k = 10
            # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个?
            # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s

            ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask
            # start = time.time()
            bFindPear = False
            for i, cls_id in enumerate(result[0]):
                if cls_id == 0 and not bFindPear:
                    pear_mask = result[3][i].cpu().numpy()
                    bFindPear = True

            # 从梨的mask中提取轮廓
            pear_outline = get_outline_from_mask(pear_mask, w, h)
            # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
            # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
            # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
            roundness = compute_roundness(pear_outline)
            ###

            result.append(roundness)
Пример #21
0
                                 undo_transform=False)
        cv2.imshow("YOLACT", img_numpy)
        if cv2.waitKey(33) == 27:
            break
    cv2.destroyAllWindows()
    camera.release()
    return


if __name__ == '__main__':
    rospy.init_node('test')
    sub_img = Get_image()
    print('Loading model...', end='')
    with torch.no_grad():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        net = Yolact()
        net.load_weights(
            '/home/chien/ros_yolact/src/yolact/src/weights/yolact_base_1333_8000.pth'
        )
        net.eval()
        net = net.cuda()
        print(' Done.')
    while not rospy.is_shutdown():
        cv2.imshow("YOLACT1", sub_img.cv_image)
        image = torch.from_numpy(sub_img.cv_image).cuda().float()
        batch = FastBaseTransform()(image.unsqueeze(0))
        preds = net(batch)
        img_numpy = prep_display(preds,
                                 image,
                                 None,
                                 None,
Пример #22
0
        if isinstance(child, tf.keras.Model):
            parse_module(child, weights)
        elif isinstance(child, tf.keras.layers.Conv2D):
            layer_weights = weights.pop()
            print(child, layer_weights[0].shape)
            child.set_weights(layer_weights)
        elif isinstance(child, tf.keras.layers.BatchNormalization):
            print(child, layer_weights[0].shape)
            layer_weights = weights.pop()
            child.set_weights(layer_weights)
        else:
            continue
    return True


model = Yolact()
darknet53_modules = [model.backbone._preconv] + model.backbone.conv_layers
for module in darknet53_modules:
    parse_module(module, darknet53_weights)

proto_net = model.proto_net
parse_module(proto_net, proto_net_weights)

fpn = model.fpn
parse_module(fpn, fpn_weights)

pred = model.prediction_layers[0]
parse_module(pred, pred_weights)

segmantic_seg_conv = model.semantic_seg_conv
parse_module(segmantic_seg_conv, segmantic_seg_conv_weights)
Пример #23
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #24
0
def train():
    #1: train 결과를 저장할 폴더를 생성
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    #2: MSCOCO에서 제공하는 API를 통해 train dataset을 준비한다.
    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    #   만약 train-validation기법을 사용한다면, eval dataset도 준비한다.
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    #3: 구현한 yolact() class의 객체를 만들고 train모드로 설정.
    #주의 : net과 yolact_net은 메모리에 저장된 같은 객체를 공유한다.
    #       다만 net은 이후에 yolact와 MultiBoxLoss가 결함되어 train을 위한
    #       통합된 객체로 다시 정의되기 때문에 yolact넷 객체에만 따로 접근하기 위해
    #       yolact_net을 deep copy본으로 가지고 있는다.
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    #######################################################################
    #######RESUME 관련#####################################################
    #4: args.log와 args.resume은 train도중 log를 남기는 것과, train이
    # 불가피하게 중도에 정지되었을 경우, 중단 지점부터 재시작할 수 있도록
    # 기능을 만든 것이므로 필요한 경우에만 더 자세히 보도록 하자.
    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)
    #######END#############################################################
    #######################################################################

    #5: yolact의 optimizer와 loss함수를 설정한다.
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    #6: 멀티 GPU를 사용하는 경우 각 GPU에 batch size를 분할해준다.
    #   만약 총 Batch size가 맞지 않으면 뭔가 잘못된 것이므로 프로그램 종료.

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    #7: 현재까지 설정된 net과 loss 함수를 엮어 더 통합된 net으로 만듬.
    #   이제 net을 호출하면, bbox를 detection하고, fast nms를 거쳐 한 번
    #   필터링을 한 후, ground truth와 비교하여 loss를 계산하고, 이 과정을
    #   멀티 GPU일 경우 알아서 각 device에 작업을 분할해준다.
    #   yolact_net은 net에 포함된 yolact()만을 가리킨다.
    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    #8: yolact_net의 batch_normalization layer를 모두 false로 만든 뒤에
    #   0만을 가지고 있는 zero_tensor를 모델에 통과시켜, 파라미터를 초기화시켜준다.
    #   그 후에 다시 batch_normalization layer를 train모드로 바꿔준다.
    #   굳이 이런 과정을 거치는 이유는 저자가 batch_normalization에 미리 넣어놓은
    #  평균/분산 값은 초기화하고 싶지 않기 때문이다.
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    (torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    #9: loss counters
    #   bbox의 위치에 대한 loss와, class confidence에 대한 loss 를 담을 변수를 생성하고,
    #   batch_size와 dataset의 크기에 맞는 1 epoch의 size와 몇 epoch를 돌려야하는지 구한다.
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)  #cw : 음수입력을 허용치 않기 위해... GOOD
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    #10:Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    #   step_index는 learning rate decay를 위해 사용하는 index이다.
    #   data_loader는 train중에 순서대로 데이터셋을 준비해서 넘겨주는 class이다.
    #   여기서 객체를 만들어 저장한다.
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    #11:특정 epoch와 iteration에 도달했을 때, 중간 과정을 save_path에 저장하기 위한
    #  람다 함수를 정의하고, time_avg와 loss_avg는 MovingAverage 클래스의 객체로써
    #  훈련 중간 과정의 loss를 이동평균 값으로 보여주기 위해 선언되는 객체이다.
    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    #12: main train이 시작되는 부분(#A ~ #F)
    print('Begin training!')
    print()

    # A
    #    try-except를 사용하여 ctrl+c(keyboardInterrupt)를 통해
    #   훈련을 중단하고 진행내용은 저장할 수 있다.
    #   중단지점부터 재시작하고 싶으면 train.py실행 시 --resume인자를 사용한다.
    try:
        #9에서 계산된 num_epochs만큼 반복.
        for epoch in range(num_epochs):
            # B
            #   --resume을 이용해 시작했다면, 재시작 iter에 도달할 때까지 continue,
            #   또한 data_loader에서 data를 불러오며 loss를 계산하는데,
            #   도중에 목표 iteration에 도달했으면 break하여 1 epoch를 종료한다.
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # 목표한만큼 훈련이 되었다면, 종료한다.
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # 목표로 설정된 반복횟수가 max_iter보다 크면 max_iter에서 훈련을 마친다.
                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # 특정 iteration에 config값이 바뀌도록 할 경우의 작업을 수행한다.
                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # C
                #   [learning rate 조정]

                # train시작한지 얼마 안되었을 경우(lr_warmup_until기준) 훈련을 조금 가속시키기 위해 조정.
                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                #   특정 iteration에 도달할 때마다 learning rate decay수행.
                #   Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # D
                #   loss 함수 계산.

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                #   Forward Propagation을 수행하고 수행 결과로 loss 함수를 통해 1 iteration의 loss를 계산한다.
                #   구체적인 동작은 Backbone.py의 resnet101, yolact.py의 yolact, MultiBoxLoss.py의 MultiBoxLoss 클래스를 모두 보아야 한다.
                #   (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # E
                #   Backward Propagation을 수행하고,
                #   계산가능한 값일 경우, optimizer.step()을 통해 parameters에 적용

                # Backprop
                loss.backward()

                # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # F
                #   train진행 과정에서 소요 시간과, 중간 loss값을 출력하여 중간 성과를
                #   파악 할 수 있도록 해주는 파트.

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                #   log를 파일로 기록
                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu
                # ~F

                # 1번 반복하면, 1 iter증가.
                iteration += 1

                #   주기마다 진행과정을 저장하는 작업 수행.
                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # train-validation으로 작업을 수행하는 경우,
            # 1 epoch를 돌렸을 때 validation 주기에 도달한 epoch였으면 validate 1회 진행하여 mAP측정.
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)

    #13: Ctrl + c를 이용하여 훈련을 중단했을 경우, save_foler에 weights를 저장하고 중단하여
    #   다음에 다시 재시작할 수 있도록 한다.
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
from config import PASCAL_CLASSES, COLORS, get_params, ROOT_DIR
from data.coco_dataset import ObjectDetectionDataset
from utils import learning_rate_schedule
from utils.utils import postprocess, denormalize_image
from yolact import Yolact

# Todo Add your custom dataset
tf.random.set_seed(1234)
NAME_OF_DATASET = "pascal"
CLASS_NAMES = PASCAL_CLASSES

# -----------------------------------------------------------------------------------------------
# create model and dataloader
train_iter, input_size, num_cls, lrs_schedule_params, loss_params, parser_params, model_params = get_params(
    NAME_OF_DATASET)
model = Yolact(**model_params)
dateset = ObjectDetectionDataset(dataset_name=NAME_OF_DATASET,
                                 tfrecord_dir=os.path.join(
                                     ROOT_DIR, "data", NAME_OF_DATASET),
                                 anchor_instance=model.anchor_instance,
                                 **parser_params)
train_dataset = dateset.get_dataloader(subset='train', batch_size=1)
valid_dataset = dateset.get_dataloader(subset='val', batch_size=1)
# -----------------------------------------------------------------------------------------------
# Restore CheckPoints
# Choose the Optimizor, Loss Function, and Metrics, learning rate schedule
lr_schedule = learning_rate_schedule.Yolact_LearningRateSchedule(
    **lrs_schedule_params)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)

ckpt_dir = os.path.join(ROOT_DIR, "checkpoints")
Пример #26
0
#   Copyright (C) 2019 * Ltd. All rights reserved.
#
#   Editor      : VIM
#   File name   : convert_weight.py
#   Author      : YunYang1994
#   Created date: 2019-07-27 18:07:20
#   Description :
#
#================================================================

import torch
import numpy as np
from yolact import Yolact

with torch.no_grad():
    model = Yolact()
    model.eval()
    model.load_weights("./yolact_darknet53_54_800000.pth")
    modules = model.children()


def parse_layer(layer, weights):
    assert isinstance(layer, torch.nn.Conv2d) or isinstance(
        layer, torch.nn.BatchNorm2d)
    print("=> Parsing ", layer)
    if isinstance(layer, torch.nn.Conv2d):
        weight, bias = layer.weight.detach().numpy(), layer.bias
        weight = np.transpose(
            weight, [2, 3, 1, 0])  # k_h, h_w, in_channels, out_channels
        if bias is None:
            weights.append([weight])
Пример #27
0
    #     print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds))


if __name__ == '__main__':

    # 数据集与标签
    valid_dataset = COCODetection(image_path='./data/coco/images/val2017/',
                                  info_file='./data/coco/annotations/instances_val2017.json',
                                  transform=BaseTransform(),
                                  has_gt=True
                                  )
    prep_coco_cats()

    # 模型
    print('Loading model...', end='')
    model = Yolact()
    model.load_weights(args.trained_model)
    model.eval()
    model = model.cuda() if args.cuda else model.cpu()
    print(' Done.')

    # 核心入口
    with torch.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.cuda:
            torch.backends.cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
Пример #28
0
def train(rank, args):
    if args.num_gpus > 1:
        multi_gpu_rescale(args)
    if rank == 0:
        if not os.path.exists(args.save_folder):
            os.mkdir(args.save_folder)

    # set up logger
    setup_logger(output=os.path.join(args.log_folder, cfg.name),
                 distributed_rank=rank)
    logger = logging.getLogger("yolact.train")

    w = SummaryHelper(distributed_rank=rank,
                      log_dir=os.path.join(args.log_folder, cfg.name))
    w.add_text("argv", " ".join(sys.argv))
    logger.info("Args: {}".format(" ".join(sys.argv)))
    import git
    with git.Repo(search_parent_directories=True) as repo:
        w.add_text("git_hash", repo.head.object.hexsha)
        logger.info("git hash: {}".format(repo.head.object.hexsha))

    try:
        logger.info("Initializing torch.distributed backend...")
        dist.init_process_group(backend='nccl',
                                init_method=args.dist_url,
                                world_size=args.num_gpus,
                                rank=rank)
    except Exception as e:
        logger.error("Process group URL: {}".format(args.dist_url))
        raise e

    dist.barrier()

    if torch.cuda.device_count() > 1:
        logger.info('Multiple GPUs detected! Turning off JIT.')

    collate_fn = detection_collate
    if cfg.dataset.name == 'YouTube VIS':
        dataset = YoutubeVIS(image_path=cfg.dataset.train_images,
                             info_file=cfg.dataset.train_info,
                             configs=cfg.dataset,
                             transform=SSDAugmentationVideo(MEANS))

        if cfg.dataset.joint == 'coco':
            joint_dataset = COCODetection(
                image_path=cfg.joint_dataset.train_images,
                info_file=cfg.joint_dataset.train_info,
                transform=SSDAugmentation(MEANS))
            joint_collate_fn = detection_collate

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images,
                                     info_file=cfg.dataset.valid_info,
                                     configs=cfg.dataset,
                                     transform=BaseTransformVideo(MEANS))
        collate_fn = collate_fn_youtube_vis

    elif cfg.dataset.name == 'FlyingChairs':
        dataset = FlyingChairs(image_path=cfg.dataset.trainval_images,
                               info_file=cfg.dataset.trainval_info)

        collate_fn = collate_fn_flying_chairs

    else:
        dataset = COCODetection(image_path=cfg.dataset.train_images,
                                info_file=cfg.dataset.train_info,
                                transform=SSDAugmentation(MEANS))

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                        info_file=cfg.dataset.valid_info,
                                        transform=BaseTransform(MEANS))

    # Set cuda device early to avoid duplicate model in master GPU
    if args.cuda:
        torch.cuda.set_device(rank)

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs.

    # use timer for experiments
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        logger.info('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume, args=args)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        logger.info('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    if cfg.flow.train_flow:
        criterion = OpticalFlowLoss()

    else:
        criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                                 pos_threshold=cfg.positive_iou_threshold,
                                 neg_threshold=cfg.negative_iou_threshold,
                                 negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net.cuda(rank)
        criterion.cuda(rank)
        net = nn.parallel.DistributedDataParallel(net,
                                                  device_ids=[rank],
                                                  output_device=rank,
                                                  broadcast_buffers=False,
                                                  find_unused_parameters=True)
        # net       = nn.DataParallel(net).cuda()
        # criterion = nn.DataParallel(criterion).cuda()

    optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    w.set_step(iteration)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size // args.num_gpus
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    from data.sampler_utils import InfiniteSampler, build_batch_data_sampler

    infinite_sampler = InfiniteSampler(dataset,
                                       seed=args.random_seed,
                                       num_replicas=args.num_gpus,
                                       rank=rank,
                                       shuffle=True)
    train_sampler = build_batch_data_sampler(infinite_sampler,
                                             images_per_batch=args.batch_size)

    data_loader = data.DataLoader(
        dataset,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        multiprocessing_context="fork" if args.num_workers > 1 else None,
        batch_sampler=train_sampler)
    data_loader_iter = iter(data_loader)

    if cfg.dataset.joint:
        joint_infinite_sampler = InfiniteSampler(joint_dataset,
                                                 seed=args.random_seed,
                                                 num_replicas=args.num_gpus,
                                                 rank=rank,
                                                 shuffle=True)
        joint_train_sampler = build_batch_data_sampler(
            joint_infinite_sampler, images_per_batch=args.batch_size)
        joint_data_loader = data.DataLoader(
            joint_dataset,
            num_workers=args.num_workers,
            collate_fn=joint_collate_fn,
            multiprocessing_context="fork" if args.num_workers > 1 else None,
            batch_sampler=joint_train_sampler)
        joint_data_loader_iter = iter(joint_data_loader)

    dist.barrier()

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()
    data_time_avg = MovingAverage(10)

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    def backward_and_log(prefix,
                         net_outs,
                         targets,
                         masks,
                         num_crowds,
                         extra_loss=None):
        optimizer.zero_grad()

        out = net_outs["pred_outs"]
        wrapper = ScatterWrapper(targets, masks, num_crowds)
        losses = criterion(out, wrapper, wrapper.make_mask())

        losses = {k: v.mean()
                  for k, v in losses.items()}  # Mean here because Dataparallel

        if extra_loss is not None:
            assert type(extra_loss) == dict
            losses.update(extra_loss)

        loss = sum([losses[k] for k in losses])

        # Backprop
        loss.backward()  # Do this to free up vram even if loss is not finite
        if torch.isfinite(loss).item():
            optimizer.step()

        # Add the loss to the moving average for bookkeeping
        for k in losses:
            loss_avgs[k].add(losses[k].item())
            w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k),
                         losses[k].item())

        return losses

    logger.info('Begin training!')
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            while True:
                data_start_time = time.perf_counter()
                datum = next(data_loader_iter)
                dist.barrier()
                data_end_time = time.perf_counter()
                data_time = data_end_time - data_start_time
                if iteration != args.start_iter:
                    data_time_avg.add(data_time)
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                elif cfg.lr_schedule == 'cosine':
                    set_lr(
                        optimizer,
                        args.lr *
                        ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) *
                         .5))

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while cfg.lr_schedule == 'step' and step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                global lr
                w.add_scalar('meta/lr', lr)

                if cfg.dataset.name == "FlyingChairs":
                    imgs_1, imgs_2, flows = prepare_flow_data(datum)
                    net_outs = net(None, extras=(imgs_1, imgs_2))
                    # Compute Loss
                    optimizer.zero_grad()

                    losses = criterion(net_outs, flows)

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('loss/%s' % k, losses[k].item())

                elif cfg.dataset.joint or not cfg.dataset.is_video:
                    if cfg.dataset.joint:
                        joint_datum = next(joint_data_loader_iter)
                        dist.barrier()
                        # Load training data
                        # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                        images, targets, masks, num_crowds = prepare_data(
                            joint_datum)
                    else:
                        images, targets, masks, num_crowds = prepare_data(
                            datum)
                    extras = {
                        "backbone": "full",
                        "interrupt": False,
                        "moving_statistics": {
                            "aligned_feats": []
                        }
                    }
                    net_outs = net(images, extras=extras)
                    out = net_outs["pred_outs"]
                    # Compute Loss
                    optimizer.zero_grad()

                    wrapper = ScatterWrapper(targets, masks, num_crowds)
                    losses = criterion(out, wrapper, wrapper.make_mask())

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('joint/%s' % k, losses[k].item())

                # Forward Pass
                if cfg.dataset.is_video:
                    # reference frames
                    references = []
                    moving_statistics = {"aligned_feats": [], "conf_hist": []}
                    for idx, frame in enumerate(datum[:0:-1]):
                        images, annots = frame

                        extras = {
                            "backbone": "full",
                            "interrupt": True,
                            "keep_statistics": True,
                            "moving_statistics": moving_statistics
                        }

                        with torch.no_grad():
                            net_outs = net(images, extras=extras)

                        moving_statistics["feats"] = net_outs["feats"]
                        moving_statistics["lateral"] = net_outs["lateral"]

                        keys_to_save = ("outs_phase_1", "outs_phase_2")
                        for key in set(net_outs.keys()) - set(keys_to_save):
                            del net_outs[key]
                        references.append(net_outs)

                    # key frame with annotation, but not compute full backbone
                    frame = datum[0]
                    images, annots = frame
                    frame = (
                        images,
                        annots,
                    )
                    images, targets, masks, num_crowds = prepare_data(frame)

                    extras = {
                        "backbone": "full",
                        "interrupt": not cfg.flow.base_backward,
                        "moving_statistics": moving_statistics
                    }
                    gt_net_outs = net(images, extras=extras)
                    if cfg.flow.base_backward:
                        losses = backward_and_log("compute", gt_net_outs,
                                                  targets, masks, num_crowds)

                    keys_to_save = ("outs_phase_1", "outs_phase_2")
                    for key in set(gt_net_outs.keys()) - set(keys_to_save):
                        del gt_net_outs[key]

                    # now do the warp
                    if len(references) > 0:
                        reference_frame = references[0]
                        extras = {
                            "backbone": "partial",
                            "moving_statistics": moving_statistics
                        }

                        net_outs = net(images, extras=extras)
                        extra_loss = yolact_net.extra_loss(
                            net_outs, gt_net_outs)

                        losses = backward_and_log("warp",
                                                  net_outs,
                                                  targets,
                                                  masks,
                                                  num_crowds,
                                                  extra_loss=extra_loss)

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time
                w.add_scalar('meta/data_time', data_time)
                w.add_scalar('meta/iter_time', elapsed)

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]
                    if torch.cuda.is_available():
                        max_mem_mb = torch.cuda.max_memory_allocated(
                        ) / 1024.0 / 1024.0
                        # torch.cuda.reset_max_memory_allocated()
                    else:
                        max_mem_mb = None

                    logger.info("""\
eta: {eta}  epoch: {epoch}  iter: {iter}  \
{losses}  {loss_total}  \
time: {time}  data_time: {data_time}  lr: {lr}  {memory}\
""".format(eta=eta_str,
                    epoch=epoch,
                    iter=iteration,
                    losses="  ".join([
                    "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses
                    ]),
                    loss_total="T: {:.3f}".format(
                    sum([loss_avgs[k].get_avg() for k in losses])),
                    data_time="{:.3f}".format(data_time_avg.get_avg()),
                    time="{:.3f}".format(elapsed),
                    lr="{:.6f}".format(lr),
                    memory="max_mem: {:.0f}M".format(max_mem_mb)))

                if rank == 0 and iteration % 100 == 0:

                    if cfg.flow.train_flow:
                        import flowiz as fz
                        from layers.warp_utils import deform_op
                        tgt_size = (64, 64)
                        flow_size = flows.size()[2:]
                        vis_data = []
                        for pred_flow in net_outs:
                            vis_data.append(pred_flow)

                        deform_gt = deform_op(imgs_2, flows)
                        flows_pred = [
                            F.interpolate(x,
                                          size=flow_size,
                                          mode='bilinear',
                                          align_corners=False)
                            for x in net_outs
                        ]
                        deform_preds = [
                            deform_op(imgs_2, x) for x in flows_pred
                        ]

                        vis_data.append(
                            F.interpolate(flows, size=tgt_size, mode='area'))

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]

                        def convert_image(image):
                            image = F.interpolate(image,
                                                  size=tgt_size,
                                                  mode='area')
                            image = image[0]
                            image = image.data.cpu().numpy()
                            image = image[::-1]
                            image = image.transpose(1, 2, 0)
                            image = image * np.array(STD) + np.array(MEANS)
                            image = image.transpose(2, 0, 1)
                            image = image / 255
                            image = np.clip(image, -1, 1)
                            image = image[::-1]
                            return image

                        vis_data.append(convert_image(imgs_1))
                        vis_data.append(convert_image(imgs_2))
                        vis_data.append(convert_image(deform_gt))
                        vis_data.extend(
                            [convert_image(x) for x in deform_preds])

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                    elif cfg.flow.warp_mode == "flow":
                        import flowiz as fz
                        tgt_size = (64, 64)
                        vis_data = []
                        for pred_flow, _, _ in net_outs["preds_flow"]:
                            vis_data.append(pred_flow)

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]
                        input_image = F.interpolate(images,
                                                    size=tgt_size,
                                                    mode='area')
                        input_image = input_image[0]
                        input_image = input_image.data.cpu().numpy()
                        input_image = input_image.transpose(1, 2, 0)
                        input_image = input_image * np.array(
                            STD[::-1]) + np.array(MEANS[::-1])
                        input_image = input_image.transpose(2, 0, 1)
                        input_image = input_image / 255
                        input_image = np.clip(input_image, -1, 1)
                        vis_data.append(input_image)

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                iteration += 1
                w.set_step(iteration)

                if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    logger.info('Saving state, iter: {}'.format(iteration))
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            logger.info('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    if rank == 0:
                        compute_validation_map(yolact_net, val_dataset)
                    dist.barrier()

    except KeyboardInterrupt:
        if args.interrupt_no_save:
            logger.info('No save on interrupt, just exiting...')
        elif rank == 0:
            print('Stopping early. Saving network...')
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        return

    if rank == 0:
        yolact_net.save_weights(save_path(epoch, iteration))
Пример #29
0
    if args.cuda:
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if args.resume and not args.display:
        with open(args.ap_data_file, 'rb') as f:
            ap_data = pickle.load(f)
        calc_map(ap_data)
        exit()

    dataset = None

    print('Loading model...', end='')
    net = Yolact()
    net.load_weights(args.trained_model)
    net.eval()
    print(' Done.')

    if args.cuda:
        net = net.cuda()

    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug



scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths,
            cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net,
Пример #30
0
class YOLACT_MODEL():
    def __init__(self, opts):
        #concat the two files to one file
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        self.mode = opts['mode']

    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        return self.display(preds,
                            frame,
                            None,
                            None,
                            undo_transform=False,
                            score_threshold=self.threshold)

    def display(self,
                dets_out,
                img,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                top_k=100,
                score_threshold=0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]
            classes, scores, boxes = [
                x[:top_k].detach().cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().detach().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        show_mask = True
        show_box = True

        if self.mode == "mask_only":
            show_box = False

        if self.mode == "box_only":
            show_mask = False

        print("mode :", self.mode)
        print("show_mask :", show_mask)
        print("show_box :", show_box)

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if show_mask and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if show_box:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if True:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if True:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if True else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return (img_numpy, boxes, scores)