Пример #1
0
def load_model(model_file):
  torch.set_default_tensor_type('torch.cuda.FloatTensor')
  set_cfg('yolact_plus_resnet50_config')
  net = Yolact()
  net.load_weights(model_file)
  net.eval()
  return net
Пример #2
0
class YOLACT_MODEL():

    def __init__(self, opts):
        #concat the two files to one file 
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):    
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()                        
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        
    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        output_image = self.display(preds, frame, None, None,
                                     undo_transform=False, score_threshold=self.threshold)
        return output_image

    def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]

        img_gpu = img_gpu * masks[0]
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
               
        return img_numpy        
Пример #3
0
    def __init__(
            self,
            weights='./crow_vision_yolact/data/yolact/weights/weights_yolact_kuka_17/crow_base_35_457142.pth',
            config=None,
            batchsize=1,
            top_k=25,
            score_threshold=0.1,
            display_text=True,
            display_bboxes=True,
            display_masks=True,
            display_scores=True):
        self.score_threshold = score_threshold
        self.top_k = top_k
        self.batchsize = batchsize

        # initialize a yolact net for inference
        ## YOLACT setup
        # setup config
        if config is not None:
            if '.obj' in config:
                with open(config, 'rb') as f:
                    config = dill.load(f)
            set_cfg(config)
        self.class_names_tuple = get_class_names_tuple()

        parse_args([
            '--top_k=' + str(top_k),
            '--score_threshold=' + str(score_threshold),
            '--display_text=' + str(display_text),
            '--display_bboxes=' + str(display_bboxes),
            '--display_masks=' + str(display_masks),
            '--display_scores=' + str(display_scores),
        ])

        # CUDA setup for yolact
        torch.backends.cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

        #YOLACT net itself
        with torch.no_grad():
            net = Yolact().cuda(torch.cuda.current_device())
            net.load_weights(weights)
            net.eval()
            net.detect.use_fast_nms = True
            net.detect.use_cross_class_nms = False

        self.net = net
        print("YOLACT network available as self.net")

        #for debug,benchmark
        self.duration = 0.0
Пример #4
0
def init_model(transform):
    args = parse_args()

    if args.config is not None:
        print(args.config)
        set_cfg(args.config)
        cfg.mask_proto_debug = False

    if args.trained_model == 'interrupt':
        args.trained_model = SavePath.get_interrupt('weights/')
    elif args.trained_model == 'latest':
        args.trained_model = SavePath.get_latest('weights/', cfg.name)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if args.cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')
        net = net.cuda()

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()

    return net, args
Пример #5
0
def prepare_model(args):
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        init_path = args.save_folder + cfg.backbone.path
        print('Initializing weights...', init_path)
        if os.path.isfile(init_path):
            yolact_net.init_weights(backbone_path=init_path)
        else:
            print("no init weight, use empty")
    return yolact_net
Пример #6
0
def load_weights(filename, cuda):
    """Load YOLACT network weights"""
    global ynet
    if filename == '':
        raise ValueError('Empty filename for network weights')
    print('#### CUDA ENABLED', cuda)
    print(f'Loading weights from {filename}')
    tic = time.perf_counter_ns()
    with torch.no_grad():
        if cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
        # torch.set_default_tensor_type('torch.FloatTensor')
        ynet = Yolact()
        ynet.load_weights(filename, False)
        ynet.eval()
    toc = time.perf_counter_ns()
    logging.debug(f'Time to load weights: {1e-9 * (toc - tic)}')
Пример #7
0
def convert_to_onnx_with_hydra(cfg: DictConfig):

    # create folder for onnx
    createFolderOnnx(cfg)
    # set cfg
    set_cfg(cfg.onnx.yolact_cfg)

    model = Yolact()
    model.load_weights(cfg.onnx.model_ckpt_path)
    model.eval()

    model = model.cpu()

    dummy_input = torch.rand(
        (cfg.onnx.model_batch_size, cfg.onnx.model_channel_input,
         cfg.onnx.model_height_input, cfg.onnx.model_width_input))

    torch.onnx.export(model,
                      dummy_input,
                      cfg.onnx.model_onnx_path,
                      verbose=cfg.onnx.verbose,
                      opset_version=cfg.onnx.opset_version)
Пример #8
0
def main(args):

  rospy.init_node('yolact_ros')
  rospack = rospkg.RosPack()
  yolact_path = rospack.get_path('yolact_ros')
  
  model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth"
  model_path = SavePath.from_str(model_path_str)
  set_cfg(model_path.model_name + '_config')

  with torch.no_grad():
      results_path_str = yolact_path + "/scripts/yolact/results"
      if not os.path.exists(results_path_str):
          os.makedirs(results_path_str)

      cudnn.benchmark = True
      cudnn.fastest = True
      torch.set_default_tensor_type('torch.cuda.FloatTensor')   

      print('Loading model...', end='')
      net = Yolact()
      net.load_weights(model_path_str)
      net.eval()
      print(' Done.')

      net = net.cuda()
      net.detect.use_fast_nms = True
      cfg.mask_proto_debug = False

  ic = image_converter(net)
  

  try:
    rospy.spin()
  except KeyboardInterrupt:
    print("Shutting down")
  cv2.destroyAllWindows()
Пример #9
0
#   Editor      : VIM
#   File name   : convert_weight.py
#   Author      : YunYang1994
#   Created date: 2019-07-27 18:07:20
#   Description :
#
#================================================================

import torch
import numpy as np
from yolact import Yolact

with torch.no_grad():
    model = Yolact()
    model.eval()
    model.load_weights("./yolact_darknet53_54_800000.pth")
    modules = model.children()


def parse_layer(layer, weights):
    assert isinstance(layer, torch.nn.Conv2d) or isinstance(
        layer, torch.nn.BatchNorm2d)
    print("=> Parsing ", layer)
    if isinstance(layer, torch.nn.Conv2d):
        weight, bias = layer.weight.detach().numpy(), layer.bias
        weight = np.transpose(
            weight, [2, 3, 1, 0])  # k_h, h_w, in_channels, out_channels
        if bias is None:
            weights.append([weight])
        else:
            bias = layer.bias.detach().numpy()
Пример #10
0

if __name__ == '__main__':

    # 数据集与标签
    valid_dataset = COCODetection(image_path='./data/coco/images/val2017/',
                                  info_file='./data/coco/annotations/instances_val2017.json',
                                  transform=BaseTransform(),
                                  has_gt=True
                                  )
    prep_coco_cats()

    # 模型
    print('Loading model...', end='')
    model = Yolact()
    model.load_weights(args.trained_model)
    model.eval()
    model = model.cuda() if args.cuda else model.cpu()
    print(' Done.')

    # 核心入口
    with torch.no_grad():
        if not os.path.exists('results'):
            os.makedirs('results')

        if args.cuda:
            torch.backends.cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
Пример #11
0
def train(rank, args):
    if args.num_gpus > 1:
        multi_gpu_rescale(args)
    if rank == 0:
        if not os.path.exists(args.save_folder):
            os.mkdir(args.save_folder)

    # set up logger
    setup_logger(output=os.path.join(args.log_folder, cfg.name),
                 distributed_rank=rank)
    logger = logging.getLogger("yolact.train")

    w = SummaryHelper(distributed_rank=rank,
                      log_dir=os.path.join(args.log_folder, cfg.name))
    w.add_text("argv", " ".join(sys.argv))
    logger.info("Args: {}".format(" ".join(sys.argv)))
    import git
    with git.Repo(search_parent_directories=True) as repo:
        w.add_text("git_hash", repo.head.object.hexsha)
        logger.info("git hash: {}".format(repo.head.object.hexsha))

    try:
        logger.info("Initializing torch.distributed backend...")
        dist.init_process_group(backend='nccl',
                                init_method=args.dist_url,
                                world_size=args.num_gpus,
                                rank=rank)
    except Exception as e:
        logger.error("Process group URL: {}".format(args.dist_url))
        raise e

    dist.barrier()

    if torch.cuda.device_count() > 1:
        logger.info('Multiple GPUs detected! Turning off JIT.')

    collate_fn = detection_collate
    if cfg.dataset.name == 'YouTube VIS':
        dataset = YoutubeVIS(image_path=cfg.dataset.train_images,
                             info_file=cfg.dataset.train_info,
                             configs=cfg.dataset,
                             transform=SSDAugmentationVideo(MEANS))

        if cfg.dataset.joint == 'coco':
            joint_dataset = COCODetection(
                image_path=cfg.joint_dataset.train_images,
                info_file=cfg.joint_dataset.train_info,
                transform=SSDAugmentation(MEANS))
            joint_collate_fn = detection_collate

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images,
                                     info_file=cfg.dataset.valid_info,
                                     configs=cfg.dataset,
                                     transform=BaseTransformVideo(MEANS))
        collate_fn = collate_fn_youtube_vis

    elif cfg.dataset.name == 'FlyingChairs':
        dataset = FlyingChairs(image_path=cfg.dataset.trainval_images,
                               info_file=cfg.dataset.trainval_info)

        collate_fn = collate_fn_flying_chairs

    else:
        dataset = COCODetection(image_path=cfg.dataset.train_images,
                                info_file=cfg.dataset.train_info,
                                transform=SSDAugmentation(MEANS))

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                        info_file=cfg.dataset.valid_info,
                                        transform=BaseTransform(MEANS))

    # Set cuda device early to avoid duplicate model in master GPU
    if args.cuda:
        torch.cuda.set_device(rank)

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs.

    # use timer for experiments
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        logger.info('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume, args=args)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        logger.info('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    if cfg.flow.train_flow:
        criterion = OpticalFlowLoss()

    else:
        criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                                 pos_threshold=cfg.positive_iou_threshold,
                                 neg_threshold=cfg.negative_iou_threshold,
                                 negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net.cuda(rank)
        criterion.cuda(rank)
        net = nn.parallel.DistributedDataParallel(net,
                                                  device_ids=[rank],
                                                  output_device=rank,
                                                  broadcast_buffers=False,
                                                  find_unused_parameters=True)
        # net       = nn.DataParallel(net).cuda()
        # criterion = nn.DataParallel(criterion).cuda()

    optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    w.set_step(iteration)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size // args.num_gpus
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    from data.sampler_utils import InfiniteSampler, build_batch_data_sampler

    infinite_sampler = InfiniteSampler(dataset,
                                       seed=args.random_seed,
                                       num_replicas=args.num_gpus,
                                       rank=rank,
                                       shuffle=True)
    train_sampler = build_batch_data_sampler(infinite_sampler,
                                             images_per_batch=args.batch_size)

    data_loader = data.DataLoader(
        dataset,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        multiprocessing_context="fork" if args.num_workers > 1 else None,
        batch_sampler=train_sampler)
    data_loader_iter = iter(data_loader)

    if cfg.dataset.joint:
        joint_infinite_sampler = InfiniteSampler(joint_dataset,
                                                 seed=args.random_seed,
                                                 num_replicas=args.num_gpus,
                                                 rank=rank,
                                                 shuffle=True)
        joint_train_sampler = build_batch_data_sampler(
            joint_infinite_sampler, images_per_batch=args.batch_size)
        joint_data_loader = data.DataLoader(
            joint_dataset,
            num_workers=args.num_workers,
            collate_fn=joint_collate_fn,
            multiprocessing_context="fork" if args.num_workers > 1 else None,
            batch_sampler=joint_train_sampler)
        joint_data_loader_iter = iter(joint_data_loader)

    dist.barrier()

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()
    data_time_avg = MovingAverage(10)

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    def backward_and_log(prefix,
                         net_outs,
                         targets,
                         masks,
                         num_crowds,
                         extra_loss=None):
        optimizer.zero_grad()

        out = net_outs["pred_outs"]
        wrapper = ScatterWrapper(targets, masks, num_crowds)
        losses = criterion(out, wrapper, wrapper.make_mask())

        losses = {k: v.mean()
                  for k, v in losses.items()}  # Mean here because Dataparallel

        if extra_loss is not None:
            assert type(extra_loss) == dict
            losses.update(extra_loss)

        loss = sum([losses[k] for k in losses])

        # Backprop
        loss.backward()  # Do this to free up vram even if loss is not finite
        if torch.isfinite(loss).item():
            optimizer.step()

        # Add the loss to the moving average for bookkeeping
        for k in losses:
            loss_avgs[k].add(losses[k].item())
            w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k),
                         losses[k].item())

        return losses

    logger.info('Begin training!')
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            while True:
                data_start_time = time.perf_counter()
                datum = next(data_loader_iter)
                dist.barrier()
                data_end_time = time.perf_counter()
                data_time = data_end_time - data_start_time
                if iteration != args.start_iter:
                    data_time_avg.add(data_time)
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                elif cfg.lr_schedule == 'cosine':
                    set_lr(
                        optimizer,
                        args.lr *
                        ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) *
                         .5))

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while cfg.lr_schedule == 'step' and step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                global lr
                w.add_scalar('meta/lr', lr)

                if cfg.dataset.name == "FlyingChairs":
                    imgs_1, imgs_2, flows = prepare_flow_data(datum)
                    net_outs = net(None, extras=(imgs_1, imgs_2))
                    # Compute Loss
                    optimizer.zero_grad()

                    losses = criterion(net_outs, flows)

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('loss/%s' % k, losses[k].item())

                elif cfg.dataset.joint or not cfg.dataset.is_video:
                    if cfg.dataset.joint:
                        joint_datum = next(joint_data_loader_iter)
                        dist.barrier()
                        # Load training data
                        # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                        images, targets, masks, num_crowds = prepare_data(
                            joint_datum)
                    else:
                        images, targets, masks, num_crowds = prepare_data(
                            datum)
                    extras = {
                        "backbone": "full",
                        "interrupt": False,
                        "moving_statistics": {
                            "aligned_feats": []
                        }
                    }
                    net_outs = net(images, extras=extras)
                    out = net_outs["pred_outs"]
                    # Compute Loss
                    optimizer.zero_grad()

                    wrapper = ScatterWrapper(targets, masks, num_crowds)
                    losses = criterion(out, wrapper, wrapper.make_mask())

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('joint/%s' % k, losses[k].item())

                # Forward Pass
                if cfg.dataset.is_video:
                    # reference frames
                    references = []
                    moving_statistics = {"aligned_feats": [], "conf_hist": []}
                    for idx, frame in enumerate(datum[:0:-1]):
                        images, annots = frame

                        extras = {
                            "backbone": "full",
                            "interrupt": True,
                            "keep_statistics": True,
                            "moving_statistics": moving_statistics
                        }

                        with torch.no_grad():
                            net_outs = net(images, extras=extras)

                        moving_statistics["feats"] = net_outs["feats"]
                        moving_statistics["lateral"] = net_outs["lateral"]

                        keys_to_save = ("outs_phase_1", "outs_phase_2")
                        for key in set(net_outs.keys()) - set(keys_to_save):
                            del net_outs[key]
                        references.append(net_outs)

                    # key frame with annotation, but not compute full backbone
                    frame = datum[0]
                    images, annots = frame
                    frame = (
                        images,
                        annots,
                    )
                    images, targets, masks, num_crowds = prepare_data(frame)

                    extras = {
                        "backbone": "full",
                        "interrupt": not cfg.flow.base_backward,
                        "moving_statistics": moving_statistics
                    }
                    gt_net_outs = net(images, extras=extras)
                    if cfg.flow.base_backward:
                        losses = backward_and_log("compute", gt_net_outs,
                                                  targets, masks, num_crowds)

                    keys_to_save = ("outs_phase_1", "outs_phase_2")
                    for key in set(gt_net_outs.keys()) - set(keys_to_save):
                        del gt_net_outs[key]

                    # now do the warp
                    if len(references) > 0:
                        reference_frame = references[0]
                        extras = {
                            "backbone": "partial",
                            "moving_statistics": moving_statistics
                        }

                        net_outs = net(images, extras=extras)
                        extra_loss = yolact_net.extra_loss(
                            net_outs, gt_net_outs)

                        losses = backward_and_log("warp",
                                                  net_outs,
                                                  targets,
                                                  masks,
                                                  num_crowds,
                                                  extra_loss=extra_loss)

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time
                w.add_scalar('meta/data_time', data_time)
                w.add_scalar('meta/iter_time', elapsed)

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]
                    if torch.cuda.is_available():
                        max_mem_mb = torch.cuda.max_memory_allocated(
                        ) / 1024.0 / 1024.0
                        # torch.cuda.reset_max_memory_allocated()
                    else:
                        max_mem_mb = None

                    logger.info("""\
eta: {eta}  epoch: {epoch}  iter: {iter}  \
{losses}  {loss_total}  \
time: {time}  data_time: {data_time}  lr: {lr}  {memory}\
""".format(eta=eta_str,
                    epoch=epoch,
                    iter=iteration,
                    losses="  ".join([
                    "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses
                    ]),
                    loss_total="T: {:.3f}".format(
                    sum([loss_avgs[k].get_avg() for k in losses])),
                    data_time="{:.3f}".format(data_time_avg.get_avg()),
                    time="{:.3f}".format(elapsed),
                    lr="{:.6f}".format(lr),
                    memory="max_mem: {:.0f}M".format(max_mem_mb)))

                if rank == 0 and iteration % 100 == 0:

                    if cfg.flow.train_flow:
                        import flowiz as fz
                        from layers.warp_utils import deform_op
                        tgt_size = (64, 64)
                        flow_size = flows.size()[2:]
                        vis_data = []
                        for pred_flow in net_outs:
                            vis_data.append(pred_flow)

                        deform_gt = deform_op(imgs_2, flows)
                        flows_pred = [
                            F.interpolate(x,
                                          size=flow_size,
                                          mode='bilinear',
                                          align_corners=False)
                            for x in net_outs
                        ]
                        deform_preds = [
                            deform_op(imgs_2, x) for x in flows_pred
                        ]

                        vis_data.append(
                            F.interpolate(flows, size=tgt_size, mode='area'))

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]

                        def convert_image(image):
                            image = F.interpolate(image,
                                                  size=tgt_size,
                                                  mode='area')
                            image = image[0]
                            image = image.data.cpu().numpy()
                            image = image[::-1]
                            image = image.transpose(1, 2, 0)
                            image = image * np.array(STD) + np.array(MEANS)
                            image = image.transpose(2, 0, 1)
                            image = image / 255
                            image = np.clip(image, -1, 1)
                            image = image[::-1]
                            return image

                        vis_data.append(convert_image(imgs_1))
                        vis_data.append(convert_image(imgs_2))
                        vis_data.append(convert_image(deform_gt))
                        vis_data.extend(
                            [convert_image(x) for x in deform_preds])

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                    elif cfg.flow.warp_mode == "flow":
                        import flowiz as fz
                        tgt_size = (64, 64)
                        vis_data = []
                        for pred_flow, _, _ in net_outs["preds_flow"]:
                            vis_data.append(pred_flow)

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]
                        input_image = F.interpolate(images,
                                                    size=tgt_size,
                                                    mode='area')
                        input_image = input_image[0]
                        input_image = input_image.data.cpu().numpy()
                        input_image = input_image.transpose(1, 2, 0)
                        input_image = input_image * np.array(
                            STD[::-1]) + np.array(MEANS[::-1])
                        input_image = input_image.transpose(2, 0, 1)
                        input_image = input_image / 255
                        input_image = np.clip(input_image, -1, 1)
                        vis_data.append(input_image)

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                iteration += 1
                w.set_step(iteration)

                if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    logger.info('Saving state, iter: {}'.format(iteration))
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            logger.info('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    if rank == 0:
                        compute_validation_map(yolact_net, val_dataset)
                    dist.barrier()

    except KeyboardInterrupt:
        if args.interrupt_no_save:
            logger.info('No save on interrupt, just exiting...')
        elif rank == 0:
            print('Stopping early. Saving network...')
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        return

    if rank == 0:
        yolact_net.save_weights(save_path(epoch, iteration))
Пример #12
0
class YOLACT_MODEL():
    def __init__(self, opts):
        #concat the two files to one file
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        self.mode = opts['mode']

    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        return self.display(preds,
                            frame,
                            None,
                            None,
                            undo_transform=False,
                            score_threshold=self.threshold)

    def display(self,
                dets_out,
                img,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                top_k=100,
                score_threshold=0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]
            classes, scores, boxes = [
                x[:top_k].detach().cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().detach().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        show_mask = True
        show_box = True

        if self.mode == "mask_only":
            show_box = False

        if self.mode == "box_only":
            show_mask = False

        print("mode :", self.mode)
        print("show_mask :", show_mask)
        print("show_box :", show_box)

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if show_mask and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if show_box:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if True:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if True:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if True else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return (img_numpy, boxes, scores)
Пример #13
0
def interpret():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net = nn.DataParallel(net).cuda()
        criterion = nn.DataParallel(criterion).cuda()
        # net = net.cuda()
        # criterion = criterion.cuda()
        # criterion = criterion.cuda()

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    print("Dataset Size:")
    print(len(dataset))
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    num_epochs = 1

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin interpret!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue
            count = 0
            for datum in data_loader:
                del datum
                count += 1

                if count % 10000 == 0:
                    print(count)
                continue
    except KeyboardInterrupt:
        print('Stopping early. Saving network...')

    print("Loaded Dataset Numbers")
    print(count)
Пример #14
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net = nn.DataParallel(net).cuda()
        criterion = nn.DataParallel(criterion).cuda()

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S']  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Load training data
                # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                images, targets, masks, num_crowds = prepare_data(datum)

                # Forward Pass
                out = net(images)

                # Compute Loss
                optimizer.zero_grad()

                wrapper = ScatterWrapper(targets, masks, num_crowds)
                losses = criterion(out, wrapper, wrapper.make_mask())

                losses = {k: v.mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(yolact_net, val_dataset)
    except KeyboardInterrupt:
        print('Stopping early. Saving network...')

        # Delete previous copy of the interrupted network so we don't spam the weights folder
        SavePath.remove_interrupt(args.save_folder)

        yolact_net.save_weights(
            save_path(epoch,
                      repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #15
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #16
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))
    
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()
    print('\n--- Generator created! ---')

    # NOTE
    # I maunally set the original image size and seg size as 138
    # might change in the future, for example 550
    if cfg.pred_seg:
        dis_size = 138
        dis_net  = Discriminator_Wgan(i_size = dis_size, s_size = dis_size)
        # Change the initialization inside the dis_net class inside 
        # set the dis net's initial parameter values
        # dis_net.apply(gan_init)
        dis_net.train()
        print('--- Discriminator created! ---\n')

    if args.log:
        log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()),
            overwrite=(args.resume is None), log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check    
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path)

    # optimizer_gen = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
    #                       weight_decay=args.decay)
    # if cfg.pred_seg:
    #     optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr, momentum=args.momentum,
    #                         weight_decay=args.decay)
    #     schedule_dis  = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6)

    # NOTE: Using the Ranger Optimizer for the generator
    optimizer_gen     = Ranger(net.parameters(), lr = args.lr, weight_decay=args.decay)
    # optimizer_gen = optim.RMSprop(net.parameters(), lr = args.lr)

    # FIXME: Might need to modify the lr in the optimizer carefually
    # check this
    # def make_D_optimizer(cfg, model):
    # params = []
    # for key, value in model.named_parameters():
    #     if not value.requires_grad:
    #         continue
    #     lr = cfg.SOLVER.BASE_LR/5.0
    #     weight_decay = cfg.SOLVER.WEIGHT_DECAY
    #     if "bias" in key:
    #         lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR/5.0
    #         weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
    #     params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]

    # optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
    # return optimizer

    if cfg.pred_seg:
        optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr)
        # optimizer_dis = optim.RMSprop(dis_net.parameters(), lr = cfg.dis_lr)
        schedule_dis  = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6)

    criterion     = MultiBoxLoss(num_classes=cfg.num_classes,
                                pos_threshold=cfg.positive_iou_threshold,
                                neg_threshold=cfg.negative_iou_threshold,
                                negpos_ratio=cfg.ohem_negpos_ratio, pred_seg=cfg.pred_seg)

    # criterion_dis = nn.BCELoss()
    # Take the advice from WGAN
    criterion_dis = DiscriminatorLoss_Maskrcnn()
    criterion_gen = GeneratorLoss_Maskrcnn()


    if args.batch_alloc is not None:
        # e.g. args.batch_alloc: 24,24
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion, pred_seg=cfg.pred_seg))

    if args.cuda:
        net     = net.cuda()
        # NOTE
        if cfg.pred_seg:
            dis_net = nn.DataParallel(dis_net)
            dis_net = dis_net.cuda()
    
    # Initialize everything
    if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())

    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)
    
    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset, args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    # NOTE
    val_loader  = data.DataLoader(val_dataset, args.batch_size,
                                  num_workers=args.num_workers*2,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    
    
    save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types # Forms the print order
                      # TODO: global command can modify global variable inside of the function.
    loss_avgs  = { k: MovingAverage(100) for k in loss_types }

    # NOTE
    # Enable AMP
    amp_enable = cfg.amp
    scaler = torch.cuda.amp.GradScaler(enabled=amp_enable)

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter

            if (epoch+1)*epoch_size < iteration:
                continue
            
            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch+1)*epoch_size:
                    break
      
                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()
                
                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer_gen, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer_gen, args.lr * (args.gamma ** step_index))
                
                
                # NOTE
                if cfg.pred_seg:
                    # ====== GAN Train ======
                    # train the gen and dis in different iteration
                    # it_alter_period = iteration % (cfg.gen_iter + cfg.dis_iter)
                    # FIXME:
                    # present_time = time.time()
                    for _ in range(cfg.dis_iter):
                        # freeze_pretrain(yolact_net, freeze=False)
                        # freeze_pretrain(net, freeze=False)
                        # freeze_pretrain(dis_net, freeze=False)
                        # if it_alter_period == 0:
                        #     print('--- Generator     freeze   ---')
                        #     print('--- Discriminator training ---')

                        if cfg.amp:
                            with torch.cuda.amp.autocast():
                                # ----- Discriminator part -----
                                # seg_list  is the prediction mask
                                # can be regarded as generated images from YOLACT
                                # pred_list is the prediction label
                                # seg_list  dim: list of (138,138,instances)
                                # pred_list dim: list of (instances)
                                losses, seg_list, pred_list = net(datum)
                                seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                                
                                # input image size is [b, 3, 550, 550]
                                # downsample to       [b, 3, seg_h, seg_w]
                                image_list = [img.to(cuda0) for img in datum[0]]
                                image    = interpolate(torch.stack(image_list), size = seg_size, 
                                                            mode='bilinear',align_corners=False)

                                # Because in the discriminator training, we do not 
                                # want the gradient flow back to the generator part
                                # we detach seg_clas (mask_clas come the data, does not have grad)
                        
                                output_pred = dis_net(img = image.detach(), seg = seg_clas.detach())
                                output_grou = dis_net(img = image.detach(), seg = mask_clas.detach())

                                # p = elem_mul_p.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # g = elem_mul_g.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # image = image.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # from PIL import Image
                                # seg_PIL = Image.fromarray(p, 'RGB')
                                # mask_PIL = Image.fromarray(g, 'RGB')
                                # seg_PIL.save('mul_seg.png')
                                # mask_PIL.save('mul_mask.png')
                                # raise RuntimeError

                                # from matplotlib import pyplot as plt
                                # fig, (ax1, ax2) = plt.subplots(1,2)
                                # ax1.imshow(mask_show)
                                # ax2.imshow(seg_show)
                                # plt.show(block=False)
                                # plt.pause(2)
                                # plt.close()  

                                # if iteration % (cfg.gen_iter + cfg.dis_iter) == 0:
                                #     print(f'Probability of fake is fake: {output_pred.mean().item():.2f}')
                                #     print(f'Probability of real is real: {output_grou.mean().item():.2f}')

                                # 0 for Fake/Generated
                                # 1 for True/Ground Truth
                                # fake_label = torch.zeros(b)
                                # real_label = torch.ones(b)

                                # Advice of practical implementation 
                                # from https://arxiv.org/abs/1611.08408
                                # loss_pred = -criterion_dis(output_pred,target=real_label)
                                # loss_pred = criterion_dis(output_pred,target=fake_label)
                                # loss_grou = criterion_dis(output_grou,target=real_label)
                                # loss_dis  = loss_pred + loss_grou

                                # Wasserstein Distance (Earth-Mover)
                                loss_dis = criterion_dis(input=output_grou,target=output_pred)
                            
                            # Backprop the discriminator
                            # Scales loss. Calls backward() on scaled loss to create scaled gradients.
                            scaler.scale(loss_dis).backward()
                            scaler.step(optimizer_dis)
                            scaler.update()
                            optimizer_dis.zero_grad()

                            # clip the updated parameters
                            _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()]


                            # ----- Generator part -----
                            # freeze_pretrain(yolact_net, freeze=False)
                            # freeze_pretrain(net, freeze=False)
                            # freeze_pretrain(dis_net, freeze=False)
                            # if it_alter_period == (cfg.dis_iter+1):
                            #     print('--- Generator     training ---')
                            #     print('--- Discriminator freeze   ---')

                            # FIXME:
                            # print(f'dis time pass: {time.time()-present_time:.2f}')
                            # FIXME:
                            # present_time = time.time()

                            with torch.cuda.amp.autocast():
                                losses, seg_list, pred_list = net(datum)
                                seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                                image_list = [img.to(cuda0) for img in datum[0]]
                                image      = interpolate(torch.stack(image_list), size = seg_size, 
                                                            mode='bilinear',align_corners=False)
                                # Perform forward pass of all-fake batch through D
                                # NOTE this seg_clas CANNOT detach, in order to flow the 
                                # gradient back to the generator
                                # output = dis_net(img = image, seg = seg_clas)
                                # Since the log(1-D(G(x))) not provide sufficient gradients
                                # We want log(D(G(x)) instead, this can be achieve by
                                # use the real_label as target.
                                # This step is crucial for the information of discriminator
                                # to go into the generator.
                                # Calculate G's loss based on this output
                                # real_label = torch.ones(b)
                                # loss_gen   = criterion_dis(output,target=real_label)
                            
                                # GAN MaskRCNN
                                output_pred = dis_net(img = image, seg = seg_clas)
                                output_grou = dis_net(img = image, seg = mask_clas)

                                # Advice from WGAN
                                # loss_gen = -torch.mean(output)
                                loss_gen = criterion_gen(input=output_grou,target=output_pred)

                                # since the dis is already freeze, the gradients will only
                                # record the YOLACT
                                losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                                loss = sum([losses[k] for k in losses])
                                loss += loss_gen
                            
                            # Generator backprop
                            scaler.scale(loss).backward()
                            scaler.step(optimizer_gen)
                            scaler.update()
                            optimizer_gen.zero_grad()
                            

                            # FIXME:
                            # print(f'gen time pass: {time.time()-present_time:.2f}')
                            # print('GAN part over')

                        else:
                            losses, seg_list, pred_list = net(datum)
                            seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)

                            image_list = [img.to(cuda0) for img in datum[0]]
                            image    = interpolate(torch.stack(image_list), size = seg_size, 
                                                        mode='bilinear',align_corners=False)

                            output_pred = dis_net(img = image.detach(), seg = seg_clas.detach())
                            output_grou = dis_net(img = image.detach(), seg = mask_clas.detach())
                            loss_dis = criterion_dis(input=output_grou,target=output_pred)

                            loss_dis.backward()
                            optimizer_dis.step()
                            optimizer_dis.zero_grad()
                            _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()]
                        
                            # ----- Generator part -----
                            # FIXME:
                            # print(f'dis time pass: {time.time()-present_time:.2f}')
                            # FIXME:
                            # present_time = time.time()

                            losses, seg_list, pred_list = net(datum)
                            seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                            image_list = [img.to(cuda0) for img in datum[0]]
                            image      = interpolate(torch.stack(image_list), size = seg_size, 
                                                        mode='bilinear',align_corners=False)
                                                        
                            # GAN MaskRCNN
                            output_pred = dis_net(img = image, seg = seg_clas)
                            output_grou = dis_net(img = image, seg = mask_clas)

                            loss_gen = criterion_gen(input=output_grou,target=output_pred)

                            # since the dis is already freeze, the gradients will only
                            # record the YOLACT
                            losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                            loss = sum([losses[k] for k in losses])
                            loss += loss_gen
                            loss.backward()
                            # Do this to free up vram even if loss is not finite
                            optimizer_gen.zero_grad()
                            if torch.isfinite(loss).item():
                                # since the optimizer_gen is for YOLACT only
                                # only the gen will be updated
                                optimizer_gen.step()       

                            # FIXME:
                            # print(f'gen time pass: {time.time()-present_time:.2f}')
                            # print('GAN part over')
                else:
                    # ====== Normal YOLACT Train ======
                    # Zero the grad to get ready to compute gradients
                    optimizer_gen.zero_grad()
                    # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                    losses = net(datum)
                    losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])
                    # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                    # all_loss = sum([v.mean() for v in losses.values()])

                    # Backprop
                    loss.backward() # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer_gen.step()                    
                
                # Add the loss to the moving average for bookkeeping
                _ = [loss_avgs[k].add(losses[k].item()) for k in losses]
                # for k in losses:
                #     loss_avgs[k].add(losses[k].item())

                cur_time  = time.time()
                elapsed   = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0]
                    
                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], [])
                    if cfg.pred_seg:
                        print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f')
                                % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True)
                        # print(f'Generator loss: {loss_gen:.2f} | Discriminator loss: {loss_dis:.2f}')
                    # Loss Key:
                    #  - B: Box Localization Loss
                    #  - C: Class Confidence Loss
                    #  - M: Mask Loss
                    #  - P: Prototype Loss
                    #  - D: Coefficient Diversity Loss
                    #  - E: Class Existence Loss
                    #  - S: Semantic Segmentation Loss
                    #  - T: Total loss

                if args.log:
                    precision = 5
                    loss_info = {k: round(losses[k].item(), precision) for k in losses}
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow
                        
                    log.log('train', loss=loss_info, epoch=epoch, iter=iteration,
                        lr=round(cur_lr, 10), elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu
                
                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder, cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)
            
            # This is done per epoch
            if args.validation_epoch > 0:
                # NOTE: Validation loss
                # if cfg.pred_seg:
                #     net.eval()
                #     dis_net.eval()
                #     cfg.gan_eval = True
                #     with torch.no_grad():
                #         for datum in tqdm(val_loader, desc='GAN Validation'):
                #             losses, seg_list, pred_list = net(datum)
                #             losses, seg_list, pred_list = net(datum)
                #             # TODO: warp below as a function
                #             seg_list = [v.permute(2,1,0).contiguous() for v in seg_list]
                #             b = len(seg_list) # batch size
                #             _, seg_h, seg_w = seg_list[0].size()

                #             seg_clas    = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w)
                #             mask_clas   = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w)
                #             target_list = [target for target in datum[1][0]]
                #             mask_list   = [interpolate(mask.unsqueeze(0), size = (seg_h,seg_w),mode='bilinear', \
                #                             align_corners=False).squeeze() for mask in datum[1][1]]

                #             for idx in range(b):
                #                 for i, (pred, i_target) in enumerate(zip(pred_list[idx], target_list[idx])):
                #                     seg_clas[idx, pred, ...]                 += seg_list[idx][i,...]
                #                     mask_clas[idx, i_target[-1].long(), ...] += mask_list[idx][i,...]
                               
                #             seg_clas = torch.clamp(seg_clas, 0, 1)
                #             image    = interpolate(torch.stack(datum[0]), size = (seg_h,seg_w), 
                #                                         mode='bilinear',align_corners=False)
                #             real_label  = torch.ones(b)
                #             output_pred = dis_net(img = image, seg = seg_clas)
                #             output_grou = dis_net(img = image, seg = mask_clas)
                #             loss_pred   = -criterion_dis(output_pred,target=real_label)
                #             loss_grou   =  criterion_dis(output_grou,target=real_label)
                #             loss_dis    = loss_pred + loss_grou
                #         losses = { k: (v).mean() for k,v in losses.items() }
                #         loss = sum([losses[k] for k in losses])
                #         val_loss = loss - cfg.lambda_dis*loss_dis
                #         schedule_dis.step(loss_dis)
                #         lr = [group['lr'] for group in optimizer_dis.param_groups]
                #         print(f'Discriminator lr: {lr[0]}')
                #     net.train()
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    cfg.gan_eval = False
                    dis_net.eval()
                    compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)
        
        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')
            
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)
            
            yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #17
0
        if cv2.waitKey(33) == 27:
            break
    cv2.destroyAllWindows()
    camera.release()
    return


if __name__ == '__main__':
    rospy.init_node('test')
    sub_img = Get_image()
    print('Loading model...', end='')
    with torch.no_grad():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        net = Yolact()
        net.load_weights(
            '/home/chien/ros_yolact/src/yolact/src/weights/yolact_base_1333_8000.pth'
        )
        net.eval()
        net = net.cuda()
        print(' Done.')
    while not rospy.is_shutdown():
        cv2.imshow("YOLACT1", sub_img.cv_image)
        image = torch.from_numpy(sub_img.cv_image).cuda().float()
        batch = FastBaseTransform()(image.unsqueeze(0))
        preds = net(batch)
        img_numpy = prep_display(preds,
                                 image,
                                 None,
                                 None,
                                 undo_transform=False)
        cv2.imshow("YOLACT", img_numpy)
Пример #18
0
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        if args.image is None and args.video is None and args.images is None:
            dataset = COCODetection(cfg.dataset.valid_images,
                                    cfg.dataset.valid_info,
                                    transform=BaseTransform(),
                                    has_gt=cfg.dataset.has_gt)
            prep_coco_cats()
        else:
            dataset = None

        print('Loading model...', end='')
        net = Yolact()
        map_location = None if args.cuda else 'cpu'
        net.load_weights(args.trained_model, map_location=map_location)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        evaluate(net, dataset)
        print("time_taken", time.time() - s_t)
Пример #19
0
class YolactInterface(object):
    def __init__(self, model_pth, output_num=5):
        self.output_num = output_num
        with torch.no_grad():
            set_cfg("yolact_base_config")
            torch.cuda.set_device(0)
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(model_pth)
            self.net.eval()
            self.net = self.net.cuda()
        print("load model complete")

    def run_once(self, src):
        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        with torch.no_grad():
            frame = torch.Tensor(src).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            time_elapsed = (time.clock() - time_start)
            h, w, _ = src.shape
            t = postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0.)  # TODO: give a suitable threshold
            torch.cuda.synchronize()
            classes, scores, bboxes, masks = [
                x[:self.output_num].cpu().numpy() for x in t
            ]  # TODO: Only 5 objects for test
            print(time_elapsed)
        instances = self.build_up_result(masks.shape[0], classes, bboxes,
                                         masks, scores)
        return {"instances": instances}

    def build_up_result(self, num, classes, bboxes, masks, scores):
        instances = []
        for i in range(num):
            bbox = [
                bboxes[i, 0], bboxes[i, 1], bboxes[i, 2] - bboxes[i, 0],
                bboxes[i, 3] - bboxes[i, 1]
            ]
            # Round to the nearest 10th to avoid huge file sizes, as COCO suggests
            bbox = [round(float(x) * 10) / 10 for x in bbox]
            # encode segmentation with RLE
            rle = pycocotools.mask.encode(
                np.asfortranarray(masks[i, :, :].astype(
                    np.uint8)))  # rle binary encoding
            rle['counts'] = rle['counts'].decode(
                'ascii')  # json.dump doesn't like bytes strings
            # create one instance json
            instances.append({
                'category_id':
                int(classes[i]
                    ),  # TODO: origin: get_coco_cat(int(category_id))
                'bbox': {
                    "b": bbox
                },
                "segmentation": rle,
                'score': float(scores[i])
            })

        return instances
Пример #20
0
class DOTMask():

    def __init__(self, nn, input_device):
        """
        Initialisation function
        """
    
        print('Loading model...')
        self.nn = nn
        if self.nn == 'yolact':
            print("Selected NN: Yolact")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact++':
            print("Selected NN: Yolact++")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_plus_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_plus_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact_edge':
            print("Selected NN: Yolact_edge")
            #Yoloact_edge imports
            sys.path.append('../nn/yolact_edge')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn

            set_cfg("yolact_edge_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_edge_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'mrcnn':
            print("Selected NN: Mask-RCNN")
             # Keras
            import keras
            from keras.models import Model
            from keras import backend as K
            K.common.set_image_dim_ordering('tf')

            # Mask-RCNN
            sys.path.append('../nn/Mask_RCNN/')
            from mrcnn import config
            from mrcnn import utils 
            from mrcnn import model as modellib
            from inference_config import InferenceConfig

            self.config = InferenceConfig()
            self.model = modellib.MaskRCNN(
                mode="inference", 
                model_dir="../weights/",#"../nn/Mask_RCNN/mrcnn/", 
                config=self.config)

            # Load weights trained on MS-COCO
            self.model.load_weights("../weights/mask_rcnn_coco.h5", by_name=True)
        
        else:
            print("no nn defined")

        self.bridge = CvBridge()

        self._max_inactive_frames = 10 # Maximum nb of frames before destruction
        self.next_object_id = 0 # ID for next object
        self.objects_dict = {} # Detected objects dictionary
        self.var_init = 0
        self.cam_pos_qat = np.array([[0.,0.,0.],[0.,0.,0.,1.]])
        self.cam_pos = np.array([[0.,0.,0.],[0.,0.,0.]])
        
        self.dilatation = 1
        self.score_threshold = 0.1
        self.max_number_observation = 5
        self.human_threshold = 0.01
        self.object_threshold = 0.3
        self.iou_threshold = 0.9
        self.selected_classes = [0, 56, 67]
        self.masked_id = []

        #if input_device == 'xtion':
        #    self.human_threshold = 0.1
        #    self.iou_threshold = 0.3

        self.depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/masked_image_raw", 
            Image,queue_size=1)

        self.dynamic_depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/dynamic_masked_image_raw", 
            Image,queue_size=1)

        self.frame = []
        self.depth_frame = []
        self.msg_header = std_msgs.msg.Header()
        self.depth_msg_header = std_msgs.msg.Header()

        # Class names COCO dataset
        self.class_names = [
            'person', 'bicycle', 'car', 'motorcycle',
            'airplane', 'bus', 'train', 'truck', 'boat',
            'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 
            'bird', 'cat', 'dog', 'horse', 'sheep', 
            'cow', 'elephant', 'bear', 'zebra', 'giraffe', 
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 
            'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 
            'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 
            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 
            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant', 
            'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 
            'oven', 'toaster', 'sink', 'refrigerator', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
            'toothbrush']
        
    def get_active(self, val):
        for key in self.objects_dict:
            if self.objects_dict[key]["maskID"] == val:
                return self.objects_dict[key]["activeObject"]
        return "Key not exist"

    def class_selection(self, masks_in, class_ids):
        """
        Function for Mask class selection (Selected classes : 1,40,41,42,57)
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([class_ids.shape[0], masks.shape[1], masks.shape[2]])
            for l in range(masks.shape[0]):
                if (class_ids[l] == 0 or class_ids[l] == 39 or 
                    class_ids[l] == 56):
                    x[l, :, :] = masks[l, :, :]
                else:
                    x[l, :, :] = 0
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def static_masks_selection(self, masks_in, class_ids):
        """
        Function for static Mask class selection
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([masks.shape[0], masks.shape[1], masks.shape[2]])
            for i in self.objects_dict:
                if not np.in1d(i, self.masked_id):
                    if self.objects_dict[i]["activeObject"] == 1 and self.objects_dict[i]["maskID"] < masks.shape[0] and (class_ids[self.objects_dict[i]["maskID"]] == 0 or class_ids[self.objects_dict[i]["maskID"]] == 39 or 
                        class_ids[self.objects_dict[i]["maskID"]] == 56):
                        x[self.objects_dict[i]["maskID"], :, :] = masks[self.objects_dict[i]["maskID"], :, :]
                        
                    elif self.objects_dict[i]["activeObject"] == 0 and self.objects_dict[i]["maskID"] < masks.shape[0]:
                        x[self.objects_dict[i]["maskID"], :, :] = 0
                    else:
                        pass
                    self.masked_id.append(i)
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def read_objects_pose(self):

        for i in self.objects_dict:
            
            if self.objects_dict[i]["classID"]==0:
                object_type = "Person"
            elif self.objects_dict[i]["classID"]==39:
                object_type = "Bottle"
            elif self.objects_dict[i]["classID"]==56:
                object_type = "Chair"
            else:
                object_type = "Nan"

            try:
                (self.objects_dict[i]["worldPose"],rot) = listener.lookupTransform('/map',object_type+'_'+str(i), rospy.Time(0))
            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                continue
                        
    def handle_objects_pose(self):
        for i in self.objects_dict:
            if self.objects_dict[i]["classID"]==0 or self.objects_dict[i]["classID"]==39 or self.objects_dict[i]["classID"]==56:
                if self.objects_dict[i]["classID"]==0:
                    object_type = "Person"
                elif self.objects_dict[i]["classID"]==39:
                    object_type = "Bottle"
                elif self.objects_dict[i]["classID"]==56:
                    object_type = "Chair"
                else:
                    object_type = "Nan"
                
                br = tf.TransformBroadcaster()
                e_pose = self.objects_dict[i]["estimatedPose"]
                br.sendTransform((e_pose[0], e_pose[1], e_pose[2]), 
                                tf.transformations.quaternion_from_euler(0,0,0),
                                rospy.Time.now(),
                                object_type+'_'+str(i),
                                '/map')

    def iou_centered_centroid(self, rois_old, rois_new, mask_old, mask_new):
        # intersection_over_union applied on centered centroid 
        img_v = mask_old.shape[0]
        img_h = mask_old.shape[1]

        pad_x_old = int((img_v-(rois_old[3]-rois_old[1]))/2)
        pad_y_old = int((img_h-(rois_old[2]-rois_old[0]))/2)
        pad_x_new = int((img_v-(rois_new[3]-rois_new[1]))/2)
        pad_y_new = int((img_h-(rois_new[2]-rois_new[0]))/2)

        cropped_mask_old = mask_old[rois_old[1]:rois_old[3], rois_old[0]:rois_old[2]]
        cropped_mask_new = mask_new[rois_new[1]:rois_new[3], rois_new[0]:rois_new[2]]

        centered_mask_old = add_padding(cropped_mask_old, pad_y_old, pad_x_old, pad_y_old, pad_x_old)
        centered_mask_new = add_padding(cropped_mask_new, pad_y_new, pad_x_new, pad_y_new, pad_x_new)

        centered_mask_old_croped = centered_mask_old[1:478, 1:638]
        centered_mask_new_croped = centered_mask_new[1:478, 1:638]

        intersection = np.logical_and(centered_mask_old_croped, centered_mask_new_croped)
        union = np.logical_or(centered_mask_old_croped, centered_mask_new_croped)
        iou = np.sum(intersection) / np.sum(union)
        return iou

    def apply_depth_image_masking(self, image_in, masks):
        """Apply the given mask to the image.
        """
        
        image = copy.deepcopy(image_in)
        image_static = copy.deepcopy(image_in)
        for i in range(masks.shape[0]):
            is_active = self.get_active(i)
            mask = masks[i, :, :]
            mask = ndimage.binary_dilation(mask, iterations=self.dilatation)
            if is_active == 1:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
                image_static[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
            else:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])

            
        return image_static, image

    def mask_dilatation(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        for i in range(mask.shape[0]):
            mask[i] = ndimage.binary_dilation(mask[i], iterations=self.dilatation)

        print("Numpy dilation time : ", - (timebefore - time.time()))
        return mask

    def mask_dilatation_cv(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        kernel = np.ones((3,3))
        for i in range(mask.shape[0]):
            mask[i] = cv2.dilate(mask[i],kernel, iterations=self.dilatation)
        

        print("cv2 dilation time : ", - (timebefore - time.time()))
        return mask

    def get_masking_depth(self, image, mask):
        """Apply the given mask to the image.
        """
        x = np.zeros([image.shape[0], image.shape[1]])
        y = np.zeros(mask.shape[0])

        for i in range(mask.shape[0]):
            x[:, :] = np.where(mask[i,:,:] != 1,
                                0,
                                image[:, :])

            x[:, :] = np.where( np.isnan(x[:,:]),
                                0,
                                x[:, :])

            if sum(sum((x[:, :]!=0))) == 0:
                y[i] = 0
            else:
                y[i] = (x[:, :].sum()/sum(sum((x[:, :]!=0))))
        
        return y

    def add_object(self, centroid, dimensions, mask_id, class_id, mask_old, rois_old):
        dt = 0.25

        try:
            (transc, rotc) = listener.lookupTransform('/map', self.tf_camera, rospy.Time(0))
        except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
            transc = np.array([0.,0.,0.])
            rotc = np.array([0.,0.,0.,1.])

        euler = tf.transformations.euler_from_quaternion(rotc)
        rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])

        h_mat = rot
        h_mat[0:3,3:] = np.array([transc]).T
        b = h_mat.dot(np.array([[centroid[0],centroid[1],centroid[2],1]]).T)[0:3,:]
        
        y = np.array([b[0,0], b[1,0], b[2,0]])

        x = [y[0], y[1], y[2], 0, 0, 0]

        P = np.eye(len(x))

        F = np.array([[ 1,  0,  0, dt,  0,  0],
                      [ 0,  1,  0,  0, dt,  0],
                      [ 0,  0,  1,  0,  0, dt],
                      [ 0,  0,  0,  1,  0,  0],
                      [ 0,  0,  0,  0,  1,  0],
                      [ 0,  0,  0,  0,  0,  1]])

        H = np.array([[ 0.001,  0,  0,  0,  0,  0],
                      [ 0,  0.001,  0,  0,  0,  0],
                      [ 0,  0,  0.001,  0,  0,  0]])

        if class_id == 1:
            ax = 0.68
            ay = 0.68
            az = 0.68
        else:
            ax = 1
            ay = 1
            az = 1

        Q = np.array([[((dt**4)/4)*(ax**2),  0.0,  0.0,  ((dt**4)/4)*(ax**3),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**2),  0.0,  0.0, ((dt**4)/4)*(ay**3),   0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**2),  0.0,   0.0, ((dt**4)/4)*(az**3)],
                      [((dt**4)/4)*(ax**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**3),  0.0,  0.0, (dt**2)*(ax**2)]])             

        R = np.array([[ 0.8,  0,  0],
                      [ 0,  0.8,  0],
                      [ 0,  0,  1.2]])

        self.objects_dict.update({self.next_object_id : {
            "kalmanFilter" : extendedKalmanFilter(x, P, F, H, Q, R),
            "centroid" : centroid,
            "dimension" : dimensions,
            "classID" : class_id,
            "roisOld" : rois_old,
            "maskID" : mask_id,
            "maskOld" : mask_old,
            "worldPose" : [0,0,0],
            "estimatedVelocity" : [0,0,0],
            "estimatedPose" : [0,0,0],
            "inactiveNbFrame" : 0,
            "activeObject" : 0}})
        
        self.next_object_id = self.next_object_id+1
        
    def delete_object(self, object_id):
        del self.objects_dict[object_id]

    def mask_to_centroid(self, rois, mask_depth):
        current_centroids = {}
        current_dimensions = {}
        for i in range(len(rois)):    
            # 3D centroids from depth frame
            
            if args.input == 'tum':
                fx = 525.0  # focal length x
                fy = 525.0  # focal length y
                cx = 319.5  # optical center x
                cy = 239.5  # optical center y
            elif args.input == 'xtion':    
                # Asus xtion sensor 
                fx = 525
                fy = 525
                cx = 319.5
                cy = 239.5
            elif args.input == 'zed':
                # Zed sensor left img vga
                fx = 350.113
                fy = 350.113
                cx = 336.811
                cy = 190.357
            else:
                print("No valid input")
            
            # Translation from depth pixel to local point
            if mask_depth[i] == -1:
                z = 0
            else :
                z = mask_depth[i]
            
            y = (((rois[i,3]+rois[i,1])/2) - cy) * z / fy
            x = (((rois[i,2]+rois[i,0])/2) - cx) * z / fx

            # Translation from point to world coord
            current_centroids.update({i:[x, y, z]})
            current_dimensions.update({i:[rois[i,3]-rois[i,1], rois[i,2]-rois[i,0]]})
        return current_centroids, current_dimensions
        
    def live_analysis(self):
        """
        Function for live stream video masking
        """
        
        bar = [
                " Waiting for frame [=     ]              ",
                " Waiting for frame [ =    ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [     =]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [ =    ]              ",
            ]
        idx = 0
        while not rospy.is_shutdown():
            start_time = time.time()
            self.masked_id = []
            current_frame = self.frame
            current_depth_frame = self.depth_frame

            if len(current_frame)==0  or  len(current_depth_frame)==0 :

                print(bar[idx % len(bar)], end= "\r")
                idx = idx +1
                time.sleep(0.1)
            
            else:
                
                nn_start_time = time.time()
                
                if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge':
                    frame = torch.from_numpy(current_frame).cuda().float()
                    batch = FastBaseTransform()(frame.unsqueeze(0))
                    if self.nn == 'yolact_edge':
                        extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None}
                        preds = self.net(batch.cuda(), extras=extras)
                        preds = preds["pred_outs"]
                    else:
                        preds = self.net(batch.cuda())
                        
                    nn_pred_time = time.time()
                    h, w, _ = frame.shape
                    b = {}
                    r = {}
                    b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold)

                    r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy())
                    r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy())
                    r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy())
                    r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy())    
               
                elif self.nn == 'mrcnn':
                    results = self.model.detect([current_frame],verbose=1)
                    r = results[0]
                    r['masks'] = np.swapaxes(r['masks'],0,2)
                    r['masks'] = np.swapaxes(r['masks'],1,2)

                    for i in range(r['rois'].shape[0]):
                        buff = r['rois'][i]
                        r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]]
                    r['class_ids'] = r['class_ids'] - 1
                
                ''' Deprecated, did not enhance speed
                j=0
                for i in range(len(r['class_ids'])):
                    if not np.in1d(r['class_ids'][j], self.selected_classes):
                        r['class_ids'] = np.delete(r['class_ids'], j)
                        r['scores']= np.delete(r['scores'], j)
                        r['rois']= np.delete(r['rois'], j,axis=0)
                        r['masks']= np.delete(r['masks'], j, axis=0)
                    else:
                        j=j+1
                '''
                self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0])
                for j in range(self.number_observation):
                    if r['scores'][j] < self.score_threshold:
                        self.number_observation = j
                        break

                r['class_ids'] = r['class_ids'][:self.number_observation]
                r['scores'] = r['scores'][:self.number_observation]
                r['rois'] = r['rois'][:self.number_observation]
                r['masks'] = r['masks'][:self.number_observation]

                nn_time = time.time()

                mask_depth = self.get_masking_depth(current_depth_frame, r['masks'])
                
                # Read object tf pose
                self.read_objects_pose()
                
                # Read camera tf pose
                try:
                    (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0))
                except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                    transc = np.array([0.,0.,0.])
                    rotc = np.array([0.,0.,0.,1.])

                euler = tf.transformations.euler_from_quaternion(rotc)
                rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])
        
                h_mat = rot
                h_mat[0:3,3:] = np.array([transc]).T

                objects_to_delete = []

                # Main filter update and prediction step
                if len(r['rois']) == 0:
                    for i in self.objects_dict:
                        self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1

                        if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames:                            
                            objects_to_delete.append(i)
                    
                    for i in objects_to_delete:
                        self.delete_object(i)
                        
                else : 
                    current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth)

                    if not self.objects_dict:
                        if not len(current_centroids)==0:
                            for i in range(len(current_centroids)):
                                self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i])

                            for i in self.objects_dict:
                                self.objects_dict[i]["kalmanFilter"].prediction()
                                self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat)
                                self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3]
                                self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6]
                    else:
                        objects_pose = np.zeros((len(self.objects_dict),3))
                        objects_ids = np.zeros((len(self.objects_dict)))
                        index = 0
                        for i in self.objects_dict:
                            objects_pose[index,] = self.objects_dict[i]["centroid"]
                            objects_ids[index] = i
                            index = index + 1

                        centroids_pose = np.zeros((len(current_centroids),3))
                        for i in range(len(current_centroids)):
                            centroids_pose[i,] = current_centroids[i]
                        
                        eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten()
                        index_sorted = np.argsort(eucledian_dist_pairwise)

                        used_objects = []
                        used_centroids = []
                        
                        for index in range(len(eucledian_dist_pairwise)):
                            object_id = int(index_sorted[index] / len(centroids_pose))
                            centroid_id = index_sorted[index] % len(centroids_pose)

                            if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5):
                                if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]:
                                    timebefore = time.time()
                                    used_objects.append(object_id)
                                    used_centroids.append(centroid_id)

                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3]
                                    self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6]

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0:
                                        max_threshold = self.human_threshold
                                    else:
                                        max_threshold = self.object_threshold
                                    
                                    if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 0

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0:
                                        
                                        iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id])         
                                        if iou<self.iou_threshold:
                                            self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                        else:
                                            x=1
                                    
                                    self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0
                                    self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id
                                    self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id]
                        
                        if len(centroids_pose) < len(objects_pose):
                            for index in range(len(eucledian_dist_pairwise)):
                                object_id = int(index_sorted[index] / len(objects_pose))
                                if not np.in1d(object_id, used_objects):
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1
                                    self.objects_dict[objects_ids[object_id]]["activeObject"] = 0
                                    if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames:
                                        self.delete_object(objects_ids[object_id])
                                        used_objects.append(object_id)
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                        self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3]
                                        self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6]

                        elif len(centroids_pose) > len(objects_pose):
                            buff_id = self.next_object_id
                            for index in range(len(eucledian_dist_pairwise)):
                                centroid_id = index_sorted[index] % len(centroids_pose)
                                if not np.in1d(centroid_id, used_centroids):
                                    self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id])
                                    self.objects_dict[buff_id]["kalmanFilter"].prediction()
                                    self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3]
                                    self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6]
                                    buff_id = buff_id + 1
                               
                kalman_time = time.time()
                # Write objects filter pose to tf
                self.handle_objects_pose()

                result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks'])
                
                DDITS = Image()
                DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1')
                DDITS.header = self.depth_msg_header
                self.dynamic_depth_image_pub.publish(DDITS)

                DITS = Image()
                DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1')
                DITS.header = self.depth_msg_header
                self.depth_image_pub.publish(DITS)
                
                print_time = time.time()

                #print(" NN pred time: ", format(nn_pred_time - nn_start_time, '.3f'),", NN post time: ", format(nn_time - nn_pred_time, '.3f'),", NN time: ", format(nn_time - start_time, '.3f'), ", Kalman time: ", format(kalman_time - nn_time, '.3f'),
                #", Print time: ", format(print_time - kalman_time, '.3f'), ", Total time: ", format(time.time() - start_time, '.3f'),
                #", FPS :", format(1/(time.time() - start_time), '.2f'), end="\r")

    def image_callback(self, msg):

        self.msg_header = msg.header
        self.frame = self.bridge.imgmsg_to_cv2(msg, "bgr8")

    def depth_image_callback(self, msg):

        self.depth_msg_header = msg.header
        #32FC1 for asus xtion
        #8UC1 forkicect
        self.depth_frame = self.bridge.imgmsg_to_cv2(msg, "32FC1")
Пример #21
0
from layers.output_utils import postprocess
import pycocotools

from data import cfg, set_cfg, set_dataset

import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from collections import defaultdict
import matplotlib.pyplot as plt
import cv2

set_cfg('yolact_resnet50_config')
cudnn.benchmark = True
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
net = Yolact()
net.load_weights('weights/yolact_resnet50_54_800000.pth')
net.eval()
net = net.cuda()

net.detect.use_fast_nms = True
cfg.mask_proto_debug = False

path = "cat.jpg"
frame = torch.from_numpy(cv2.imread(path)).cuda().float()
batch = FastBaseTransform()(frame.unsqueeze(0))
print(batch.shape)
preds = net(batch)
Пример #22
0
def evaluate(image, train_mode=False):

    mask_proto_debug=False

    net.detect.use_fast_nms = True
    cfg.mask_proto_debug = mask_proto_debug

    output_image = evalimage(image)

    return output_image

config = None
detect = False
dataset=None
cuda = True


model_path = SavePath.from_str("yolact_darknet53_54_800000.pth")
config = model_path.model_name + '_config'
print('Config not specified. Parsed %s from the file name.\n' % config)
set_cfg(config)
cfg.eval_mask_branch = True
cudnn.benchmark = True
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
print('Loading model...')
net = Yolact()
net.load_weights("/home/venkat/Documents/projects/Perception-ros-tuggerbot/src/perception/yolact_depth_perception/scripts/yolact_darknet53_54_800000.pth")
net.eval()
print(' Done.')
net = net.cuda()
Пример #23
0
def train():
    #1: train 결과를 저장할 폴더를 생성
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    #2: MSCOCO에서 제공하는 API를 통해 train dataset을 준비한다.
    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    #   만약 train-validation기법을 사용한다면, eval dataset도 준비한다.
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    #3: 구현한 yolact() class의 객체를 만들고 train모드로 설정.
    #주의 : net과 yolact_net은 메모리에 저장된 같은 객체를 공유한다.
    #       다만 net은 이후에 yolact와 MultiBoxLoss가 결함되어 train을 위한
    #       통합된 객체로 다시 정의되기 때문에 yolact넷 객체에만 따로 접근하기 위해
    #       yolact_net을 deep copy본으로 가지고 있는다.
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    #######################################################################
    #######RESUME 관련#####################################################
    #4: args.log와 args.resume은 train도중 log를 남기는 것과, train이
    # 불가피하게 중도에 정지되었을 경우, 중단 지점부터 재시작할 수 있도록
    # 기능을 만든 것이므로 필요한 경우에만 더 자세히 보도록 하자.
    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)
    #######END#############################################################
    #######################################################################

    #5: yolact의 optimizer와 loss함수를 설정한다.
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    #6: 멀티 GPU를 사용하는 경우 각 GPU에 batch size를 분할해준다.
    #   만약 총 Batch size가 맞지 않으면 뭔가 잘못된 것이므로 프로그램 종료.

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    #7: 현재까지 설정된 net과 loss 함수를 엮어 더 통합된 net으로 만듬.
    #   이제 net을 호출하면, bbox를 detection하고, fast nms를 거쳐 한 번
    #   필터링을 한 후, ground truth와 비교하여 loss를 계산하고, 이 과정을
    #   멀티 GPU일 경우 알아서 각 device에 작업을 분할해준다.
    #   yolact_net은 net에 포함된 yolact()만을 가리킨다.
    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    #8: yolact_net의 batch_normalization layer를 모두 false로 만든 뒤에
    #   0만을 가지고 있는 zero_tensor를 모델에 통과시켜, 파라미터를 초기화시켜준다.
    #   그 후에 다시 batch_normalization layer를 train모드로 바꿔준다.
    #   굳이 이런 과정을 거치는 이유는 저자가 batch_normalization에 미리 넣어놓은
    #  평균/분산 값은 초기화하고 싶지 않기 때문이다.
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    (torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    #9: loss counters
    #   bbox의 위치에 대한 loss와, class confidence에 대한 loss 를 담을 변수를 생성하고,
    #   batch_size와 dataset의 크기에 맞는 1 epoch의 size와 몇 epoch를 돌려야하는지 구한다.
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)  #cw : 음수입력을 허용치 않기 위해... GOOD
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    #10:Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    #   step_index는 learning rate decay를 위해 사용하는 index이다.
    #   data_loader는 train중에 순서대로 데이터셋을 준비해서 넘겨주는 class이다.
    #   여기서 객체를 만들어 저장한다.
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    #11:특정 epoch와 iteration에 도달했을 때, 중간 과정을 save_path에 저장하기 위한
    #  람다 함수를 정의하고, time_avg와 loss_avg는 MovingAverage 클래스의 객체로써
    #  훈련 중간 과정의 loss를 이동평균 값으로 보여주기 위해 선언되는 객체이다.
    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    #12: main train이 시작되는 부분(#A ~ #F)
    print('Begin training!')
    print()

    # A
    #    try-except를 사용하여 ctrl+c(keyboardInterrupt)를 통해
    #   훈련을 중단하고 진행내용은 저장할 수 있다.
    #   중단지점부터 재시작하고 싶으면 train.py실행 시 --resume인자를 사용한다.
    try:
        #9에서 계산된 num_epochs만큼 반복.
        for epoch in range(num_epochs):
            # B
            #   --resume을 이용해 시작했다면, 재시작 iter에 도달할 때까지 continue,
            #   또한 data_loader에서 data를 불러오며 loss를 계산하는데,
            #   도중에 목표 iteration에 도달했으면 break하여 1 epoch를 종료한다.
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # 목표한만큼 훈련이 되었다면, 종료한다.
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # 목표로 설정된 반복횟수가 max_iter보다 크면 max_iter에서 훈련을 마친다.
                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # 특정 iteration에 config값이 바뀌도록 할 경우의 작업을 수행한다.
                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # C
                #   [learning rate 조정]

                # train시작한지 얼마 안되었을 경우(lr_warmup_until기준) 훈련을 조금 가속시키기 위해 조정.
                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                #   특정 iteration에 도달할 때마다 learning rate decay수행.
                #   Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # D
                #   loss 함수 계산.

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                #   Forward Propagation을 수행하고 수행 결과로 loss 함수를 통해 1 iteration의 loss를 계산한다.
                #   구체적인 동작은 Backbone.py의 resnet101, yolact.py의 yolact, MultiBoxLoss.py의 MultiBoxLoss 클래스를 모두 보아야 한다.
                #   (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # E
                #   Backward Propagation을 수행하고,
                #   계산가능한 값일 경우, optimizer.step()을 통해 parameters에 적용

                # Backprop
                loss.backward()

                # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # F
                #   train진행 과정에서 소요 시간과, 중간 loss값을 출력하여 중간 성과를
                #   파악 할 수 있도록 해주는 파트.

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                #   log를 파일로 기록
                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu
                # ~F

                # 1번 반복하면, 1 iter증가.
                iteration += 1

                #   주기마다 진행과정을 저장하는 작업 수행.
                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # train-validation으로 작업을 수행하는 경우,
            # 1 epoch를 돌렸을 때 validation 주기에 도달한 epoch였으면 validate 1회 진행하여 mAP측정.
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)

    #13: Ctrl + c를 이용하여 훈련을 중단했을 경우, save_foler에 weights를 저장하고 중단하여
    #   다음에 다시 재시작할 수 있도록 한다.
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
Пример #24
0
def compute_validation_loss(data_loader, val_loader, criterion):
    global loss_types

    # loss counters
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    weight_paths = os.listdir(args.resume)

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    epoch_size = len(data_loader)
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    with torch.no_grad():

        # Don't switch to eval mode because we want to get losses
        next_iterations = args.start_iter

        for epoch in range(num_epochs):
            new_epoch = next_iterations // epoch_size
            if epoch != new_epoch:
                continue

            for idx, datum in enumerate(tqdm(range(len(data_loader)))):
                iterations = epoch * epoch_size + idx
                if iterations % 1500 == 0:
                    stop = True
                    for path in weight_paths:
                        iter_id = path.split('_')[-1][:-4]
                        epoch_id = int(path.split('_')[-2])
                        if int(iter_id) == iterations:
                            stop = False
                            break
                    if stop:
                        print("Stop at iter {}".format(iterations))
                        return None

                    weight_name = path  #"yolact_taco_{}_{}.pth".format(epoch_id,iterations)
                    weight_path = os.path.join(args.resume, weight_name)
                    print('Loading {}...'.format(weight_name))
                    yolact_net.load_weights(weight_path)

                else:
                    continue

            datum = None
            losses = {}
            total_train = len(data_loader)
            for idx, datum in enumerate(tqdm(data_loader)):
                try:
                    _losses = net(datum)
                    _losses = {k: (v).mean() for k, v in _losses.items()}
                    for k, v in _losses.items():
                        if k in losses:
                            losses[k] += v
                        else:
                            losses[k] = v
                except IndexError as e:
                    total_train -= 1
                    continue
            for k in losses.keys():
                losses[k] /= total_train

            total_train_loss = sum([k for k in losses.values()])
            print('Train loss: {}'.format(total_train_loss.item()))

            datum = None
            _losses = None
            losses = {}
            total_val = len(val_loader)
            for idx, datum in enumerate(tqdm(val_loader)):
                try:
                    _losses = net(datum)
                    _losses = {k: (v).mean() for k, v in _losses.items()}
                    for k, v in _losses.items():
                        if k in losses:
                            losses[k] += v
                        else:
                            losses[k] = v
                except IndexError as e:
                    total_val -= 1
                    continue
            for k in losses.keys():
                losses[k] /= total_val

            total_val_loss = sum([k for k in losses.values()])
            print('Val loss: {}'.format(total_val_loss.item()))

            next_iterations += 1500
            with open(args.log_loss, 'a+') as f:
                f.write('{}_{}_{}\r'.format(iterations,
                                            total_train_loss.item(),
                                            total_val_loss.item()))
class YolactEdgeEngine:
    def __init__(self):
        parse_args(self)
        self.args.config = 'yolact_edge_mobilenetv2_config'
        set_cfg(self.args.config)
        self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth'
        self.args.top_k = 10
        self.args.score_threshold = 0.3
        self.args.trt_batch_size = 3
        self.args.disable_tensorrt = False
        self.args.use_fp16_tensorrt = False
        self.args.use_tensorrt_safe_mode = True
        self.args.cuda = True
        self.args.fast_nms = True
        self.args.display_masks = True
        self.args.display_bboxes = True
        self.args.display_text = True
        self.args.display_scores = True
        self.args.display_linecomb = False
        self.args.fast_eval = False
        self.args.deterministic = False
        self.args.no_crop = False
        self.args.crop = True
        self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images'

        setup_logger(logging_level=logging.INFO)
        self.logger = logging.getLogger('yolact.eval')

        self.color_cache = defaultdict(lambda: {})

        with torch.no_grad():
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')

            self.logger.info('Loading model...')
            self.net = Yolact(training=False)
            if self.args.trained_model is not None:
                self.net.load_weights(self.args.trained_model, args=self.args)
            else:
                self.logger.warning('No weights loaded!')
            self.net.eval()
            self.logger.info('Model loaded.')
            convert_to_tensorrt(self.net,
                                cfg,
                                self.args,
                                transform=BaseTransform())

    def evaluate(self, train_mode=False, train_cfg=None):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            inp, out = self.args.images.split(':')
            self.evalimages(inp, out)

    def evalimages(self, input_folder: str, output_folder: str):
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        print()
        for p in Path(input_folder).glob('*'):
            path = str(p)
            name = os.path.basename(path)
            name = '.'.join(name.split('.')[:-1]) + '.jpg'
            out_path = os.path.join(output_folder, name)

            img = cv2.imread(path)
            img_out = self.evalimage(img, out_path)
            #print(path + ' -> ' + out_path)
        print('Done.')

    def detect(self, img_in, return_imgs=False):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            #return self.evalimage(img_in[0])
            return self.evalbatch(img_in, return_imgs)

    def evalbatch(self, imgs, return_imgs=False):
        frame = torch.from_numpy(np.array(imgs)).cuda().float()
        batch = FastBaseTransform()(frame)

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        #start_time = time.time()
        preds = self.net(batch, extras=extras)["pred_outs"]
        #end_time = time.time()
        #print('%.3f s' % (end_time-start_time))

        imgs_out = []
        allres = []
        for i, img in enumerate(imgs):
            if return_imgs:
                img_out, res = self.prep_display(preds,
                                                 frame[i],
                                                 None,
                                                 None,
                                                 undo_transform=False,
                                                 batch_idx=i,
                                                 create_mask=True,
                                                 return_imgs=return_imgs)
                imgs_out.append(img_out)
                allres.append(res)
            else:
                res = self.prep_display(preds,
                                        frame[i],
                                        None,
                                        None,
                                        undo_transform=False,
                                        batch_idx=i,
                                        create_mask=True,
                                        return_imgs=return_imgs)
                allres.append(res)
        if return_imgs:
            return imgs_out, allres
        else:
            return allres

    def evalimage(self, img, save_path=None):
        frame = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        preds = self.net(batch, extras=extras)["pred_outs"]

        return self.prep_display(preds,
                                 frame,
                                 None,
                                 None,
                                 undo_transform=False,
                                 create_mask=True)
        #if save_path:
        #    cv2.imwrite(save_path, img_numpy)
        #return img_numpy, mask

    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     batch_idx=0,
                     create_mask=False,
                     return_imgs=False):
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
            #print(h, " ", w)

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            batch_idx,
                            visualize_lincomb=self.args.display_linecomb,
                            crop_masks=self.args.crop,
                            score_threshold=self.args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                masks = t[3][:self.args.top_k]
            classes, scores, boxes = [
                x[:self.args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.args.score_threshold:
                num_dets_to_consider = j
                break

        idx_fil = []
        for i in range(num_dets_to_consider):
            if cfg.dataset.class_names[
                    classes[i]] == 'car' or cfg.dataset.class_names[
                        classes[i]] == 'truck':
                idx_fil.append(i)
        num_dets_to_consider = len(idx_fil)

        if num_dets_to_consider == 0:
            # no detection found so just output original image
            if not create_mask:
                return (img_gpu * 255).byte().cpu().numpy()
            elif return_imgs:
                return (img_gpu * 255).byte().cpu().numpy(), ImageResult(
                    None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0)
            else:
                return ImageResult(None, None, None,
                                   np.zeros((h, w, 1), dtype='uint8'), 0)

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        if self.args.display_masks and cfg.eval_mask_branch:
            # after this, mask is of size [num_dets, h, w, l]
            #masks = masks[:num_dets_to_consider, :, :, None]
            #classes = classes[:num_dets_to_consider]
            #scores = scores[:num_dets_to_consider]
            #boxes = boxes[:num_dets_to_consider, :]

            masks = masks[idx_fil, :, :, None]
            classes = classes[idx_fil]
            scores = scores[idx_fil]
            boxes = boxes[idx_fil, :]

            if create_mask:
                mask_img = np.zeros((h, w, 1), dtype='uint8')
                for j in range(num_dets_to_consider):
                    mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype(
                        np.uint8)
                if not return_imgs:
                    return ImageResult(classes, scores, boxes, mask_img,
                                       num_dets_to_consider)

            # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # this is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # then draw the stuff that needs to be done on cpu
        # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if self.args.display_text or self.args.display_bboxes:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if self.args.display_scores else _class
                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
        return img_numpy, ImageResult(classes, scores, boxes, mask_img,
                                      num_dets_to_consider)
Пример #26
0
class YolactWorker(qc.QObject):
    # emits list of classes, scores, and bboxes of detected objects
    # bboxes are in (top-left, w, h) format
    # The even is passed for synchronizing display of image in videowidget
    # with the bounding boxes
    sigProcessed = qc.pyqtSignal(np.ndarray, int)
    sigInitialized = qc.pyqtSignal()
    sigError = qc.pyqtSignal(YolactException)

    def __init__(self):
        super(YolactWorker, self).__init__()
        self.mutex = qc.QMutex()
        self._image = None
        self._pos = 0
        self.top_k = 10
        self.cuda = torch.cuda.is_available()
        self.net = None
        self.score_threshold = 0.15
        self.overlap_thresh = 1.0
        self.config = yconfig.cfg
        self.weights_file = ''
        self.config_file = ''
        self.video_file = None

    def setWaitCond(self, waitCond: threading.Event) -> None:
        _ = qc.QMutexLocker(self.mutex)
        self._waitCond = waitCond

    @qc.pyqtSlot(bool)
    def enableCuda(self, on):
        settings.setValue('yolact/cuda', on)
        self.cuda = on

    @qc.pyqtSlot(int)
    def setTopK(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.top_k = value

    @qc.pyqtSlot(int)
    def setBatchSize(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.batch_size = int(value)

    @qc.pyqtSlot(float)
    def setScoreThresh(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.score_threshold = value

    @qc.pyqtSlot(float)
    def setOverlapThresh(self, value):
        """Merge objects if their bboxes overlap more than this."""
        _ = qc.QMutexLocker(self.mutex)
        self.overlap_thresh = value

    @qc.pyqtSlot(str)
    def setConfig(self, filename):
        if filename == '':
            return
        self.config_file = filename
        with open(filename, 'r') as cfg_file:
            config = yaml.safe_load(cfg_file)
            for key, value in config.items():
                logging.debug('%r \n%r %r', key, type(value), value)
                self.config.__setattr__(key, value)
            if 'mask_proto_debug' not in config:
                self.config.mask_proto_debug = False
        logging.debug(yaml.dump(self.config))

    @qc.pyqtSlot(str)
    def setWeights(self, filename: str) -> None:
        if filename == '':
            raise YolactException('Empty filename for network weights')
        self.weights_file = filename
        tic = time.perf_counter_ns()
        with torch.no_grad():
            if self.cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(self.weights_file, self.cuda)
            self.net.eval()
            if self.cuda:
                self.net = self.net.cuda()
        toc = time.perf_counter_ns()
        logging.debug('Time to load weights %f s', 1e-9 * (toc - tic))
        self.sigInitialized.emit()

    @qc.pyqtSlot(np.ndarray, int)
    def process(self, image: np.ndarray, pos: int):
        """:returns (classes, scores, boxes)

        where `boxes` is an array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.

        `classes` is the class ids of the corresponding objects.

        `scores` are the computed class scores corresponding to the detected objects.
        Roughly high score indicates strong belief that the object belongs to
        the identified class.
        """
        _ts = time.perf_counter()
        logging.debug(f'Received frame {pos}')
        if self.net is None:
            self.sigError.emit(YolactException('Network not initialized'))
            return
        # Partly follows yolact eval.py
        tic = time.perf_counter_ns()
        _ = qc.QMutexLocker(self.mutex)
        with torch.no_grad():
            if self.cuda:
                image = torch.from_numpy(image).cuda().float()
            else:
                image = torch.from_numpy(image).float()
            batch = FastBaseTransform()(image.unsqueeze(0))
            preds = self.net(batch)
            image_gpu = image / 255.0
            h, w, _ = image.shape
            save = self.config.rescore_bbox
            self.config.rescore_bbox = True
            classes, scores, boxes, masks = oututils.postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=self.score_threshold)
            idx = scores.argsort(0, descending=True)[:self.top_k]
            # if self.config.eval_mask_branch:
            #     masks = masks[idx]
            classes, scores, boxes = [
                x[idx].cpu().numpy() for x in (classes, scores, boxes)
            ]
            # This is probably not required, `postprocess` uses
            # `score_thresh` already
            num_dets_to_consider = min(self.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < self.score_threshold:
                    num_dets_to_consider = j
                    break
            # logging.debug('Bounding boxes: %r', boxes)
            # Convert from top-left bottom-right format to
            # top-left, width, height format
            if len(boxes) == 0:
                self.sigProcessed.emit(boxes, pos)
                return
            boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
            boxes = np.asanyarray(boxes, dtype=np.int_)
            if self.overlap_thresh < 1:
                dist_matrix = pairwise_distance(new_bboxes=boxes,
                                                bboxes=boxes,
                                                boxtype=OutlineStyle.bbox,
                                                metric=DistanceMetric.ios)
                bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \
                             for jj in range(ii+1, dist_matrix.shape[1]) \
                              if dist_matrix[ii, jj] < 1 - self.overlap_thresh]
                good_idx = list(set(range(boxes.shape[0])) - set(bad_idx))
                boxes = boxes[good_idx].copy()

            toc = time.perf_counter_ns()
            logging.debug('Time to process single _image: %f s',
                          1e-9 * (toc - tic))
            self.sigProcessed.emit(boxes, pos)
            logging.debug(f'Emitted bboxes for frame {pos}: {boxes}')
        _dt = time.perf_counter() - _ts
        logging.debug(
            f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
Пример #27
0
class MattingService:
    def __init__(self,
                 model_path="./weights/yolact_im700_54_800000.pth",
                 use_cuda=False):
        print('Loading model...', end='')
        self.use_cuda = use_cuda
        self.trained_model = model_path
        self.net = Yolact()
        self.net.load_weights(self.trained_model)
        self.net.eval()

        if self.use_cuda:
            self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        self.net.detect.use_cross_class_nms = False
        cfg.mask_proto_debug = False

        print(' Done.')

    def process(self, image, top_k=1, score_threshold=0.6):
        # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
        with torch.no_grad():
            if image is not None:
                if ':' in image:
                    inp, _image_name = image.split(':')
                    self._infer_image(self.net, inp, _image_name, top_k,
                                      score_threshold)
                else:
                    _image_name = image.split('/')[-1].split('.')[0] + '.png'
                    out = os.path.join('results/', _image_name)
                    self._infer_image(self.net, image, out, top_k,
                                      score_threshold)
                return _image_name

    def _infer_image(self, net: Yolact, path, save_path, top_k,
                     score_threshold):
        if self.use_cuda:
            frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        else:
            frame = torch.from_numpy(cv2.imread(path)).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        img_numpy = self.post_process(preds,
                                      frame,
                                      None,
                                      None,
                                      top_k,
                                      score_threshold,
                                      undo_transform=False)

        if save_path is None:
            img_numpy = img_numpy[:, :, (2, 1, 0, 3)]

        if save_path is None:
            plt.subplot()
            plt.imshow(img_numpy)
            plt.title(path)
            plt.show()
        else:
            # plt.subplot()
            # plt.imshow(img_numpy)
            # plt.title(path)
            # plt.show()
            cv2.imwrite(save_path, img_numpy)

    @staticmethod
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res
Пример #28
0
def create_model(weights):
    yolact = Yolact()
    yolact.load_weights(weights)
    return yolact
Пример #29
0
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if args.resume and not args.display:
        with open(args.ap_data_file, 'rb') as f:
            ap_data = pickle.load(f)
        calc_map(ap_data)
        exit()

    dataset = None

    print('Loading model...', end='')
    net = Yolact()
    net.load_weights(args.trained_model)
    net.eval()
    print(' Done.')

    if args.cuda:
        net = net.cuda()

    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug



scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths,
            cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net,
            args=args, root_path=root_path, use_gpu=use_gpu)
Пример #30
0
def detect():
    img_path = '/home/user/dataset/pear/train/JPEGImages'
    save_path = '/home/user/pear_output'
    weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'

    set_cfg('pear_config')

    with torch.no_grad():
        torch.cuda.set_device(0)

        ######
        # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
        # cudnn.benchmark = True
        # cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        ######

        net = Yolact()
        net.load_weights(weight_path)
        net.eval()
        net = net.cuda()
        print('model loaded...')

        net.detect.cross_class_nms = True
        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        img_names = [
            name for name in os.listdir(img_path)
            if name.endswith('.jpg') or name.endswith('.png')
        ]
        #for img_name in tqdm(img_names):
        for img_name in img_names:
            img = cv2.imread(os.path.join(img_path, img_name))
            img = torch.from_numpy(img).cuda().float()
            img = FastBaseTransform()(img.unsqueeze(0))
            start = time.time()
            preds = net(img)
            print('clw: image_name: %s, inference time use %.3fs' %
                  (img_name,
                   time.time() - start))  # inference time use 0.023s, 550x550

            # start = time.time()
            h, w = img.shape[2:]
            result = postprocess(
                preds, w, h, crop_masks=True,
                score_threshold=0.3)  # classes, scores, boxes, masks 按照score排序
            # top_k = 10
            # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个?
            # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s

            ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask
            # start = time.time()
            bFindPear = False
            for i, cls_id in enumerate(result[0]):
                if cls_id == 0 and not bFindPear:
                    pear_mask = result[3][i].cpu().numpy()
                    bFindPear = True

            # 从梨的mask中提取轮廓
            pear_outline = get_outline_from_mask(pear_mask, w, h)
            # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
            # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
            # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
            roundness = compute_roundness(pear_outline)
            ###

            result.append(roundness)