示例#1
0
def main(arch, model_path, output_path, input_shape=(224, 224), batch_size=1):
    repvgg_build_func = get_RepVGG_func_by_name(arch)
    model = repvgg_build_func(deploy=True)
    model.load_state_dict(torch.load(model_path))
    dummy_input = torch.autograd.Variable(
        torch.randn(batch_size, 3, input_shape[0], input_shape[1]))

    torch.onnx.export(model,
                      dummy_input,
                      output_path,
                      verbose=True,
                      keep_initializers_as_inputs=True,
                      opset_version=12,
                      input_names=['input'],
                      output_names=['output'])
    # onnx_model = onnx.load(output_path)  # load onnx model
    # model_simp, check = simplify(onnx_model)
    # assert check, "Simplified ONNX model could not be validated"
    # onnx.save(model_simp, output_path)
    # print('finished exporting onnx ')

    model_d = onnx.load_model(output_path)
    d = model_d.graph.input[0].type.tensor_type.shape.dim
    # d[0].dim_value = input_shape[0]
    d[0].dim_param = '?'
    for output in model_d.graph.output:
        d = output.type.tensor_type.shape.dim
        # d[0].dim_value = input_shape[0]
        d[0].dim_param = '?'
    onnx.save_model(model_d, output_path)
示例#2
0
def test():
    args = parser.parse_args()

    repvgg_build_func = get_RepVGG_func_by_name(args.arch)

    model = repvgg_build_func(deploy=args.mode == 'deploy')

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
        use_gpu = False
    else:
        model = model.cuda()
        use_gpu = True

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    if os.path.isfile(args.weights):
        print("=> loading checkpoint '{}'".format(args.weights))
        checkpoint = torch.load(args.weights)
        if 'state_dict' in checkpoint:
            checkpoint = checkpoint['state_dict']
        ckpt = {k.replace('module.', ''): v
                for k, v in checkpoint.items()}  # strip the names
        model.load_state_dict(ckpt)
    else:
        print("=> no checkpoint found at '{}'".format(args.weights))

    cudnn.benchmark = True

    # Data loading code
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    validate(val_loader, model, criterion, use_gpu)
def main(arch, model_path, output_path, input_shape=(224, 224), batch_size=1):
    repvgg_build_func = get_RepVGG_func_by_name(arch)
    model = repvgg_build_func(deploy=True)
    model.load_state_dict(torch.load(model_path))
    dummy_input = torch.autograd.Variable(
        torch.randn(batch_size, 3, input_shape[0], input_shape[1]))
    torch.onnx.export(model,
                      dummy_input,
                      output_path,
                      verbose=True,
                      keep_initializers_as_inputs=True,
                      opset_version=12)
    onnx_model = onnx.load(output_path)  # load onnx model
    model_simp, check = simplify(onnx_model)
    assert check, "Simplified ONNX model could not be validated"
    onnx.save(model_simp, output_path)
    print('finished exporting onnx ')
示例#4
0
def convert():
    args = parser.parse_args()

    repvgg_build_func = get_RepVGG_func_by_name(args.arch)

    train_model = repvgg_build_func(deploy=False)

    if os.path.isfile(args.load):
        print("=> loading checkpoint '{}'".format(args.load))
        checkpoint = torch.load(args.load)
        if 'state_dict' in checkpoint:
            train_model.load_state_dict(checkpoint['state_dict'])
        else:
            train_model.load_state_dict(checkpoint)
    else:
        print("=> no checkpoint found at '{}'".format(args.load))

    repvgg_model_convert(train_model, build_func=repvgg_build_func, save_path=args.save)
示例#5
0
def convert():
    args = parser.parse_args()

    repvgg_build_func = get_RepVGG_func_by_name(args.arch)

    train_model = repvgg_build_func(deploy=False)

    if os.path.isfile(args.load):
        print("=> loading checkpoint '{}'".format(args.load))
        checkpoint = torch.load(args.load)
        if 'state_dict' in checkpoint:
            checkpoint = checkpoint['state_dict']
        ckpt = {k.replace('module.', ''): v
                for k, v in checkpoint.items()}  # strip the names
        train_model.load_state_dict(ckpt)
    else:
        print("=> no checkpoint found at '{}'".format(args.load))

    repvgg_model_convert(train_model, save_path=args.save)
示例#6
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    repvgg_build_func = get_RepVGG_func_by_name(args.arch)

    model = repvgg_build_func(deploy=False)

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
    elif args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = sgd_optimizer(model, args.lr, args.momentum, args.weight_decay)

    lr_scheduler = CosineAnnealingLR(
        optimizer=optimizer,
        T_max=args.epochs * IMAGENET_TRAINSET_SIZE // args.batch_size //
        ngpus_per_node)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        # adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args,
              lr_scheduler)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
示例#7
0
    def __init__(self,
                 backbone_name,
                 backbone_file,
                 deploy,
                 bins=(1, 2, 3, 6),
                 dropout=0.1,
                 classes=2,
                 zoom_factor=8,
                 use_ppm=True,
                 criterion=nn.CrossEntropyLoss(ignore_index=255),
                 BatchNorm=nn.BatchNorm2d,
                 pretrained=True):
        super(PSPNet, self).__init__()
        assert 2048 % len(bins) == 0
        assert classes > 1
        assert zoom_factor in [1, 2, 4, 8]
        self.zoom_factor = zoom_factor
        self.use_ppm = use_ppm
        self.criterion = criterion

        repvgg_fn = get_RepVGG_func_by_name(backbone_name)
        backbone = repvgg_fn(deploy)
        if pretrained:
            checkpoint = torch.load(backbone_file)
            if 'state_dict' in checkpoint:
                checkpoint = checkpoint['state_dict']
            ckpt = {
                k.replace('module.', ''): v
                for k, v in checkpoint.items()
            }  # strip the names
            backbone.load_state_dict(ckpt)

        self.layer0, self.layer1, self.layer2, self.layer3, self.layer4 = backbone.stage0, backbone.stage1, backbone.stage2, backbone.stage3, backbone.stage4

        #   The last two stages should have stride=1 for semantic segmentation
        #   Note that the stride of 1x1 should be the same as the 3x3
        #   Use dilation following the implementation of PSPNet
        secondlast_channel = 0
        for n, m in self.layer3.named_modules():
            if ('rbr_dense' in n or 'rbr_reparam' in n) and isinstance(
                    m, nn.Conv2d):
                m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1)
                print('change dilation, padding, stride of ', n)
                secondlast_channel = m.out_channels
            elif 'rbr_1x1' in n and isinstance(m, nn.Conv2d):
                m.stride = (1, 1)
                print('change stride of ', n)
        last_channel = 0
        for n, m in self.layer4.named_modules():
            if ('rbr_dense' in n or 'rbr_reparam' in n) and isinstance(
                    m, nn.Conv2d):
                m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1)
                print('change dilation, padding, stride of ', n)
                last_channel = m.out_channels
            elif 'rbr_1x1' in n and isinstance(m, nn.Conv2d):
                m.stride = (1, 1)
                print('change stride of ', n)

        fea_dim = last_channel
        aux_in = secondlast_channel

        if use_ppm:
            self.ppm = PPM(fea_dim, int(fea_dim / len(bins)), bins, BatchNorm)
            fea_dim *= 2

        self.cls = nn.Sequential(
            nn.Conv2d(fea_dim, 512, kernel_size=3, padding=1, bias=False),
            BatchNorm(512), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout),
            nn.Conv2d(512, classes, kernel_size=1))
        if self.training:
            self.aux = nn.Sequential(
                nn.Conv2d(aux_in, 256, kernel_size=3, padding=1, bias=False),
                BatchNorm(256), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout),
                nn.Conv2d(256, classes, kernel_size=1))