示例#1
0
def step_scheduler(optimizer, current_epoch, lr_schedule, net_name):
    """
    Function to perform step learning rate decay
    Args:
        optimizer: Optimizer for which step decay has to be applied
        epoch: Current epoch number
    """
    previous_lr = optimizer.param_groups[0]['lr']
    for (e, v) in lr_schedule:
        if current_epoch == e - 1:  #epoch start from 0
            logger.warn("epoch {}: {} lr changed from: {} to {}".format(
                current_epoch, net_name, previous_lr, v))
            for param_group in optimizer.param_groups:
                param_group['lr'] = v

    return optimizer
示例#2
0
def step_scheduler(optimizer, current_epoch, lr_schedule, net_name):
    """
    Function to perform step learning rate decay
    Args:
        optimizer: Optimizer for which step decay has to be applied
        epoch: Current epoch number
    """
    warnings.warn(
        "please use step_scheduler in pytorchgo.utils.learning_rate",
        DeprecationWarning,
    )
    previous_lr = optimizer.param_groups[0]["lr"]
    for (e, v) in lr_schedule:
        if current_epoch == e - 1:  # epoch start from 0
            logger.warn("epoch {}: {} lr changed from: {} to {}".format(
                current_epoch, net_name, previous_lr, v))
            for param_group in optimizer.param_groups:
                param_group["lr"] = v

    return optimizer
示例#3
0
def train(args):

    logger.auto_set_dir()
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'

    # Setup Augmentations
    data_aug = Compose([RandomRotate(10), RandomHorizontallyFlip()])

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path,
                           is_transform=True,
                           img_size=(args.img_rows, args.img_cols),
                           epoch_scale=4,
                           augmentations=data_aug,
                           img_norm=args.img_norm)
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='val',
                           img_size=(args.img_rows, args.img_cols),
                           img_norm=args.img_norm)

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=8)

    # Setup Metrics
    running_metrics = runningScore(n_classes)

    # Setup Model
    from model_zoo.deeplabv1 import VGG16_LargeFoV
    model = VGG16_LargeFoV(class_num=n_classes,
                           image_size=[args.img_cols, args.img_rows],
                           pretrained=True)

    #model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    model.cuda()

    # Check if model has custom optimizer / loss
    if hasattr(model, 'optimizer'):
        logger.warn("don't have customzed optimizer, use default setting!")
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.l_rate,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    optimizer_summary(optimizer)
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            logger.info("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            logger.info("No checkpoint found at '{}'".format(args.resume))

    best_iou = -100.0
    for epoch in tqdm(range(args.n_epoch), total=args.n_epoch):
        model.train()
        for i, (images, labels) in tqdm(enumerate(trainloader),
                                        total=len(trainloader),
                                        desc="training epoch {}/{}".format(
                                            epoch, args.n_epoch)):
            cur_iter = i + epoch * len(trainloader)
            cur_lr = adjust_learning_rate(optimizer,
                                          args.l_rate,
                                          cur_iter,
                                          args.n_epoch * len(trainloader),
                                          power=0.9)
            #if i > 10:break

            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

            optimizer.zero_grad()
            outputs = model(images)
            #print(np.unique(outputs.data[0].cpu().numpy()))
            loss = CrossEntropyLoss2d_Seg(input=outputs,
                                          target=labels,
                                          class_num=n_classes)

            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                logger.info("Epoch [%d/%d] Loss: %.4f, lr: %.7f" %
                            (epoch + 1, args.n_epoch, loss.data[0], cur_lr))

        model.eval()
        for i_val, (images_val, labels_val) in tqdm(enumerate(valloader),
                                                    total=len(valloader),
                                                    desc="validation"):
            images_val = Variable(images_val.cuda(), volatile=True)
            labels_val = Variable(labels_val.cuda(), volatile=True)

            outputs = model(images_val)
            pred = outputs.data.max(1)[1].cpu().numpy()
            gt = labels_val.data.cpu().numpy()
            running_metrics.update(gt, pred)

        score, class_iou = running_metrics.get_scores()
        for k, v in score.items():
            logger.info("{}: {}".format(k, v))
        running_metrics.reset()

        if score['Mean IoU : \t'] >= best_iou:
            best_iou = score['Mean IoU : \t']
            state = {
                'epoch': epoch + 1,
                'mIoU': best_iou,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(state,
                       os.path.join(logger.get_logger_dir(), "best_model.pkl"))
示例#4
0
 def load_items(self):
     with open(self.json_path, 'r') as f:
         import json
         logger.warn("loading  data from json file {}....".format(
             self.json_path))
         self.data_json = json.load(f)
示例#5
0
                    help='Gamma update for SGD')
parser.add_argument('--visdom',
                    default=False,
                    type=str2bool,
                    help='Use visdom for loss visualization')
parser.add_argument('--gpu', default=0, type=int, help='gpu')
args = parser.parse_args()

os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

if torch.cuda.is_available():
    if args.cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not args.cuda:
        logger.warn(
            "WARNING: It looks like you have a CUDA device, but aren't " +
            "using CUDA.\nRun with --cuda for optimal training speed.")
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')


def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            logger.info("WARNING: Using default COCO dataset_root because " +
                        "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
示例#6
0
def set_gpu(gpu):
    if not isinstance(gpu, str):
        gpu = str(gpu)
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    logger.warn("pytorchgo set gpu: {}".format(gpu))
def train(args):

    logger.auto_set_dir()
    from pytorchgo.utils.pytorch_utils import set_gpu
    set_gpu(args.gpu)

    # Setup Dataloader
    from pytorchgo.augmentation.segmentation import SubtractMeans, PIL2NP, RGB2BGR, PIL_Scale, Value255to0, ToLabel
    from torchvision.transforms import Compose, Normalize, ToTensor
    img_transform = Compose([  # notice the order!!!
        PIL_Scale(train_img_shape, Image.BILINEAR),
        PIL2NP(),
        RGB2BGR(),
        SubtractMeans(),
        ToTensor(),
    ])

    label_transform = Compose([
        PIL_Scale(train_img_shape, Image.NEAREST),
        PIL2NP(),
        Value255to0(),
        ToLabel()
    ])

    val_img_transform = Compose([
        PIL_Scale(train_img_shape, Image.BILINEAR),
        PIL2NP(),
        RGB2BGR(),
        SubtractMeans(),
        ToTensor(),
    ])
    val_label_transform = Compose([
        PIL_Scale(train_img_shape, Image.NEAREST),
        PIL2NP(),
        ToLabel(),
        # notice here, training, validation size difference, this is very tricky.
    ])

    from pytorchgo.dataloader.pascal_voc_loader import pascalVOCLoader as common_voc_loader
    train_loader = common_voc_loader(split="train_aug",
                                     epoch_scale=1,
                                     img_transform=img_transform,
                                     label_transform=label_transform)
    validation_loader = common_voc_loader(split='val',
                                          img_transform=val_img_transform,
                                          label_transform=val_label_transform)

    n_classes = train_loader.n_classes
    trainloader = data.DataLoader(train_loader,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True)

    valloader = data.DataLoader(validation_loader,
                                batch_size=args.batch_size,
                                num_workers=8)

    # Setup Metrics
    running_metrics = runningScore(n_classes)

    # Setup Model
    from pytorchgo.model.deeplabv1 import VGG16_LargeFoV
    from pytorchgo.model.deeplab_resnet import Res_Deeplab

    model = Res_Deeplab(NoLabels=n_classes, pretrained=True, output_all=False)

    from pytorchgo.utils.pytorch_utils import model_summary, optimizer_summary
    model_summary(model)

    def get_validation_miou(model):
        model.eval()
        for i_val, (images_val, labels_val) in tqdm(enumerate(valloader),
                                                    total=len(valloader),
                                                    desc="validation"):
            if i_val > 5 and is_debug == 1: break
            if i_val > 200 and is_debug == 2: break

            #img_large = torch.Tensor(np.zeros((1, 3, 513, 513)))
            #img_large[:, :, :images_val.shape[2], :images_val.shape[3]] = images_val

            output = model(Variable(images_val, volatile=True).cuda())
            output = output
            pred = output.data.max(1)[1].cpu().numpy()
            #pred = output[:, :images_val.shape[2], :images_val.shape[3]]

            gt = labels_val.numpy()

            running_metrics.update(gt, pred)

        score, class_iou = running_metrics.get_scores()
        for k, v in score.items():
            logger.info("{}: {}".format(k, v))
        running_metrics.reset()
        return score['Mean IoU : \t']

    model.cuda()

    # Check if model has custom optimizer / loss
    if hasattr(model, 'optimizer'):
        logger.warn("don't have customzed optimizer, use default setting!")
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.optimizer_params(args.l_rate),
                                    lr=args.l_rate,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    optimizer_summary(optimizer)
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            logger.info("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            logger.info("No checkpoint found at '{}'".format(args.resume))

    best_iou = 0
    logger.info('start!!')
    for epoch in tqdm(range(args.n_epoch), total=args.n_epoch):
        model.train()
        for i, (images, labels) in tqdm(enumerate(trainloader),
                                        total=len(trainloader),
                                        desc="training epoch {}/{}".format(
                                            epoch, args.n_epoch)):
            if i > 10 and is_debug == 1: break

            if i > 200 and is_debug == 2: break

            cur_iter = i + epoch * len(trainloader)
            cur_lr = adjust_learning_rate(optimizer,
                                          args.l_rate,
                                          cur_iter,
                                          args.n_epoch * len(trainloader),
                                          power=0.9)

            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

            optimizer.zero_grad()
            outputs = model(images)  # use fusion score
            loss = CrossEntropyLoss2d_Seg(input=outputs,
                                          target=labels,
                                          class_num=n_classes)

            #for i in range(len(outputs) - 1):
            #for i in range(1):
            #    loss = loss + CrossEntropyLoss2d_Seg(input=outputs[i], target=labels, class_num=n_classes)

            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                logger.info(
                    "Epoch [%d/%d] Loss: %.4f, lr: %.7f, best mIoU: %.7f" %
                    (epoch + 1, args.n_epoch, loss.data[0], cur_lr, best_iou))

        cur_miou = get_validation_miou(model)
        if cur_miou >= best_iou:
            best_iou = cur_miou
            state = {
                'epoch': epoch + 1,
                'mIoU': best_iou,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(state,
                       os.path.join(logger.get_logger_dir(), "best_model.pth"))
        model_f2.load_state_dict(checkpoint['f2_state_dict'])
    optimizer_g.load_state_dict(checkpoint['optimizer_g'])
    optimizer_f.load_state_dict(checkpoint['optimizer_f'])
    logger.info("=> loaded checkpoint '{}'".format(args.resume))

else:
    model_g, model_f1, model_f2 = get_models(net_name=args.net, res=args.res, input_ch=args.input_ch,
                                             n_class=args.n_class,
                                             method=args.method, uses_one_classifier=args.uses_one_classifier,
                                             is_data_parallel=args.is_data_parallel)
    optimizer_g = get_optimizer(model_g.parameters(), lr=args.lr, momentum=args.momentum, opt=args.opt,
                                weight_decay=args.weight_decay)
    optimizer_f = get_optimizer(list(model_f1.parameters()) + list(model_f2.parameters()), opt=args.opt,
                                lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
if args.uses_one_classifier:
    logger.warn ("f1 and f2 are same!")
    model_f2 = model_f1

mode = "%s-%s2%s-%s_%sch" % (args.src_dataset, args.src_split, args.tgt_dataset, args.tgt_split, args.input_ch)
if args.net in ["fcn", "psp"]:
    model_name = "%s-%s-%s-res%s" % (args.method, args.savename, args.net, args.res)
else:
    model_name = "%s-%s-%s" % (args.method, args.savename, args.net)

outdir = os.path.join(logger.get_logger_dir(), mode)

# Create Model Dir
pth_dir = os.path.join(outdir, "pth")
mkdir_if_not_exist(pth_dir)

# Create Model Dir and  Set TF-Logger