Exemplo n.º 1
0
def main():
    global args, best_loss
    args = parser.parse_args()

    if args.tensorboard:
        from tensorboard_logger import configure
        print("Using tensorboard")
        configure("%s" % (args.dir))

    # loading core configuration
    c_config = CoreConfig()
    if args.core_config == '':
        print('No core config file given, using default core configuration')
    if not os.path.exists(args.core_config):
        sys.exit('Cannot find the config file: {}'.format(args.core_config))
    else:
        c_config.read(args.core_config)
        print('Using core configuration from {}'.format(args.core_config))

    # loading Unet configuration
    u_config = UnetConfig()
    if args.unet_config == '':
        print('No unet config file given, using default unet configuration')
    if not os.path.exists(args.unet_config):
        sys.exit('Cannot find the unet configuration file: {}'.format(
            args.unet_config))
    else:
        # need train_image_size for validation
        u_config.read(args.unet_config, args.train_image_size)
        print('Using unet configuration from {}'.format(args.unet_config))

    offset_list = c_config.offsets
    print("offsets are: {}".format(offset_list))

    # model configurations from core config
    num_classes = c_config.num_classes
    num_colors = c_config.num_colors
    num_offsets = len(c_config.offsets)
    # model configurations from unet config
    start_filters = u_config.start_filters
    up_mode = u_config.up_mode
    merge_mode = u_config.merge_mode
    depth = u_config.depth

    train_data = args.train_dir + '/train'
    val_data = args.train_dir + '/val'

    trainset = WaldoDataset(train_data, c_config, args.train_image_size)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              num_workers=4,
                                              batch_size=args.batch_size,
                                              shuffle=True)

    valset = WaldoDataset(val_data, c_config, args.train_image_size)
    valloader = torch.utils.data.DataLoader(valset,
                                            num_workers=4,
                                            batch_size=args.batch_size)

    NUM_TRAIN = len(trainset)
    NUM_VAL = len(valset)
    NUM_ALL = NUM_TRAIN + NUM_VAL
    print('Total samples: {0} \n'
          'Using {1} samples for training, '
          '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL))

    # create model
    model = UNet(num_classes,
                 num_offsets,
                 in_channels=num_colors,
                 depth=depth,
                 start_filts=start_filters,
                 up_mode=up_mode,
                 merge_mode=merge_mode).cuda()

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # define optimizer
    # optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    # Train
    for epoch in range(args.start_epoch, args.epochs):
        Train(trainloader, model, optimizer, epoch)
        val_loss = Validate(valloader, model, epoch)
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_loss,
            }, is_best)
    print('Best validation loss: ', best_loss)

    # visualize some example outputs
    outdir = '{}/imgs'.format(args.dir)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    sample(model, valloader, outdir, c_config)
def main():
    global args, best_loss
    args = parser.parse_args()

    if args.tensorboard:
        print("Using tensorboard")
        configure("exp/%s" % (args.name))

    if not (os.path.exists(args.train_data) and os.path.exists(args.train_data)
            and os.path.exists(args.test_data)):
        train, val, test = DataProcess(args.train_path, args.test_path, 0.9,
                                       args.img_channels)
        torch.save(train, args.train_data)
        torch.save(val, args.val_data)
        torch.save(test, args.test_data)

    s_trans = tsf.Compose([
        tsf.ToPILImage(),
        tsf.Resize((args.img_height, args.img_width)),
        tsf.ToTensor(),
    ])

    offset_list = [(1, 1), (0, -2)]

    # split the training set into training set and validation set
    trainset = Dataset(args.train_data, s_trans, offset_list, args.num_classes,
                       args.img_height, args.img_width)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              num_workers=1,
                                              batch_size=args.batch_size)

    valset = Dataset(args.val_data, s_trans, offset_list, args.num_classes,
                     args.img_height, args.img_width)
    valloader = torch.utils.data.DataLoader(valset,
                                            num_workers=1,
                                            batch_size=args.batch_size)

    # datailer = iter(trainloader)
    # img, bound, class_id = datailer.next()
    # # print img.shape, bound.shape, class_id.shape
    # torch.set_printoptions(threshold=5000)
    # print bound.shape
    # torchvision.utils.save_image(img, 'raw.png')
    # torchvision.utils.save_image(bound[:, 0:1, :, :], 'bound1.png')
    # torchvision.utils.save_image(bound[:, 1:2, :, :], 'bound2.png')
    # torchvision.utils.save_image(class_id[:, 0:1, :, :], 'class1.png')
    # torchvision.utils.save_image(class_id[:, 1:2, :, :], 'class2.png')
    # sys.exit('stop')

    NUM_TRAIN = len(trainset)
    NUM_VAL = len(valset)
    NUM_ALL = NUM_TRAIN + NUM_VAL
    print(
        'Total samples: {0} \n'
        'Using {1} samples for training, '
        '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL))

    # create model
    model = UNet(args.num_classes,
                 len(offset_list),
                 in_channels=3,
                 depth=args.depth).cuda()
    # model = UNet(3, 1, len(offset_list))

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # define optimizer
    # optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    # Train
    for epoch in range(args.start_epoch, args.epochs):
        Train(trainloader, model, optimizer, epoch)
        val_loss = Validate(valloader, model, epoch)
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_loss,
            }, is_best)
    print 'Best validation loss: ', best_loss

    # Visualize some predicted masks on training data to get a better intuition
    # about the performance. Comment it if not necessary.
    datailer = iter(trainloader)
    img, classification, bound = datailer.next()
    torchvision.utils.save_image(img, 'imgs/raw.png')
    for i in range(len(offset_list)):
        torchvision.utils.save_image(bound[:, i:i + 1, :, :],
                                     'imgs/bound_{}.png'.format(i))
    for i in range(args.num_classes):
        torchvision.utils.save_image(classification[:, i:i + 1, :, :],
                                     'imgs/class_{}.png'.format(i))
    img = torch.autograd.Variable(img).cuda()
    predictions = model(img)
    predictions = predictions.data
    class_pred = predictions[:, :args.num_classes, :, :]
    bound_pred = predictions[:, args.num_classes:, :, :]
    for i in range(len(offset_list)):
        torchvision.utils.save_image(bound_pred[:, i:i + 1, :, :],
                                     'imgs/bound_pred{}.png'.format(i))
    for i in range(args.num_classes):
        torchvision.utils.save_image(class_pred[:, i:i + 1, :, :],
                                     'imgs/class_pred{}.png'.format(i))
Exemplo n.º 3
0
def main():
    global args, best_loss
    args = parser.parse_args()

    if args.tensorboard:
        print("Using tensorboard")
        configure("exp/%s" % (args.name))

    if not (os.path.exists(args.train_data) and os.path.exists(args.train_data)
            and os.path.exists(args.test_data)):
        train, val, test = DataProcess(args.train_path, args.test_path, 0.9,
                                       args.img_channels)
        t.save(train, args.train_data)
        t.save(val, args.val_data)
        t.save(test, args.test_data)

    s_trans = tsf.Compose([
        tsf.ToPILImage(),
        tsf.Resize((args.img_height, args.img_width)),
        tsf.ToTensor(),
    ])

    t_trans = tsf.Compose([
        tsf.ToPILImage(),
        tsf.Resize((args.img_height, args.img_width),
                   interpolation=PIL.Image.NEAREST),
        tsf.ToTensor(),
    ])

    # split the training set into training set and validation set
    trainset = TrainDataset(args.train_data, s_trans, t_trans)
    trainloader = t.utils.data.DataLoader(trainset,
                                          num_workers=1,
                                          batch_size=args.batch_size,
                                          shuffle=True)

    valset = TrainDataset(args.val_data, s_trans, t_trans)
    valloader = t.utils.data.DataLoader(valset,
                                        num_workers=1,
                                        batch_size=args.batch_size)

    NUM_TRAIN = len(trainset)
    NUM_VAL = len(valset)
    NUM_ALL = NUM_TRAIN + NUM_VAL
    print(
        'Total samples: {0} \n'
        'Using {1} samples for training, '
        '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL))

    testset = TestDataset(args.test_data, s_trans)
    testloader = t.utils.data.DataLoader(testset, num_workers=1, batch_size=1)

    # create model
    model = UNet(1, in_channels=3, depth=args.depth).cuda()

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = t.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # define optimizer
    optimizer = t.optim.Adam(model.parameters(), lr=1e-3)

    # Train
    for epoch in range(args.start_epoch, args.epochs):
        Train(trainloader, model, optimizer, epoch)
        val_loss = Validate(valloader, model, epoch)
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_loss,
            }, is_best)
    print 'Best validation loss: ', best_loss

    # Visualize some predicted masks on training data to get a better intuition
    # about the performance. Comment it if not necessary.
    datailer = iter(trainloader)
    img, mask = datailer.next()
    torchvision.utils.save_image(img, 'raw.png')
    torchvision.utils.save_image(mask, 'mask.png')
    img = t.autograd.Variable(img).cuda()
    img_pred = model(img)
    img_pred = img_pred.data
    torchvision.utils.save_image(img_pred > 0.5, 'predicted.png')

    # Load the best model and evaluate on test set
    checkpoint = t.load('exp/%s/' % (args.name) + 'model_best.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])
    Test(testloader, model)
Exemplo n.º 4
0
    init_util.print_network(model)
    # model = nn.DataParallel(model, device_ids=[0])   # multi-GPU

    log = logger.Logger('./output/{}'.format(args.save))

    best = [0, np.inf]  # 初始化最优模型的epoch和performance
    trigger = 0  # early stop 计数器
    for epoch in range(1, args.epochs + 1):
        common.adjust_learning_rate(optimizer, epoch, args)
        train_log = train(model, train_loader)
        val_log = val(model, val_loader)
        log.update(epoch, train_log, val_log)

        # Save checkpoint.
        state = {
            'net': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch
        }
        torch.save(
            state,
            os.path.join('./output/{}'.format(args.save), 'latest_model.pth'))
        trigger += 1
        if val_log['Val Loss'] < best[1]:
            print('Saving best model')
            torch.save(
                state,
                os.path.join('./output/{}'.format(args.save),
                             'best_model.pth'))
            best[0] = epoch
            best[1] = val_log['Val Loss']
Exemplo n.º 5
0
transform = transforms.Compose([
    utils.transforms.RandomMirror(),
    utils.transforms.ToTensor(),
    utils.transforms.Downsize(2)
])

dataset = utils.datasets.SteelDefectDataset(csv_file='train.csv',
root_dir='data/severstal-steel-defect-detection',transform=transform)
train_loader = DataLoader(dataset, batch_size=1,shuffle=True)

criterion = utils.loss.SegmentMSELoss()

for e in range(1,epoch+1):
    print('Epoch {}:'.format(e))
    total_loss = 0
    for batch, data in tqdm(enumerate(train_loader),total=len(train_loader),leave=False):
        optimizer.zero_grad()
        imgs, cs, targets = data['img'], data['c'], data['target']
        imgs = imgs.to(device)
        targets = targets.to(device)
        out = model(imgs)
        loss = criterion(out,cs,targets)
        loss.backward()
        total_loss += loss.data
        optimizer.step()
        if batch == 500:
            print(total_loss/batch)
    print('Loss: {:.3f}'.format(total_loss/(batch)))
    torch.save(model.state_dict(), os.path.join('weights','Unet_e{}.pth'.format(e)))