Exemplo n.º 1
0
def get(args):
    """ Entry point. Call this function to get all Charades dataloaders """
    normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0])
    train_file = args.train_file
    val_file = args.val_file
    train_dataset = Charadesflow(
        args.data, 'train', train_file, args.cache,
        transform=transforms.Compose([
            arraytransforms.RandomResizedCrop(224),
            arraytransforms.ToTensor(),
            normalize,
            transforms.Lambda(lambda x: torch.cat(x)),
        ]))
    val_transforms = transforms.Compose([
            arraytransforms.Resize(256),
            arraytransforms.CenterCrop(224),
            arraytransforms.ToTensor(),
            normalize,
            transforms.Lambda(lambda x: torch.cat(x)),
        ])
    val_dataset = Charadesflow(
        args.data, 'val', val_file, args.cache, transform=val_transforms)
    valvideo_dataset = Charadesflow(
        args.data, 'val_video', val_file, args.cache, transform=val_transforms)
    return train_dataset, val_dataset, valvideo_dataset
Exemplo n.º 2
0
def get_emotion(current_face, gray):
    """
    inputs:
    current_face: (xmin, ymin, w, h)
    gray: grayscale frame
    
    outputs:
    emotion: from -1 to 1, 1 being most positive, -1 being most negative
    """
    cut_size = 44
    transform_test = transforms.Compose([
        transforms.TenCrop(cut_size),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
    ])   
    
    # crop face from grayscale frame
    xmin = current_face[0]
    xmax = current_face[0] + current_face[2]
    ymin = current_face[1]
    ymax = current_face[1] + current_face[3]
    face = gray[ymin:ymax,xmin:xmax]
    
    # resize and transform
    face = (resize(face, (48,48), mode='symmetric')*255).astype('uint8')
    img = face[:, :, np.newaxis]
    img = np.concatenate((img, img, img), axis=2)
    img = Image.fromarray(img)
    inputs = transform_test(img)

    class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
    
    # set device, load model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = vgg.VGG('VGG19')
    checkpoint = torch.load('PrivateTest_model.t7')
    net.load_state_dict(checkpoint['net'])
    net.to(device)
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    inputs = inputs.to(device)
    with torch.no_grad():
        inputs = Variable(inputs)
    outputs = net(inputs)
    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops
    weights = np.array([-0.4,-0.1,-0.1,0.8,-0.4,0.2])
    score = F.softmax(outputs_avg, dim=0)
    emotion_score = np.sum(score.cpu().detach().numpy()[:6]**0.5*weights)
    

    return emotion_score
    def img_10crop(self,img):
        """
        数据增强
        将图片在左上角,左下角,右上角,右下角,中心进行切割和并做镜像操作,这样的操作使得数据库扩大了10倍
        """
        cut_size = [44, 44]  # 44
        transform_test = transforms.Compose([
            transforms.TenCrop(cut_size),
            transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        ])
        inputs = transform_test(img)

        return inputs
Exemplo n.º 4
0
def getEmotionScore(imageDirectory, resizeImg):
    cut_size = 44

    transform_test = transforms.Compose([
        transforms.TenCrop(cut_size),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops])),
    ])

    raw_img = io.imread(imageDirectory)

    if resizeImg == True:
        raw_img = cropND(raw_img, (400, 400))

    io.imsave("screenshot.jpg", raw_img)

    gray = rgb2gray(raw_img)
    gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)

    img = gray[:, :, np.newaxis]

    img = np.concatenate((img, img, img), axis=2)
    img = Image.fromarray(img)
    inputs = transform_test(img)

    net = VGG('VGG19')
    checkpoint = torch.load(
        os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))
    net.load_state_dict(checkpoint['net'])
    net.cuda()
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    inputs = inputs.cuda()
    inputs = Variable(inputs, volatile=True)
    outputs = net(inputs)

    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

    score = F.softmax(outputs_avg)
    _, predicted = torch.max(outputs_avg.data, 0)

    scoreDict = {}

    for i in range(len(class_names)):
        scoreDict[class_names[i]] = float(score.data.cpu().numpy()[i])

    scoreDict["expression"] = str(class_names[int(predicted.cpu().numpy())])
    return scoreDict
Exemplo n.º 5
0
def img2mood(raw_img):

    cut_size = 44

    transform_test = transforms.Compose([
        transforms.TenCrop(cut_size),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops])),
    ])
    #raw_img = io.imread('images/1.jpg')
    gray = rgb2gray(raw_img)
    gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)

    img = gray[:, :, np.newaxis]

    img = np.concatenate((img, img, img), axis=2)
    img = Image.fromarray(img)
    inputs = transform_test(img)

    class_names = [
        'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ]

    net = VGG('VGG19')
    checkpoint = torch.load(os.path.join('FER2013_VGG19',
                                         'PrivateTest_model.t7'),
                            map_location='cpu')
    net.load_state_dict(checkpoint['net'])
    #net.cuda()
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    #inputs = inputs.cuda()
    inputs = Variable(inputs, volatile=True)
    outputs = net(inputs)

    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

    score = F.softmax(outputs_avg)
    _, predicted = torch.max(outputs_avg.data, 0)

    emojis_img = io.imread('images/emojis/%s.png' %
                           str(class_names[int(predicted.cpu().numpy())]))

    print("The Expression is %s" %
          str(class_names[int(predicted.cpu().numpy())]))
    return int(predicted.cpu().numpy()), emojis_img
    '''
Exemplo n.º 6
0
def predict_emotion(file_name):
    raw_img = io.imread(file_name)
    cut_size = 44

    transform_test = transforms.Compose([
        transforms.TenCrop(cut_size),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops])),
    ])

    gray = rgb2gray(raw_img)
    gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)

    img = gray[:, :, np.newaxis]

    img = np.concatenate((img, img, img), axis=2)  #(48,48,3)
    img = Image.fromarray(img)  #转化成PIL文件
    inputs = transform_test(img)  #transform

    class_names = [
        'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ]

    net = VGG('VGG19')  #加载网络模型
    checkpoint = torch.load(os.path.join(
        'F:/Study/First_Grade/Winter_Vacation/Real-time-face-recognition-master/Facial-Expression-Recognition.Pytorch-master/FER2013_VGG19',
        'PrivateTest_model.t7'),
                            map_location='cpu')
    net.load_state_dict(checkpoint['net'])
    # net.cuda()
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    # inputs = inputs.cuda()
    inputs = Variable(inputs, volatile=True)
    outputs = net(inputs)

    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops
    #使用数据增强后的结果测试效果会更强
    score = F.softmax(outputs_avg)
    # print(type(score)) #tensor
    _, predicted = torch.max(score.data, 0)

    return score, predicted
path = os.path.join(opt.dataset + '_' + opt.model, str(opt.fold))

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(cut_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

# 数据扩增
# 随机切割,扩充数据集,减缓了过拟合的作用
transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack(
        [transforms.ToTensor()(crop) for crop in crops])),
])

trainset = CK(split='Training', fold=opt.fold, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=opt.bs,
                                          shuffle=True,
                                          num_workers=0)
testset = CK(split='Testing', fold=opt.fold, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=5,
                                         shuffle=False,
                                         num_workers=0)

# Model
if opt.model == 'VGG19':
Exemplo n.º 8
0
def main():

    global best_prec1
    best_prec1 = 0

    global val_acc
    val_acc = []

    global class_num

    class_num = args.dataset == 'cifar10' and 10 or 100

    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

    if args.augment:
        if args.autoaugment:
            print('Autoaugment')
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4),
                                                  mode='reflect').squeeze()),
                transforms.ToPILImage(),
                transforms.RandomCrop(32),
                transforms.RandomHorizontalFlip(),
                CIFAR10Policy(),
                transforms.ToTensor(),
                Cutout(n_holes=args.n_holes, length=args.length),
                normalize,
            ])

        elif args.cutout:
            print('Cutout')
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4),
                                                  mode='reflect').squeeze()),
                transforms.ToPILImage(),
                transforms.RandomCrop(32),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                Cutout(n_holes=args.n_holes, length=args.length),
                normalize,
            ])

        else:
            print('Standrad Augmentation!')
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4),
                                                  mode='reflect').squeeze()),
                transforms.ToPILImage(),
                transforms.RandomCrop(32),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

    transform_test = transforms.Compose([transforms.ToTensor(), normalize])

    kwargs = {'num_workers': 1, 'pin_memory': True}
    assert (args.dataset == 'cifar10' or args.dataset == 'cifar100')
    train_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=True,
                                                download=True,
                                                transform=transform_train),
        batch_size=training_configurations[args.model]['batch_size'],
        shuffle=True,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=False,
                                                transform=transform_test),
        batch_size=training_configurations[args.model]['batch_size'],
        shuffle=True,
        **kwargs)

    # create model
    if args.model == 'resnet':
        model = eval('networks.resnet.resnet' + str(args.layers) +
                     '_cifar')(dropout_rate=args.droprate)
    elif args.model == 'se_resnet':
        model = eval('networks.se_resnet.resnet' + str(args.layers) +
                     '_cifar')(dropout_rate=args.droprate)
    elif args.model == 'wideresnet':
        model = networks.wideresnet.WideResNet(args.layers,
                                               args.dataset == 'cifar10' and 10
                                               or 100,
                                               args.widen_factor,
                                               dropRate=args.droprate)
    elif args.model == 'se_wideresnet':
        model = networks.se_wideresnet.WideResNet(
            args.layers,
            args.dataset == 'cifar10' and 10 or 100,
            args.widen_factor,
            dropRate=args.droprate)

    elif args.model == 'densenet_bc':
        model = networks.densenet_bc.DenseNet(
            growth_rate=args.growth_rate,
            block_config=(int((args.layers - 4) / 6), ) * 3,
            compression=args.compression_rate,
            num_init_features=24,
            bn_size=args.bn_size,
            drop_rate=args.droprate,
            small_inputs=True,
            efficient=False)
    elif args.model == 'shake_pyramidnet':
        model = networks.shake_pyramidnet.PyramidNet(dataset=args.dataset,
                                                     depth=args.layers,
                                                     alpha=args.alpha,
                                                     num_classes=class_num,
                                                     bottleneck=True)

    elif args.model == 'resnext':
        if args.cardinality == 8:
            model = networks.resnext.resnext29_8_64(class_num)
        if args.cardinality == 16:
            model = networks.resnext.resnext29_16_64(class_num)

    elif args.model == 'shake_shake':
        if args.widen_factor == 112:
            model = networks.shake_shake.shake_resnet26_2x112d(class_num)
        if args.widen_factor == 32:
            model = networks.shake_shake.shake_resnet26_2x32d(class_num)
        if args.widen_factor == 96:
            model = networks.shake_shake.shake_resnet26_2x32d(class_num)

    elif args.model == 'shake_shake_x':

        model = networks.shake_shake.shake_resnext29_2x4x64d(class_num)

    if not os.path.isdir(check_point):
        mkdir_p(check_point)

    fc = Full_layer(int(model.feature_num), class_num)

    print('Number of final features: {}'.format(int(model.feature_num)))

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()]) +
        sum([p.data.nelement() for p in fc.parameters()])))

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    isda_criterion = ISDALoss(int(model.feature_num), class_num).cuda()
    ce_criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(
        [{
            'params': model.parameters()
        }, {
            'params': fc.parameters()
        }],
        lr=training_configurations[args.model]['initial_learning_rate'],
        momentum=training_configurations[args.model]['momentum'],
        nesterov=training_configurations[args.model]['nesterov'],
        weight_decay=training_configurations[args.model]['weight_decay'])

    model = torch.nn.DataParallel(model).cuda()
    fc = nn.DataParallel(fc).cuda()

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        fc.load_state_dict(checkpoint['fc'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        isda_criterion = checkpoint['isda_criterion']
        val_acc = checkpoint['val_acc']
        best_prec1 = checkpoint['best_acc']
        np.savetxt(accuracy_file, np.array(val_acc))
    else:
        start_epoch = 0

    for epoch in range(start_epoch,
                       training_configurations[args.model]['epochs']):

        adjust_learning_rate(optimizer, epoch + 1)

        # train for one epoch
        train(train_loader, model, fc, isda_criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, fc, ce_criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'fc': fc.state_dict(),
                'best_acc': best_prec1,
                'optimizer': optimizer.state_dict(),
                'isda_criterion': isda_criterion,
                'val_acc': val_acc,
            },
            is_best,
            checkpoint=check_point)
        print('Best accuracy: ', best_prec1)
        np.savetxt(accuracy_file, np.array(val_acc))

    print('Best accuracy: ', best_prec1)
    print('Average accuracy', sum(val_acc[len(val_acc) - 10:]) / 10)
    # val_acc.append(sum(val_acc[len(val_acc) - 10:]) / 10)
    # np.savetxt(val_acc, np.array(val_acc))
    np.savetxt(accuracy_file, np.array(val_acc))
def computeResult(data):
  try:
    cut_size = 44

    transform_test = transforms.Compose([
        transforms.TenCrop(cut_size),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
    ])

    #Uses image data in array to compute the image
    raw_img = np.array(data, dtype=np.uint8)
   
    gray = rgb2gray(raw_img)
    gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8)

    img = gray[:, :, np.newaxis]

    img = np.concatenate((img, img, img), axis=2)
    # img = Image.fromarray(img)

    img = Image.fromarray(img)

    inputs = transform_test(img)

    class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    net = VGG('VGG19')
    checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))
    net.load_state_dict(checkpoint['net'])
    net.cuda()
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    inputs = inputs.cuda()
    inputs = Variable(inputs, volatile=True)
    outputs = net(inputs)

    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

    score = F.softmax(outputs_avg)
    _, predicted = torch.max(outputs_avg.data, 0)

    plt.rcParams['figure.figsize'] = (13.5,5.5)
    axes=plt.subplot(1, 3, 1)
    plt.imshow(raw_img)
    plt.xlabel('Input Image', fontsize=16)
    axes.set_xticks([])
    axes.set_yticks([])
    plt.tight_layout()


    plt.subplots_adjust(left=0.05, bottom=0.2, right=0.95, top=0.9, hspace=0.02, wspace=0.3)

    plt.subplot(1, 3, 2)
    ind = 0.1+0.6*np.arange(len(class_names))    # the x locations for the groups
    width = 0.4       # the width of the bars: can also be len(x) sequence
    color_list = ['red','orangered','darkorange','limegreen','darkgreen','royalblue','navy']
    for i in range(len(class_names)):
        plt.bar(ind[i], score.data.cpu().numpy()[i], width, color=color_list[i])
    plt.title("Classification results ",fontsize=20)
    plt.xlabel(" Expression Category ",fontsize=16)
    plt.ylabel(" Classification Score ",fontsize=16)
    plt.xticks(ind, class_names, rotation=45, fontsize=14)

    axes=plt.subplot(1, 3, 3)
    emojis_img = io.imread('./images/emojis/%s.png' % str(class_names[int(predicted.cpu().numpy())]))
    plt.imshow(emojis_img)
    plt.xlabel('Emoji Expression', fontsize=16)
    axes.set_xticks([])
    axes.set_yticks([])
    plt.tight_layout()
    # show emojis

    #plt.show()
    plt.savefig(os.path.join('./images/results/' + 'results.jpg'))
    plt.close()

    print("Result:" + "%s" %str(class_names[int(predicted.cpu().numpy())]))
  except:
    print('Cannot find image')
Exemplo n.º 10
0
    def show_anim(robot, cnt):
        # nonlocal model

        # read the image sequence for classification
        img_path = config.img_path
        img_names = [
            os.path.join(img_path, 'face_round{}_cnt{}.jpg'.format(cnt, i))
            for i in range(10)
        ]
        face_seq = [io.imread(img) for img in img_names]

        anim_prob = np.zeros(len(config.anim_names))
        for face in face_seq:
            ## use the model loaded to predict probabilities
            ## input one figure and get a one-dim array of size 7
            '''

			complete here !!!

			'''

            cut_size = 44

            transform_test = transforms.Compose([
                transforms.TenCrop(cut_size),
                transforms.Lambda(lambda crops: torch.stack(
                    [transforms.ToTensor()(crop) for crop in crops])),
            ])

            raw_img = face
            gray = rgb2gray(raw_img)
            gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)

            img = gray[:, :, np.newaxis]

            img = np.concatenate((img, img, img), axis=2)
            img = Image.fromarray(img)
            inputs = transform_test(img)

            ncrops, c, h, w = np.shape(inputs)

            inputs = inputs.view(-1, c, h, w)
            #inputs = inputs.cuda()
            inputs = Variable(inputs, volatile=True)
            outputs = net(inputs)

            outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

            score = F.softmax(outputs_avg)
            _, predicted = torch.max(outputs_avg.data, 0)

            prob = score.data.cpu().numpy()

            anim_prob += prob
            # e.g.
            # anim_prob += model(face)
        anim_prob = anim_prob / sum(anim_prob)

        anim_name = np.random.choice(config.anim_names, 1, p=anim_prob)[0]

        print('Playing Animation: ', anim_name)
        print('The Expression is: ',
              class_names[anim_prob.tolist().index(max(anim_prob))])
        robot.anim.play_animation(anim_name)
def main():
    global args, best_prec1
    args = parser.parse_args()
    # torch.cuda.set_device(args.gpu)
    if args.tensorboard:
        print("Using TensorBoard")
        configure("exp/%s" % (args.name))

    # Data loading code
    if args.augment:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: F.pad(
                Variable(x.unsqueeze(0), requires_grad=False, volatile=True),
                (4, 4, 4, 4),
                mode='replicate').data.squeeze()),
            transforms.ToPILImage(),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
        ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    kwargs = {'num_workers': 1, 'pin_memory': True}
    assert (args.dataset == 'cifar10' or args.dataset == 'cifar100')
    train_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=True,
                                                download=True,
                                                transform=transform_train),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=False,
                                                transform=transform_test),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    # create model
    model = WideResNetMulti(args.layers,
                            args.dataset == 'cifar10' and 10 or 100,
                            args.num_rotate_classes,
                            args.widen_factor,
                            dropRate=args.droprate)

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # for training on multiple GPUs.
    # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch + 1)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
    print 'Best accuracy: ', best_prec1
Exemplo n.º 12
0
def get_loader(img_root, img_size, batch_size, mode='train', num_thread=1):
    shuffle = False

    mean_bgr = torch.Tensor(3, 256, 256)
    mean_bgr[0, :, :] = 104.008  # B
    mean_bgr[1, :, :] = 116.669  # G
    mean_bgr[2, :, :] = 122.675  # R

    depth_mean_bgr = torch.Tensor(1, 256, 256)
    depth_mean_bgr[0, :, :] = 115.8695

    if mode == 'train':
        transform = trans.Compose([
            # trans.ToTensor  image -> [0,255]
            trans.ToTensor_BGR(),
            trans.Lambda(lambda x: x - mean_bgr)
        ])

        depth_transform = trans.Compose([
            # trans.ToTensor  image -> [0,255]
            trans.ToTensor(),
            trans.Lambda(lambda x: x - depth_mean_bgr)
        ])

        t_transform = trans.Compose([
            # transform.ToTensor  label -> [0,1]
            transforms.ToTensor(),
        ])
        label_32_transform = trans.Compose([
            trans.Scale((32, 32), interpolation=Image.NEAREST),
            transforms.ToTensor(),
        ])
        label_64_transform = trans.Compose([
            trans.Scale((64, 64), interpolation=Image.NEAREST),
            transforms.ToTensor(),
        ])
        label_128_transform = trans.Compose([
            trans.Scale((128, 128), interpolation=Image.NEAREST),
            transforms.ToTensor(),
        ])
        shuffle = True
    else:
        transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            trans.ToTensor_BGR(),
            trans.Lambda(lambda x: x - mean_bgr)
        ])

        depth_transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            trans.ToTensor(),
            trans.Lambda(lambda x: x - depth_mean_bgr)
        ])

        t_transform = trans.Compose([
            trans.Scale((img_size, img_size), interpolation=Image.NEAREST),
            transforms.ToTensor(),
        ])
    if mode == 'train':
        dataset = ImageData(img_root, transform, depth_transform, t_transform, label_32_transform, label_64_transform, label_128_transform, mode)
    else:
        dataset = ImageData(img_root, transform, depth_transform, t_transform, label_32_transform=None, label_64_transform=None, label_128_transform=None, mode=mode)

    data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_thread)
    return data_loader
Exemplo n.º 13
0
def get_loader(img_root,
               label_root,
               img_size,
               batch_size,
               mode='train',
               num_thread=1):
    shuffle = False
    mean = torch.Tensor(3, 256, 256)
    mean[0, :, :] = 125.5325
    mean[1, :, :] = 118.1743
    mean[2, :, :] = 101.3507
    # mean = torch.Tensor([123.68, 116.779, 103.939]).view(3, 1, 1) / 255
    if mode == 'train':
        transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            # trans.ToTensor  image -> [0,255]
            trans.ToTensor(),
            trans.Lambda(lambda x: x - mean)
        ])
        t_transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            # transform.ToTensor  label -> [0,1]
            transforms.ToTensor(),
            # transforms.Lambda(lambda x: torch.round(x))  # TODO: it maybe unnecessary
        ])
        label_32_transform = trans.Compose([
            trans.Scale((32, 32)),
            transforms.ToTensor(),
        ])
        label_64_transform = trans.Compose([
            trans.Scale((64, 64)),
            transforms.ToTensor(),
        ])
        label_128_transform = trans.Compose([
            trans.Scale((128, 128)),
            transforms.ToTensor(),
        ])
        shuffle = True
    else:
        # define transform to images
        transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            trans.ToTensor(),
            trans.Lambda(lambda x: x - mean)
        ])

        # define transform to ground truth
        t_transform = trans.Compose([
            trans.Scale((img_size, img_size)),
            transforms.ToTensor(),
            #transforms.Lambda(lambda x: torch.round(x))  # TODO: it maybe unnecessary
        ])
    if mode == 'train':
        dataset = ImageData(img_root, label_root, transform, t_transform,
                            label_32_transform, label_64_transform,
                            label_128_transform)
        # print(dataset.image_path)
        data_loader = data.DataLoader(dataset=dataset,
                                      batch_size=batch_size,
                                      shuffle=shuffle,
                                      num_workers=num_thread)
        return data_loader
    else:
        dataset = ImageData(img_root,
                            label_root,
                            transform,
                            t_transform,
                            label_32_transform=None,
                            label_64_transform=None,
                            label_128_transform=None)
        # print(dataset.image_path)
        data_loader = data.DataLoader(dataset=dataset,
                                      batch_size=batch_size,
                                      shuffle=shuffle,
                                      num_workers=num_thread)
        return data_loader
Exemplo n.º 14
0
# laplacia_weights = []
# laplacia_weights = [1e3 / n ** 2 for n in [512]]
# laplacia_weights = [3 / n for n in [1, 2, 3]]

# vgg definition that conveniently let's you grab the outputs from any layer

# %%

# gram matrix and loss

# %%

prep = transforms.Compose([
    transforms.Resize(args.img_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]),  # turn to BGR
    transforms.Normalize(
        mean=[0.40760392, 0.45795686, 0.48501961],  # subtract imagenet mean
        std=[1, 1, 1]),
    transforms.Lambda(lambda x: x.mul_(255)),
])

prep_hr = transforms.Compose([
    transforms.Resize(args.img_size_hr),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]),  # turn to BGR
    transforms.Normalize(
        mean=[0.40760392, 0.45795686, 0.48501961],
        # subtract imagenet mean
        std=[1, 1, 1]),
    transforms.Lambda(lambda x: x.mul_(255)),