示例#1
0
    def forward(self, input):
        # resnet
        input_i=input[:,:,0:3,:,:]#[64,4,3,224,224]
        input_m=input[:,:,3:5,:,:]#mv
        input_r=input[:,:,5:8,:,:]
        # print(input_i.shape)
        # print(input_m.shape)
        # print(input_r.shape)
        input_i = input_i.view((-1, ) + input_i.size()[-3:])
        input_m = input_m.view((-1, ) + input_m.size()[-3:])
        input_r = input_r.view((-1, ) + input_r.size()[-3:])
        # a=input_i.view((-1, ) + input_i.size()[-4:])
        # print(a.shape)

        input_m = self.data_bn_m(input_m)
        input_r = self.data_bn_r(input_r)

        base_out_i = self.base_model_i(input_i)
        base_out_m = self.base_model_m(input_m)
        base_out_r = self.base_model_r(input_r)
        # print(base_out_i.shape)
        # print(base_out_m.shape)
        # print(base_out_r.shape)

        base_out = torch.cat((base_out_i, base_out_m, base_out_r), 1)#[batchsize*?num_gop, 3072, 1, 1]
        print(base_out.shape)

        # lstm
        args = parser.parse_args()
        input_l = base_out.view((args.batch_size,-1) + base_out.size()[1:])#>[batchsize,num_gop, 3072, 1, 1]
        print(input_l.shape)
        input_l = torch.squeeze(input_l, 3)#[batchsize,num_gop, 3072, 1]
        print(input_l.shape)
        input_l = torch.squeeze(input_l, 3)#[batchsize,num_gop, 3072]
        print(input_l.shape)

        _,(input_l,_)=self.lstm(input_l)
        lstm_out=input_l[-1,:,:]
        print(lstm_out.shape)
        lstm_out=lstm_out.view(-1,lstm_out.size()[-1])
        print(lstm_out.shape)

        lstm_out=self.linear_1(lstm_out)
        print(lstm_out.shape)


        return lstm_out
示例#2
0
def main():
    global args
    global best_prec1
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    else:
        raise ValueError('Unknown dataset ' + args.data_name)

    model = Model(num_class,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch)
    print(model)

    train_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        args.data_name,
        video_list=args.train_list,
        num_segments=args.num_segments,
        representation=args.representation,
        transform=model.get_augmentation(),
        is_train=True,
        accumulate=(not args.no_accumulation),
    ),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        args.data_name,
        video_list=args.test_list,
        num_segments=args.num_segments,
        representation=args.representation,
        transform=torchvision.transforms.Compose([
            GroupScale(int(model.scale_size)),
            GroupCenterCrop(model.crop_size),
        ]),
        is_train=False,
        accumulate=(not args.no_accumulation),
    ),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0

        if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key
                or 'data_bn'
                in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{
            'params': value,
            'lr': args.lr,
            'lr_mult': lr_mult,
            'decay_mult': decay_mult
        }]

    optimizer = torch.optim.Adam(params,
                                 weight_decay=args.weight_decay,
                                 eps=0.001)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(args.epochs):
        cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps,
                                      args.lr_decay)

        train(train_loader, model, criterion, optimizer, epoch, cur_lr)

        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion)

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
示例#3
0
文件: train.py 项目: VideoForage/DSVC
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as LS
from torch.autograd import Variable

import network
from dataset import get_loader, default_loader
from evaluate import run_eval
from train_options import parser
from util import get_models, init_lstm, set_train, set_eval
from util import prepare_inputs, forward_ctx

args = parser.parse_args()
print(args)

############### Data ###############
train_loader = get_loader(is_train=True,
                          root=args.train,
                          mv_dir=args.train_mv,
                          args=args)


def get_eval_loaders():
    # We can extend this dict to evaluate on multiple datasets.
    eval_loaders = {
        'TVL':
        get_loader(is_train=False,
                   root=args.eval,
示例#4
0
def main():
    print(torch.cuda.device_count())
    global args
    global devices
    global WRITER
    args = parser.parse_args()
    global description
    description = '%s_bt_%d_seg_%d_%s' % (args.arch, args.batch_size,
                                          args.num_segments, "iframe+mv*0.5")
    log_name = './log/%s' % description
    WRITER = SummaryWriter(log_name)
    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    model = Model(2,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch)

    # add continue train from before
    if CONTINUE_FROM_LAST:
        checkpoint = torch.load(LAST_SAVE_PATH)
        # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
        print("model epoch {} lowest loss {}".format(checkpoint['epoch'],
                                                     checkpoint['loss_min']))
        base_dict = {
            '.'.join(k.split('.')[1:]): v
            for k, v in list(checkpoint['state_dict'].items())
        }
        loss_min = checkpoint['loss_min']
        model.load_state_dict(base_dict)
        start_epochs = checkpoint['epoch']
    else:
        loss_min = 10000
        start_epochs = 0

    # print(model)
    # WRITER.add_graph(model, (torch.randn(10,5, 2, 224, 224),))

    devices = [torch.device("cuda:%d" % device) for device in args.gpus]
    global DEVICES
    DEVICES = devices

    train_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        video_list=args.train_list,
        num_segments=args.num_segments,
        representation=args.representation,
        is_train=True,
        accumulate=(not args.no_accumulation),
    ),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False)

    val_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        video_list=args.test_list,
        num_segments=args.num_segments,
        representation=args.representation,
        is_train=False,
        accumulate=(not args.no_accumulation),
    ),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False)

    model = torch.nn.DataParallel(model, device_ids=args.gpus)
    model = model.to(devices[0])
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0
        if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key
                or 'data_bn'
                in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{
            'params': value,
            'lr': args.lr,
            'lr_mult': lr_mult,
            'decay_mult': decay_mult
        }]

    if FINETUNE:
        optimizer = torch.optim.SGD(params, lr=1e-5, momentum=0.9)
    else:
        optimizer = torch.optim.Adam(params,
                                     weight_decay=args.weight_decay,
                                     eps=0.001)

    criterions = []
    siamese_loss = ContrastiveLoss(margin=2.0).to(devices[0])
    classifiy_loss = nn.CrossEntropyLoss().to(devices[0])
    # classifiy_loss = LabelSmoothingLoss(2,0.1,-1)
    criterions.append(siamese_loss)

    criterions.append(classifiy_loss)

    # try to use ReduceOnPlatue to adjust lr
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.2,
                                  patience=20 // args.eval_freq,
                                  verbose=True)

    for epoch in range(start_epochs, args.epochs):
        # about optimizer
        WRITER.add_scalar('Lr/epoch', get_lr(optimizer), epoch)
        loss_train_s, loss_train_c = train(train_loader, model, criterions,
                                           optimizer, epoch)
        loss_train = WEI_S * loss_train_s + WEI_C * loss_train_c
        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss_val_s, loss_val_c, acc = validate(val_loader, model,
                                                   criterions, epoch)
            loss_val = WEI_S * loss_val_s + WEI_C * loss_val_c
            scheduler.step(loss_val_c)
            is_best = (loss_val_c < loss_min)
            loss_min = min(loss_val_c, loss_min)
            # visualization
            WRITER.add_scalar('Accuracy/epoch', acc, epoch)
            WRITER.add_scalars('Siamese Loss/epoch', {
                'Train': loss_train_s,
                'Val': loss_val_s
            }, epoch)
            WRITER.add_scalars('Classification Loss/epoch', {
                'Train': loss_train_c,
                'Val': loss_val_c
            }, epoch)
            WRITER.add_scalars('Combine Loss/epoch', {
                'Train': loss_train,
                'Val': loss_val
            }, epoch)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'loss_min': loss_min,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
    WRITER.close()
示例#5
0
def main():
    global args
    global best_prec1
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    elif args.data_name == 'mine':
        num_class = 2
    else:
        raise ValueError('Unknown dataset ' + args.data_name)

    model = Model(num_class,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch)
    print(model)

    if 'resnet3D' in args.arch:
        train_crop_min_ratio = 0.75
        train_crop_min_scale = 0.25
        mean = [0.4345, 0.4051, 0.3775]
        std = [0.2768, 0.2713, 0.2737]
        value_scale = 1

        train_transform = Compose([
            RandomResizedCrop(
                model.crop_size, (train_crop_min_scale, 1.0),
                (train_crop_min_ratio, 1.0 / train_crop_min_ratio)),
            RandomHorizontalFlip(),
            ToTensor(),
            ScaleValue(value_scale),
            Normalize(mean, std)
        ])
        test_trainsform = Compose([
            Resize(model.crop_size),
            CenterCrop(model.crop_size),
            ToTensor(),  # range [0, 255] -> [0.0,1.0]
            ScaleValue(1),
            Normalize(mean, std)
        ])

    train_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.train_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=model.get_augmentation(),  #train_transform, 
            is_train=True,
            accumulate=(not args.no_accumulation),
            model_name=args.arch),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=worker_init_fn)

    val_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.test_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=torchvision.transforms.Compose([
                GroupScale(int(model.scale_size)),
                GroupCenterCrop(model.crop_size)
            ]),  #test_trainsform, 
            is_train=True,
            accumulate=(not args.no_accumulation),
            model_name=args.arch),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=worker_init_fn)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0

        if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key
                or 'data_bn'
                in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{
            'params': value,
            'lr': args.lr,
            'lr_mult': lr_mult,
            'decay_mult': decay_mult
        }]

    #optimizer = torch.optim.SGD(params, weight_decay=0.001, momentum=0.9, nesterov=False)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    optimizer = torch.optim.Adam(params,
                                 weight_decay=args.weight_decay,
                                 eps=0.001)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(args.epochs):
        cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps,
                                      args.lr_decay)
        #cur_lr = get_lr(optimizer)

        train(train_loader, model, criterion, optimizer, epoch, cur_lr)
        #prec1, prev_val_loss = validate(val_loader, model, criterion)
        #scheduler.step(prev_val_loss)

        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, _ = validate(val_loader, model, criterion)

            # 紀錄訓練歷程
            np.savez("train_history/train_history.npz",
                     loss=np.array(train_loss),
                     top1=np.array(train_prec),
                     lr=np.array(train_lr))
            np.savez("train_history/valid_history.npz",
                     loss=np.array(valid_loss),
                     top1=np.array(valid_prec))

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
示例#6
0
def main():
    global args
    global best_prec1
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    else:
        raise ValueError('Unknown dataset ' + args.data_name)

    # num_class: total number of classes
    # num_segments: number of TSN segments, default=3
    # representation: iframe, mv, residual
    # base_model: base architecture

    model = Model(num_class,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch,
                  mv_stack_size=args.mv_stack_size)
    print(model)

    # dataset (Dataset) – dataset from which to load the data.
    # batch_size – how many samples per batch to load (default: 1).
    # shuffle – set to True to have the data reshuffled at every epoch.
    # num_workers – how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)
    # pin_memory – If True, the data loader will copy tensors into CUDA pinned memory before returning them.

    train_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.train_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=model.get_augmentation(),
            # get_augmentation() =
            # GroupMultiScaleCrop + GroupRandomHorizontalFlip
            # GroupMultiScaleCrop contains stack mv

            # seems np.stack in resize_mv() called in GroupMultiScaleCrop
            # has the same effects as Stack() in TSN

            # -----------------------
            # TSN:
            # transform=torchvision.transforms.Compose([
            #     train_augmentation,                       # train_augmentation = model.get_augmentation(), same
            #     Stack(roll=args.arch == 'BNInception'),   # this line seems important
            #     ToTorchFormatTensor(div=args.arch != 'BNInception'),
            #     normalize, # used for RGBDiff
            # ])),
            # ----------------------
            is_train=True,
            accumulate=(not args.no_accumulation),
            mv_stack_size=args.mv_stack_size),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.test_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=torchvision.transforms.
            Compose([  # seems important to stacking
                GroupScale(int(model.scale_size)),
                GroupCenterCrop(
                    model.crop_size
                ),  # here they both use model.crop_size (instead of TSN's net.input_size in test_model.py)
            ]),  # this function contains stack

            # seems np.stack in resize_mv() called in GroupCenterCrop
            # has the same effects as Stack() in TSN

            # -----------------------
            # TSN:
            # transform=torchvision.transforms.Compose([
            #     GroupScale(int(scale_size)),
            #     GroupCenterCrop(crop_size),
            #     Stack(roll=args.arch == 'BNInception'),       # this line seems important
            #     ToTorchFormatTensor(div=args.arch != 'BNInception'),
            #     normalize,
            # ])),
            # -----------------------
            is_train=False,
            accumulate=(not args.no_accumulation),
            mv_stack_size=args.mv_stack_size),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # parallel gpu setting
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0

        if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key
                or 'data_bn'
                in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{
            'params': value,
            'lr': args.lr,
            'lr_mult': lr_mult,
            'decay_mult': decay_mult
        }]

    optimizer = torch.optim.Adam(params,
                                 weight_decay=args.weight_decay,
                                 eps=0.001)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(args.epochs):
        cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps,
                                      args.lr_decay)

        train(train_loader, model, criterion, optimizer, epoch, cur_lr)

        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion)

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
示例#7
0
def main():
    global args
    global best_prec1
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    else:
        raise ValueError('Unknown dataset '+ args.data_name)

    model = Model(num_class, args.num_segments, args.representation,
                  base_model=args.arch)
    print(model)

    train_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.train_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=model.get_augmentation(),
            is_train=True,
            accumulate=(not args.no_accumulation),
            ),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.test_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=torchvision.transforms.Compose([
                GroupScale(int(model.scale_size)),
                GroupCenterCrop(model.crop_size),
                ]),
            is_train=False,
            accumulate=(not args.no_accumulation),
            ),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0

        if ('module.base_model.conv1' in key
                or 'module.base_model.bn1' in key
                or 'data_bn' in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{'params': value, 'lr': args.lr, 'lr_mult': lr_mult, 'decay_mult': decay_mult}]

    optimizer = torch.optim.Adam(
        params,
        weight_decay=args.weight_decay,
        eps=0.001)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(args.epochs):
        cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps, args.lr_decay)

        train(train_loader, model, criterion, optimizer, epoch, cur_lr)

        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion)

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
示例#8
0
def main():
    # loading input arguments for training
    global args
    global best_prec1
    global start_epoch
    start_epoch = 0
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    elif args.data_name == 'kinetics400':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.data_name)

    # define the model architecture
    model = Model(num_class, args.num_segments, args.representation,
                  base_model=args.arch,
                  new_length=args.new_length,
                  use_databn=args.use_databn,
                  gen_flow_or_delta=args.gen_flow_or_delta,
                  gen_flow_ds_factor=args.gen_flow_ds_factor,
                  arch_estimator=args.arch_estimator,
                  arch_d=args.arch_d,
                  att=args.att)
    print(model)

    # load the pre-trained model
    if args.weights is not None:
        checkpoint = torch.load(args.weights, map_location=lambda storage, loc: storage)
        print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
        base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
        model.load_state_dict(base_dict, strict=False)

    # define the data loader for reading training data
    train_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.flow_root,
            args.data_name,
            video_list=args.train_list,
            num_segments=args.num_segments,
            representation=args.representation,
            new_length=args.new_length,
            flow_ds_factor=args.flow_ds_factor,
            upsample_interp=args.upsample_interp,
            transform=model.get_augmentation(),
            is_train=True,
            accumulate=(not args.no_accumulation),
            gop=args.gop,
            flow_folder=args.data_flow,
            mv_minmaxnorm=args.mv_minmaxnorm,
            ),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    # define the data loader for reading val data
    val_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.flow_root,
            args.data_name,
            video_list=args.test_list,
            num_segments=args.num_segments,
            representation=args.representation,
            new_length=args.new_length,
            flow_ds_factor=args.flow_ds_factor,
            upsample_interp=args.upsample_interp,
            transform=torchvision.transforms.Compose([
                GroupScale(int(model.scale_size)),
                GroupCenterCrop(model.crop_size),
                ]),
            is_train=False,
            accumulate=(not args.no_accumulation),
            gop=args.gop,
            flow_folder=args.data_flow,
            mv_minmaxnorm=args.mv_minmaxnorm,
            ),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda(args.gpus[0])
    cudnn.benchmark = True

    # define optimizer and specify the corresponding parameters
    params_dict = dict(model.named_parameters())
    params_cls = []
    params_gf = []
    params_d = []
    for key, value in params_dict.items():
        if 'base_model' in key:
            decay_mult = 0.0 if 'bias' in key else 1.0
            lr_mult = args.lr_cls_mult # for cls, just finetune. if '.fc.' in key: lr_mult = 1.0
            params_cls += [{'params': value, 'lr': args.lr, 'lr_mult': lr_mult, 'decay_mult': decay_mult}]
        if 'gen_flow_model' in key:
            decay_mult = 0.0 if 'bias' in key else 1.0
            lr_mult = args.lr_mse_mult
            params_gf += [{'params': value, 'lr': args.lr, 'lr_mult': lr_mult, 'decay_mult': decay_mult}]
        if 'discriminator' in key:
            decay_mult = 0.0 if 'bias' in key else 1.0
            lr_mult = args.lr_d_mult
            params_d += [{'params': value, 'lr': args.lr, 'lr_mult': lr_mult, 'decay_mult': decay_mult}]

    optimizer_cls = torch.optim.Adam(
        params_cls,
        weight_decay=args.weight_decay,
        eps=0.001)

    optimizer_gf = torch.optim.Adam(
        params_gf,
        weight_decay=args.weight_decay,
        eps=0.001)

    optimizer_d = torch.optim.Adam(
        params_d,
        weight_decay=args.weight_decay,
        eps=0.001)

    # resume training from previous checkpoint
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage)
            start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            if 'optimizer_cls' in checkpoint.keys():
                optimizer_cls.load_state_dict(checkpoint['optimizer_cls'])
                optimizer_gf.load_state_dict(checkpoint['optimizer_gf'])
                optimizer_d.load_state_dict(checkpoint['optimizer_d'])
                def load_opt_update_cuda(optimizer, cuda_id):
                    for state in optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda(cuda_id)
                load_opt_update_cuda(optimizer_cls, args.gpus[0])
                load_opt_update_cuda(optimizer_gf, args.gpus[0])
                load_opt_update_cuda(optimizer_d, args.gpus[0])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))

    # define several loss functions
    criterion = torch.nn.CrossEntropyLoss().cuda(args.gpus[0])
    if args.loss_mse == 'MSELoss':
        criterion_mse = torch.nn.MSELoss().cuda(args.gpus[0])
    elif args.loss_mse == 'SmoothL1Loss':
        criterion_mse = torch.nn.SmoothL1Loss().cuda(args.gpus[0])
    elif args.loss_mse == 'L1':
        criterion_mse = torch.nn.L1Loss().cuda(args.gpus[0])

    # finally done with setup and start to train model
    for epoch in range(start_epoch, args.epochs):
        # determine the learning rate for the current epoch
        cur_lr_cls = adjust_learning_rate(optimizer_cls, epoch, args.lr_steps, args.lr_decay) #, freeze=True, epoch_thre=args.epoch_thre)
        cur_lr_gf = adjust_learning_rate(optimizer_gf, epoch, args.lr_steps, args.lr_decay)
        cur_lr_d = adjust_learning_rate(optimizer_d, epoch, args.lr_steps, args.lr_decay)

        # perform training
        train(train_loader, model, criterion, criterion_mse, optimizer_cls,
            optimizer_gf, optimizer_d, epoch, cur_lr_cls, cur_lr_gf, cur_lr_d, args.lr_cls, args.lr_adv_g, args.lr_adv_d, args.lr_mse, args.att)

        # perform validation if needed
        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, criterion_mse, args.lr_cls, args.lr_adv_g, args.lr_adv_d, args.lr_mse, args.att)
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer_cls': optimizer_cls.state_dict(),
                        'optimizer_gf': optimizer_gf.state_dict(),
                        'optimizer_d': optimizer_d.state_dict(),
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
示例#9
0
def main():
    print(torch.cuda.device_count())
    global args
    global devices
    global WRITER
    args = parser.parse_args()
    global description
    description = 'bt_%d_seg_%d_%s' % (args.batch_size * ACCUMU_STEPS,
                                       args.num_segments, "finetune_from_vcdb")
    log_name = r'/home/sjhu/projects/compressed_video_compare/imqfusion/log/%s' % description
    WRITER = SummaryWriter(log_name)
    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    model = Model(2,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch)

    # add continue train from before
    if CONTINUE_FROM_LAST:
        checkpoint = torch.load(LAST_SAVE_PATH)
        # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
        print("model epoch {} lowest loss {}".format(checkpoint['epoch'],
                                                     checkpoint['loss_min']))
        base_dict = {
            '.'.join(k.split('.')[1:]): v
            for k, v in list(checkpoint['state_dict'].items())
        }
        loss_min = checkpoint['loss_min']
        model.load_state_dict(base_dict)
        start_epochs = checkpoint['epoch']
    else:
        loss_min = 10000
        start_epochs = 0

    devices = [torch.device("cuda:%d" % device) for device in args.gpus]
    global DEVICES
    DEVICES = devices

    # deal the unbalance between pos and neg samples
    train_dataset = CoviarDataSet(
        args.data_root,
        video_list=args.train_list,
        num_segments=args.num_segments,
        is_train=True,
    )
    target = train_dataset._labels_list
    class_sample_count = torch.tensor([(target == t).sum()
                                       for t in np.unique(target)])
    weight = 1. / class_sample_count.float()
    samples_weights = weight[target]
    train_sampler = WeightedRandomSampler(samples_weights, len(train_dataset),
                                          True)
    train_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        video_list=args.train_list,
        num_segments=args.num_segments,
        is_train=True,
    ),
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(CoviarDataSet(
        args.data_root,
        video_list=args.test_list,
        num_segments=args.num_segments,
        is_train=False,
    ),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    model = torch.nn.DataParallel(model, device_ids=args.gpus)
    model = model.to(devices[0])
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0
        if 'module.fc' in key:
            params += [{
                'params': [value],
                'lr': args.lr * 10,
                'decay_mult': decay_mult
            }]
        elif 'module.fusion' in key:
            params += [{
                'params': [value],
                'lr': args.lr * 10,
                'decay_mult': decay_mult
            }]
        elif 'module.mvnet' in key:
            params += [{
                'params': [value],
                'lr': args.lr * 10,
                'decay_mult': decay_mult
            }]
        else:
            params += [{
                'params': [value],
                'lr': args.lr * 1,
                'decay_mult': decay_mult
            }]

    # loss_weights = torch.FloatTensor([1.01,1])
    optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9)
    criterions = []
    siamese_loss = ContrastiveLoss(margin=2.0).to(devices[0])
    classifiy_loss = nn.CrossEntropyLoss().to(devices[0])
    # classifiy_loss = LabelSmoothingLoss(2,0.1,-1)
    criterions.append(siamese_loss)
    criterions.append(classifiy_loss)

    # try to use ReduceOnPlatue to adjust lr
    # scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=20 // args.eval_freq, verbose=True)
    scheduler = WarmStartCosineAnnealingLR(optimizer,
                                           T_max=args.epochs,
                                           T_warm=10)
    for epoch in range(start_epochs, args.epochs):
        # about optimizer:
        WRITER.add_scalar('Lr/epoch', get_lr(optimizer), epoch)
        loss_train_s, loss_train_c = train(train_loader, model, criterions,
                                           optimizer, epoch)
        loss_train = WEI_S * loss_train_s + WEI_C * loss_train_c
        scheduler.step(epoch)
        if epoch % EVAL_FREQ == 0 or epoch == args.epochs - 1:
            loss_val_s, loss_val_c, acc, report = validate(
                val_loader, model, criterions, epoch)
            loss_val = WEI_S * loss_val_s + WEI_C * loss_val_c
            is_best = (loss_val_c < loss_min)
            loss_min = min(loss_val_c, loss_min)
            # visualization
            WRITER.add_text(tag='Classification Report',
                            text_string=report,
                            global_step=epoch)
            WRITER.add_scalar('Accuracy/epoch', acc, epoch)
            WRITER.add_scalars('Siamese Loss/epoch', {
                'Train': loss_train_s,
                'Val': loss_val_s
            }, epoch)
            WRITER.add_scalars('Classification Loss/epoch', {
                'Train': loss_train_c,
                'Val': loss_val_c
            }, epoch)
            WRITER.add_scalars('Combine Loss/epoch', {
                'Train': loss_train,
                'Val': loss_val
            }, epoch)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'loss_min': loss_min,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
    WRITER.close()
示例#10
0
def main():
    global args

    best_prec = 0
    args = parser.parse_args()

    # Data Transform and data loading
    traindir = os.path.join(args.data, 'train_data')
    valdir = os.path.join(args.data, 'valid_data')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.339, 0.224, 0.225])

    transform = (transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ]),
                 transforms.Compose([
                     transforms.Resize(224),
                     transforms.CenterCrop(224),
                     transforms.ToTensor()
                 ]))

    train_dataset = dataset.loadedDataset(traindir, transform)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(dataset.loadedDataset(
        valdir, transform),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if os.path.exists(args.model):
        # load existing model
        model_info = torch.load(args.model)
        print("==> loading existing model '{}' ".format(model_info['arch']))
        original_model = models.__dict__[model_info['arch']](pretrained=False)
        model = LSTMModel(original_model, model_info['arch'],
                          model_info['num_classes'], model_info['lstm_layers'],
                          model_info['hidden_size'], model_info['fc_size'])
        print(model)
        model.cuda()
        model.load_state_dict(model_info['state_dict'])
        best_prec = model_info['best_prec']
        cur_epoch = model_info['epoch']
    else:
        # load and create model
        print("==> creating model '{}' ".format(args.arch))

        original_model = models.__dict__[args.arch](pretrained=True)
        model = LSTMModel(original_model, args.arch,
                          len(train_dataset.classes), args.lstm_layers,
                          args.hidden_size, args.fc_size)
        print(model)
        model.cuda()
        cur_epoch = 0

    # loss criterion and optimizer
    criterion = nn.CrossEntropyLoss(reduction='none')
    criterion = criterion.cuda()

    if args.optim == 'sgd':
        optimizer = torch.optim.SGD([{
            'params': model.fc_pre.parameters()
        }, {
            'params': model.rnn.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    elif args.optim == 'rmsprop':
        optimizer = torch.optim.RMSprop([{
            'params': model.fc_pre.parameters()
        }, {
            'params': model.rnn.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                                        lr=args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)

    elif args.optim == 'adam':
        optimizer = torch.optim.Adam([{
            'params': model.fc_pre.parameters()
        }, {
            'params': model.rnn.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)

    # Training on epochs
    for epoch in range(cur_epoch, args.epochs):

        optimizer = adjust_learning_rate(optimizer, epoch)

        print(
            "---------------------------------------------------Training---------------------------------------------------"
        )

        # train on one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        print(
            "--------------------------------------------------Validation--------------------------------------------------"
        )

        # evaluate on validation set
        prec1, prec5 = validate(val_loader, model, criterion)

        print("------Validation Result------")
        print("   Top1 accuracy: {prec: .2f} %".format(prec=prec1.item()))
        print("   Top5 accuracy: {prec: .2f} %".format(prec=prec5.item()))
        print("-----------------------------")

        # remember best top1 accuracy and save checkpoint
        is_best = prec1 > best_prec
        best_prec = max(prec1, best_prec)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'num_classes': len(train_dataset.classes),
                'lstm_layers': args.lstm_layers,
                'hidden_size': args.hidden_size,
                'fc_size': args.fc_size,
                'state_dict': model.state_dict(),
                'best_prec': best_prec,
                'optimizer': optimizer.state_dict(),
            }, is_best)