def create_dataloader(args):
    if args.root_path != '':
        args.video_path = os.path.join(args.root_path, args.video_path)
        args.annotation_path = os.path.join(args.root_path,
                                            args.annotation_path)
        args.result_path = os.path.join(args.root_path, args.result_path)
        if args.resume_path:
            args.resume_path = os.path.join(args.root_path, args.resume_path)
        if args.pretrain_path:
            # args.pretrain_path = os.path.join(args.root_path, args.pretrain_path)
            args.pretrain_path = os.path.abspath(args.pretrain_path)
    args.scales = [args.initial_scale]
    for i in range(1, args.n_scales):
        args.scales.append(args.scales[-1] * args.scale_step)

    args.mean = get_mean(args.norm_value, dataset=args.mean_dataset)
    args.std = get_std(args.norm_value)

    if args.no_mean_norm and not args.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not args.std_norm:
        norm_method = Normalize(args.mean, [1, 1, 1])
    else:
        norm_method = Normalize(args.mean, args.std)

    assert args.train_crop in ['random', 'corner', 'center']
    if args.train_crop == 'random':
        crop_method = MultiScaleRandomCrop(args.scales, args.sample_size)
    elif args.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(args.scales, args.sample_size)
    elif args.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(args.scales,
                                           args.sample_size,
                                           crop_positions=['c'])

    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(args.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(args.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(args, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.n_threads,
                                               pin_memory=True)

    spatial_transform = Compose([
        # Scale(args.sample_size),
        Scale(int(args.sample_size / args.scale_in_test)),
        # CenterCrop(args.sample_size),
        CornerCrop(args.sample_size, args.crop_position_in_test),
        ToTensor(args.norm_value),
        norm_method
    ])
    temporal_transform = TemporalCenterCrop(args.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(args, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.n_threads,
                                             pin_memory=True)

    return train_loader, val_loader
Exemplo n.º 2
0
    opt.mean = get_mean(opt.norm_value)
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model = generate_model(opt)
    print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if not opt.no_train:
        spatial_transform = Compose([
            MultiScaleCornerCrop(opt.scales, opt.sample_size),
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value),
            Normalize(opt.mean, [1, 1, 1])
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        if opt.dataset == 'kinetics':
            training_data = Kinetics(opt.video_path,
                                     opt.annotation_path,
                                     'training',
                                     spatial_transform=spatial_transform,
                                     temporal_transform=temporal_transform,
                                     target_transform=target_transform)
        else:
            training_data = ActivityNet(opt.video_path,
Exemplo n.º 3
0
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
Exemplo n.º 4
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict,
             out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step, memSize, regression, rloss, debug,
             verbose, CAM):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    model_folder = os.path.join(
        './', out_dir, dataset, 'rgb',
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        # ToTensor(),
        # normalize
    ])
    transform_rgb = Compose([ToTensor(), normalize])
    transform_MS = Compose([Resize((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(train_data_dir,
                                splits=train_splits,
                                spatial_transform=spatial_transform,
                                transform_rgb=transform_rgb,
                                transform_MS=transform_MS,
                                seqLen=seqLen,
                                fmt='.png',
                                regression=regression)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(train_data_dir,
                              splits=val_splits,
                              spatial_transform=Compose(
                                  [Scale(256), CenterCrop(224)]),
                              transform_rgb=transform_rgb,
                              transform_MS=transform_MS,
                              seqLen=seqLen,
                              fmt='.png',
                              regression=regression,
                              verbose=False)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()
    '''
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                  spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                  seqLen=seqLen, fmt='.jpg')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                                 shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()
    '''
    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)

        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

        # Add params from ms_module
        for params in model.ms_module.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.ms_module.train(True)
    model.to(device)

    # wandb.init(project="first_person_action_recognition")

    loss_fn = nn.CrossEntropyLoss()
    if regression:
        if rloss == 'MSE':
            # Mean Squared Error loss
            loss_ms_fn = nn.MSELoss()  # it should work
        elif rloss == 'L1':
            # L1 loss
            loss_ms_fn = nn.L1Loss()
        elif rloss == 'SmoothL1':
            # Huber Loss or Smooth L1 Loss
            loss_ms_fn = nn.SmoothL1Loss()
        elif rloss == 'KLdiv':
            # Kullback-Leiber Loss
            loss_ms_fn = nn.KLDivLoss()
    else:
        # classification
        loss_ms_fn = nn.CrossEntropyLoss()  # TODO: check paper Planamente

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        #model.train(True)
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

            model.ms_module.train(True)

        for i, (inputsRGB, inputsMS, targets) in enumerate(train_loader):
            # Inputs:
            #   - inputsRGB : the rgb frame input
            # Labels :
            #   - inputsMS  : the motion task label
            #   - targets   : output

            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(device)
            labelVariable = targets.to(device)
            msVariable = inputsMS.to(device)
            trainSamples += inputsRGB.size(0)
            output_label, _, output_ms = model(inputVariable, device)
            loss_c = loss_fn(output_label, labelVariable)
            if regression:
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                output_ms = torch.sigmoid(output_ms)
                output_ms = torch.reshape(output_ms,
                                          (seqLen * 7 * 7, output_ms.size(0)))
            else:
                # classification task
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0))).long()
                output_ms = torch.reshape(
                    output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))  #

            loss_ms = loss_ms_fn(output_ms, msVariable)
            loss = loss_c + loss_ms
            if verbose:
                print(loss_c)
                print(loss_ms)
                print(loss)
                print()
            # loss = loss_fn(output_label, labelVariable) + loss_ms_fn(output_ms, inputsMS) # TODO (forse): invertire 0 e 1 dim per inputsMS # output1 = F.softmax(torch.reshape(output_ms, (32, 7, 2, 7*7))[0, 0, :, :], dim=0)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100

        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))

        # VALIDATION PHASE
        #if val_data_dir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputsRGB, inputsMS, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputsRGB.size(0)
                inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(
                    device)  # la permutazione è a solo scopo di computazione
                labelVariable = targets.to(device)
                msVariable = inputsMS.to(device)
                output_label, _, output_ms = model(inputVariable, device)
                loss_c = loss_fn(output_label, labelVariable)
                if regression:
                    msVariable = torch.reshape(
                        msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                    output_ms = torch.sigmoid(output_ms)
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                else:
                    # classification task
                    msVariable = torch.reshape(
                        msVariable,
                        (seqLen * 7 * 7, msVariable.size(0))).long()
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                loss_ms = loss_ms_fn(output_ms, msVariable)
                val_loss = loss_c + loss_ms
                # val_loss = loss_fn(output_label, labelVariable) # TODO: add ms Loss
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Valid: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            '''else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)
                '''
        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Exemplo n.º 5
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')
    train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w')
    val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w')
    train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w')
    val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224)]),
                                   seqLen=seqLen, fmt='.png',phase='test', regressor=regressor)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()


    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.load_state_dict(torch.load(stage1_dict),strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.conv.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.clas.parameters():
            params.requires_grad = True
            train_params += [params]
        model.conv.train(True)
        model.clas.train(True)
        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]
        


    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fms = nn.NLLLoss()
    loss_reg = nn.MSELoss()
    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        numCorrTrain_ms = 0
        trainSamples = 0
        iterPerEpoch = 0
        epoch_loss_ms = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if stage == 2:
            model.conv.train(True)
            model.clas.train(True)
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs ,binary_map, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, output_ms = model(inputVariable)
            
            loss = loss_fn(output_label, labelVariable)

            if stage==2 :
                loss.backward(retain_graph=True)
            else:
                loss.backward()
            if regressor == 0:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                output_ms = output_ms.view(-1,2)
            elif regressor == 1:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                output_ms = output_ms.view(-1)
            binary_map =binary_map.contiguous().view(-1)
                        
            
            if stage==2:
                if regressor == 1:
                    loss_ms=loss_reg(output_ms, binary_map)
                    loss_ms.backward()
                    
                    epoch_loss_ms+=loss_ms.item()
                elif regressor == 0:
                    loss_ms=loss_fn(output_ms, binary_map)
                    loss_ms.backward()
                    _, predicted = torch.max(output_ms.data, 1)
                    numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item()
                    epoch_loss_ms+=loss_ms.item()
        
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
            epoch_loss += loss.item()

            
        avg_loss = epoch_loss/iterPerEpoch
        if stage ==2:
            trainAccuracy = (numCorrTrain_ms / trainSamples) * 100
            avg_loss_ms= epoch_loss_ms/iterPerEpoch
            #avg_loss = avg_loss + avg_loss_ms
            train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
            if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))

        trainAccuracy = (numCorrTrain / trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                numCorr_ms = 0
                epoch_loss_ms_val=0
                
                for j, (inputs, binary_map, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, output_ms = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    if regressor == 0:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                        output_ms = output_ms.view(-1,2)
                    elif regressor == 1:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                        output_ms = output_ms.view(-1)
                    binary_map =binary_map.contiguous().view(-1)
                    if stage==2:
                        if regressor == 1:
                            loss_ms=loss_reg(output_ms, binary_map)
                            
                            epoch_loss_ms_val+=loss_ms.item()
                        elif regressor == 0:
                            loss_ms=loss_fn(output_ms, binary_map)
                            _, predicted = torch.max(output_ms.data, 1)
                            numCorr_ms += torch.sum(predicted == binary_map.data).data.item()
                            epoch_loss_ms_val+=loss_ms.item()
                                
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labelVariable.data).data.item()
                
                avg_val_loss = val_loss_epoch / val_iter
                if stage ==2:
                    avg_loss_ms= epoch_loss_ms_val/ val_iter
                    val_accuracy = (numCorr_ms / val_samples) * 100
                    #avg_loss = avg_loss + avg_loss_ms 
                    val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
                    if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                val_accuracy = (numCorr / val_samples) * 100
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_ms_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    train_log_loss_ms.close()
    val_log_loss_ms.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    optim_scheduler.step()
Exemplo n.º 6
0
from makeDatasetsNames import *
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale,
                                Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
import matplotlib.pyplot as plt

import importlib
importlib.reload(grad_cam)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

normalize = Normalize(mean=mean, std=std)
spatial_transform = Compose(
    [Scale(256),
     MultiScaleCornerCrop([1], 224),
     ToTensor(), normalize])


def frame_example(image):
    flowModel = "../experiments/gtea61/flow/model_flow_state_dict.pth"
    rgbModel = "modelsFolder/experiments/gtea61/rgb/stage2/model_rgb_state_dict.pth"
    stackSize = 5
    memSize = 512
    num_classes = 61
    seqLen = 7
    model_state_dict = "modelsFolder/selfSupervisedExperiments/gtea61/twoStream/model_twoStream_state_dict.pth"
    trainDatasetDir = "../GTEA61/flow_x_processed/train"
    model = twoStreamAttentionModel(flowModel=flowModel,
                                    frameModel=rgbModel,
                                    stackSize=stackSize,
Exemplo n.º 7
0
def main_run(dataset, stage, root_dir, out_dir, stage1_dict, seqLen,
             trainBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize,
             outPool_size, split, evalInterval, regression, rloss, debug):
    if debug:
        n_workers = 0
        n_workers_test = 0
        device = 'cpu'
    else:
        n_workers = 4
        n_workers_test = 2
        device = 'cuda'
    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    test_split = split

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    stage = stage
    #test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    best_acc = 0

    if stage == 1:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs
    elif stage == 2 or stage == 3:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs

    num_classes = 61

    dataset_dir = root_dir

    #model_folder = os.path.join('.', out_dir, dataset, str(test_split))
    model_folder = os.path.join('./', out_dir, 'stage' + str(stage))
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    else:
        print('Directory {} exists!'.format(model_folder))
        sys.exit()

    note_fl = open(model_folder + '/note.txt', 'w')
    note_fl.write('Number of Epochs = {}\n'
                  'lr = {}\n'
                  'Train Batch Size = {}\n'
                  'Sequence Length = {}\n'
                  'Decay steps = {}\n'
                  'Decay factor = {}\n'
                  'Memory size = {}\n'
                  'Memory cam classes = {}\n'.format(numEpochs, lr1,
                                                     trainBatchSize, seqLen,
                                                     decay_step, decay_factor,
                                                     memSize, c_cam_classes))

    note_fl.close()

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'),
                                'w')
    test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w')
    test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w')

    # Dataloaders
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    print('Preparing dataset...')
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        # ToTensor(),
        # normalize
    ])
    transform_rgb = Compose([ToTensor(), normalize])
    transform_MS = Compose([Resize((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(dataset_dir,
                                splits=train_splits,
                                spatial_transform=spatial_transform,
                                transform_rgb=transform_rgb,
                                transform_MS=transform_MS,
                                seqLen=seqLen,
                                fmt='.png',
                                regression=regression)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_test = makeDataset(dataset_dir,
                               splits=val_splits,
                               spatial_transform=Compose(
                                   [Scale(256), CenterCrop(224)]),
                               transform_rgb=transform_rgb,
                               transform_MS=transform_MS,
                               seqLen=seqLen,
                               fmt='.png',
                               regression=regression,
                               verbose=False)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=n_workers,
                                              pin_memory=True)

    print('here')
    print('Number of train samples = {}'.format(vid_seq_train.__len__()))

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    train_params = []
    if stage == 1:
        if regression:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   n_channels=1)
        else:
            model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    elif stage == 2 or stage == 3:
        if regression:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   n_channels=1,
                                   c_cam_classes=c_cam_classes)
        else:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   c_cam_classes=c_cam_classes)

        #model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
        if stage == 2:
            checkpoint_path = os.path.join(
                stage1_dict, 'last_checkpoint_stage' + str(1) + '.pth.tar')
        elif stage == 3:
            checkpoint_path = os.path.join(
                stage1_dict, 'last_checkpoint_stage' + str(2) + '.pth.tar')
        if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(
                checkpoint_path))
        else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
        last_checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(last_checkpoint['model_state_dict'],
                              strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        # Add params from ms_module
        if stage == 2:
            for params in model.ms_module.parameters():
                params.requires_grad = True
                train_params += [params]

    for params in model.lsta_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.classifier.train(True)

    model.ms_module.train(True)
    model.to(device)

    loss_fn = nn.CrossEntropyLoss()

    if regression:
        if rloss == 'MSE':
            # Mean Squared Error loss
            loss_ms_fn = nn.MSELoss()  # it should work
        elif rloss == 'L1':
            # L1 loss
            loss_ms_fn = nn.L1Loss()
        elif rloss == 'SmoothL1':
            # Huber Loss or Smooth L1 Loss
            loss_ms_fn = nn.SmoothL1Loss()
        elif rloss == 'KLdiv':
            # Kullback-Leiber Loss
            loss_ms_fn = nn.KLDivLoss()
    else:
        # classification
        loss_ms_fn = nn.CrossEntropyLoss()  # TODO: check paper Planamente

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=5e-4,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        #optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        # model.classifier.train(True)
        model.lsta_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
            model.ms_module.train(True)

        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, inputsMS, targets) in enumerate(train_loader):
            # Inputs:
            #   - inputsRGB : the rgb frame input
            # Labels :
            #   - inputsMS  : the motion task label
            #   - targets   : output

            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device)
            labelVariable = targets.to(device)
            msVariable = inputsMS.to(device)
            trainSamples += inputs.size(0)
            output_label, _, output_ms = model(inputVariable, device)
            loss_c = loss_fn(output_label, labelVariable)
            if stage == 2:
                if regression:
                    msVariable = torch.reshape(
                        msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                    output_ms = torch.sigmoid(output_ms)
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                else:
                    # classification task
                    msVariable = torch.reshape(
                        msVariable,
                        (seqLen * 7 * 7, msVariable.size(0))).long()
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                loss_ms = loss_ms_fn(output_ms, msVariable)
                loss = loss_c + loss_ms
            else:
                loss = loss_c
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            #print('Training loss after {} iterations = {} '.format(train_iter, loss.data.item()))
            #train_log_loss_batch.write('Training loss after {} iterations = {}\n'.format(train_iter, loss.data.item()))
            #writer.add_scalar('train/iter_loss', loss.data.item(), train_iter)
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        save_path_model = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar')
        save_file = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer_fn.state_dict(),
            'best_acc': best_acc,
        }
        torch.save(save_file, save_path_model)

        if (epoch + 1) % evalInterval == 0:
            #print('Testing...')
            model.train(False)
            test_loss_epoch = 0
            test_iter = 0
            test_samples = 0
            numCorr = 0
            for j, (inputs, inputsMS, targets) in enumerate(test_loader):
                #print('testing inst = {}'.format(j))
                test_iter += 1
                test_samples += inputs.size(0)
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device)
                labelVariable = targets.to(device)
                msVariable = inputsMS.to(device)

                output_label, _, output_ms = model(inputVariable, device)
                test_loss_c = loss_fn(output_label, labelVariable)
                if stage == 2:
                    if regression:
                        msVariable = torch.reshape(
                            msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                        output_ms = torch.sigmoid(output_ms)
                        output_ms = torch.reshape(
                            output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                    else:
                        # classification task
                        msVariable = torch.reshape(
                            msVariable,
                            (seqLen * 7 * 7, msVariable.size(0))).long()
                        output_ms = torch.reshape(
                            output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                    test_loss_ms = loss_ms_fn(output_ms, msVariable)
                    test_loss = test_loss_c + test_loss_ms
                else:
                    test_loss = test_loss_c
                test_loss_epoch += test_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            test_accuracy = (numCorr / test_samples) * 100
            avg_test_loss = test_loss_epoch / test_iter
            print('Test Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_test_loss))
            print('Test Accuracy after {} epochs = {}%'.format(
                epoch + 1, test_accuracy))
            writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1)
            writer.add_scalar('test/accuracy', test_accuracy, epoch + 1)
            test_log_loss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_test_loss))
            test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, test_accuracy))

            if test_accuracy > best_acc:
                best_acc = test_accuracy
                save_path_model = os.path.join(
                    model_folder,
                    'best_checkpoint_stage' + str(stage) + '.pth.tar')
                save_file = {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer_fn.state_dict(),
                    'best_acc': best_acc,
                }
                torch.save(save_file, save_path_model)
        optim_scheduler.step()
    train_log_loss.close()
    train_log_acc.close()
    test_log_acc.close()
    train_log_loss_batch.close()
    test_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 8
0
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    assert opt.train_crop in ['random', 'corner', 'center']
    if opt.train_crop == 'random':
        ucf_crop = MultiScaleRandomCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'corner':
        ucf_crop = MultiScaleCornerCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'center':
        ucf_crop = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c'])

    spatial_transform = []
    temporal_transform = []
    target_transform = []

    ucf_transform = Compose([
        RandomHorizontalFlip(),
        ucf_crop,
        ToTensor(opt.norm_value), norm_method,
    ])

    kinetics_transform = transforms.Compose([
        transforms.Resize(128),
def main_run(stage, model, supervision, train_data_dir, val_data_dir,
             stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, lr_suphead, lr_resnet, alpha, decay_factor,
             decay_step, lossSupervision, memSize):

    num_classes = 61

    if model == 'ConvLSTMAttention':
        model = ConvLSTMAttention(num_classes=num_classes,
                                  mem_size=memSize,
                                  supervision=supervision,
                                  loss_supervision=lossSupervision)
    elif model == 'ConvLSTM':
        model = ConvLSTM(num_classes=num_classes,
                         mem_size=memSize,
                         supervision=supervision,
                         loss_supervision=lossSupervision)
    elif model == 'SupervisedLSTMMod':
        model = SupervisedLSTMMod(num_classes=num_classes,
                                  mem_size=memSize,
                                  supervision=supervision,
                                  loss_supervision=lossSupervision)
    elif model == 'MyNetIDT':
        model = MyNetIDT(num_classes=num_classes,
                         mem_size=memSize,
                         supervision=supervision,
                         loss_supervision=lossSupervision)
    else:
        print('Model not found')
        sys.exit()

    model_folder = os.path.join(
        './', out_dir, 'rgb',
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])
    spatial_transform_map = Cp([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        Resize((7, 7)),
        TT()
    ])
    spatial_transform_map_2 = Cp([Resize((7, 7)), TT()])
    vid_seq_train = makeDataset_supervision(
        train_data_dir,
        train=True,
        spatial_transform=spatial_transform,
        spatial_transform_map=spatial_transform_map,
        seqLen=seqLen,
        fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset_supervision(
            val_data_dir,
            train=False,
            spatial_transform_map=spatial_transform_map_2,
            spatial_transform=Compose(
                [Scale(256),
                 CenterCrop(224),
                 ToTensor(), normalize]),
            seqLen=seqLen,
            fmt='.png')

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=8,
                                                 pin_memory=True)
        valInstances = vid_seq_val.__len__()
    trainInstances = vid_seq_train.__len__()
    train_params = []
    train_params3 = []
    train_params2 = []
    if stage == 0:
        for params in model.resNet.parameters():
            params.requires_grad = True
            train_params += [params]
        if stage1_dict is not None:
            model.load_state_dict(torch.load(stage1_dict))
    elif stage == 1:
        supervision = False
        model.eval()
        for params in model.parameters():
            params.requires_grad = False
    else:
        model.load_state_dict(torch.load(stage1_dict))
        model.train()
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)
        model.sup_head.train()

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params2 += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params2 += [params]
    for params in model.sup_head.parameters():
        params.requires_grad = True
        train_params3 += [params]

    model.lstm_cell.train()
    model.classifier.train()
    model.cuda()
    if lossSupervision == "classification":
        loss_sup = nn.CrossEntropyLoss()
    elif lossSupervision == "regression":
        loss_sup = nn.L1Loss()
    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.Adam([{
        "params": train_params,
        "lr": lr_resnet
    }, {
        "params": train_params3,
        "lr": lr_suphead
    }, {
        "params": train_params2,
        "lr": lr1
    }],
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)
    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        epoch_loss_ = 0
        loss_ = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        if stage == 0:
            model.train()
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.sup_head.train()
            model.resNet.fc.train(True)
        for i, (inputs, targets, maps) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            if lossSupervision == "classification":
                maps = torch.ceil(maps)
                maps = maps.type(torch.LongTensor)
                maps = maps.permute(1, 0, 2, 3, 4).squeeze(2).cuda()
                maps = maps.reshape(maps.shape[0] * maps.shape[1],
                                    maps.shape[2], maps.shape[3])
            else:
                maps = maps.permute(1, 0, 2, 3, 4).cuda()
                maps = maps.reshape(maps.shape[0] * maps.shape[1],
                                    maps.shape[2], maps.shape[3],
                                    maps.shape[4])
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _, output_super = model(inputVariable)
            if supervision == True:
                loss_ = alpha * loss_sup(output_super, maps)
                loss_.backward(retain_graph=True)
                epoch_loss_ += loss_.data.item()
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data.item()
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / float(trainSamples)) * 100

        avg_loss_ = epoch_loss_ / float(iterPerEpoch)
        print(
            'Train: Epoch = {} | Loss = {} | Accuracy = {} | supervision_loss {}'
            .format(epoch + 1, avg_loss, trainAccuracy, avg_loss_))
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        if val_data_dir is not None:
            if (epoch + 1) % 1 == 0:
                model.eval()
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets, _) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    with torch.no_grad():
                        inputVariable = Variable(
                            inputs.permute(1, 0, 2, 3, 4).cuda())
                        labelVariable = Variable(
                            targets.cuda(non_blocking=True))
                        output_label, _, _ = model(inputVariable)
                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.data.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = (numCorr / float(val_samples)) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('val: Epoch = {} | Loss = {} | Accuracy = {} '.format(
                    epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                    epoch + 1, avg_val_loss))
                val_log_acc.write(
                    'Val Accuracy after {} epochs = {}%\n'.format(
                        epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder +
                                       '/model_rgb_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    save_path_model = (model_folder +
                                       '/model_rgb_state_dict_epoch' +
                                       str(epoch + 1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 10
0
def main_run(stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen,
             trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor,
             decay_step, memSize, color, rgbm, fcm):
    #dataset = 'gtea61'
    begin_time = datetime.datetime.now()
    num_classes = 61

    if color not in ['HSV_opticalFlow', 'flow_surfaceNormals', 'warpedHSV']:
        print(color, ' is not valid')
        exit(-1)

    model_folder = os.path.join(
        './', out_dir, 'BigConvLSTM', color, str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(train_data_dir,
                                seqLen=seqLen,
                                fmt='.png',
                                users=['S1', 'S3', 'S4'],
                                spatial_transform=spatial_transform,
                                colorization=color)
    #trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,
                                  seqLen=seqLen,
                                  fmt='.png',
                                  users=['S2'],
                                  train=False,
                                  spatial_transform=Compose([
                                      Scale(256),
                                      CenterCrop(224),
                                      ToTensor(), normalize
                                  ]),
                                  colorization=color)
        #valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []
    if stage == 1:
        model = bigConvLSTM(num_classes=num_classes,
                            mem_size=memSize,
                            rgbm=rgbm,
                            fcm=fcm)
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
    else:  # stage == 2
        model = bigConvLSTM(num_classes=num_classes,
                            mem_size=memSize,
                            rgbm=rgbm,
                            fcm=fcm)

        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNetRGB.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetRGB.fc.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetCol.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetCol.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNetRGB.layer4[0].conv1.train(True)
        model.resNetRGB.layer4[0].conv2.train(True)
        model.resNetRGB.layer4[1].conv1.train(True)
        model.resNetRGB.layer4[1].conv2.train(True)
        model.resNetRGB.layer4[2].conv1.train(True)
        model.resNetRGB.layer4[2].conv2.train(True)
        model.resNetRGB.fc.train(True)

        model.resNetCol.layer4[0].conv1.train(True)
        model.resNetCol.layer4[0].conv2.train(True)
        model.resNetCol.layer4[1].conv1.train(True)
        model.resNetCol.layer4[1].conv2.train(True)
        model.resNetCol.layer4[2].conv1.train(True)
        model.resNetCol.layer4[2].conv2.train(True)
        model.resNetCol.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)
    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    dataload_time = datetime.datetime.now()

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        if stage == 2:
            model.resNetRGB.layer4[0].conv1.train(True)
            model.resNetRGB.layer4[0].conv2.train(True)
            model.resNetRGB.layer4[1].conv1.train(True)
            model.resNetRGB.layer4[1].conv2.train(True)
            model.resNetRGB.layer4[2].conv1.train(True)
            model.resNetRGB.layer4[2].conv2.train(True)
            model.resNetRGB.fc.train(True)

            model.resNetCol.layer4[0].conv1.train(True)
            model.resNetCol.layer4[0].conv2.train(True)
            model.resNetCol.layer4[1].conv1.train(True)
            model.resNetCol.layer4[1].conv2.train(True)
            model.resNetCol.layer4[2].conv1.train(True)
            model.resNetCol.layer4[2].conv2.train(True)
            model.resNetCol.fc.train(True)

        #for i, (inputs, targets) in enumerate(train_loader):
        for inputsRGB, inputsCol, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            inputVariableRGB = Variable(
                inputsRGB.permute(1, 0, 2, 3, 4).to(device))
            inputVariableCol = Variable(
                inputsCol.permute(1, 0, 2, 3, 4).to(device))
            labelVariable = Variable(targets.to(device))
            trainSamples += inputsRGB.size(0)

            output_label, _ = model(inputVariableRGB, inputVariableCol, device)

            loss = loss_fn(output_label, labelVariable)
            loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for inputsRGB, inputsCol, targets in val_loader:
                    val_iter += 1
                    val_samples += inputsRGB.size(0)

                    inputVariableRGB = Variable(
                        inputsRGB.permute(1, 0, 2, 3, 4).to(device))
                    inputVariableCol = Variable(
                        inputsCol.permute(1, 0, 2, 3, 4).to(device))
                    labelVariable = Variable(targets.to(device))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _ = model(inputVariableRGB, inputVariableCol,
                                            device)

                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.to(device)).sum()

            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_' + color +
                                   '_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
                print("saved new best model")

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    end_time = datetime.datetime.now()
    print('total time elapsed: ', end_time - begin_time)
    print('dataload time: ', dataload_time - begin_time)
    print('training time: ', end_time - dataload_time)
    timers = open((model_folder + '/timings.txt'), 'w')
    timers.write(
        f"total time elapsed: {end_time-begin_time} \ndataload time: {dataload_time-begin_time} \ntraining time: {end_time-dataload_time}"
    )
    timers.close()
Exemplo n.º 11
0
            parameters,
            lr=opt.learning_rate,
            betas=(0.9, 0.999),
            weight_decay=opt.weight_decay)

    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    ## prepare train
    if not opt.no_train:
        temporal_transform = TemporalSegmentRandomCrop(opt.segment_number, opt.sample_duration)

        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            sceobj_crop_method = MultiScaleRandomCrop(opt.scales, opt.sceobj_frame_size)
        elif opt.train_crop == 'corner':
            sceobj_crop_method = MultiScaleCornerCrop(opt.scales, opt.sceobj_frame_size)
        elif opt.train_crop == 'center':
            sceobj_crop_method = MultiScaleCornerCrop(opt.scales, opt.sceobj_frame_size, crop_positions=['c'])
        sceobj_spatial_transform = Compose([
            sceobj_crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value),
            normalize
        ])
        #sceobj_spatial_transform = transforms.Compose([
        #    transforms.Resize(256),
        #    transforms.CenterCrop(opt.sceobj_frame_size),
        #    transforms.RandomHorizontalFlip(),
        #    transforms.ToTensor(),
        #    transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                         std=[0.229, 0.224, 0.225])
def modelTrain(modelUsed, pretrained, trainDataset, trainLabels,
               validationDataset, validationLabels, numEpochs, evalInterval,
               evalMode, outDir, numWorkers, lr, stepSize, decayRate,
               trainBatchSize, seqLen, plotting):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = makeDataset(trainDataset,
                              trainLabels,
                              spatial_transform=spatial_transform,
                              seqLen=seqLen)
    # torch iterator to give data in batches of specified size
    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              num_workers=numWorkers,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])

    vidSeqValid = makeDataset(validationDataset,
                              validationLabels,
                              seqLen=seqLen,
                              spatial_transform=test_spatial_transform)

    validationLoader = torch.utils.data.DataLoader(vidSeqValid,
                                                   batch_size=1,
                                                   shuffle=False,
                                                   num_workers=int(numWorkers /
                                                                   2),
                                                   pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numValidationInstances = vidSeqValid.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of validation samples = {}'.format(numValidationInstances))

    modelFolder = './experiments_' + outDir + '_' + modelUsed + '_' + str(
        pretrained)  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        pass
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'a')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'a')
    validationLogLoss = open((modelFolder + '/validLogLoss.txt'), 'a')
    validationLogAcc = open((modelFolder + '/validLogAcc.txt'), 'a')

    model = ViolenceModel(modelUsed, pretrained)

    trainParams = []
    for params in model.parameters():
        if params.requires_grad:
            trainParams += [params]
    model.train(True)
    if (torch.cuda.is_available()):
        model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimizerFn.zero_grad()
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50
    train_loss = []
    val_loss = []
    train_acc = []
    val_acc = []
    bestmodel = None

    for epoch in range(numEpochs):
        optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            if (torch.cuda.is_available()):
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
                labelVariable = Variable(targets.cuda())
            else:
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4))
                labelVariable = Variable(targets)
            outputLabel = model(inputVariable1)
            loss = lossFn(outputLabel, labelVariable)
            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            if (torch.cuda.is_available()):
                numCorrTrain += (predicted == targets.cuda()).sum()
            else:
                numCorrTrain += (predicted == targets).sum()
            epochLoss += loss.item()
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (float(numCorrTrain) * 100) / float(numTrainInstances)
        train_loss.append(avgLoss)
        train_acc.append(trainAccuracy)
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            validationLossEpoch = 0
            validationIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(validationLoader):
                validationIter += 1
                #if evalMode == 'centerCrop':
                if (torch.cuda.is_available()):
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3,
                                                             4).cuda(),
                                              requires_grad=False)
                    labelVariable = Variable(targets.cuda(async=True),
                                             requires_grad=False)
                else:
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4),
                                              requires_grad=False)
                    labelVariable = Variable(targets, requires_grad=False)
                # else:
                #     if(torch.cuda.is_available()):
                #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4).cuda(), requires_grad=False)
                #         labelVariable = Variable(targets.cuda(async=True), requires_grad=False)
                #     else:
                #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4), requires_grad=False)
                #         labelVariable = Variable(targets, requires_grad=False)
                outputLabel = model(inputVariable1)
                validationLoss = lossFn(outputLabel, labelVariable)
                validationLossEpoch += validationLoss.item()
                outputProb = torch.nn.Softmax(dim=1)(outputLabel)
                _, predicted = torch.max(outputProb.data, 1)
                if (torch.cuda.is_available()):
                    numCorrTest += (predicted == targets[0].cuda()).sum()
                else:
                    numCorrTest += (predicted == targets[0]).sum()
            validationAccuracy = (float(numCorrTest) *
                                  100) / float(numValidationInstances)
            avgValidationLoss = validationLossEpoch / validationIter
            val_loss.append(avgValidationLoss)
            val_acc.append(validationAccuracy)
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgValidationLoss, validationAccuracy))
            writer.add_scalar('test/epochloss', avgValidationLoss, epoch + 1)
            writer.add_scalar('test/accuracy', validationAccuracy, epoch + 1)
            validationLogLoss.write('valid Loss after {} epochs = {}\n'.format(
                epoch + 1, avgValidationLoss))
            validationLogAcc.write(
                'valid Accuracy after {} epochs = {}%\n'.format(
                    epoch + 1, validationAccuracy))
            if validationAccuracy > minAccuracy:
                bestmodel = model
                minAccuracy = validationAccuracy
    '''plotting the accuracy and loss curves'''
    if plotting:
        xc = range(1, numEpochs + 1)
        xv = []
        for i in xc:
            if (i % evalInterval == 0):
                xv.append(i)
        plt.figure(1, figsize=(7, 5))
        plt.plot(xc, train_loss)
        plt.plot(xv, val_loss)
        plt.xlabel('num of Epochs')
        plt.ylabel('loss')
        plt.title('train_loss vs val_loss')
        plt.grid(True)
        plt.legend(['train', 'val'])
        #print plt.style.available # use bmh, classic,ggplot for big pictures
        plt.style.use(['classic'])
        plt.savefig(modelFolder + "/lossCurve.png")

        plt.figure(2, figsize=(7, 5))
        plt.plot(xc, train_acc)
        plt.plot(xv, val_acc)
        plt.xlabel('num of Epochs')
        plt.ylabel('accuracy')
        plt.title('train_acc vs val_acc')
        plt.grid(True)
        plt.legend(['train', 'val'], loc=4)
        #print plt.style.available # use bmh, classic,ggplot for big pictures
        plt.style.use(['classic'])
        plt.savefig(modelFolder + "/accuracyCurve.png")
        #plt.show()
    trainLogAcc.close()
    validationLogAcc.close()
    trainLogLoss.close()
    validationLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return bestmodel, validationAccuracy
Exemplo n.º 13
0
def main():
    opt = parse_opts()

    ecd_name, cls_name = opt.model_name.split('-')
    ecd_model = get_encoder_net(ecd_name)
    cls_model = get_end_net(cls_name)

    cfg.encoder_model = ecd_name
    cfg.classification_model = cls_name

    if opt.debug:
        cfg.debug = opt.debug
    else:
        if opt.tensorboard == 'TEST':
            cfg.tensorboard = opt.model_name
        else:
            cfg.tensorboard = opt.tensorboard
            cfg.flag = opt.flag
    model = cls_model(cfg,
                      encoder=CNNencoder(
                          cfg,
                          ecd_model(pretrained=True, path=opt.encoder_model)))
    cfg.video_path = os.path.join(cfg.root_path, cfg.video_path)
    cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path)

    cfg.list_all_member()

    torch.manual_seed(cfg.manual_seed)
    print('##########################################')
    print('####### model 仅支持单GPU')
    print('##########################################')
    model = model.cuda()
    print(model)
    criterion = nn.CrossEntropyLoss()
    if cfg.cuda:
        criterion = criterion.cuda()

    norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print('##########################################')
    print('####### train')
    print('##########################################')
    assert cfg.train_crop in ['random', 'corner', 'center']
    if cfg.train_crop == 'random':
        crop_method = (cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(cfg.scales,
                                           cfg.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(cfg.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(cfg, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.n_threads,
                                               drop_last=False,
                                               pin_memory=True)
    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    print('##########################################')
    print('####### run')
    print('##########################################')
    if cfg.debug:
        logger = None
    else:
        path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag)
        logger = Logger(logdir=path)
        cfg.save_config(path)

    for i in range(cfg.begin_epoch, cfg.n_epochs + 1):
        train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger)
        validation_loss = val_epoch(i, val_loader, model, criterion, cfg,
                                    logger)

        scheduler.step(validation_loss)
Exemplo n.º 14
0
    def __init__(self):

        self.model_methods = [['resnext', 'gradcam', 'camshow']]

        self.classes = [
            "brush_hair", "cartwheel", "catch", "chew", "clap", "climb",
            "climb_stairs", "dive", "draw_sword", "dribble", "drink", "eat",
            "fall_floor", "fencing", "flic_flac", "golf", "handstand", "hit",
            "hug", "jump", "kick", "kick_ball", "kiss", "laugh", "pick",
            "pour", "pullup", "punch", "push", "pushup", "ride_bike",
            "ride_horse", "run", "shake_hands", "shoot_ball", "shoot_bow",
            "shoot_gun", "sit", "situp", "smile", "smoke", "somersault",
            "stand", "swing_baseball", "sword", "sword_exercise", "talk",
            "throw", "turn", "walk", "wave"
        ]

        scales = [1.0]

        self.spatial_transform = Compose([
            MultiScaleCornerCrop(scales, 112),
            ToTensor(1.0),
            Normalize(get_mean(1.0, dataset='activitynet'), get_std(1.0))
        ])

        self.spatial_transform2 = Compose([MultiScaleCornerCrop(scales, 112)])

        self.spatial_transform3 = Compose([
            MultiScaleCornerCrop(scales, 112),
            ToTensor(1),
            Normalize([0, 0, 0], [1, 1, 1])
        ])

        self.model = utils.load_model(self.model_methods[0][0])
        self.model.cuda()
        #self.video=[]
        #self.flows=[]
        self.bb_frames = []
        #self.explainer= get_explainer
        method_name = 'gradcam'
        self.explainer = get_explainer(self.model, method_name, "conv1")
        self.explainer2 = get_explainer(self.model, method_name, "layer1")
        self.explainer3 = get_explainer(self.model, method_name, "layer2")
        self.explainer4 = get_explainer(self.model, method_name, "layer3")
        self.explainer5 = get_explainer(self.model, method_name, "layer4")
        self.explainer6 = get_explainer(self.model, method_name, "avgpool")
        path = "images/frames4"
        #print path
        self.path = path + "/"
        #dirc = os.listdir(path)
        #self.files = [ fname for fname in dirc if fname.startswith('img')]
        #self.files2 = [ fname for fname in dirc if fname.startswith('flow_x')]
        self.seq = []
        self.kls = []
        self.scr = []
        self.totalhit = 0
        self.totalhit2 = 0
        self.totalhit3 = 0
        self.totalhit4 = 0
        self.totalhit5 = 0
        self.totalhit6 = 0
        self.totalhit7 = 0
        self.totalframes = 0
Exemplo n.º 15
0
def main_run(stage, train_data_dir, val_data_dir, stage1Dict, stage1Dict_rgb,
             stage1Dict_fc, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize):
    #dataset = 'gtea61'
    num_classes = 61

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTMDoubleResnet', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(train_data_dir,
                                seqLen=seqLen,
                                fmt='.png',
                                users=['S1', 'S3', 'S4'],
                                spatial_transform=spatial_transform)
    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,
                                  seqLen=seqLen,
                                  fmt='.png',
                                  users=['S2'],
                                  train=False,
                                  spatial_transform=Compose([
                                      Scale(256),
                                      CenterCrop(224),
                                      ToTensor(), normalize
                                  ]))
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []

    model = twoStreamFlowCol(num_classes=num_classes,
                             memSize=memSize,
                             frameModel=stage1Dict_rgb,
                             flowModel=stage1Dict_fc)
    model.train(False)
    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.flowModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        #for i, (inputs, targets) in enumerate(train_loader):
        for inputs, inputsSN, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _ = model(inputVariable, inputSNVariable)

            loss = loss_fn(output_label, labelVariable)
            loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for inputs, inputsSN, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    inputSNVariable = Variable(
                        inputsSN.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _ = model(inputVariable, inputSNVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 16
0
     get_lastest_model(opt)
 print(opt)
 with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
     json.dump(vars(opt), opt_file)
 
 torch.manual_seed(opt.manual_seed)
 
 model, parameters = generate_model(opt)
 print(model)
 criterion = nn.MSELoss()
 if not opt.no_cuda:
     criterion = criterion.cuda()
 norm_method = Normalize(opt.mean, [1, 1, 1])
 
 if not opt.no_train:
     crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
     spatial_transform = Compose([
         crop_method,
         RandomHorizontalFlip(),
         ToTensor(opt.norm_value), norm_method
     ])
     temporal_transform = None
     target_transform = ClassLabel()
                     
     train_logger = Logger(
         os.path.join(opt.result_path, 'train.log'),
         ['epoch', 'loss', 'label_s acc', 'label_mid acc','label_end acc','lr'])
     train_batch_logger = Logger(
         os.path.join(opt.result_path, 'train_batch.log'),
         ['epoch', 'batch', 'iter', 'loss', 'label_s acc','label_mid acc','label_end acc', 'lr'])
     
Exemplo n.º 17
0
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step,
             uniform_sampling, debug):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset,
                                'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    #num_workers = 4
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(trainDir,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                stackSize=stackSize,
                                fmt='.png',
                                uniform_sampling=uniform_sampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               sampler=None,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(trainDir,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              uniform_sampling=uniform_sampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.
          format(trainInstances, valInstances))

    model = flow_resnet34(True,
                          channels=2 * stackSize,
                          num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.to(device)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.to(device)
            labelVariable = targets.to(device)
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()

        optim_scheduler.step()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = inputs.to(device)
                labelVariable = targets.to(device)
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_flow_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (model_folder +
                                   '/model_flow_state_dict_epoch' +
                                   str(epoch + 1) + '.pth')
                torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Exemplo n.º 18
0
Arquivo: main.py Projeto: qzhb/CKMN
            momentum=opt.momentum,
            dampening=dampening,
            weight_decay=opt.weight_decay,
            nesterov=opt.nesterov)

    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    ## prepare train
    if not opt.no_train:
        temporal_transform = TemporalSegmentRandomCrop(opt.segment_number, opt.sample_duration)

        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            spatial_crop_method = MultiScaleRandomCrop(opt.scales, opt.frame_size)
        elif opt.train_crop == 'corner':
            spatial_crop_method = MultiScaleCornerCrop(opt.scales, opt.frame_size)
        elif opt.train_crop == 'center':
            spatial_crop_method = MultiScaleCornerCrop(opt.scales, opt.frame_size, crop_positions=['c'])
        spatial_transform = Compose([
            spatial_crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value),
            normalize
        ])
        training_data = get_training_set(opt, spatial_transform, temporal_transform)

        train_loader = DataLoaderX(
            training_data,
            batch_size=opt.batch_size,
            shuffle=True,
            num_workers=opt.n_threads,
Exemplo n.º 19
0
def main_run(version, stage, train_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, mem_size):
    num_classes = 61

    model_folder = os.path.join("./", out_dir, version)

    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Train val partitioning
    train_usr = ["S1", "S3", "S4"]
    val_usr = ["S2"]

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose(
        [Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
         ToTensor(), normalize])

    vid_seq_train = makeDataset(train_data_dir, train_usr,
                                spatial_transform=spatial_transform, seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                                               shuffle=True, num_workers=4, pin_memory=True)

    vid_seq_val = makeDataset(train_data_dir, val_usr,
                              spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                              seqLen=seqLen)

    val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                             shuffle=False, num_workers=2, pin_memory=True)

    train_params = []

    # stage 1: train only lstm
    if stage == 1:

        model = attentionModel(num_classes=num_classes, mem_size=mem_size)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

    # stage 2: train lstm, layer4, spatial attention and final fc
    else:
        model = attentionModel(num_classes=num_classes, mem_size=mem_size)
        model.load_state_dict(torch.load(stage1_dict))  # pretrained
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():  # fully connected layer
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():  # for both stages we train the lstm
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():  # for both stages we train the last classifier (after the lstm and avg pooling)
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs, inputsF, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()  # evaluating number of correct classifications
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples)

        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))  # log file
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))  # log file
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))

        if (epoch + 1) % VAL_FREQUENCY == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, inputsF, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
                labelVariable = Variable(targets.to(DEVICE))
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(DEVICE)).sum()  # evaluating number of correct classifications
            val_accuracy = (numCorr.data.item() / val_samples)
            avg_val_loss = val_loss_epoch / val_iter
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))  # log file
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))  # log file
            if val_accuracy > min_accuracy:
                save_path_model = (
                        model_folder + '/model_rgb_state_dict.pth')  # every epoch, check if the val accuracy is improved, if so, save that model
                torch.save(model.state_dict(),
                           save_path_model)  # in that way, even if the model overfit, you will get always the best model
                min_accuracy = val_accuracy  # in this way you don't have to care too much about the number of epochs

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Exemplo n.º 20
0
def main_run(version, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, outDir,
             trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor):
    
    num_classes = 61     # gtea61 dataset
    model_folder = os.path.join("./", outDir, version)

    # Create the dir
    print(f"Checking directory {model_folder}")
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    print(f"Creating directory{model_folder}")
    os.makedirs(model_folder)

    # Log files
    print(f"Creating log files")
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # ImageNet mean and std
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    # Train val partitioning
    train_usr = ["S1", "S3", "S4"]
    val_usr = ["S2"]


    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])
    # train dataset
    print(f"Defining train dataset")
    vid_seq_train = makeDataset(trainDatasetDir, train_usr, spatial_transform,
                               stackSize=stackSize, seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    

    # val dataset
    print(f"Defining validation dataset")
    vid_seq_val = makeDataset(trainDatasetDir, val_usr,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   stackSize=stackSize, phase="val", seqLen=seqLen)
    
    val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
    
    valSamples = vid_seq_val.__len__()


    # model
    print("Building model")
    model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize,         # see twoStreamModel.py
                                    num_classes=num_classes)
    
    print("Setting trainable parameters")
    for params in model.parameters():           # initially freeze all layers
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():    # unfreeze classifier layer (the layer that joins the two models outputs)
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():  # unfreeze lstm layer of the frame model
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():     #unfreeze layer 4
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():              # unfreeze last fully connected layer of frame model 
        params.requires_grad = True                                     # (I still don't know why, because in the joining of the two models, this layer is skipped)
        train_params += [params]                                        

    base_params = []
    for params in model.flowModel.layer4.parameters():              # unfreeze layer 4 of flow model
        base_params += [params]
        params.requires_grad = True

    print("Moving model to GPU")
    model.to(DEVICE)

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    print("Defining loss function, optimizer and scheduler")
    loss_fn = nn.CrossEntropyLoss()     # loss function
    optimizer_fn = torch.optim.SGD([    # optimizer
        {'params': train_params},
        {'params': base_params, 'lr': 1e-4},  # 1e-4
    ], lr=lr1, momentum=0.9, weight_decay=5e-4)

    #scheduler
    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor)
    train_iter = 0

    print("Training begun")
    # TRAIN PROCEDURE
    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)


        start = time.time()
        for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(train_loader):
            
            print(f"step {j} / {int(np.floor(trainSamples/trainBatchSize))}")
            
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()                                                # put gradients to zero
            inputVariableFlow = Variable(inputFlow.to(DEVICE))
            inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            #print("predict")
            output_label = model(inputVariableFlow, inputVariableFrame)         # predict
            loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)   # compute loss
            #print("backprop")
            loss.backward()                                                     
            optimizer_fn.step()
            #print("accuracy")
            _, predicted = torch.max(output_label.data, 1)                  
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()             # counting number of correct predictions
            epoch_loss += loss.data.item()  

        
        avg_loss = epoch_loss / iterPerEpoch                                    # computing average per epoch loss
        trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))             # log file
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))     # log file
        print(f"Elapsed : {time.time()-start}")

        # VALIDATION
        if (epoch + 1) % 5 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(val_loader):
                if j % 1 == 0:
                    print(f"step {j} / {int(np.floor(vid_seq_val.__len__()/valBatchSize))}")

                val_iter += 1
                inputVariableFlow = Variable(inputFlow.to(DEVICE))
                inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE))
                labelVariable = Variable(targets.to(DEVICE))
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                val_loss_epoch += loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == labelVariable.data).sum()
            val_accuracy = (numCorr.item() / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))       # log file
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))   # log file
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_twoStream_state_dict.pth')                    # every epoch, check if the val accuracy is improved, if so, save that model
                torch.save(model.state_dict(), save_path_model)                                         # in that way, even if the model overfit, you will get always the best model
                min_accuracy = val_accuracy                                                             # in this way you don't have to care too much about the number of epochs

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Exemplo n.º 21
0
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize,
             trainDatasetDir, valDatasetDir, outDir, trainBatchSize,
             valBatchSize, lr1, numEpochs, decay_step, decay_factor,
             uniformSampling):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]
    directory = trainDatasetDir

    model_folder = os.path.join(
        './', outDir, dataset,
        'twoStream')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(directory,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen,
                                uniform_sampling=uniformSampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    vid_seq_val = makeDataset(directory,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              numSeg=1,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              seqLen=seqLen,
                              uniform_sampling=uniformSampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=2,
                                             pin_memory=True)
    valSamples = vid_seq_val.__len__()

    model = twoStreamAttentionModel(flowModel=flowModel,
                                    frameModel=rgbModel,
                                    stackSize=stackSize,
                                    memSize=memSize,
                                    num_classes=num_classes)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flowModel.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD([
        {
            'params': train_params
        },
        {
            'params': base_params,
            'lr': 1e-4
        },
    ],
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = inputFlow.to(DEVICE)
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
            labelVariable = targets.to(DEVICE)
            output_label = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(torch.log_softmax(output_label, dim=1),
                           labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data.item()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        # Validation Phase
        #if valDatasetDir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariableFlow = inputFlow.to(DEVICE)
                inputVariableFrame = inputFrame.permute(1, 0, 2, 3,
                                                        4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(torch.log_softmax(output_label, dim=1),
                               labelVariable)
                val_loss_epoch += loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == labelVariable.data).sum()
            val_accuracy = (numCorr.item() / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(
                epoch + 1, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder +
                                   '/model_twoStream_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        #else:
        #    if (epoch + 1) % 10 == 0:
        #        save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
        #        torch.save(model.state_dict(), save_path_model)

        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Exemplo n.º 22
0
def main_run(dataset, stage, root_dir, out_dir, seqLen, trainBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize, outPool_size,
             split, evalInterval):

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    stage = stage
    test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    best_acc = 0

    if stage == 1:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs
    elif stage == 2:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs

    if dataset == 'gtea_61':
        num_classes = 61
    elif dataset == 'gtea_71':
        num_classes = 71
    elif dataset == 'egtea_gaze+':
        num_classes = 106
    else:
        print('Wrong dataset')
        sys.exit()
    dataset_dir = os.path.join(root_dir, dataset)

    model_folder = os.path.join('.', out_dir, dataset, str(test_split))

    if not os.path.exists(model_folder):
        os.makedirs(model_folder)

    note_fl = open(model_folder + '/note.txt', 'w')
    note_fl.write('Number of Epochs = {}\n'
                  'lr = {}\n'
                  'Train Batch Size = {}\n'
                  'Sequence Length = {}\n'
                  'Decay steps = {}\n'
                  'Decay factor = {}\n'
                  'Memory size = {}\n'
                  'Memory cam classes = {}\n'.format(numEpochs, lr1,
                                                     trainBatchSize, seqLen,
                                                     decay_step, decay_factor,
                                                     memSize, c_cam_classes))

    note_fl.close()

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'),
                                'w')
    test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w')
    test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w')

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    print('Preparing dataset...')

    if dataset == 'egtea_gaze+':
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(
            dataset_dir, test_split)
    else:
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(
            dataset_dir, test_split)

    vid_seq_train = makeDataset(trainDatasetF,
                                trainLabels,
                                trainNumFrames,
                                spatial_transform=spatial_transform,
                                fmt='.jpg',
                                seqLen=seqLen)

    print('Number of train samples = {}'.format(vid_seq_train.__len__()))

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               num_workers=4,
                                               pin_memory=True)

    vid_seq_test = makeDataset(testDatasetF,
                               testLabels,
                               testNumFrames,
                               spatial_transform=Compose([
                                   Scale(256),
                                   CenterCrop(224),
                                   ToTensor(), normalize
                               ]),
                               fmt='.jpg',
                               seqLen=seqLen)

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    train_params = []
    if stage == 1:
        model = attentionModel(num_classes=num_classes,
                               mem_size=memSize,
                               c_cam_classes=c_cam_classes)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    elif stage == 2:
        model = attentionModel(num_classes=num_classes,
                               mem_size=memSize,
                               c_cam_classes=c_cam_classes)
        checkpoint_path = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(1) + '.pth.tar')
        if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(
                checkpoint_path))
        else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
        last_checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(last_checkpoint['model_state_dict'])
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lsta_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=5e-4,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            if train_iter % 10 == 0:
                print('Training loss after {} iterations = {} '.format(
                    train_iter, loss.data[0]))
                train_log_loss_batch.write(
                    'Training loss after {} iterations = {}\n'.format(
                        train_iter, loss.data[0]))
                writer.add_scalar('train/iter_loss', loss.data[0], train_iter)
            epoch_loss += loss.data[0]
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        save_path_model = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar')
        save_file = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer_fn.state_dict(),
            'best_acc': best_acc,
        }
        torch.save(save_file, save_path_model)

        if (epoch + 1) % evalInterval == 0:
            print('Testing...')
            model.train(False)
            test_loss_epoch = 0
            test_iter = 0
            test_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(test_loader):
                print('testing inst = {}'.format(j))
                test_iter += 1
                test_samples += inputs.size(0)
                inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(),
                                         volatile=True)
                labelVariable = Variable(targets.cuda(async=True),
                                         volatile=True)
                output_label, _ = model(inputVariable)
                test_loss = loss_fn(output_label, labelVariable)
                test_loss_epoch += test_loss.data[0]
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.cuda()).sum()
            test_accuracy = (numCorr / test_samples) * 100
            avg_test_loss = test_loss_epoch / test_iter
            print('Test Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_test_loss))
            print('Test Accuracy after {} epochs = {}%'.format(
                epoch + 1, test_accuracy))
            writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1)
            writer.add_scalar('test/accuracy', test_accuracy, epoch + 1)
            test_log_loss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_test_loss))
            test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, test_accuracy))

            if test_accuracy > best_acc:
                best_acc = test_accuracy
                save_path_model = os.path.join(
                    model_folder,
                    'best_checkpoint_stage' + str(stage) + '.pth.tar')
                save_file = {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer_fn.state_dict(),
                    'best_acc': best_acc,
                }
                torch.save(save_file, save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    test_log_acc.close()
    train_log_loss_batch.close()
    test_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset, 'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        !rm -rf ./experiments
        #sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(trainDir, spatial_transform=spatial_transform, sequence=False,
                                stackSize=stackSize, fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, sampler=None, num_workers=4, pin_memory=True)
    valInstances=0
    if valDir is not None:

        vid_seq_val = makeDataset(valDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, stackSize=stackSize, fmt='.png', phase='Test')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

    model = flow_resnet34(True, channels=2*stackSize, num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data[0]
        avg_loss = epoch_loss/iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch+1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch+1, trainAccuracy))
        if valDir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, _ = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.data[0]
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = (numCorr / val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_flow_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 24
0
def main_run(dataset, flowModel, rgbModel, stage, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir,
             trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', outDir, dataset, 'NewtwoStream',str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(trainDatasetDir,spatial_transform=spatial_transform,
                               sequence=False, numSeg=1, fmt='.png', seqLen=seqLen, frame_div=True)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)

    

    if valDatasetDir is not None:

        vid_seq_val = makeDataset(valDatasetDir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, numSeg=1, fmt='.png', phase='Test',
                                   seqLen=seqLen, frame_div=True)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valSamples = vid_seq_val.__len__()

    train_params = []
    if stage == 1:

        model = attentionModel_flow(num_classes=num_classes,frameModel=rgbModel, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        model = attentionModel_flow(num_classes=num_classes, mem_size=memSize)
        model.load_state_dict(torch.load(flowModel))
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.flowResNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.flowResNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.flowResNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.flowResNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.flowResNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.flowResNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.flowResNet.fc_action.parameters():
            params.requires_grad = True
            train_params += [params]

        model.flowResNet.layer4[0].conv1.train(True)
        model.flowResNet.layer4[0].conv2.train(True)
        model.flowResNet.layer4[1].conv1.train(True)
        model.flowResNet.layer4[1].conv2.train(True)
        model.flowResNet.layer4[2].conv1.train(True)
        model.flowResNet.layer4[2].conv2.train(True)
        model.flowResNet.fc_action.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]


    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    train_iter = 0

    for epoch in range(numEpochs):
        
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.flowResNet.layer4[0].conv1.train(True)
            model.flowResNet.layer4[0].conv2.train(True)
            model.flowResNet.layer4[1].conv1.train(True)
            model.flowResNet.layer4[1].conv2.train(True)
            model.flowResNet.layer4[2].conv1.train(True)
            model.flowResNet.layer4[2].conv2.train(True)
            model.flowResNet.fc_action.train(True)

        
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = inputFlow.permute(1, 0, 2, 3, 4).cuda()
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).cuda()
            labelVariable = targets.cuda()
            
            output_label,_ = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
        
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
            epoch_loss += loss.item()
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))
        if valDatasetDir is not None:
            if (epoch + 1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                numCorr = 0
                for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                    val_iter += 1
                    inputVariableFlow = inputFlow.permute(1, 0, 2, 3, 4).cuda()
                    inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).cuda()
                    labelVariable = targets.cuda()
                 
                    output_label,_ = model(inputVariableFlow, inputVariableFrame)
                    loss = loss_fn(output_label, labelVariable)
            
                    val_loss_epoch += loss.item()
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_accuracy = (numCorr / valSamples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_twoStream_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
                torch.save(model.state_dict(), save_path_model)
        
    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 25
0
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict,
             stackSize, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize, alphaX,
             alphaY):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTM', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)
    ])
    spatial_transform2 = Compose([Scale((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(trainDatasetDir,
                                spatial_transform2,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen)

    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if valDatasetDir is not None:
        vid_seq_val = makeDataset(valDatasetDir,
                                  spatial_transform2,
                                  spatial_transform=Compose(
                                      [Scale(256), CenterCrop(224)]),
                                  sequence=False,
                                  numSeg=1,
                                  stackSize=stackSize,
                                  fmt='.png',
                                  phase='Test',
                                  seqLen=seqLen)
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []
    if stage == 1:
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:  # stage == 2
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fn_regression = nn.MSELoss()  # Loss function for the regression model

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        x_loss = 0
        y_loss = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

        #for i, (inputs, targets) in enumerate(train_loader):
        for flowX, flowY, inputs, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            flowX = flowX.cuda()
            flowY = flowY.cuda()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _, flowXprediction, flowYprediction = model(
                inputVariable)

            #Reshaping predictions and inputs in order
            #to correctly regress on the inputs
            flowXprediction = flowXprediction.view(-1)
            flowX = torch.reshape(flowX, (-1, )).float()

            flowYprediction = flowYprediction.view(-1)
            flowY = torch.reshape(flowY, (-1, )).float()

            #print(f'Prediction: {flowXprediction.size()}')
            #print(f'Input : {flowX.size()}')

            #sys.exit()

            lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
            lossY = alphaY * loss_fn_regression(flowYprediction, flowY)
            loss = loss_fn(output_label, labelVariable)

            #Weighting the loss of the ss task
            #by multiplying it by alpha
            total_loss = loss + lossX + lossY
            total_loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            x_loss += lossX.item()
            y_loss += lossY.item()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_x_loss = x_loss / iterPerEpoch
        avg_y_loss = y_loss / iterPerEpoch
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss))
        print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss))

        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1)
        writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1)

        train_log_loss.write('Training X loss after {} epoch= {}'.format(
            epoch + 1, avg_x_loss))
        train_log_loss.write('Training Y loss after {} epoch= {}'.format(
            epoch + 1, avg_y_loss))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if valDatasetDir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_x_loss = 0
            val_y_loss = 0
            val_samples = 0
            numCorr = 0
            mmap_loss = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for flowX, flowY, inputs, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    flowX = flowX.cuda()
                    flowY = flowY.cuda()

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _, flowXprediction, flowYprediction = model(
                        inputVariable)

                    #Reshaping predictions and inputs in order
                    #to correctly regress on the inputs
                    flowXprediction = flowXprediction.view(-1)
                    flowX = torch.reshape(flowX, (-1, )).float()

                    flowYprediction = flowXprediction.view(-1)
                    flowY = torch.reshape(flowX, (-1, )).float()

                    lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
                    lossY = alphaY * loss_fn_regression(flowYprediction, flowY)

                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    val_x_loss += lossX.item()
                    val_y_loss += lossY.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            avg_x_val_loss = val_x_loss / val_iter
            avg_y_val_loss = val_y_loss / val_iter
            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val X Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_x_val_loss))
            print('Val Y Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_y_val_loss))
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1)
            writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1)
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val X Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_x_val_loss))
            val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_y_val_loss))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Exemplo n.º 26
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    spatial_transform.append(ToArray())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size)

    train_logger = Logger(opt.result_path / 'train.log',
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        opt.result_path / 'train_batch.log',
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate,
                                      mode='min',
                                      patience=opt.plateau_patience)
    else:
        scheduler = MultiStepDecay(learning_rate=opt.learning_rate,
                                   milestones=opt.multistep_milestones)

    optimizer = fluid.optimizer.MomentumOptimizer(
        learning_rate=scheduler,
        momentum=opt.momentum,
        parameter_list=model_parameters,
        use_nesterov=opt.nesterov,
        regularization=fluid.regularizer.L2Decay(
            regularization_coeff=opt.weight_decay))

    return (train_loader, train_logger, train_batch_logger, optimizer,
            scheduler)
Exemplo n.º 27
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
Exemplo n.º 28
0
#     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
# ])

# transform_test = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
# ])
data_path = args.datapath
seqLen=args.seqLen
testBatchSize=1

trainX, trainY, testX, testY = make_split(data_path)
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
normalize = Normalize(mean=mean, std=std)
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])

vidSeqTrain = makeDataset(trainX, trainY, spatial_transform=spatial_transform,
                                seqLen=seqLen)

trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=args.trainBatchSize,
                            shuffle=True, num_workers=0)

test_spatial_transform = Compose([Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std)])

vidSeqTest = makeDataset(testX, testY, seqLen=seqLen,
    spatial_transform=test_spatial_transform)

testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize,
                        shuffle=False, num_workers=1)
Exemplo n.º 29
0
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen,
             memSize, evalInterval, evalMode, numWorkers, outDir,
             fightsDir_train, noFightsDir_train, fightsDir_test,
             noFightsDir_test):

    train_dataset_dir_fights = fightsDir_train
    train_dataset_dir_noFights = noFightsDir_train
    test_dataset_dir_fights = fightsDir_test
    test_dataset_dir_noFights = noFightsDir_test

    trainDataset, trainLabels, trainNumFrames = make_split(
        train_dataset_dir_fights, train_dataset_dir_noFights)
    testDataset, testLabels, testNumFrames = make_split(
        test_dataset_dir_fights, test_dataset_dir_noFights)

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = VideoDataset(trainDataset,
                               trainLabels,
                               trainNumFrames,
                               spatial_transform=spatial_transform,
                               seqLen=seqLen)

    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              num_workers=numWorkers,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
        testBatchSize = 1
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])
        testBatchSize = 1

    vidSeqTest = VideoDataset(testDataset,
                              testLabels,
                              testNumFrames,
                              seqLen=seqLen,
                              spatial_transform=test_spatial_transform)

    testLoader = torch.utils.data.DataLoader(vidSeqTest,
                                             batch_size=testBatchSize,
                                             shuffle=False,
                                             num_workers=int(numWorkers / 2),
                                             pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numTestInstances = vidSeqTest.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of testing samples = {}'.format(numTestInstances))

    modelFolder = './experiments_' + outDir  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        print(modelFolder + ' exists!!!')
        sys.exit()
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w')
    testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w')
    testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w')

    model = ViolenceModel(mem_size=memSize)

    trainParams = []
    for params in model.parameters():
        params.requires_grad = True
        trainParams += [params]
    model.train(True)
    model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50

    for epoch in range(numEpochs):
        optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            outputLabel = model(inputVariable1)
            loss = lossFn(outputLabel, labelVariable)
            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epochLoss += loss.data[0]
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (numCorrTrain / numTrainInstances) * 100
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            testLossEpoch = 0
            testIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(testLoader):
                testIter += 1
                if evalMode == 'centerCrop':
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3,
                                                             4).cuda(),
                                              volatile=True)
                else:
                    inputVariable1 = Variable(inputs[0].cuda(), volatile=True)
                labelVariable = Variable(targets.cuda(async=True),
                                         volatile=True)
                outputLabel = model(inputVariable1)
                outputLabel_mean = torch.mean(outputLabel, 0, True)
                testLoss = lossFn(outputLabel_mean, labelVariable)
                testLossEpoch += testLoss.data[0]
                _, predicted = torch.max(outputLabel_mean.data, 1)
                numCorrTest += (predicted == targets[0]).sum()
            testAccuracy = (numCorrTest / numTestInstances) * 100
            avgTestLoss = testLossEpoch / testIter
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgTestLoss, testAccuracy))
            writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1)
            writer.add_scalar('test/accuracy', testAccuracy, epoch + 1)
            testLogLoss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avgTestLoss))
            testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, testAccuracy))
            if testAccuracy > minAccuracy:
                savePathClassifier = (modelFolder + '/bestModel.pth')
                torch.save(model, savePathClassifier)
                minAccuracy = testAccuracy
    trainLogAcc.close()
    testLogAcc.close()
    trainLogLoss.close()
    testLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return True
Exemplo n.º 30
0
def main_run(stage, model, supervision, train_data_dir, val_data_dir, stage1dict, out_dir, seq_len, train_batch_size,
             val_batch_size, num_epochs, lr1, lr_suphead, lr_resnet, alpha, decay_factor, decay_step, 
             mem_size):

    num_classes = 61
    if model == 'MyNet':
        model = MyNet(num_classes=num_classes, mem_size=mem_size)
    elif model == 'ConvLSTMDynamic':
        model = ConvLSTMDynamic(num_classes=num_classes, mem_size=mem_size)
    else:
        print('Model not found')
        sys.exit()
    
    model_folder = os.path.join('./', out_dir, 'rgb')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    unnormalize = UnNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])
    vid_seq_train = MakeDataset_flowsupervision(train_data_dir, train=True,
                                spatial_transform=spatial_transform, 
                                seq_len=seq_len, fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=train_batch_size,
                                               shuffle=True, num_workers=2, pin_memory=True)
    if val_data_dir is not None:
        vid_seq_val = MakeDataset_flowsupervision(val_data_dir, train=False,
                                  spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                  seq_len=seq_len, fmt='.png')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=val_batch_size,
                                                 shuffle=False, num_workers=2, pin_memory=True)

    train_params = []
    train_params3 = []
    train_params2 = []
    if stage == 1:
        supervision = False
        model.eval()
        for params in model.parameters():
            params.requires_grad = False
    else:
        model.load_state_dict(torch.load(stage1dict),strict=False)
        model.train()
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)
        model.dinam.train()
        
    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params2 += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params2 += [params]
    for params in model.dinam.parameters():
        params.requires_grad = True
        train_params3 += [params]

    model.train()
    model.cuda()
    loss_sup = nn.MSELoss()
    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.Adam([{"params": train_params, "lr": lr_resnet}, 
                                     {"params": train_params3, "lr": lr_suphead},
                                     {"params": train_params2, "lr": lr1}], 
                                    lr=lr1, weight_decay=4e-5, eps=1e-4)
    
    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(num_epochs):
        epoch_loss = 0
        num_corr_train = 0
        train_samples = 0
        iter_per_epoch = 0
        epoch_loss_ = 0
        model.lstm_cell.train()
        model.classifier.train()
        if stage == 2:
            model.resNet.layer4[0].conv1.train()
            model.resNet.layer4[0].conv2.train()
            model.resNet.layer4[1].conv1.train()
            model.resNet.layer4[1].conv2.train()
            model.resNet.layer4[2].conv1.train()
            model.resNet.layer4[2].conv2.train()
            model.dinam.train()
            model.resNet.fc.train()
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)

        for i, (inputs, targets, m) in enumerate(train_loader):
            train_iter += 1
            iter_per_epoch += 1
            optimizer_fn.zero_grad()
            images = inputs.permute(1, 0, 2, 3, 4).cuda()
            labels = targets.cuda()
            m = m.permute(1, 0, 2, 3, 4).cuda()
            train_samples += inputs.size(0)
            output_label, _, output_super = model(images)
            if supervision:
                loss_ = loss_sup(output_super, m.cuda())
                epoch_loss_ += loss_.data.item()
            loss = loss_fn(output_label, labels)
            epoch_loss += loss.data.item()
            if supervision:
                loss = loss + loss_*alpha
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            num_corr_train += (predicted == targets.cuda()).sum()
            
        optim_scheduler.step()
        avg_loss = epoch_loss/iter_per_epoch
        train_acc = (num_corr_train / float(train_samples)) * 100
        output_super = output_super.cpu()
        m = m.cpu()
        if (epoch % 2 == 0) and supervision:
            d = f"./Results/epoch{epoch}"
            os.makedirs(d)
            for i in range(m.shape[0]):
                save_image(unnormalize(m[i, 0]), d+f'/inp{i}.png')
                save_image(unnormalize(output_super[i, 0].detach()), d+f'/out{i}.png')
        avg_loss_ = epoch_loss_/float(iter_per_epoch)
        print('Train: Epoch = {} | Loss = {} | Accuracy = {} | supervision_loss {}'.format(epoch+1, avg_loss,
                                                                                           train_acc, avg_loss_))
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, train_acc))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', train_acc, epoch+1)

        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.eval()
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                num_corr = 0
                for j, (inputs, targets, _) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    with torch.no_grad():
                        images = inputs.permute(1, 0, 2, 3, 4).cuda()
                        labels = targets.cuda(non_blocking=True)
                        output_label, _, _ = model(images)
                        val_loss = loss_fn(output_label, labels)
                        val_loss_epoch += val_loss.data.item()
                        _, predicted = torch.max(output_label.data, 1)
                        num_corr += (predicted == targets.cuda()).sum()
                val_accuracy = (num_corr / float(val_samples)) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('val: Epoch = {} | Loss = {} | Accuracy = {} '.format(epoch+1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
        else:
            if (epoch+1) % 10 == 0:
                save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()