示例#1
0
def main():
    opt = parse_opts()

    ecd_name, cls_name = opt.model_name.split('-')
    ecd_model = get_encoder_net(ecd_name)
    cls_model = get_end_net(cls_name)

    cfg.encoder_model = ecd_name
    cfg.classification_model = cls_name

    if opt.debug:
        cfg.debug = opt.debug
    else:
        if opt.tensorboard == 'TEST':
            cfg.tensorboard = opt.model_name
        else:
            cfg.tensorboard = opt.tensorboard
            cfg.flag = opt.flag
    model = cls_model(cfg,
                      encoder=CNNencoder(
                          cfg,
                          ecd_model(pretrained=True, path=opt.encoder_model)))
    cfg.video_path = os.path.join(cfg.root_path, cfg.video_path)
    cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path)

    cfg.list_all_member()

    torch.manual_seed(cfg.manual_seed)
    print('##########################################')
    print('####### model 仅支持单GPU')
    print('##########################################')
    model = model.cuda()
    print(model)
    criterion = nn.CrossEntropyLoss()
    if cfg.cuda:
        criterion = criterion.cuda()

    norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print('##########################################')
    print('####### train')
    print('##########################################')
    assert cfg.train_crop in ['random', 'corner', 'center']
    if cfg.train_crop == 'random':
        crop_method = (cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(cfg.scales,
                                           cfg.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(cfg.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(cfg, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.n_threads,
                                               drop_last=False,
                                               pin_memory=True)
    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    print('##########################################')
    print('####### run')
    print('##########################################')
    if cfg.debug:
        logger = None
    else:
        path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag)
        logger = Logger(logdir=path)
        cfg.save_config(path)

    for i in range(cfg.begin_epoch, cfg.n_epochs + 1):
        train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger)
        validation_loss = val_epoch(i, val_loader, model, criterion, cfg,
                                    logger)

        scheduler.step(validation_loss)
示例#2
0
文件: test_rgb.py 项目: i-amgeek/LSTA
def main_run(dataset, root_dir, checkpoint_path, seqLen, testBatchSize, memSize, outPool_size, split):


    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    testBatchSize = testBatchSize
    checkpoint_path = checkpoint_path

    if dataset == 'gtea_61':
        num_classes = 61
    elif dataset == 'gtea_71':
        num_classes = 71
    elif dataset == 'egtea_gaze+':
        num_classes = 106
    else:
        print('Wrong dataset')
        sys.exit()
    dataset_dir = os.path.join(root_dir, dataset)
    print('Preparing dataset...')

    if dataset == 'egtea_gaze+':
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(dataset_dir,
                                                                                                               test_split)
    else:
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(dataset_dir,
                                                                                                  test_split)


    vid_seq_test = makeDataset(testDatasetF, testLabels, testNumFrames,
                               spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                               fmt='.jpg', seqLen=seqLen)

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    print("Dataset shape: ", len(vid_seq_test.__getitem__(0)), vid_seq_test.__getitem__(0)[0].shape , end='\n\n\n')
    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize,
                            shuffle=False, num_workers=0, pin_memory=True)


    model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
    if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(checkpoint_path))
    else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
    last_checkpoint = torch.load(checkpoint_path) #, map_location=torch.device('cpu'))
    model.load_state_dict(last_checkpoint['model_state_dict'])
    model.cuda()
    model.train(False)
    model.eval()

    print('Testing...')
    test_iter = 0
    test_samples = 0
    numCorr = 0
    for j, (inputs, targets) in tqdm(enumerate(test_loader)):
        test_iter += 1
        test_samples += inputs.size(0)
        with torch.no_grad():
            print(inputs.shape, targets.shape)
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            output_label, _ = model(inputVariable)
            del inputVariable
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
    test_accuracy = (numCorr.cpu().item() / test_samples) * 100
    print('Test Accuracy after = {}%'.format(test_accuracy))
示例#3
0
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        print("Getting the training set")
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
示例#4
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [
        Resize(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor()
    ]
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))
    temporal_transform = TemporalCompose(temporal_transform)

    val_data_checkpoint_path = opt.result_path / Path('val_data_' +
                                                      opt.dataset + '.data')
    val_collate_checkpoint_path = opt.result_path / Path('val_coll_' +
                                                         opt.dataset + '.data')
    if os.path.exists(val_data_checkpoint_path) and os.path.exists(
            val_collate_checkpoint_path) and opt.save_load_data_checkpoint:
        with open(val_data_checkpoint_path, 'rb') as filehandle:
            val_data = pickle.load(filehandle)
        with open(val_collate_checkpoint_path, 'rb') as filehandle:
            collate_fn = pickle.load(filehandle)
    else:
        val_data, collate_fn = get_validation_data(
            opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
            opt.file_type, spatial_transform, temporal_transform)
        if opt.save_load_data_checkpoint:
            with open(val_data_checkpoint_path, 'wb') as filehandle:
                pickle.dump(val_data, filehandle)
            with open(val_collate_checkpoint_path, 'wb') as filehandle:
                pickle.dump(collate_fn, filehandle)

    if opt.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_data, shuffle=False)
    else:
        val_sampler = None
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=(opt.batch_size //
                                                         opt.n_val_samples),
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=True,
                                             sampler=val_sampler,
                                             worker_init_fn=worker_init_fn,
                                             collate_fn=collate_fn)

    if opt.is_master_node:
        val_logger = Logger(opt.result_path / 'val.log',
                            ['epoch', 'loss', 'acc'])
    else:
        val_logger = None

    return val_loader, val_logger
示例#5
0
 #         else:
 #             optimizer = optim.Adam(
 #                 parameters,
 #                 lr=opt.learning_rate)
 #     elif opt.optimizer == 'rmsprop':
 #         optimizer = optim.RMSprop(
 #             parameters,
 #             lr=opt.learning_rate)
 #     scheduler = lr_scheduler.ReduceLROnPlateau(
 #         optimizer, 'min', patience=opt.lr_patience)
 if not opt.no_val:
     if opt.dataset in ['gtea', 'kth2']:
         spatial_transform = Compose([
             Scale(opt.sample_size),
             CenterCrop(opt.sample_size),
             ToTensor(opt.norm_value),
             norm_method,
         ])
     else:
         spatial_transform = Compose([
             Scale(opt.sample_size),
             CenterCrop(opt.sample_size),
             RGB2Gray(),
             ToTensor(opt.norm_value),
             norm_method,
         ])
     temporal_transform = LoopPadding(opt.sample_duration)
     target_transform = ClassLabel()
     if opt.compress == 'mask':
         spatio_temporal_transform = Coded(opt.mask_path)
     elif opt.compress == 'avg':
示例#6
0
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir,
             trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', outDir, dataset, 'twoStream')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(trainDatasetDir,spatial_transform=spatial_transform,
                               sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)

    if valDatasetDir is not None:

        vid_seq_val = makeDataset(valDatasetDir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test',
                                   seqLen=seqLen)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valSamples = vid_seq_val.__len__()

    model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize,
                                    num_classes=num_classes)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flowModel.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD([
        {'params': train_params},
        {'params': base_params, 'lr': 1e-4},
    ], lr=lr1, momentum=0.9, weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor)
    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = Variable(inputFlow.cuda())
            inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            output_label = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain,trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))
        if valDatasetDir is not None:
            if (epoch + 1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                numCorr = 0
                for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                    val_iter += 1
                    inputVariableFlow = Variable(inputFlow.cuda())
                    inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda())
                    output_label = model(inputVariableFlow, inputVariableFrame)
                    loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                    val_loss_epoch += loss.item()
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == labelVariable.data).sum()
                val_accuracy = torch.true_divide(numCorr,valSamples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
                print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_twoStream_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
                torch.save(model.state_dict(), save_path_model)
    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
示例#7
0
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=False,
                               numSeg=1,
                               stackSize=stackSize,
                               fmt='.png',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    predicted_labels = []
    true_labels = []
    with torch.no_grad():
        for j, (inputFlow, inputFrame, targets) in enumerate(test_loader):
            inputVariableFrame = Variable(
                inputFrame.permute(1, 0, 2, 3, 4).cuda())
            inputVariableFlow = Variable(inputFlow.cuda())
            output_label = model(inputVariableFlow, inputVariableFrame)
            _, predictedTwoStream = torch.max(output_label.data, 1)
            numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum()
            predicted_labels.append(predictedTwoStream.item())
            true_labels.append(targets.item())

    test_accuracyTwoStream = torch.true_divide(numCorrTwoStream,
                                               test_samples) * 100

    #Debugging output
    '''
    print(f'The class of predicted is {type(predicted_labels)}')
    print(f'The class of the true labels is {type(true_labels)}')

    print(f'Predicted {np.array(predicted_labels).shape}')
    print(f'True {np.array(true_labels).shape}')

    print(predicted_labels)
    print(true_labels)'''
    #End of debugging output

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    print('Accuracy {:.02f}%'.format(test_accuracyTwoStream))

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight')
    plt.show()
                  last_fc=True)

# In[10]:

model.load_state_dict(out_state_dict)

# In[11]:

model.eval()

# In[169]:

spatial_transform = Compose([
    Scale(sample_size),
    CenterCrop(sample_size),
    ToTensor(),
    Normalize(mean, [1, 1, 1])
])
temporal_transform = LoopPadding(sample_duration)

# In[ ]:

test_video = os.path.join('test_videos', args['video'])

# In[170]:

subprocess.call('mkdir tmp', shell=True)
subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(test_video),
                shell=True)

# In[173]:
示例#9
0
def get_ucf_data(opt):

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1, 1, 1])

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()  # VideoID()

    # get training data
    training_data = UCF101(opt.video_path,
                           opt.annotation_path,
                           'training',
                           0,
                           spatial_transform=spatial_transform,
                           temporal_transform=temporal_transform,
                           target_transform=target_transform,
                           sample_duration=16)

    # wrap training data
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=opt.n_threads,
                                               pin_memory=False)  # True

    # get validation data
    val_data = UCF101(opt.video_path,
                      opt.annotation_path,
                      'validation',
                      0,
                      spatial_transform=spatial_transform,
                      temporal_transform=temporal_transform,
                      target_transform=target_transform,
                      sample_duration=16)

    # wrap validation data
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=False)

    target_transform = VideoID()
    # get test data
    test_data = UCF101(opt.video_path,
                       opt.annotation_path,
                       'testing',
                       0,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       target_transform=target_transform,
                       sample_duration=16)

    # wrap test data
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    return train_loader, val_loader, test_loader, test_data
示例#10
0
def train_main_multi_batch(model, input_root_dir, opt):
    ####

    epoch_logger = logging.getLogger('info')
    batch_logger = logging.getLogger('info')

    elogHandler = logging.StreamHandler()
    eformatter = jsonlogger.JsonFormatter()
    elogHandler.setFormatter(eformatter)
    epoch_logger.addHandler(elogHandler)

    blogHandler = logging.StreamHandler()
    bformatter = jsonlogger.JsonFormatter()
    blogHandler.setFormatter(bformatter)
    batch_logger.addHandler(blogHandler)

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)

    # criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    if not opt.no_cuda:
        criterion = criterion.cuda()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    epoch = 1

    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()

    ii = 0

    previous_label = "FAKE"
    pre_previous_label = "FAKE"

    for files_dir in os.listdir(input_root_dir):
        sub_path = os.path.join(input_root_dir, files_dir)
        print("Files dir: " + files_dir)
        print("Sub path:" + sub_path)

        data_file_path = os.path.join(sub_path, 'metadata.json')
        with open(data_file_path, 'r') as data_file:
            labels = json.load(data_file)

        opt.batch_size = 36
        total_batch_size = len(os.listdir(sub_path))
        i = 0
        input_files = os.listdir(sub_path)
        for inp_num in range(1, len(input_files), 2):
            print("Lala: " + str(inp_num))
            # print(input_files)
            input_file1 = input_files[inp_num]
            input_file2 = input_files[inp_num - 1]
            if input_file1.endswith(".mp4") and input_file2.endswith(".mp4"):

                video_path1 = os.path.join(sub_path, input_file1)
                video_path2 = os.path.join(sub_path, input_file2)

                label1 = labels[input_file1]
                label2 = labels[input_file2]

                if label1['label'] != previous_label or label1[
                        'label'] != pre_previous_label:

                    previous_label = label1['label']

                    subprocess.call('mkdir tmp', shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 tmp/image_%05d.jpg'
                        .format(video_path1),
                        shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 -start_number 289 tmp/image_%05d.jpg'
                        .format(video_path2),
                        shell=True)

                    video_dir = '{}tmp/'.format(
                        '/data/codebases/video_classification/')

                    data = Video(video_dir,
                                 spatial_transform=spatial_transform,
                                 temporal_transform=temporal_transform,
                                 sample_duration=opt.sample_duration)

                    data_loader = torch.utils.data.DataLoader(
                        data,
                        batch_size=opt.batch_size,
                        shuffle=False,
                        num_workers=opt.n_threads,
                        pin_memory=True)

                    for k, (inputs, targets) in enumerate(data_loader):
                        data_time.update(time.time() - end_time)

                        print("Label: " + label1['label'] + ", " +
                              label2['label'])

                        # # FOR CROSS ENTROPY LOSS
                        # targets = torch.zeros([18, 1], dtype=torch.long)
                        # for j in range(0,18):
                        #     if(label['label'] == 'FAKE'):
                        #         targets[j][0] = 0
                        #         # targets[j][1] = 1
                        #     else:
                        #         targets[j][0] = 1
                        #         # targets[j][1] = 0

                        # FOR MSE LOSS
                        targets = torch.zeros([opt.batch_size, opt.n_classes],
                                              dtype=torch.float)
                        for j in range(0, int(opt.batch_size / 2)):
                            if (label1['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        for j in range(int(opt.batch_size / 2),
                                       opt.batch_size):
                            if (label2['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        if not opt.no_cuda:
                            targets = targets.cuda(non_blocking=True)
                        inputs = Variable(inputs)
                        targets = Variable(targets)
                        outputs = model(inputs)

                        print(outputs.t())
                        print(targets.t())

                        # FOR CROSS ENTROPY LOSS
                        # loss = criterion(outputs, torch.max(targets, 1)[1])
                        # FOR MSE LOSS
                        loss = criterion(outputs, targets)

                        print(loss)

                        # FOR CROSS ENTROPY LOSS
                        # acc = calculate_accuracy(outputs, targets)
                        # FOR MSE LOSS
                        acc = calculate_accuracy_mse(outputs, targets)

                        print(acc)

                        try:
                            losses.update(loss.data[0], inputs.size(0))
                        except:
                            losses.update(loss.data, inputs.size(0))
                        accuracies.update(acc, inputs.size(0))

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                        batch_time.update(time.time() - end_time)
                        end_time = time.time()

                        batch_logger.log(
                            1, {
                                'epoch': epoch,
                                'batch': i + 1,
                                'iter': (epoch - 1) * opt.batch_size + (i + 1),
                                'loss': losses.val,
                                'acc': accuracies.val,
                                'lr': optimizer.param_groups[0]['lr']
                            })

                        print(
                            'Epoch: [{0}][{1}/{2}]\t'
                            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                            'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                            'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                                epoch,
                                i + 1,
                                opt.batch_size,
                                batch_time=batch_time,
                                data_time=data_time,
                                loss=losses,
                                acc=accuracies))
                        ii += 1
                    subprocess.call('rm -rf tmp', shell=True)
                i += 1

            if ii % 100 == 0:
                save_loc = '/data/codebases/video_classification/model{}.pth'.format(
                    ii)
                torch.save(model.state_dict(), save_loc)
        epoch_logger.log(
            1, {
                'epoch': epoch,
                'loss': losses.avg,
                'acc': accuracies.avg,
                'lr': optimizer.param_groups[0]['lr']
            })
        print('XXX Epoch: [{0}]\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch,
                                                         i + 1,
                                                         opt.batch_size,
                                                         batch_time=batch_time,
                                                         data_time=data_time,
                                                         loss=losses,
                                                         acc=accuracies))
    exit(1)
示例#11
0

if __name__ == '__main__':
    opt = parse_opts()
    opt.mean = get_mean(1)
    opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
    opt.sample_duration = 16
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    print('#####', opt.scales)
    print(opt.mean)
    spatial_transform = Compose([
        MultiScaleCornerCrop(opt.scales, opt.sample_size),
        RandomHorizontalFlip(),
        ToTensor(1),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = TemporalRandomCrop(opt.sample_duration)
    train_data = Video(opt.train_list,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       sample_duration=opt.sample_duration,
                       n_samples_for_each_video=1)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.n_threads,
                                               pin_memory=True)

    val_spatial_transform = Compose([
def main_run(stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen,
             trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor,
             decay_step, memSize, color, rgbm, fcm):
    #dataset = 'gtea61'
    begin_time = datetime.datetime.now()
    num_classes = 61

    if color not in ['HSV_opticalFlow', 'flow_surfaceNormals', 'warpedHSV']:
        print(color, ' is not valid')
        exit(-1)

    model_folder = os.path.join(
        './', out_dir, 'BigConvLSTM', color, str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(train_data_dir,
                                seqLen=seqLen,
                                fmt='.png',
                                users=['S1', 'S3', 'S4'],
                                spatial_transform=spatial_transform,
                                colorization=color)
    #trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,
                                  seqLen=seqLen,
                                  fmt='.png',
                                  users=['S2'],
                                  train=False,
                                  spatial_transform=Compose([
                                      Scale(256),
                                      CenterCrop(224),
                                      ToTensor(), normalize
                                  ]),
                                  colorization=color)
        #valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []
    if stage == 1:
        model = bigConvLSTM(num_classes=num_classes,
                            mem_size=memSize,
                            rgbm=rgbm,
                            fcm=fcm)
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
    else:  # stage == 2
        model = bigConvLSTM(num_classes=num_classes,
                            mem_size=memSize,
                            rgbm=rgbm,
                            fcm=fcm)

        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNetRGB.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetRGB.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetRGB.fc.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetCol.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNetCol.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNetCol.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNetRGB.layer4[0].conv1.train(True)
        model.resNetRGB.layer4[0].conv2.train(True)
        model.resNetRGB.layer4[1].conv1.train(True)
        model.resNetRGB.layer4[1].conv2.train(True)
        model.resNetRGB.layer4[2].conv1.train(True)
        model.resNetRGB.layer4[2].conv2.train(True)
        model.resNetRGB.fc.train(True)

        model.resNetCol.layer4[0].conv1.train(True)
        model.resNetCol.layer4[0].conv2.train(True)
        model.resNetCol.layer4[1].conv1.train(True)
        model.resNetCol.layer4[1].conv2.train(True)
        model.resNetCol.layer4[2].conv1.train(True)
        model.resNetCol.layer4[2].conv2.train(True)
        model.resNetCol.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)
    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    dataload_time = datetime.datetime.now()

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        if stage == 2:
            model.resNetRGB.layer4[0].conv1.train(True)
            model.resNetRGB.layer4[0].conv2.train(True)
            model.resNetRGB.layer4[1].conv1.train(True)
            model.resNetRGB.layer4[1].conv2.train(True)
            model.resNetRGB.layer4[2].conv1.train(True)
            model.resNetRGB.layer4[2].conv2.train(True)
            model.resNetRGB.fc.train(True)

            model.resNetCol.layer4[0].conv1.train(True)
            model.resNetCol.layer4[0].conv2.train(True)
            model.resNetCol.layer4[1].conv1.train(True)
            model.resNetCol.layer4[1].conv2.train(True)
            model.resNetCol.layer4[2].conv1.train(True)
            model.resNetCol.layer4[2].conv2.train(True)
            model.resNetCol.fc.train(True)

        #for i, (inputs, targets) in enumerate(train_loader):
        for inputsRGB, inputsCol, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            inputVariableRGB = Variable(
                inputsRGB.permute(1, 0, 2, 3, 4).to(device))
            inputVariableCol = Variable(
                inputsCol.permute(1, 0, 2, 3, 4).to(device))
            labelVariable = Variable(targets.to(device))
            trainSamples += inputsRGB.size(0)

            output_label, _ = model(inputVariableRGB, inputVariableCol, device)

            loss = loss_fn(output_label, labelVariable)
            loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for inputsRGB, inputsCol, targets in val_loader:
                    val_iter += 1
                    val_samples += inputsRGB.size(0)

                    inputVariableRGB = Variable(
                        inputsRGB.permute(1, 0, 2, 3, 4).to(device))
                    inputVariableCol = Variable(
                        inputsCol.permute(1, 0, 2, 3, 4).to(device))
                    labelVariable = Variable(targets.to(device))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _ = model(inputVariableRGB, inputVariableCol,
                                            device)

                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.to(device)).sum()

            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_' + color +
                                   '_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
                print("saved new best model")

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    end_time = datetime.datetime.now()
    print('total time elapsed: ', end_time - begin_time)
    print('dataload time: ', dataload_time - begin_time)
    print('training time: ', end_time - dataload_time)
    timers = open((model_folder + '/timings.txt'), 'w')
    timers.write(
        f"total time elapsed: {end_time-begin_time} \ndataload time: {dataload_time-begin_time} \ntraining time: {end_time-dataload_time}"
    )
    timers.close()
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen,
             evalInterval, evalMode, numWorkers, outDir, modelUsed, pretrained,
             train_test_split, directory, crossValidation, folds):

    compDataset, classCount, class_names = make_split(directory)

    if crossValidation:
        data, label = compDataset
        kFoldCrossValid(folds, data, label, numEpochs, evalMode, numWorkers,
                        lr, stepSize, decayRate, trainBatchSize, seqLen)

    else:
        (trainDataset,
         trainLabels), (validationDataset,
                        validationLabels), (testDataset,
                                            testLabels) = sampleFromClass(
                                                compDataset, classCount,
                                                train_test_split)
        model, accuracy = modelTrain(modelUsed, pretrained, trainDataset,
                                     trainLabels, validationDataset,
                                     validationLabels, numEpochs, evalInterval,
                                     evalMode, outDir, numWorkers, lr,
                                     stepSize, decayRate, trainBatchSize,
                                     seqLen, True)
        '''for printing confusion matrix'''
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        normalize = Normalize(mean=mean, std=std)
        if evalMode == 'centerCrop':
            test_spatial_transform = Compose(
                [Scale(256),
                 CenterCrop(224),
                 ToTensor(), normalize])
        elif evalMode == 'tenCrops':
            test_spatial_transform = Compose(
                [Scale(256),
                 TenCrops(size=224, mean=mean, std=std)])
        elif evalMode == 'fiveCrops':
            test_spatial_transform = Compose(
                [Scale(256),
                 FiveCrops(size=224, mean=mean, std=std)])
        elif evalMode == 'horFlip':
            test_spatial_transform = Compose([
                Scale(256),
                CenterCrop(224),
                FlippedImagesTest(mean=mean, std=std)
            ])

        vidSeqTest = makeDataset(testDataset,
                                 testLabels,
                                 seqLen=seqLen,
                                 spatial_transform=test_spatial_transform)

        testLoader = torch.utils.data.DataLoader(vidSeqTest,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=int(numWorkers /
                                                                 2),
                                                 pin_memory=True)

        numTestInstances = vidSeqTest.__len__()

        print('Number of test samples = {}'.format(numTestInstances))

        modelFolder = './experiments_' + outDir + '_' + modelUsed + '_' + str(
            pretrained)  # Dir for saving models and log files

        savePathClassifier = (modelFolder + '/bestModel.pth')
        torch.save(model.state_dict(), savePathClassifier)
        '''running test samples and printing confusion matrix'''
        model.train(False)
        print('Testing...')
        LossEpoch = 0
        testIter = 0
        pred = None
        targ = None
        numCorrTest = 0
        for j, (inputs, targets) in enumerate(testLoader):
            testIter += 1
            #if evalMode == 'centerCrop':
            if (torch.cuda.is_available()):
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(),
                                          requires_grad=False)
                labelVariable = Variable(targets.cuda(async=True),
                                         requires_grad=False)
            else:
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4),
                                          requires_grad=False)
                labelVariable = Variable(targets, requires_grad=False)
            # else:
            #     if(torch.cuda.is_available()):
            #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4).cuda(), requires_grad=False)
            #         labelVariable = Variable(targets.cuda(async=True), requires_grad=False)
            #     else:
            #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4), requires_grad=False)
            #         labelVariable = Variable(targets, requires_grad=False)
            outputLabel = model(inputVariable1)
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            if pred is None:
                pred = predicted.cpu().numpy()
                targ = targets[0].cpu().numpy()
            else:
                pred = np.append(pred, predicted.cpu().numpy())
                targ = np.append(targ, targets[0].cpu().numpy())
            # if(torch.cuda.is_available()):
            #     numCorrTest += (predicted == targets[0].cuda()).sum()
            # else:
            #     numCorrTest += (predicted == targets[0]).sum()

        # Compute confusion matrix
        cnf_matrix = confusion_matrix(targ, pred)
        np.set_printoptions(precision=2)
        # Plot non-normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              title='Confusion matrix, without normalization')
        plt.savefig(modelFolder + "/no_norm_confusion_matrix.png")
        # Plot normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              normalize=True,
                              title='Normalized confusion matrix')

        plt.savefig(modelFolder + "/confusion_matrix.png")
        return True
def modelTrain(modelUsed, pretrained, trainDataset, trainLabels,
               validationDataset, validationLabels, numEpochs, evalInterval,
               evalMode, outDir, numWorkers, lr, stepSize, decayRate,
               trainBatchSize, seqLen, plotting):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = makeDataset(trainDataset,
                              trainLabels,
                              spatial_transform=spatial_transform,
                              seqLen=seqLen)
    # torch iterator to give data in batches of specified size
    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              num_workers=numWorkers,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])

    vidSeqValid = makeDataset(validationDataset,
                              validationLabels,
                              seqLen=seqLen,
                              spatial_transform=test_spatial_transform)

    validationLoader = torch.utils.data.DataLoader(vidSeqValid,
                                                   batch_size=1,
                                                   shuffle=False,
                                                   num_workers=int(numWorkers /
                                                                   2),
                                                   pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numValidationInstances = vidSeqValid.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of validation samples = {}'.format(numValidationInstances))

    modelFolder = './experiments_' + outDir + '_' + modelUsed + '_' + str(
        pretrained)  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        pass
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'a')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'a')
    validationLogLoss = open((modelFolder + '/validLogLoss.txt'), 'a')
    validationLogAcc = open((modelFolder + '/validLogAcc.txt'), 'a')

    model = ViolenceModel(modelUsed, pretrained)

    trainParams = []
    for params in model.parameters():
        if params.requires_grad:
            trainParams += [params]
    model.train(True)
    if (torch.cuda.is_available()):
        model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimizerFn.zero_grad()
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50
    train_loss = []
    val_loss = []
    train_acc = []
    val_acc = []
    bestmodel = None

    for epoch in range(numEpochs):
        optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            if (torch.cuda.is_available()):
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
                labelVariable = Variable(targets.cuda())
            else:
                inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4))
                labelVariable = Variable(targets)
            outputLabel = model(inputVariable1)
            loss = lossFn(outputLabel, labelVariable)
            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            if (torch.cuda.is_available()):
                numCorrTrain += (predicted == targets.cuda()).sum()
            else:
                numCorrTrain += (predicted == targets).sum()
            epochLoss += loss.item()
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (float(numCorrTrain) * 100) / float(numTrainInstances)
        train_loss.append(avgLoss)
        train_acc.append(trainAccuracy)
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            validationLossEpoch = 0
            validationIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(validationLoader):
                validationIter += 1
                #if evalMode == 'centerCrop':
                if (torch.cuda.is_available()):
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3,
                                                             4).cuda(),
                                              requires_grad=False)
                    labelVariable = Variable(targets.cuda(async=True),
                                             requires_grad=False)
                else:
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4),
                                              requires_grad=False)
                    labelVariable = Variable(targets, requires_grad=False)
                # else:
                #     if(torch.cuda.is_available()):
                #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4).cuda(), requires_grad=False)
                #         labelVariable = Variable(targets.cuda(async=True), requires_grad=False)
                #     else:
                #         inputVariable1 = Variable(inputs[0].permute(1, 0, 2, 3, 4), requires_grad=False)
                #         labelVariable = Variable(targets, requires_grad=False)
                outputLabel = model(inputVariable1)
                validationLoss = lossFn(outputLabel, labelVariable)
                validationLossEpoch += validationLoss.item()
                outputProb = torch.nn.Softmax(dim=1)(outputLabel)
                _, predicted = torch.max(outputProb.data, 1)
                if (torch.cuda.is_available()):
                    numCorrTest += (predicted == targets[0].cuda()).sum()
                else:
                    numCorrTest += (predicted == targets[0]).sum()
            validationAccuracy = (float(numCorrTest) *
                                  100) / float(numValidationInstances)
            avgValidationLoss = validationLossEpoch / validationIter
            val_loss.append(avgValidationLoss)
            val_acc.append(validationAccuracy)
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgValidationLoss, validationAccuracy))
            writer.add_scalar('test/epochloss', avgValidationLoss, epoch + 1)
            writer.add_scalar('test/accuracy', validationAccuracy, epoch + 1)
            validationLogLoss.write('valid Loss after {} epochs = {}\n'.format(
                epoch + 1, avgValidationLoss))
            validationLogAcc.write(
                'valid Accuracy after {} epochs = {}%\n'.format(
                    epoch + 1, validationAccuracy))
            if validationAccuracy > minAccuracy:
                bestmodel = model
                minAccuracy = validationAccuracy
    '''plotting the accuracy and loss curves'''
    if plotting:
        xc = range(1, numEpochs + 1)
        xv = []
        for i in xc:
            if (i % evalInterval == 0):
                xv.append(i)
        plt.figure(1, figsize=(7, 5))
        plt.plot(xc, train_loss)
        plt.plot(xv, val_loss)
        plt.xlabel('num of Epochs')
        plt.ylabel('loss')
        plt.title('train_loss vs val_loss')
        plt.grid(True)
        plt.legend(['train', 'val'])
        #print plt.style.available # use bmh, classic,ggplot for big pictures
        plt.style.use(['classic'])
        plt.savefig(modelFolder + "/lossCurve.png")

        plt.figure(2, figsize=(7, 5))
        plt.plot(xc, train_acc)
        plt.plot(xv, val_acc)
        plt.xlabel('num of Epochs')
        plt.ylabel('accuracy')
        plt.title('train_acc vs val_acc')
        plt.grid(True)
        plt.legend(['train', 'val'], loc=4)
        #print plt.style.available # use bmh, classic,ggplot for big pictures
        plt.style.use(['classic'])
        plt.savefig(modelFolder + "/accuracyCurve.png")
        #plt.show()
    trainLogAcc.close()
    validationLogAcc.close()
    trainLogLoss.close()
    validationLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return bestmodel, validationAccuracy
示例#15
0
                        type=int,
                        default=250,
                        help="何epochごとに学習率を減らすか")
    parser.add_argument('--manual_seed',
                        default=1,
                        type=int,
                        help='Manually set random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = opt()  #argsの読み出し
    args.arch = "ResNet-{}".format(args.model_depth)  #実行するアーキテクチャを書き込む
    spatial_transform = Compose([
        ToTensor(),  #1iterごとに読み込まれる各フレーム(PIL Image)をTensorへ変換する
    ])
    temporal_transform = TemporalRandomCrop4flow()  #時間方向の前処理,今回はなし
    target_transform = ClassLabel()  #学習する正解データ,2クラス分類なのでラベル
    #accuracies=AverageMeter()#各回におけるaccとその平均

    model = test_generate_model(args)  #モデルの読み込み(pretrainがあれば重みも読み込んでおく)

    test_data = get_training_set(args, spatial_transform, temporal_transform,
                                 target_transform)  #データローダに入力するデータセットの作成
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=20)

    pred = []
    Y = []
    for i, (x, y) in enumerate(test_loader):
        x = torch.tensor(x).cuda()
示例#16
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, attention):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', out_dir, dataset, 'rgb', 'stage'+str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   seqLen=seqLen, fmt='.png',phase='test')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()


    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:

        model = attentionModel(num_classes=num_classes, mem_size=memSize, attention=attention)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        model = attentionModel(num_classes=num_classes, mem_size=memSize, attention=attention)
        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]


    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
            epoch_loss += loss.item()
        avg_loss = epoch_loss/iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, _ = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labelVariable.data).data.item()
                val_accuracy = (numCorr / val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
示例#17
0
def main_run(dataset, stage, root_dir, out_dir, seqLen, trainBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize, outPool_size,
             split, evalInterval):

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    stage = stage
    test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    best_acc = 0

    if stage == 1:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs
    elif stage == 2:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs

    if dataset == 'gtea_61':
        num_classes = 61
    elif dataset == 'gtea_71':
        num_classes = 71
    elif dataset == 'egtea_gaze+':
        num_classes = 106
    else:
        print('Wrong dataset')
        sys.exit()
    dataset_dir = os.path.join(root_dir, dataset)

    model_folder = os.path.join('.', out_dir, dataset, str(test_split))

    if not os.path.exists(model_folder):
        os.makedirs(model_folder)

    note_fl = open(model_folder + '/note.txt', 'w')
    note_fl.write('Number of Epochs = {}\n'
                  'lr = {}\n'
                  'Train Batch Size = {}\n'
                  'Sequence Length = {}\n'
                  'Decay steps = {}\n'
                  'Decay factor = {}\n'
                  'Memory size = {}\n'
                  'Memory cam classes = {}\n'.format(numEpochs, lr1,
                                                     trainBatchSize, seqLen,
                                                     decay_step, decay_factor,
                                                     memSize, c_cam_classes))

    note_fl.close()

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'),
                                'w')
    test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w')
    test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w')

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    print('Preparing dataset...')

    if dataset == 'egtea_gaze+':
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(
            dataset_dir, test_split)
    else:
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(
            dataset_dir, test_split)

    vid_seq_train = makeDataset(trainDatasetF,
                                trainLabels,
                                trainNumFrames,
                                spatial_transform=spatial_transform,
                                fmt='.jpg',
                                seqLen=seqLen)

    print('Number of train samples = {}'.format(vid_seq_train.__len__()))

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               num_workers=4,
                                               pin_memory=True)

    vid_seq_test = makeDataset(testDatasetF,
                               testLabels,
                               testNumFrames,
                               spatial_transform=Compose([
                                   Scale(256),
                                   CenterCrop(224),
                                   ToTensor(), normalize
                               ]),
                               fmt='.jpg',
                               seqLen=seqLen)

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    train_params = []
    if stage == 1:
        model = attentionModel(num_classes=num_classes,
                               mem_size=memSize,
                               c_cam_classes=c_cam_classes)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    elif stage == 2:
        model = attentionModel(num_classes=num_classes,
                               mem_size=memSize,
                               c_cam_classes=c_cam_classes)
        checkpoint_path = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(1) + '.pth.tar')
        if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(
                checkpoint_path))
        else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
        last_checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(last_checkpoint['model_state_dict'])
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lsta_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=5e-4,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            if train_iter % 10 == 0:
                print('Training loss after {} iterations = {} '.format(
                    train_iter, loss.data[0]))
                train_log_loss_batch.write(
                    'Training loss after {} iterations = {}\n'.format(
                        train_iter, loss.data[0]))
                writer.add_scalar('train/iter_loss', loss.data[0], train_iter)
            epoch_loss += loss.data[0]
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        save_path_model = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar')
        save_file = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer_fn.state_dict(),
            'best_acc': best_acc,
        }
        torch.save(save_file, save_path_model)

        if (epoch + 1) % evalInterval == 0:
            print('Testing...')
            model.train(False)
            test_loss_epoch = 0
            test_iter = 0
            test_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(test_loader):
                print('testing inst = {}'.format(j))
                test_iter += 1
                test_samples += inputs.size(0)
                inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(),
                                         volatile=True)
                labelVariable = Variable(targets.cuda(async=True),
                                         volatile=True)
                output_label, _ = model(inputVariable)
                test_loss = loss_fn(output_label, labelVariable)
                test_loss_epoch += test_loss.data[0]
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.cuda()).sum()
            test_accuracy = (numCorr / test_samples) * 100
            avg_test_loss = test_loss_epoch / test_iter
            print('Test Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_test_loss))
            print('Test Accuracy after {} epochs = {}%'.format(
                epoch + 1, test_accuracy))
            writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1)
            writer.add_scalar('test/accuracy', test_accuracy, epoch + 1)
            test_log_loss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_test_loss))
            test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, test_accuracy))

            if test_accuracy > best_acc:
                best_acc = test_accuracy
                save_path_model = os.path.join(
                    model_folder,
                    'best_checkpoint_stage' + str(stage) + '.pth.tar')
                save_file = {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer_fn.state_dict(),
                    'best_acc': best_acc,
                }
                torch.save(save_file, save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    test_log_acc.close()
    train_log_loss_batch.close()
    test_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    def score(self):

        normalize = get_normalize_method(self.opt.mean, self.opt.std, self.opt.no_mean_norm,
                                         self.opt.no_std_norm)
        spatial_transform = [
            Resize(self.opt.sample_size),
            CenterCrop(self.opt.sample_size),
            ToTensor()
        ]

        spatial_transform.extend([ScaleValue(self.opt.value_scale), normalize])
        spatial_transform = Compose(spatial_transform)

        temporal_transform = []
        if self.opt.sample_t_stride > 1:
            temporal_transform.append(TemporalSubsampling(self.opt.sample_t_stride))
        temporal_transform.append(
            TemporalEvenCrop(self.opt.sample_duration, self.opt.n_val_samples))
        temporal_transform = TemporalCompose(temporal_transform)


        frame_count = get_n_frames(self.opt.video_jpgs_dir_path)

        frame_indices = list(range(0, frame_count))

        frame_indices = temporal_transform(frame_indices)

        spatial_transform.randomize_parameters()

        image_name_formatter = lambda x: f'image_{x:05d}.jpg'

        loader = VideoLoader(image_name_formatter)

        print('frame_indices', frame_indices)

        #clips = []
        video_outputs = []
        model = generate_model(self.opt)


        model = load_pretrained_model(model, self.opt.pretrain_path, self.opt.model,
                                      self.opt.n_finetune_classes)

        i =0
        for frame_indice in frame_indices:
            print("%d indice: %s" % (i, str(frame_indice)))
            i+=1

            clip = loader(self.opt.video_jpgs_dir_path, frame_indice)



            clip = [spatial_transform(img) for img in clip]
            clip = torch.stack(clip, 0).permute(1, 0, 2, 3)





            #parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)


            #print('clips:', clips)


            #for clip in clips:
            with torch.no_grad():

                print(clip.shape)
                output = model(torch.unsqueeze(clip, 0))
                output = F.softmax(output, dim=1).cpu()

                #print(output)
                video_outputs.append(output[0])

            del clip

        video_outputs = torch.stack(video_outputs)
        average_scores = torch.mean(video_outputs, dim=0)

        #inference_loader, inference_class_names = main.get_inference_utils(self.opt)
        with self.opt.annotation_path.open('r') as f:
            data = json.load(f)

        class_to_idx = get_class_labels(data)
        idx_to_class = {}
        for name, label in class_to_idx.items():
            idx_to_class[label] = name
        print(idx_to_class)

        inference_result = inference.get_video_results(
            average_scores, idx_to_class, self.opt.output_topk)

        print(inference_result)
示例#19
0
def main(args):

    import os
    import numpy as np
    import sys
    import json
    import torch
    from torch import nn
    from torch import optim
    from torch.optim import lr_scheduler

    from opts import parse_opts
    from mean import get_mean, get_std
    from spatial_transforms import (
        Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop,
        MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor)
    from temporal_transforms import LoopPadding, TemporalRandomCrop
    from target_transforms import ClassLabel, VideoID
    from target_transforms import Compose as TargetCompose
    from dataset import get_training_set, get_validation_set, get_test_set
    from utils import Logger
    from train import train_epoch
    from validation import val_epoch
    import test
    import collections
    from sklearn.svm import LinearSVC
    from sklearn.svm import SVC
    from joblib import dump, load
    from sklearn import preprocessing
    from scipy import stats
    from sklearn.metrics import accuracy_score


    local_path = os.getcwd()

    if args.video_directory_path in ["", " ", '', './video', './video/']:
        video_path = local_path + '/video/'
    else:
        video_path = args.video_directory_path
        
    video_path_jpg = local_path + '/video_jpg/'


    if not os.path.exists(video_path_jpg):
        os.makedirs(video_path_jpg)

    extracted_feature_path = local_path + '/extracted_features'
    if not os.path.exists(extracted_feature_path):
        os.makedirs(extracted_feature_path)

    final_results_path = local_path + '/final_test_results'
    if not os.path.exists(final_results_path):
        os.makedirs(final_results_path)

    os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg)
    os.system('python utils/n_frames.py' + ' ' + video_path_jpg)


    if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']:
        pretrain_directory_path = local_path + '/pretrain'
    else:
        pretrain_directory_path = args.pretrain_directory_path


    import easydict
    opt = easydict.EasyDict({
        "n_classes": 2, 
        "sample_size": 112,
        "sample_duration": 16,
        "batch_size": 16,
        "n_threads": 4,
        "norm_value": 1,
        "resnet_shortcut": 'B',
        "resnext_cardinality": 32,
    })
    opt.root_path =  local_path
    opt.video_path = video_path_jpg




    # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have
    os.environ['CUDA_VISIBLE_DEVICES']='0'



    from datasets.no_label_binary import NoLabelBinary

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1,1,1])


    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = VideoID() # ClassLabel()



    # get test data
    test_data = NoLabelBinary(
        opt.video_path,
        None,
        'testing',
        0,
        spatial_transform=spatial_transform,
        temporal_transform=temporal_transform,
        target_transform=target_transform,
        sample_duration=opt.sample_duration)


    # wrap test data
    test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=False)


    # ### Extract Features

    # ##### 3D ResNeXt-101


    from models import resnext

    # construct model architecture
    model_rxt101 = resnext.resnet101(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    cardinality=opt.resnext_cardinality,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rxt101 = model_rxt101.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rxt101.state_dict()
    model_dict.update(pretrain_dict) 
    model_rxt101.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rxt101.eval()


    # forward all the videos to extract features
    avgpool_test = []
    targets_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rxt101(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            targets_test.append(target)



    avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
    np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np)

    targets_test_np = np.concatenate(np.array(targets_test), axis=0)
    np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np)


    # ##### 3D ResNet-50


    from models import resnet

    # construct model architecture
    model_rt50 = resnet.resnet50(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rt50 = model_rt50.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rt50 = nn.DataParallel(model_rt50, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rt50.state_dict()
    model_dict.update(pretrain_dict) 
    model_rt50.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rt50.eval()


    # forward all the videos to extract features
    avgpool_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rt50(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            
        # save the features
        avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
        np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np)    


    # ### Load & fuse the features


    x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy')
    x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy')
    x_test = np.concatenate([x_test_1, x_test_2], axis=1)

    y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy')


    # ### Load Classification head and predict

    if args.model == 'hw4':
        # hw4 best model
        clf = load('./hw6_results/logistic2_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_2)
        y_pred_test_prob_raw = clf.predict_proba(x_test_2)

    elif args.model == 'hw5':
        # hw5 best model
        clf = load('./hw6_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'hw6':
        # hw6 best model
        clf = load('./hw6_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)

    elif args.model == 'hw8':
        # hw8 best model
        clf = load('./hw8_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'final':
        # Final best model
        clf = load('./hw8_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)



    split_idx = []
    for idx, y_name in enumerate(y_test):
        if idx == 0 or y_name != y_test[idx-1]:
            split_idx.append(idx)
    split_idx.append(len(y_test))
            
    y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {}
    for i, split in enumerate(split_idx):
        if i < len(split_idx) - 1:
            y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]]
            y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]]
            y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0))   


    # ### Get the length (in seconds) of each video clip


    tvns = list(y_pred_test_final.keys())
    mp4_path = video_path
    clip_duration_dict = {}

    from moviepy.editor import VideoFileClip
    i = 0
    for tvn in tvns:
        i += 1
        if i % 100 == 0:
            print(i)
        clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4"))
        clip_duration_dict[tvn] = [clip.duration]


    # ### Generate Figures
    import matplotlib.pyplot as plt
    for tvn in clip_duration_dict:
        interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
        x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
        y_idx = np.argmax(y_pred_test_prob[tvn], 1)
        y = y_pred_test_prob[tvn][:, 1]
        x = x[:len(y)]
        plt.plot(x, y)
        plt.ylim([-0.1, 1.1])
        plt.xlabel ('time/sec')
        plt.ylabel ('pred score for ground truth label')
        plt.title("Ground Truth Label:  " + tvn  + "\n Model Avg. Predict Score:  " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score'])
        plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight')
        plt.close()


    # ### Generate Json
    timeTrueLabel = {}
    for tvn in clip_duration_dict:
        if tvn in y_pred_test_prob:
            interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
            x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
            y_idx = np.argmax(y_pred_test_prob[tvn], 1)
            y = y_pred_test_prob[tvn][:, 1]
            x = x[:len(y)]  
            timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)]



    with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp:
        json.dump(timeTrueLabel, fp)
示例#20
0
import torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import random
import glob
import sys

from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale,
                                Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize = Normalize(mean=mean, std=std)
spatial_transform2 = Compose([Scale((7, 7)), ToTensor()])


def listDirectory(path):
    if os.path.isdir(path):
        return os.listdir(path)

    return []


def gen_split(root_dir, stackSize):
    DatasetF = []
    Labels = []
    NumFrames = []
    #The root directory should be processed frames/train or test
    for dir_user in sorted(os.listdir(root_dir)):
示例#21
0
def main_run(stage, train_data_dir, val_data_dir, stage1Dict, stage1Dict_rgb,
             stage1Dict_fc, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize):
    #dataset = 'gtea61'
    num_classes = 61

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTMDoubleResnet', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(train_data_dir,
                                seqLen=seqLen,
                                fmt='.png',
                                users=['S1', 'S3', 'S4'],
                                spatial_transform=spatial_transform)
    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,
                                  seqLen=seqLen,
                                  fmt='.png',
                                  users=['S2'],
                                  train=False,
                                  spatial_transform=Compose([
                                      Scale(256),
                                      CenterCrop(224),
                                      ToTensor(), normalize
                                  ]))
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []

    model = twoStreamFlowCol(num_classes=num_classes,
                             memSize=memSize,
                             frameModel=stage1Dict_rgb,
                             flowModel=stage1Dict_fc)
    model.train(False)
    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.flowModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        #for i, (inputs, targets) in enumerate(train_loader):
        for inputs, inputsSN, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _ = model(inputVariable, inputSNVariable)

            loss = loss_fn(output_label, labelVariable)
            loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for inputs, inputsSN, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    inputSNVariable = Variable(
                        inputsSN.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _ = model(inputVariable, inputSNVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
def main_run(stage, model, supervision, train_data_dir, val_data_dir,
             stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, lr_suphead, lr_resnet, alpha, decay_factor,
             decay_step, lossSupervision, memSize):

    num_classes = 61

    if model == 'ConvLSTMAttention':
        model = ConvLSTMAttention(num_classes=num_classes,
                                  mem_size=memSize,
                                  supervision=supervision,
                                  loss_supervision=lossSupervision)
    elif model == 'ConvLSTM':
        model = ConvLSTM(num_classes=num_classes,
                         mem_size=memSize,
                         supervision=supervision,
                         loss_supervision=lossSupervision)
    elif model == 'SupervisedLSTMMod':
        model = SupervisedLSTMMod(num_classes=num_classes,
                                  mem_size=memSize,
                                  supervision=supervision,
                                  loss_supervision=lossSupervision)
    elif model == 'MyNetIDT':
        model = MyNetIDT(num_classes=num_classes,
                         mem_size=memSize,
                         supervision=supervision,
                         loss_supervision=lossSupervision)
    else:
        print('Model not found')
        sys.exit()

    model_folder = os.path.join(
        './', out_dir, 'rgb',
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])
    spatial_transform_map = Cp([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        Resize((7, 7)),
        TT()
    ])
    spatial_transform_map_2 = Cp([Resize((7, 7)), TT()])
    vid_seq_train = makeDataset_supervision(
        train_data_dir,
        train=True,
        spatial_transform=spatial_transform,
        spatial_transform_map=spatial_transform_map,
        seqLen=seqLen,
        fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset_supervision(
            val_data_dir,
            train=False,
            spatial_transform_map=spatial_transform_map_2,
            spatial_transform=Compose(
                [Scale(256),
                 CenterCrop(224),
                 ToTensor(), normalize]),
            seqLen=seqLen,
            fmt='.png')

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=8,
                                                 pin_memory=True)
        valInstances = vid_seq_val.__len__()
    trainInstances = vid_seq_train.__len__()
    train_params = []
    train_params3 = []
    train_params2 = []
    if stage == 0:
        for params in model.resNet.parameters():
            params.requires_grad = True
            train_params += [params]
        if stage1_dict is not None:
            model.load_state_dict(torch.load(stage1_dict))
    elif stage == 1:
        supervision = False
        model.eval()
        for params in model.parameters():
            params.requires_grad = False
    else:
        model.load_state_dict(torch.load(stage1_dict))
        model.train()
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)
        model.sup_head.train()

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params2 += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params2 += [params]
    for params in model.sup_head.parameters():
        params.requires_grad = True
        train_params3 += [params]

    model.lstm_cell.train()
    model.classifier.train()
    model.cuda()
    if lossSupervision == "classification":
        loss_sup = nn.CrossEntropyLoss()
    elif lossSupervision == "regression":
        loss_sup = nn.L1Loss()
    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.Adam([{
        "params": train_params,
        "lr": lr_resnet
    }, {
        "params": train_params3,
        "lr": lr_suphead
    }, {
        "params": train_params2,
        "lr": lr1
    }],
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)
    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        epoch_loss_ = 0
        loss_ = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        if stage == 0:
            model.train()
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.sup_head.train()
            model.resNet.fc.train(True)
        for i, (inputs, targets, maps) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            if lossSupervision == "classification":
                maps = torch.ceil(maps)
                maps = maps.type(torch.LongTensor)
                maps = maps.permute(1, 0, 2, 3, 4).squeeze(2).cuda()
                maps = maps.reshape(maps.shape[0] * maps.shape[1],
                                    maps.shape[2], maps.shape[3])
            else:
                maps = maps.permute(1, 0, 2, 3, 4).cuda()
                maps = maps.reshape(maps.shape[0] * maps.shape[1],
                                    maps.shape[2], maps.shape[3],
                                    maps.shape[4])
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _, output_super = model(inputVariable)
            if supervision == True:
                loss_ = alpha * loss_sup(output_super, maps)
                loss_.backward(retain_graph=True)
                epoch_loss_ += loss_.data.item()
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data.item()
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / float(trainSamples)) * 100

        avg_loss_ = epoch_loss_ / float(iterPerEpoch)
        print(
            'Train: Epoch = {} | Loss = {} | Accuracy = {} | supervision_loss {}'
            .format(epoch + 1, avg_loss, trainAccuracy, avg_loss_))
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        if val_data_dir is not None:
            if (epoch + 1) % 1 == 0:
                model.eval()
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets, _) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    with torch.no_grad():
                        inputVariable = Variable(
                            inputs.permute(1, 0, 2, 3, 4).cuda())
                        labelVariable = Variable(
                            targets.cuda(non_blocking=True))
                        output_label, _, _ = model(inputVariable)
                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.data.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = (numCorr / float(val_samples)) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('val: Epoch = {} | Loss = {} | Accuracy = {} '.format(
                    epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                    epoch + 1, avg_val_loss))
                val_log_acc.write(
                    'Val Accuracy after {} epochs = {}%\n'.format(
                        epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder +
                                       '/model_rgb_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    save_path_model = (model_folder +
                                       '/model_rgb_state_dict_epoch' +
                                       str(epoch + 1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
示例#23
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
示例#24
0
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            spatial_crop_method = MultiScaleRandomCrop(opt.scales,
                                                       opt.frame_size)
        elif opt.train_crop == 'corner':
            spatial_crop_method = MultiScaleCornerCrop(opt.scales,
                                                       opt.frame_size)
        elif opt.train_crop == 'center':
            spatial_crop_method = MultiScaleCornerCrop(opt.scales,
                                                       opt.frame_size,
                                                       crop_positions=['c'])
        spatial_transform = Compose([
            spatial_crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), normalize
        ])
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform)

        train_loader = DataLoaderX(training_data,
                                   batch_size=opt.batch_size,
                                   shuffle=True,
                                   num_workers=opt.n_threads,
                                   pin_memory=True,
                                   drop_last=True)
        train_logger = Logger(os.path.join(opt.save_path, 'train.log'),
                              ['epoch', 'loss', 'final_mAP', 'lr'])

        temporal_transform = TemporalSegmentCenterCrop(opt.segment_number,
                                                       opt.sample_duration)
示例#25
0
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir,
             stackSize, seqLen, memSize, numSeg):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    flow_wt = 0.5
    testBatchSize = 1
    sequence = True
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=sequence,
                               numSeg=numSeg,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    modelFlow = flow_resnet34(False,
                              channels=2 * stackSize,
                              num_classes=num_classes)
    modelFlow.load_state_dict(torch.load(flowModel_state_dict))
    modelRGB = attentionModel(num_classes=num_classes, mem_size=memSize)
    modelRGB.load_state_dict(torch.load(RGBModel_state_dict))

    for params in modelFlow.parameters():
        params.requires_grad = False

    for params in modelRGB.parameters():
        params.requires_grad = False

    modelFlow.train(False)
    modelRGB.train(False)
    modelFlow.cuda()
    modelRGB.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    true_labels = []
    predicted_labels = []

    for j, (inputFlow, inputFrame, targets) in enumerate(test_loader):
        inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True)
        inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(),
                                      volatile=True)
        output_labelFlow, _ = modelFlow(inputVariableFlow)
        output_labelFrame, _ = modelRGB(inputVariableFrame)
        output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True)
        output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + (
            (1 - flow_wt) * output_labelFrame.data)
        _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1)
        numCorrTwoStream += (predictedTwoStream == targets[0]).sum()
        true_labels.append(targets)
        predicted_labels.append(predictedTwoStream)
    test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100
    print('Test Accuracy = {}'.format(test_accuracyTwoStream))

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStream.jpg', bbox_inches='tight')
    plt.show()
示例#26
0
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir):
    model_folder = os.path.join('./', out_dir, 'attConvLSTM', str(seqLen))
    #dataset = 'gtea61'
    num_classes = 61

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
    
    vid_seq_test = makeDataset(dataset_dir, seqLen=seqLen, fmt='.png', train=False,
                               spatial_transform=spatial_transform, users=['S2'])
    
    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
                            shuffle=False, num_workers=2, pin_memory=True)
    
    model = attentionModel(num_classes=num_classes, mem_size=memSize)
    model.load_state_dict(torch.load(model_state_dict))
    
    for params in model.parameters():
        params.requires_grad = False
    
    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []
    
    with torch.no_grad():
        #for j, (inputs, targets) in enumerate(test_loader):
        for inputs, targets in test_loader:
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            output_label, _ = model(inputVariable)
            
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
            true_labels.append(targets)
            predicted_labels.append(predicted.cpu())
    
    test_accuracy = torch.true_divide(numCorr, test_samples) * 100
    test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy)
    print(test_accuracy)
    fil = open(model_folder + "/test_log_acc.txt", "w")
    fil.write(test_accuracy)
    fil.close()
    
    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]
    
    ticks = np.linspace(0, 60, num=61)
    plt.figure(1, figsize=(12, 12), dpi=100.0)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    xy = np.arange(start=0, stop=61)
    plt.plot(xy,xy)
    plt.savefig(model_folder + '/cnf_matrix_normalized.png', bbox_inches='tight')
    plt.show()
示例#27
0
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step,
             uniform_sampling, debug):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset,
                                'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    #num_workers = 4
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(trainDir,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                stackSize=stackSize,
                                fmt='.png',
                                uniform_sampling=uniform_sampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               sampler=None,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(trainDir,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              uniform_sampling=uniform_sampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.
          format(trainInstances, valInstances))

    model = flow_resnet34(True,
                          channels=2 * stackSize,
                          num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.to(device)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.to(device)
            labelVariable = targets.to(device)
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()

        optim_scheduler.step()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = inputs.to(device)
                labelVariable = targets.to(device)
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_flow_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (model_folder +
                                   '/model_flow_state_dict_epoch' +
                                   str(epoch + 1) + '.pth')
                torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
示例#28
0
def main_run(version, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, outDir,
             trainBatchSize, valBatchSize, lr1, numEpochs, decay_step, decay_factor):
    
    num_classes = 61     # gtea61 dataset
    model_folder = os.path.join("./", outDir, version)

    # Create the dir
    print(f"Checking directory {model_folder}")
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    print(f"Creating directory{model_folder}")
    os.makedirs(model_folder)

    # Log files
    print(f"Creating log files")
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # ImageNet mean and std
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    # Train val partitioning
    train_usr = ["S1", "S3", "S4"]
    val_usr = ["S2"]


    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])
    # train dataset
    print(f"Defining train dataset")
    vid_seq_train = makeDataset(trainDatasetDir, train_usr, spatial_transform,
                               stackSize=stackSize, seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    

    # val dataset
    print(f"Defining validation dataset")
    vid_seq_val = makeDataset(trainDatasetDir, val_usr,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   stackSize=stackSize, phase="val", seqLen=seqLen)
    
    val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
    
    valSamples = vid_seq_val.__len__()


    # model
    print("Building model")
    model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize,         # see twoStreamModel.py
                                    num_classes=num_classes)
    
    print("Setting trainable parameters")
    for params in model.parameters():           # initially freeze all layers
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():    # unfreeze classifier layer (the layer that joins the two models outputs)
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():  # unfreeze lstm layer of the frame model
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():     #unfreeze layer 4
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():              # unfreeze last fully connected layer of frame model 
        params.requires_grad = True                                     # (I still don't know why, because in the joining of the two models, this layer is skipped)
        train_params += [params]                                        

    base_params = []
    for params in model.flowModel.layer4.parameters():              # unfreeze layer 4 of flow model
        base_params += [params]
        params.requires_grad = True

    print("Moving model to GPU")
    model.to(DEVICE)

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    print("Defining loss function, optimizer and scheduler")
    loss_fn = nn.CrossEntropyLoss()     # loss function
    optimizer_fn = torch.optim.SGD([    # optimizer
        {'params': train_params},
        {'params': base_params, 'lr': 1e-4},  # 1e-4
    ], lr=lr1, momentum=0.9, weight_decay=5e-4)

    #scheduler
    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor)
    train_iter = 0

    print("Training begun")
    # TRAIN PROCEDURE
    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)


        start = time.time()
        for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(train_loader):
            
            print(f"step {j} / {int(np.floor(trainSamples/trainBatchSize))}")
            
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()                                                # put gradients to zero
            inputVariableFlow = Variable(inputFlow.to(DEVICE))
            inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            #print("predict")
            output_label = model(inputVariableFlow, inputVariableFrame)         # predict
            loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)   # compute loss
            #print("backprop")
            loss.backward()                                                     
            optimizer_fn.step()
            #print("accuracy")
            _, predicted = torch.max(output_label.data, 1)                  
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()             # counting number of correct predictions
            epoch_loss += loss.data.item()  

        
        avg_loss = epoch_loss / iterPerEpoch                                    # computing average per epoch loss
        trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))             # log file
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))     # log file
        print(f"Elapsed : {time.time()-start}")

        # VALIDATION
        if (epoch + 1) % 5 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFrame, inputMMaps, inputFlow, targets) in enumerate(val_loader):
                if j % 1 == 0:
                    print(f"step {j} / {int(np.floor(vid_seq_val.__len__()/valBatchSize))}")

                val_iter += 1
                inputVariableFlow = Variable(inputFlow.to(DEVICE))
                inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE))
                labelVariable = Variable(targets.to(DEVICE))
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                val_loss_epoch += loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == labelVariable.data).sum()
            val_accuracy = (numCorr.item() / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))       # log file
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))   # log file
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_twoStream_state_dict.pth')                    # every epoch, check if the val accuracy is improved, if so, save that model
                torch.save(model.state_dict(), save_path_model)                                         # in that way, even if the model overfit, you will get always the best model
                min_accuracy = val_accuracy                                                             # in this way you don't have to care too much about the number of epochs

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
示例#29
0
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale,
                                Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
import matplotlib.pyplot as plt

import importlib
importlib.reload(grad_cam)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

normalize = Normalize(mean=mean, std=std)
spatial_transform = Compose(
    [Scale(256),
     MultiScaleCornerCrop([1], 224),
     ToTensor(), normalize])


def frame_example(image):
    flowModel = "../experiments/gtea61/flow/model_flow_state_dict.pth"
    rgbModel = "modelsFolder/experiments/gtea61/rgb/stage2/model_rgb_state_dict.pth"
    stackSize = 5
    memSize = 512
    num_classes = 61
    seqLen = 7
    model_state_dict = "modelsFolder/selfSupervisedExperiments/gtea61/twoStream/model_twoStream_state_dict.pth"
    trainDatasetDir = "../GTEA61/flow_x_processed/train"
    model = twoStreamAttentionModel(flowModel=flowModel,
                                    frameModel=rgbModel,
                                    stackSize=stackSize,
                                    memSize=memSize,
示例#30
0
    def __init__(self):

        self.model_methods = [['resnext', 'gradcam', 'camshow']]

        self.classes = [
            "brush_hair", "cartwheel", "catch", "chew", "clap", "climb",
            "climb_stairs", "dive", "draw_sword", "dribble", "drink", "eat",
            "fall_floor", "fencing", "flic_flac", "golf", "handstand", "hit",
            "hug", "jump", "kick", "kick_ball", "kiss", "laugh", "pick",
            "pour", "pullup", "punch", "push", "pushup", "ride_bike",
            "ride_horse", "run", "shake_hands", "shoot_ball", "shoot_bow",
            "shoot_gun", "sit", "situp", "smile", "smoke", "somersault",
            "stand", "swing_baseball", "sword", "sword_exercise", "talk",
            "throw", "turn", "walk", "wave"
        ]

        scales = [1.0]

        self.spatial_transform = Compose([
            MultiScaleCornerCrop(scales, 112),
            ToTensor(1.0),
            Normalize(get_mean(1.0, dataset='activitynet'), get_std(1.0))
        ])

        self.spatial_transform2 = Compose([MultiScaleCornerCrop(scales, 112)])

        self.spatial_transform3 = Compose([
            MultiScaleCornerCrop(scales, 112),
            ToTensor(1),
            Normalize([0, 0, 0], [1, 1, 1])
        ])

        self.model = utils.load_model(self.model_methods[0][0])
        self.model.cuda()
        #self.video=[]
        #self.flows=[]
        self.bb_frames = []
        #self.explainer= get_explainer
        method_name = 'gradcam'
        self.explainer = get_explainer(self.model, method_name, "conv1")
        self.explainer2 = get_explainer(self.model, method_name, "layer1")
        self.explainer3 = get_explainer(self.model, method_name, "layer2")
        self.explainer4 = get_explainer(self.model, method_name, "layer3")
        self.explainer5 = get_explainer(self.model, method_name, "layer4")
        self.explainer6 = get_explainer(self.model, method_name, "avgpool")
        path = "images/frames4"
        #print path
        self.path = path + "/"
        #dirc = os.listdir(path)
        #self.files = [ fname for fname in dirc if fname.startswith('img')]
        #self.files2 = [ fname for fname in dirc if fname.startswith('flow_x')]
        self.seq = []
        self.kls = []
        self.scr = []
        self.totalhit = 0
        self.totalhit2 = 0
        self.totalhit3 = 0
        self.totalhit4 = 0
        self.totalhit5 = 0
        self.totalhit6 = 0
        self.totalhit7 = 0
        self.totalframes = 0