예제 #1
0
    def __init__(self, opt, train=True):
        super(VideoDataset, self).__init__(opt, train=train)

        # Dataroot & Transform
        if opt.dataset == 'mgif':
            data_root = './dataset/moving-gif'
            vtrans = [vtransforms.Scale(size=128)]
        elif opt.dataset == 'kth':
            data_root = './dataset/kth_action/'
            vtrans = [
                vtransforms.CenterCrop(size=120),
                vtransforms.Scale(size=128)
            ]
        elif opt.dataset == 'penn':
            data_root = './dataset/penn_action/'
            vtrans = [vtransforms.Scale(size=128)]

        if self.train:
            vtrans += [vtransforms.RandomHorizontalFlip()]
            vtrans += [vtransforms.RandomRotation()]

        vtrans += [vtransforms.ToTensor(scale=True)]
        vtrans += [vtransforms.Normalize(0.5, 0.5)] if opt.input_norm else []
        self.vtrans = T.Compose(vtrans)

        if self.train:
            self.image_path = os.path.join(data_root, 'train')
        else:
            self.image_path = os.path.join(data_root, 'test')

        threshold = self.window_size if opt.irregular else self.sample_size
        if opt.dataset in ['kth', 'sintel', 'ucf101', 'penn']:
            self.image_list = os.listdir(self.image_path)
        elif opt.dataset in ['mgif', 'stickman']:
            self.image_list = remove_files_under_sample_size(
                image_path=self.image_path, threshold=threshold)
        self.image_list = sorted(self.image_list)
예제 #2
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    print("Building model ... ")
    model = build_model()
    print("Model %s is loaded. " % (args.modality + "_" + args.arch))

    if not os.path.exists(args.resume):
        os.makedirs(args.resume)
    print("Saving everything to directory %s." % (args.resume))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    cudnn.benchmark = True

    # Data transforming
    clip_mean = [0.485, 0.456, 0.406] * args.new_length
    clip_std = [0.229, 0.224, 0.225] * args.new_length
    normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std)

    if args.modality == "rgb":
        scale_ratios = [1.0, 0.875, 0.75, 0.66]
    elif args.modality == "flow":
        scale_ratios = [1.0, 0.875, 0.75]
    else:
        print("No such modality. Only rgb and flow supported.")

    train_transform = video_transforms.Compose([
        video_transforms.Scale((256)),
        video_transforms.MultiScaleCrop((224, 224), scale_ratios),
        video_transforms.RandomHorizontalFlip(),
        video_transforms.ToTensor(),
        normalize,
    ])

    val_transform = video_transforms.Compose([
        video_transforms.Scale((256)),
        video_transforms.CenterCrop((224)),
        video_transforms.ToTensor(),
        normalize,
    ])

    # data loading
    train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split)
    train_split_file = os.path.join(args.settings, args.dataset,
                                    train_setting_file)
    val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split)
    val_split_file = os.path.join(args.settings, args.dataset,
                                  val_setting_file)
    if not os.path.exists(train_split_file) or not os.path.exists(
            val_split_file):
        print(
            "No split file exists in %s directory. Preprocess the dataset first"
            % (args.settings))

    train_dataset = datasets.__dict__[args.dataset](
        args.data,
        train_split_file,
        "train",
        args.new_length,
        video_transform=train_transform)
    val_dataset = datasets.__dict__[args.dataset](
        args.data,
        val_split_file,
        "val",
        args.new_length,
        video_transform=val_transform)

    print('{} samples found, {} train samples and {} test samples.'.format(
        len(val_dataset) + len(train_dataset), len(train_dataset),
        len(val_dataset)))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        if (epoch + 1) % args.save_freq == 0:
            checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar")
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'optimizer': optimizer.state_dict(),
                }, is_best, checkpoint_name, args.resume)
예제 #3
0
def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = DynamicTrajectoryPredictor(9).to(device)
    model = model.float()
    model = nn.DataParallel(model)
    # 输出网络结构
    # summary(model, input_size=(18, 224, 224))
    model.load_state_dict(torch.load('./model.weights'), False)

    load_path = './data_inference/'
    img_root = '../../../flow_result/'

    # Training settings
    epochs = 15
    batch_size = 1
    learning_rate = 1e-5
    num_workers = 8
    weight_decay = 1e-2
    NUM_FLOW_FRAMES = 9
    training_proportion = 100

    # Transformers
    transform_val = video_transforms.Compose([
        video_transforms.Scale((224)),
        video_transforms.ToTensor(),
    ])

    valset = LocationDatasetBDD(filename='myvideo_val_yolo_0.pkl',
                                root_dir=load_path,
                                transform=transform_val,
                                img_root=img_root,
                                NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers)

    # for param in model.parameters():
    #     param.requires_grad = False

    model.eval()
    tmp_result = []
    start_time = time.time()
    with torch.no_grad():
        for batch_idx, data in enumerate(val_loader):
            if batch_idx % 100 == 0:
                end_time = time.time()
                print(' Batch ', batch_idx, ' of ', len(val_loader),
                      ' Cost time: ', end_time - start_time)
                start_time = end_time

            flow = data['flow_stack'].to(device)
            flow = flow.float()
            output = model(flow).detach().cpu().numpy()
            tmp_result.append(output)
            # print(output.shape)
            # if batch_idx == 1:
            #     print(batch_idx, ' : \n', output)
            # print(batch_idx, ' : \n', output)
            if batch_idx == 0:
                break

    ans = np.array(tmp_result).reshape(-1, 120)
    print(ans)
    print(ans.shape)

    # with open('record_extract.txt', 'w') as f:
    #     f.write(fold_type + ' ' + str(fold_num) + ' ' + str(ans.shape))

    # np.save('./data_inference/model_val_inference_result.npy', ans)
    np.save('./data_inference/val_prediction.npy', ans)
예제 #4
0
def main(args):
    ############################################################################
    # Path to optical flow images
    img_root = './data/human-annotated/'
    # Path to training and testing files
    load_path = './data/'
    # CPU or GPU?
    device = torch.device("cuda")

    # Training settings
    epochs = 30
    batch_size = 64
    learning_rate = 1e-5
    num_workers = 8
    pretrained = False
    weight_decay = 1e-2
    NUM_FLOW_FRAMES = 9

    model_load_path = args.model_load_path
    model_save_path = args.model_save_path

    # Transformers for training and validation
    transform_train = video_transforms.Compose([
        video_transforms.MultiScaleCrop((224, 224), [1.0]),
        video_transforms.ToTensor(),
    ])
    transform_val = video_transforms.Compose([
        video_transforms.Scale((224)),
        video_transforms.ToTensor(),
    ])
    ############################################################################

    print('################### Training settings ###################')
    print('epochs:', epochs, '   batch_size:', batch_size, '   learning_rate:',
          learning_rate, '   num_workers:', num_workers, '   model_load_path:',
          model_load_path, '   NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES)

    results = pd.DataFrame()

    for fold in [1, 2, 3, 4, 5]:
        if pretrained:
            learning_rate = 1e-6
            epochs = 30
        else:
            learning_rate = 1e-5
            epochs = 40

        print('Training on fold ' + str(fold))

        try:
            testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl',
                                          root_dir=load_path,
                                          transform=transform_val,
                                          img_root=img_root,
                                          NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
            test_loader = torch.utils.data.DataLoader(testset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=num_workers)
            trainset = LocationDatasetJAAD(filename='jaad_cv_train_' +
                                           str(fold) + '.pkl',
                                           root_dir=load_path,
                                           transform=transform_train,
                                           img_root=img_root,
                                           NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
            train_loader = torch.utils.data.DataLoader(trainset,
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       num_workers=num_workers)
            valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(fold) +
                                         '.pkl',
                                         root_dir=load_path,
                                         transform=transform_val,
                                         img_root=img_root,
                                         NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
            val_loader = torch.utils.data.DataLoader(valset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        except:
            sys.exit(
                'ERROR: Could not load pkl data file. Check the jaad .pkl files are in the correct path.'
            )

        model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device)
        model = model.float()

        model = nn.DataParallel(model)

        if model_load_path is not None:
            print('loading model from', model_load_path)
            model.load_state_dict(torch.load(model_load_path))

        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=weight_decay)
        loss_function = torch.nn.MSELoss()
        best_FDE = np.inf
        best_MSE = np.inf
        best_model = copy.deepcopy(model)

        # Begin training
        for epoch in range(1, epochs + 1):
            # Set learning rate to 1e-6 after 30 epochs
            if epoch > 30:
                optimizer = optim.Adam(model.parameters(),
                                       lr=1e-6,
                                       weight_weight_decay=decay)

            train(model, device, train_loader, optimizer, epoch, loss_function)
            MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test(
                model, device, val_loader, loss_function)
            if MSE_15 < best_MSE:
                best_MSE = MSE_15
                best_model = copy.deepcopy(model)
                best_FDE = FDE_15
            print(epoch)
            print('Best MSE:', round(best_MSE, 0))

        test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test(
            best_model, device, test_loader, loss_function)
        print('Test mse @ 15:', round(test_mse_15, 0))

        # Save the model
        torch.save(
            best_model.state_dict(), model_save_path + 'rn18_flow_css_' +
            str(NUM_FLOW_FRAMES) + 'stack_fold_' + str(fold) + '_pretrained-' +
            str(pretrained) + '_disp.weights')

        # Save the predictions and the targets
        np.save(
            './predictions_rn18_flow_css_' + str(NUM_FLOW_FRAMES) +
            'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) +
            '_disp.npy', all_outputs)
        np.save(
            './targets_rn18_flow_css_' + str(NUM_FLOW_FRAMES) +
            'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) +
            '_disp.npy', all_targets)

        # Save the results
        result = {
            'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES,
            'fold': fold,
            'val_mse': best_MSE,
            'val_fde': best_FDE,
            'test_mse_5': test_mse_5,
            'test_fde_5': test_fde_5,
            'test_mse_10': test_mse_10,
            'test_fde_10': test_fde_10,
            'test_mse_15': test_mse_15,
            'test_fde_15': test_fde_15,
            'pretrained': pretrained
        }
        results = results.append(result, ignore_index=True)
        results.to_csv('./results_rn18_jaad.csv', index=False)
예제 #5
0
def main(args):
    ############################################################################
    # Path to optical flow images
    img_root = './data/human-annotated/'
    # Path to training and testing files
    load_path = './data/'
    # CPU or GPU?
    device = torch.device("cuda")

    # Training settings
    epochs = 30
    batch_size = 64
    learning_rate = 1e-5
    num_workers = 8
    pretrained = False
    weight_decay = 1e-2
    NUM_FLOW_FRAMES = 9

    model_load_path = args.model_load_path
    model_save_path = args.model_save_path

    # Transformers for training and validation
    transform_train = video_transforms.Compose([
        video_transforms.MultiScaleCrop((224, 224), [1.0]),
        video_transforms.ToTensor(),
    ])
    transform_val = video_transforms.Compose([
        video_transforms.Scale((224)),
        video_transforms.ToTensor(),
    ])
    ############################################################################

    print('################### Training settings ###################')
    print('epochs:', epochs, '   batch_size:', batch_size, '   learning_rate:',
          learning_rate, '   num_workers:', num_workers, '   model_load_path:',
          model_load_path, '   NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES)

    results = pd.DataFrame()
    testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl',
                                  root_dir=load_path,
                                  transform=transform_val,
                                  img_root=img_root,
                                  NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=num_workers)
    trainset = LocationDatasetJAAD(filename='jaad_cv_train_' + str(1) + '.pkl',
                                   root_dir=load_path,
                                   transform=transform_train,
                                   img_root=img_root,
                                   NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(1) + '.pkl',
                                 root_dir=load_path,
                                 transform=transform_val,
                                 img_root=img_root,
                                 NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers)
    print('test_loader flow_stack size = ', test_loader['flow_stack'])
예제 #6
0
def main():
    '''
        导入模型
    '''
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = DynamicTrajectoryPredictor(9).to(device)
    model = model.float()
    model = nn.DataParallel(model)
    # summary(model, input_size=(18, 224, 224))
    model.load_state_dict(
        torch.load(
            './data/yolomyvideo_rn50_flow_css_9stack_training_proportion_100_shuffled_disp.weights'
        ), False)
    model.eval()

    load_path = './data/'
    img_root = '../../flow_result/'

    # Training settings
    epochs = 15
    batch_size = 1
    learning_rate = 1e-5
    num_workers = 8
    weight_decay = 1e-2
    NUM_FLOW_FRAMES = 9
    training_proportion = 100

    # Transformers
    transform_val = video_transforms.Compose([
        video_transforms.Scale((224)),
        video_transforms.ToTensor(),
    ])

    for fold_type in ['train', 'val', 'test']:
        for fold_num in range(1, 4):
            result.clear()
            valset = LocationDatasetBDD(filename=fold_type + str(fold_num) +
                                        '_myvideo_location_features_yolo.pkl',
                                        root_dir=load_path,
                                        transform=transform_val,
                                        img_root=img_root,
                                        NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
            val_loader = torch.utils.data.DataLoader(valset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)

            for param in model.parameters():
                param.requires_grad = False

            start_time = time.time()
            for batch_idx, data in enumerate(val_loader):
                if batch_idx % 100 == 0:
                    end_time = time.time()
                    print(fold_type + ':', fold_num, ' Batch ', batch_idx,
                          ' of ', len(val_loader), ' Cost time: ',
                          end_time - start_time)
                    start_time = end_time
                #    break

                # if batch_idx == 20:
                #     break

                flow = data['flow_stack'].to(device)
                flow = flow.float()
                output = model(flow)

                # print('Processing: ', batch_idx)

            ans = np.array(result).reshape(-1, 2048)
            print(ans.shape)

            with open('record_extract.txt', 'w') as f:
                f.write(fold_type + ' ' + str(fold_num) + ' ' + str(ans.shape))

            np.save(
                './data/sted_feature/fold_' + str(fold_num) + '_' + fold_type +
                '_dtp_features.npy', ans)
예제 #7
0
def main(args):
    ############################################################################
    # Path to optical flow images
    if args.detector == 'yolo':
        img_root = './data/yolov3/'
    else:
        img_root = './data/faster-rcnn/'
    # Path to training and testing files
    load_path = './data/'
    # CPU or GPU?
    device = torch.device("cuda")

    # Model saving and loading
    model_save_path = './data/'
    model_load_path = './data/'

    # Training settings
    epochs = 15
    batch_size = 64
    learning_rate = 1e-5
    num_workers = 8
    weight_decay = 1e-2
    NUM_FLOW_FRAMES = 9
    training_proportion = 100  #  How much of the dataset to use? 100 = 100percent

    # Transformers for training and validation
    transform_train = video_transforms.Compose([
        video_transforms.MultiScaleCrop((224, 224), [1.0]),
        video_transforms.ToTensor(),
    ])

    transform_val = video_transforms.Compose([
        video_transforms.Scale((224)),
        video_transforms.ToTensor(),
    ])

    ############################################################################

    print('################### Training settings ###################')
    print('epochs:', epochs, '   batch_size:', batch_size, '   learning_rate:',
          learning_rate, '   num_workers:', num_workers, '   NUM_FLOW_FRAMES:',
          NUM_FLOW_FRAMES)

    results = pd.DataFrame()

    print('Training model')
    print(args.detector + '_bdd10k_val.pkl')

    try:
        testset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector +
                                     '.pkl',
                                     root_dir=load_path,
                                     transform=transform_val,
                                     img_root=img_root,
                                     NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
        test_loader = torch.utils.data.DataLoader(testset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=num_workers)

        trainset = LocationDatasetBDD(filename='bdd10k_train_' +
                                      args.detector + '.pkl',
                                      root_dir=load_path,
                                      transform=transform_train,
                                      img_root=img_root,
                                      NUM_FLOW_FRAMES=NUM_FLOW_FRAMES,
                                      proportion=training_proportion)
        train_loader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=num_workers)
        valset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector +
                                    '.pkl',
                                    root_dir=load_path,
                                    transform=transform_val,
                                    img_root=img_root,
                                    NUM_FLOW_FRAMES=NUM_FLOW_FRAMES)
        val_loader = torch.utils.data.DataLoader(valset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=num_workers)
    except:
        sys.exit(
            'ERROR: Could not load pkl data file. Check the bdd .pkl files are in the correct path.'
        )

    model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device)
    model = model.float()

    model = nn.DataParallel(model)

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)
    loss_function = torch.nn.MSELoss()
    best_FDE = np.inf
    best_MSE = np.inf
    best_model = copy.deepcopy(model)

    # Begin training
    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch, loss_function)
        MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test(
            model, device, val_loader, loss_function)
        if MSE_15 < best_MSE:
            best_MSE = MSE_15
            best_model = copy.deepcopy(model)
            best_FDE = FDE_15
            torch.save(
                best_model.state_dict(),
                model_save_path + args.detector + '_rn18_bdd10k_flow_css_' +
                str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' +
                str(training_proportion) + '_shuffled_disp.weights')
        print(epoch)
        print('Best MSE:', round(best_MSE, 0))

    test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test(
        best_model, device, test_loader, loss_function)
    print('Test mse @ 15:', round(test_mse_15, 0))

    # Save the model
    torch.save(
        best_model.state_dict(),
        model_save_path + args.detector + 'bdd10k_rn18_flow_css_' +
        str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' +
        str(training_proportion) + '_shuffled_disp.weights')

    # Save the predictions and the targets
    np.save(
        './' + args.detector + '_predictions_rn18_flow_css_' +
        str(NUM_FLOW_FRAMES) + 'stack_bdd10k_training_proportion_' +
        str(training_proportion) + '_shuffled_disp.npy', all_outputs)
    np.save(
        './' + args.detector + '_targets_rn18_flow_css_' +
        str(NUM_FLOW_FRAMES) + 'stack_bdd10k__shuffled_disp.npy', all_targets)

    # Save the results
    result = {
        'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES,
        'training_proportion': training_proportion,
        'val_mse': best_MSE,
        'val_fde': best_FDE,
        'test_mse_5': test_mse_5,
        'test_fde_5': test_fde_5,
        'test_mse_10': test_mse_10,
        'test_fde_10': test_fde_10,
        'test_mse_15': test_mse_15,
        'test_fde_15': test_fde_15
    }
    results = results.append(result, ignore_index=True)
    results.to_csv('./' + args.detector + '_results_rn18_bdd10k.csv',
                   index=False)