示例#1
0
def set_model(args, n_data):
    # set the model
    if args.model == 'c3d':
        model = C3D(with_classifier=False)
    elif args.model == 'r3d':
        model = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False)
    elif args.model == 'r21d':
        model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False)

    if args.intra_neg:
        contrast = NCEAverage(args.feat_dim, n_data, args.nce_k, args.nce_t,
                              args.nce_m, args.softmax)
    else:
        contrast = NCEAverage_ori(args.feat_dim, n_data, args.nce_k,
                                  args.nce_t, args.nce_m, args.softmax)

    criterion_1 = NCESoftmaxLoss() if args.softmax else NCECriterion(n_data)
    criterion_2 = NCESoftmaxLoss() if args.softmax else NCECriterion(n_data)

    # GPU mode
    model = model.cuda()
    contrast = contrast.cuda()
    criterion_1 = criterion_1.cuda()
    criterion_2 = criterion_2.cuda()
    cudnn.benchmark = True

    return model, contrast, criterion_1, criterion_2
示例#2
0
        torch.manual_seed(args.seed)
        if args.gpu:
            torch.cuda.manual_seed_all(args.seed)

    ########### model ##############
    if args.dataset == 'ucf101':
        class_num = 101
    elif args.dataset == 'hmdb51':
        class_num = 51
    elif args.dataset == 'K400':
        class_num = 400

    if args.model == 'c3d':
        model = C3D(with_classifier=True, num_classes=class_num).cuda()
    elif args.model == 'r3d':
        model = R3DNet(layer_sizes=(1,1,1,1), with_classifier=True, num_classes=class_num).cuda()
    elif args.model == 'r21d':   
        model = R2Plus1DNet(layer_sizes=(1,1,1,1), with_classifier=True, num_classes=class_num).cuda()
    elif args.model == 's3d':   
        model = S3D(num_classes=class_num, space_to_depth=False, with_classifier=True).cuda()

    if args.ckpt:
        pretrained_weights = torch.load(args.ckpt)['model']
        model.load_state_dict({k.replace('module.base_network.',''):v for k,v in pretrained_weights.items()},strict=False) 

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model, device_ids=[0]).cuda()

    if args.desp:
        exp_name = 'K400_TCG_split1_finetuned_loss_{}_cl{}_{}_{}'.format(args.model, args.cl, args.desp, time.strftime('%m%d%H%M'))
    else:
示例#3
0
    #os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    # Force the pytorch to create context on the specific device
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7"
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    ########### model ##############
    if args.model == 'c3d':
        base = C3D(with_classifier=False).to(device)
    elif args.model == 'r3d':
        base = R3DNet(layer_sizes=(3, 4, 6, 3),
                      with_classifier=False).to(device)
    elif args.model == 'r21d':
        base = R2Plus1DNet(layer_sizes=(1, 1, 1, 1),
                           with_classifier=False).to(device)
    elif args.model == 'i3d':
        base = InceptionI3d(final_endpoint='Logits',
                            with_classifier=False).to(device)
    elif args.model == 'r3d50':
        base = resnet50(sample_size=112,
                        sample_duration=16,
                        with_classifier=False,
                        return_conv=False).to(device)
    elif args.model == 's3d':
        base = S3D(gating=True, with_classifier=False,
                   return_conv=False).to(device)
示例#4
0
def extract_feature(args):
    """Extract and save features for train split, several clips per video."""
    torch.backends.cudnn.benchmark = True
    # Force the pytorch to create context on the specific device
    #os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    ########### model ##############
    if args.model == 'c3d':
        model = C3D(with_classifier=False, return_conv=True).to(device)
    elif args.model == 'r3d':
        model = R3DNet(layer_sizes=(1, 1, 1, 1),
                       with_classifier=False,
                       return_conv=True).to(device)
    elif args.model == 'r21d':
        model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1),
                            with_classifier=False,
                            return_conv=True).to(device)
    elif args.model == 's3d':
        model = S3D(space_to_depth=False,
                    with_classifier=False,
                    return_conv=True).to(device)

    if args.ckpt:
        pretrained_weights = torch.load(args.ckpt)['model']
        model.load_state_dict(
            {
                k.replace('module.base_network.', ''): v
                for k, v in pretrained_weights.items()
            },
            strict=False)

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    model.eval()
    torch.set_grad_enabled(False)
    ### Exract for train split ###
    train_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor()
    ])
    if args.dataset == 'ucf101':
        train_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, True,
                                                   train_transforms)
    elif args.dataset == 'hmdb51':
        train_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, True,
                                                   train_transforms)
    elif args.dataset == 'K400':
        train_dataset = K400ClipRetrievalDataset('data/K400', 16, 10, True,
                                                 train_transforms)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.bs,
                                  shuffle=False,
                                  num_workers=args.workers,
                                  pin_memory=True,
                                  drop_last=True)

    features = []
    classes = []
    for data in tqdm(train_dataloader):
        sampled_clips, idxs = data
        clips = sampled_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)
        # forward
        outputs = model(inputs)
        # print(outputs.shape)
        # exit()
        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])
    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes)

    ### Exract for test split ###
    test_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor()
    ])
    if args.dataset == 'ucf101':
        test_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, False,
                                                  test_transforms)
    elif args.dataset == 'hmdb51':
        test_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, False,
                                                  test_transforms)
    elif args.dataset == 'K400':
        test_dataset = K400ClipRetrievalDataset('data/K400', 16, 10, False,
                                                test_transforms)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.bs,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True,
                                 drop_last=True)

    features = []
    classes = []
    for data in tqdm(test_dataloader):
        sampled_clips, idxs = data
        clips = sampled_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)
        # forward
        outputs = model(inputs)
        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])
    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes)
def extract_feature(args):
    """Extract and save features for train split, several clips per video."""
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    ########### model ##############
    if args.model == 'c3d':
        model = C3D(with_classifier=False, return_conv=True).to(device)
    elif args.model == 'r3d':
        model = R3DNet(layer_sizes=(1, 1, 1, 1),
                       with_classifier=False,
                       return_conv=True).to(device)
    elif args.model == 'r21d':
        model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1),
                            with_classifier=False,
                            return_conv=True).to(device)

    #model = nn.DataParallel(model)
    if args.ckpt:
        pretrained_weights = torch.load(args.ckpt)['model']
        model.load_state_dict(pretrained_weights, strict=True)
    model.eval()
    torch.set_grad_enabled(False)

    ### Exract for train split ###
    train_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor()
    ])
    if args.dataset == 'ucf101':
        train_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, True,
                                                   train_transforms)
    elif args.dataset == 'hmdb51':
        train_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, True,
                                                   train_transforms)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.bs,
                                  shuffle=False,
                                  num_workers=args.workers,
                                  pin_memory=True,
                                  drop_last=True)

    features = []
    classes = []
    for data in tqdm(train_dataloader):

        sampled_clips, u_clips, v_clips, idxs = data
        if args.modality == 'u':
            input_clips = u_clips
        elif args.modality == 'v':
            input_clips = v_clips
        else:  # rgb and res
            input_clips = sampled_clips
        clips = input_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)

        if args.modality == 'res':
            outputs = model(diff(inputs))
        else:
            outputs = model(inputs)

        if args.merge:
            rgb_clips = sampled_clips.reshape((-1, 3, 16, 112, 112)).to(device)
            outputs_rgb = model(rgb_clips)
            outputs = torch.cat((outputs_rgb, outputs), 1)

        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])
    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes)

    ## Exract for test split ###
    test_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor()
    ])
    if args.dataset == 'ucf101':
        test_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, False,
                                                  test_transforms)
    elif args.dataset == 'hmdb51':
        test_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, False,
                                                  test_transforms)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.bs,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True,
                                 drop_last=True)

    features = []
    classes = []
    for data in tqdm(test_dataloader):
        sampled_clips, u_clips, v_clips, idxs = data
        if args.modality == 'u':
            input_clips = u_clips
        elif args.modality == 'v':
            input_clips = v_clips
        else:  # rgb and res
            input_clips = sampled_clips
        clips = input_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)
        # forward
        if args.modality == 'res':
            outputs = model(diff(inputs))
        else:
            outputs = model(inputs)

        if args.merge:
            rgb_clips = sampled_clips.reshape((-1, 3, 16, 112, 112)).to(device)
            outputs_rgb = model(rgb_clips)
            outputs = torch.cat((outputs_rgb, outputs), 1)

        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])

    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes)
    print('Saving features to ...', args.feature_dir)
    # Force the pytorch to create context on the specific device
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        if args.gpu:
            torch.cuda.manual_seed_all(args.seed)

    ########### model ##############
    if args.model == 'c3d':
        base = C3D(with_classifier=False)
    elif args.model == 'r3d':
        base = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False)
    elif args.model == 'r21d':
        base = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False)
    vcopn = VCOPN(base_network=base, feature_size=512,
                  tuple_len=args.tl).to(device)

    if args.mode == 'train':  ########### Train #############
        if args.ckpt:  # resume training
            vcopn.load_state_dict(torch.load(args.ckpt))
            log_dir = os.path.dirname(args.ckpt)
        else:
            if args.desp:
                exp_name = '{}_cl{}_it{}_tl{}_{}_{}'.format(
                    args.model, args.cl, args.it, args.tl, args.desp,
                    time.strftime('%m%d%H%M'))
            else:
            torch.cuda.manual_seed_all(args.seed)

    ########### model ##############
    if args.dataset == 'ucf101':
        class_num = 101
    elif args.dataset == 'hmdb51':
        class_num = 51
    elif args.dataset == 'activity':
        class_num = 200

    if args.model == 'c3d':
        print(class_num)
        model = C3D(with_classifier=True, num_classes=class_num).cuda()
    elif args.model == 'r3d':
        model = R3DNet(layer_sizes=(3, 4, 6, 3),
                       with_classifier=False,
                       num_classes=class_num).cuda()
    elif args.model == 'r21d':
        model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1),
                            with_classifier=True,
                            num_classes=class_num).cuda()
    elif args.model == 's3d':
        model = S3D(num_classes=class_num,
                    space_to_depth=False,
                    with_classifier=True).cuda()
    elif args.model == 'r3d_50':
        model = generate_model(model_depth=50,
                               with_classifier=False,
                               return_conv=False).to(device)

    if torch.cuda.device_count() > 1:
def extract_feature(args):
    """Extract and save features for train split, several clips per video."""
    torch.backends.cudnn.benchmark = True
    # Force the pytorch to create context on the specific device
    #os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

    ########### model ##############
    if args.model == 'r3d':
        model = R3DNet(layer_sizes=(1, 1, 1, 1),
                       with_classifier=False,
                       return_conv=True).to(device)
    elif args.model == 'r18':
        model = R18(with_classifier=False).to(device)

    if args.ckpt:
        if args.model == 'r3d':
            pretrained_weights = load_pretrained_weights(args.ckpt)
            model.load_state_dict(
                pretrained_weights,
                strict=False)  # Set True to check whether loaded successfully
        else:  # for r18
            pretrained_weights = torch.load(args.ckpt)
            model.load_state_dict(pretrained_weights, strict=False)
    model.eval()
    torch.set_grad_enabled(False)
    ### Exract for train split ###
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor(), normalize
    ])
    if args.dataset == 'ucf101':
        train_dataset = UCF101ClipRetrievalDataset('data', 16, 10, True,
                                                   train_transforms)
    elif args.dataset == 'hmdb51':
        train_dataset = HMDB51ClipRetrievalDataset('data', 16, 10, True,
                                                   train_transforms)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.bs,
                                  shuffle=False,
                                  num_workers=args.workers,
                                  pin_memory=True,
                                  drop_last=True)

    features = []
    classes = []
    for data in tqdm(train_dataloader):
        sampled_clips, idxs = data
        clips = sampled_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)

        if args.modality == 'res':
            outputs = model(diff(inputs))
        else:
            outputs = model(inputs)
        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])
    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes)

    ### Exract for test split ###
    test_transforms = transforms.Compose([
        transforms.Resize((128, 171)),
        transforms.CenterCrop(112),
        transforms.ToTensor(), normalize
    ])
    if args.dataset == 'ucf101':
        test_dataset = UCF101ClipRetrievalDataset('data', 16, 10, False,
                                                  test_transforms)
    elif args.dataset == 'hmdb51':
        test_dataset = HMDB51ClipRetrievalDataset('data', 16, 10, False,
                                                  test_transforms)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.bs,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True,
                                 drop_last=True)

    features = []
    classes = []
    for data in tqdm(test_dataloader):
        sampled_clips, idxs = data
        clips = sampled_clips.reshape((-1, 3, 16, 112, 112))
        inputs = clips.to(device)
        # forward
        if args.modality == 'res':
            outputs = model(diff(inputs))
        else:
            outputs = model(inputs)
        features.append(outputs.cpu().numpy().tolist())
        classes.append(idxs.cpu().numpy().tolist())

    features = np.array(features).reshape(-1, 10, outputs.shape[1])
    classes = np.array(classes).reshape(-1, 10)
    np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features)
    np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes)
    print('Saving features to ...', args.feature_dir)