Exemplo n.º 1
0
def run(video,
        mode='rgb',
        weights='weights/unproc_bs4_456225.pt',
        num_classes=1042):
    class_map = make_label_map()
    if type(video) == str:
        data = prepare_data_mp4(video)
    else:
        data = prepare_data(video)

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('weights/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('weights/rgb_imagenet.pt'))
    i3d.replace_logits(num_classes)
    i3d.load_state_dict(torch.load(weights))
    i3d.cuda()
    i3d = nn.DataParallel(i3d)
    i3d.eval()
    preds = []
    inputs, labels, video_id = data

    per_frame_logits = i3d(inputs)

    predictions = torch.max(per_frame_logits, dim=2)[0]
    out_labels = np.argsort(predictions.cpu().detach().numpy()[0])
    out_probs = np.sort(predictions.cpu().detach().numpy()[0])
    print(class_map[out_labels[-1]])
    return class_map[out_labels[-1]]
Exemplo n.º 2
0
    def __init__(self, num_classes):
        super(TAL_Net, self).__init__()
        self.num_classes = num_classes
        self.I3D_1 = InceptionI3d(3, in_channels=3)
        self.I3D_2 = InceptionI3d(3, in_channels=3)
        
#         for param in self.I3D.parameters():
#             param.requires_grad = False
            
        self.dropout = nn.Dropout(p=0.5)   
        self.predictor = nn.Sequential(
            Unit3D(in_channels=2*(384 + 384 + 128 + 128), 
                   output_channels=256,
                   kernel_shape=[1, 1, 1],
                   name='layer1'),
            nn.Dropout(p=0.5),
            Unit3D(in_channels=256, 
                   output_channels=self.num_classes + 2,
                   kernel_shape=[1, 1, 1],
                   activation_fn=None,
                   use_batch_norm=False,
                   use_bias=True,
                   name='layer2')
        )
#         self.predictor = Unit3D(in_channels=384 + 384 + 128 + 128, output_channels=self.num_classes+2,
#                                 kernel_shape=[1, 1, 1],
#                                 activation_fn=None,
#                                 use_batch_norm=False,
#                                 use_bias=True)
               
        self.predictor.apply(weight_init)
Exemplo n.º 3
0
def run(max_steps=64e3, mode='rgb', root='', split='', batch_size=1, load_model='', save_dir=''):
    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(split, 'testing', root, mode, test_transforms, num=-1, save_dir=save_dir)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)

    # val_dataset = Dataset(split, 'testing', root, mode, test_transforms, num=-1, save_dir=save_dir)
    # val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    dataloaders = {'train': dataloader}#, 'val': val_dataloader}
    datasets = {'train': dataset}#, 'val': val_dataset}

    
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
    else:
        i3d = InceptionI3d(400, in_channels=3)
    i3d.replace_logits(400)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()

    for phase in ['train']:
        i3d.train(False)  # Set model to evaluate mode
                
        tot_loss = 0.0
        tot_loc_loss = 0.0
        tot_cls_loss = 0.0
                    
        # Iterate over data.
        for data in dataloaders[phase]:
            # get the inputs
            inputs, labels, name = data
            if os.path.exists(os.path.join(save_dir, name[0]+'.npy')):
                continue
            i=0
            for input in inputs:
                i+=1
                b,c,t,h,w = input.shape
                if t > 1600:
                    features = []
                    for start in range(1, t-56, 1600):
                        end = min(t-1, start+1600+56)
                        start = max(1, start-48)
                        ip = Variable(torch.from_numpy(input.numpy()[:,:,start:end]).cuda(), volatile=True)
                        features.append(i3d.extract_features(ip).squeeze(0).permute(1,2,3,0).data.cpu().numpy())
                    np.save(os.path.join(save_dir, name[0]), np.concatenate(features, axis=0))
                else:
                    # wrap them in Variable
                    input = Variable(input.cuda(), volatile=True)

                    features = i3d.extract_features(input)
                    new_path = os.path.join(save_dir, name[0], mode)
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)
                    np.save(os.path.join(new_path, str(i)), features.squeeze(0).permute(1,2,3,0).data.cpu().numpy())
Exemplo n.º 4
0
def model_builder():
    # setup the model
    if args.model == 'i3d':
        if args.mode == 'flow':
            model = InceptionI3d(num_classes=7, in_channels=2)
            model.load_state_dict(
                {
                    k: v
                    for k, v in torch.load('models/flow_imagenet.pt').items()
                    if k.find('logits') < 0
                },
                strict=False)
        else:
            model = InceptionI3d(num_classes=7,
                                 in_channels=3,
                                 dropout_keep_prob=0.5)
            model.load_state_dict(
                {
                    k: v
                    for k, v in torch.load('models/rgb_imagenet.pt').items()
                    if k.find('logits') < 0
                },
                strict=False)
    elif args.model == 'r2plus1d':
        model = R2Plus1DClassifier(num_classes=7)
    elif args.model == 'w3d':
        model = W3D(num_classes=7)
        # model.load_state_dict(torch.load('pev_i3d_best.pt'))

    if args.resume is not None:
        # Use a local scope to avoid dangling references
        def resume():
            if os.path.isfile(args.resume):
                print("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(
                    args.resume, map_location=lambda storage, loc: storage)
                model.load_state_dict(checkpoint)
                print("=> loaded checkpoint '{}' ".format(args.resume))
            else:
                print("=> no checkpoint found at '{}'".format(args.resume))

        resume()

    model = model.cuda()

    if args.distributed:
        lr = args.lr * args.batch_size * args.world_size / 64.
    else:
        lr = args.lr * args.batch_size / 56.

    # lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [30, 60])
    if args.distributed:
        model = DDP(model)
    else:
        model = nn.DataParallel(model)

    return model
Exemplo n.º 5
0
def run(max_steps=64e3, mode='rgb', root='/gpfs/home/lhe/xuexinwei/xxw/super-events-cvpr18-master/data/charades/Charades_v1_rgb', split='/gpfs/home/lhe/xuexinwei/xxw/super-events-cvpr18-master/data/charades/charades.json', batch_size=1, load_model='/gpfs/home/lhe/xuexinwei/xxw/super-events-cvpr18-master/pytorch-i3d/models/rgb_charades.pt', save_dir='/gpfs/home/lhe/xuexinwei/xxw/super-events-cvpr18-master/data/charades/charades_features'):
    # setup dataset
    #root = '/ssd2/charades/Charades_v1_rgb', split = 'charades/charades.json', batch_size = 1, load_model = '', save_dir = ''
    # root = '/gpfs/home/lhe/xxw/xxw/super-events-cvpr18-master/data/charades/Charades_v1_rgb'
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])
    # print ( mode,root,split,batch_size)
    dataset = Dataset(split, 'training', root, mode, test_transforms, save_dir=save_dir) #num=-1,
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    val_dataset = Dataset(split, 'testing', root, mode, test_transforms,  save_dir=save_dir)#num=-1,
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}


    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
    else:
        i3d = InceptionI3d(400, in_channels=3)
    i3d.replace_logits(157)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()

    for phase in ['train', 'val']:
        i3d.train(False)  # Set model to evaluate mode

        tot_loss = 0.0
        tot_loc_loss = 0.0
        tot_cls_loss = 0.0

        # Iterate over data.
        for data in dataloaders[phase]:
            # get the inputs
            inputs, labels, name = data
            if os.path.exists(os.path.join(save_dir, name[0]+'.npy')):
                continue

            b,c,t,h,w = inputs.shape
            if t > 1600:
                features = []
                for start in range(1, t-56, 1600):
                    end = min(t-1, start+1600+56)
                    start = max(1, start-48)
                    ip = Variable(torch.from_numpy(inputs.numpy()[:,:,start:end]).cuda(), volatile=True)
                    features.append(i3d.extract_features(ip).squeeze(0).permute(1,2,3,0).data.cpu().numpy())
                np.save(os.path.join(save_dir, name[0]), np.concatenate(features, axis=0))
            else:
                # wrap them in Variable
                inputs = Variable(inputs.cuda(), volatile=True)
                features = i3d.extract_features(inputs)
                np.save(os.path.join(save_dir, name[0]), features.squeeze(0).permute(1,2,3,0).data.cpu().numpy())
Exemplo n.º 6
0
def run(init_lr=0.1,
        max_steps=64e3,
        mode='rgb',
        root='/ssd/Charades_v1_rgb',
        train_split='charades/charades.json',
        batch_size=3 * 15,
        save_model='',
        weights=None,
        num_classes=0):
    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    data = make_eval_json()
    class_map = make_label_map()

    val_dataset = Dataset(train_split, 'test', root, mode, data, num_classes,
                          test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=False)
    dataloaders = {'test': val_dataloader}
    datasets = {'test': val_dataset}

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('weights/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('weights/rgb_imagenet.pt'))
    i3d.replace_logits(num_classes)
    i3d.load_state_dict(
        torch.load(weights)
    )  # nslt_2000_000700.pt nslt_1000_010800 nslt_300_005100.pt(best_results)  nslt_300_005500.pt(results_reported) nslt_2000_011400
    #i3d.cuda()
    #i3d = nn.DataParallel(i3d)
    i3d.eval()
    preds = []
    for data in dataloaders["test"]:
        inputs, labels, video_id = data  # inputs: b, c, t, h, w

        per_frame_logits = i3d(inputs)

        predictions = torch.max(per_frame_logits, dim=2)[0]
        out_labels = np.argsort(predictions.cpu().detach().numpy()[0])
        out_probs = np.sort(predictions.cpu().detach().numpy()[0])
        print(class_map[out_labels[-1]])
        preds.append(class_map[out_labels[-1]])
    return preds
Exemplo n.º 7
0
def run(mode='rgb', batch_size=4, load_model=''):
    device = torch.device('cuda')
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2, spatial_size=112)
    else:
        i3d = InceptionI3d(400, in_channels=3, spatial_size=112)
    sd = torch.load(load_model)
    i3d.load_state_dict(sd)
    
    i3d.to(device)

    data = torch.rand((4,3,32,112,112)).to(device)
    print(i3d(data))
def run_on_tensor(weights, ip_tensor, num_classes):
    i3d = InceptionI3d(400, in_channels=3)
    # i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))

    i3d.replace_logits(num_classes)
    i3d.load_state_dict(torch.load(weights))  # nslt_2000_000700.pt nslt_1000_010800 nslt_300_005100.pt(best_results)  nslt_300_005500.pt(results_reported) nslt_2000_011400
    i3d.cuda()
    i3d = nn.DataParallel(i3d)
    i3d.eval()

    t = ip_tensor.shape[2]
    ip_tensor.cuda()
    per_frame_logits = i3d(ip_tensor)

    predictions = F.upsample(per_frame_logits, t, mode='linear')

    predictions = predictions.transpose(2, 1)
    out_labels = np.argsort(predictions.cpu().detach().numpy()[0])

    arr = predictions.cpu().detach().numpy()[0,:,0].T

    plt.plot(range(len(arr)), F.softmax(torch.from_numpy(arr), dim=0).numpy())
    plt.show()

    return out_labels
Exemplo n.º 9
0
def run(max_steps=64e3,load_model='',root='/l/vision/v7/wang617/taiwan', batch_size=1, save_dir=''):
    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])
    dataset = Dataset(root,test_transforms, save_dir=save_dir)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8)    
    i3d = InceptionI3d(400, in_channels=3)
    #i3d.replace_logits(157)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()
    i3d.train(False)  # Set model to evaluate mode
    count = 0
    start = time.time()
    for data in dataloader:
            # get the inputs
        inputs, label, name = data
        label = str(label.numpy()[0])
        b,c,t,h,w = inputs.shape
        inputs = Variable(inputs.cuda(), volatile=True)
        features = i3d.extract_features(inputs)
        np.save(os.path.join(save_dir,name[0]),features.squeeze().data.cpu().numpy())
        f = open('/l/vision/v7/wang617/taiwan_data/i3d_feature_list.txt','a')
        f.writelines([name[0],',',label,'\n'])
        count = count +1
        if count%100 ==0:
            current = time.time()
            print('Count {:2},|' 'running time:{:.2f} sec'.format(count,current-start))
    f.close()
Exemplo n.º 10
0
def run(dataloaders, num_classes=42):
    i3d = InceptionI3d(400, in_channels=3)
    i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
    i3d.replace_logits(num_classes)
    i3d.load_state_dict(torch.load(LOAD_MODEL_LOC))
    i3d.cuda()

    i3d.train(False)
    count = 0
    for phase in ['train', 'val']:
        i3d.train(False)  # Set model to evaluate mode
        # Iterate over data.
        for data in tqdm(dataloaders[phase]):
            # get the inputs
            inputs, labels, feature_path, nf = data
            count += 1
            if os.path.exists(feature_path[0] + '/i3d_040' + '.npy'):
                continue

            os.makedirs(feature_path[0], exist_ok=True)
            b, c, t, h, w = inputs.shape
            print_log(f'shape:{b},{c},{t},{h},{w}')
            print_log('path:' + feature_path[0])
            # b,c,t,h,w = inputs.shape
            print_log(f'count:{count}')
            print_log(f'num_frames:{nf}')
            time_a = time.time()
            if nf > 1000:
                features = []
                for start in range(0, nf, 1000):
                    end = start + 1000
                    if end > nf - 1:
                        end = nf - 1
                    with torch.no_grad():
                        ip = Variable(
                            torch.from_numpy(inputs.numpy()[:, :,
                                                            start:end]).cuda())
                        features.append(
                            i3d.extract_features(ip).squeeze(0).permute(
                                1, 2, 3, 0).data.cpu().numpy())
                np.save(os.path.join(feature_path[0], 'i3d_040'),
                        np.concatenate(features, axis=0))
            else:
                # wrap them in Variable
                with torch.no_grad():
                    inputs = Variable(inputs.cuda())
                    features = i3d.extract_features(inputs)
                    np.save(
                        os.path.join(feature_path[0], 'i3d_040'),
                        features.squeeze(0).permute(1, 2, 3,
                                                    0).data.cpu().numpy())
            time_b = time.time()
            print_log(f'time consumed:{time_b-time_a}s')
Exemplo n.º 11
0
def load_temporal_model(model_name, model_depth):
    verbose = False
    #model_name = 'i3d' # resnext resnet i3d

    if model_name == 'i3d':
        model_path = '/home/vsharma/Documents/Audio_Visual_Text/models/i3d/rgb_imagenet.pt'
        model = InceptionI3d(400, in_channels=3)
        model.load_state_dict(torch.load(model_path))
        arch = model_name
        model.train(False)  # Set model to evaluate mode

    elif (model_name == 'resnet') or (model_name == 'resnext'):
        #model_depth = 50 # 101 50
        arch = '{}-{}'.format(model_name, model_depth)
        model_path = '/home/vsharma/Documents/Audio_Visual_Text/models/resnet3d'
        model_path = '{}/{}-kinetics.pth'.format(model_path, arch)

        if arch == 'resnet-50':
            model = resnet.resnet50(num_classes=400,
                                    shortcut_type='B',
                                    sample_size=112,
                                    sample_duration=16,
                                    last_fc=True)
        elif arch == 'resnext-101':
            model = resnext.resnet101(num_classes=400,
                                      shortcut_type='B',
                                      cardinality=32,
                                      sample_size=112,
                                      sample_duration=16,
                                      last_fc=True)

        model_data = torch.load(model_path)
        assert arch == model_data['arch']

        #model.load_state_dict(model_data['state_dict'])
        state_dict = model_data['state_dict']
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)

        # Removing the last 2 layers: fc and softmax
        model = nn.Sequential(*list(model.children())[:-2])
        model.eval()

    if verbose:
        print(model)

    return model
def load_model(input_channels, learning_rate, scheduler_list, checkpoint=None):
    i3d = InceptionI3d(400, in_channels=input_channels)
    #i3d = i3d.to(self.device)
    optimizer = optim.SGD(i3d.parameters(),
                          lr=learning_rate,
                          momentum=0.9,
                          weight_decay=0.0000001)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, scheduler_list)
    if (checkpoint is not None):
        data = torch.load(checkpoint)
        i3d.load_state_dict(data['model_state'])
        optimizer.load_state_dict(data['optimizer_state'])
        scheduler.load_state_dict(data['scheduler_state'])
    return i3d, optimizer, scheduler
def load_model(learning_rate, scheduler_list, checkpoint=None):
    sm = InceptionI3d(400, in_channels=3)
    sm.replace_logits(1)
    fusedNet = FusionNet(sm)
    #i3d = i3d.to(self.device)
    optimizer = optim.SGD(fusedNet.parameters(),
                          lr=learning_rate,
                          momentum=0.9,
                          weight_decay=0.0000001)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, scheduler_list)
    if (checkpoint is not None):
        data = torch.load(checkpoint)
        fusedNet.load_state_dict(data['model_state'])
        optimizer.load_state_dict(data['optimizer_state'])
        scheduler.load_state_dict(data['scheduler_state'])
    return fusedNet, optimizer, scheduler
 def __init__(self, num_outputs=120):
     super(ProxyNetwork, self).__init__()
     i3d = InceptionI3d(400, in_channels=3)
     i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
     self.i3d = i3d
     self.siam_logits = Unit3D(in_channels=112 + 288 + 64 + 64,
                               output_channels=128,
                               kernel_shape=[1, 1, 1],
                               padding=0,
                               activation_fn=None,
                               use_batch_norm=False,
                               use_bias=True,
                               name='siam_logits')
     self.siam_avg_pool = nn.AvgPool3d(kernel_size=[2, 14, 14],
                                       stride=(1, 1, 1))
     self.fc1 = nn.Linear(128, 512)
     self.fc2 = nn.Linear(512, num_outputs)
Exemplo n.º 15
0
def run(init_lr=0.1, max_step=64e3, mode='rgb', root='/ssd/Charades_v1_rgb', train_split='charades/charades.json', batch_size=8*5, save_model=''):
    # setup dataset
    train_transforms = transforms.Compose([videotransforms.RandomCrop(224),
                                           videotransforms.RandomHorisontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.RandomCrop(224)])

    dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)

    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)    

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
    i3d.replace_logits(157)
    #i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])


    num_steps_per_update = 4 # accum gradient
    steps = 0
    # train it
    while steps < max_steps:#for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode
                
            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            optimizer.zero_grad()
            
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                per_frame_logits = i3d(inputs)
                # upsample to input size
                per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')

                # compute localization loss
                loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                tot_loc_loss += loc_loss.data[0]

                # compute classification loss (with max-pooling along time B x C x T)
                cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.data[0]

                loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update
                tot_loss += loss.data[0]
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_sched.step()
                    if steps % 10 == 0:
                        print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10)
                        # save model
                        torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'val':
                print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter) 
Exemplo n.º 16
0
def drive_run(init_lr=0.1, max_steps=64e3, train_loss = [], val_loss = [], mode='gray', root="data/drive_and_act_dataset/simmons_kinect_ir_train/", batch_size=1, save_model=''):
    
    root_path = "data/drive_and_act_dataset/simmons_kinect_ir_train/"
    train_list = "sample_train_list.txt"
    test_list= "sample_test_list.txt"

    train_transforms = None
    test_transforms = None

    # create a dataset from our DriveAndAct dataset: 
    # !ls
    train_dataset = DriveAndAct(root_path + train_list, mode='gray', root=root_path, transforms=None)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

    val_dataset = DriveAndAct(root_path + test_list, mode='gray', root=root_path, transforms=None)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}
    datasets = {'train': train_dataset, 'val': val_dataset}
    
    num_classes = 39

    # setup the model
    if mode == 'gray':
        # Load in a pretrained dataset (RGB if in_channels=3, rgb_imagenet)
        # (Use flow when using our optical flow dataset)
        # i3d = InceptionI3d(400, in_channels=3)
        # i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
        i3d = InceptionI3d(num_classes, in_channels=3)
        i3d.load_state_dict(torch.load('002370.pt'))




    i3d.replace_logits(num_classes)



    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])


    num_steps_per_update = 4 # accumulating gradients ("virtual" batch size)
    steps = 0

    # train it
    while steps < max_steps: #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode
                
            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            optimizer.zero_grad()
            
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                per_frame_logits = i3d(inputs)
                # upsample to input size
                per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')

                # compute localization loss
                loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                tot_loc_loss += loc_loss.item()

                # compute classification loss (with max-pooling along time B x C x T)
                cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.item()

                loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update
                tot_loss += loss.item()
                loss.backward()



                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0

                    # weights don't update until optimizer.step() called
                    optimizer.step()

                    optimizer.zero_grad()
                    lr_sched.step()

                    # make sure model is being saved in case we get kicked off datahub
                    if steps % 10 == 0:
                        print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10))
                        # save model
                        train_loss.append(tot_loss/10)
                        torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'val':
                print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter))
                val_loss.append((tot_loss*num_steps_per_update)/num_iter)
Exemplo n.º 17
0
def test(mode='rgb', batch_size=1, accuracy_per_frame = True):
    correct = 0
    total = 0
    
    # setting up dataloaders
    test_transforms = None
    val_dataset = DriveAndAct(test_list_path, mode=mode, root=npy_path, transforms=test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

    dataloaders = {'val': val_dataloader}
    datasets = {'val': val_dataset}

    drive_num_classes = 39
    imagenet_num_classes = 400

    # set up model
    i3d = InceptionI3d(drive_num_classes, in_channels=CHANNELS)
    # Load in a pretrained dataset (RGB if in_channels=3, rgb_imagenet)
    # (Use flow when using our optical flow dataset)
    i3d.load_state_dict(torch.load(model_path))

    i3d.replace_logits(drive_num_classes)
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    y_pred = []
    y_true = []
    
    with torch.no_grad():  # Set model to evaluate mode

      counter = 0
      # Iterate over data.
      for data in tqdm(dataloaders['val']):

        if counter == 300: 
          break
        counter += 1

        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda())
        t = inputs.size(2)
        labels = Variable(labels.cuda())

        per_frame_logits = i3d(inputs)
        # upsample to input size
        per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
        
        # compute accuracy
        pred_values, pred_indices = per_frame_logits.max(1)
        actual_values, actual_indices = labels.max(1)
        pred_indices = pred_indices.squeeze()
        actual_indices = actual_indices.squeeze()
        # print statements to see what the predictions are
        #print(pred_indices[:], actual_indices[:])
        
        # calculates the accuracy for each frame in the npy
        if accuracy_per_frame:
          #print(" pred: ", pred_indices, " act: ", actual_indices)
          matches = torch.eq(pred_indices, actual_indices)
          #print("matches: ", matches)

          y_pred.extend(pred_indices.cpu().numpy())
          y_true.extend(actual_indices.cpu().numpy()) 

          correct += int(matches.sum())
          total += int(matches.shape[0])

        else:
            label = actual_indices[0]

            # get prediction counts across the all the frames in the segment
            pred_values, pred_counts = torch.unique(pred_indices, return_counts=True)
            # prediction is the action that was inferred the most times across the frames
            _, idx = pred_counts.max(0)
            pred = pred_values[idx]
            
            #print(type(pred_counts), pred_values, pred_counts)
            #print(pred, label)
            if pred == label:
                correct += 1
            total += 1
          
    return correct, total, y_pred, y_true
Exemplo n.º 18
0
def main(model_name, 
         mode,
         root,
         val_split,
         ckpt,
         batch_per_gpu):
    num_gpus = MPI.COMM_WORLD.Get_size()
    distributed = False
    if num_gpus > 1:
        distributed = True

    local_rank = MPI.COMM_WORLD.Get_rank() % torch.cuda.device_count()

    if distributed:
        torch.cuda.set_device(local_rank)
        host = os.environ["MASTER_ADDR"] if "MASTER_ADDR" in os.environ else "127.0.0.1"
        torch.distributed.init_process_group(
            backend="nccl",
            init_method='tcp://{}:12345'.format(host),
            rank=MPI.COMM_WORLD.Get_rank(),
            world_size=MPI.COMM_WORLD.Get_size()
        )

        synchronize()

    val_dataloader = make_dataloader(root,
                                        val_split, 
                                        mode,
                                        model_name,
                                        seq_len=16, #64, 
                                        overlap=8, #32,
                                        phase='val', 
                                        max_iters=None, 
                                        batch_per_gpu=batch_per_gpu,
                                        num_workers=16, 
                                        shuffle=False, 
                                        distributed=distributed,
                                        with_normal=False)

    if model_name == 'i3d':
        if mode == 'flow':
            model = InceptionI3d(val_dataloader.dataset.num_classes, in_channels=2, dropout_keep_prob=0.5)
        else:
            model = InceptionI3d(val_dataloader.dataset.num_classes, in_channels=3, dropout_keep_prob=0.5)
        model.replace_logits(val_dataloader.dataset.num_classes)
    elif model_name == 'r3d_18':
        model = r3d_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes)
    elif model_name == 'mc3_18':
        model = mc3_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes)
    elif model_name == 'r2plus1d_18':
        model = r2plus1d_18(pretrained=False, num_classes=val_dataloader.dataset.num_classes)
    elif model_name == 'c3d':
        model = C3D(pretrained=False, num_classes=val_dataloader.dataset.num_classes)
    else:
        raise NameError('unknown model name:{}'.format(model_name))

    # pdb.set_trace()
    for param in model.parameters():
        pass
    
    device = torch.device('cuda')
    model.to(device)
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DDP(model.cuda(), delay_allreduce=True)
Exemplo n.º 19
0
flowDir = rootDir + "flows/"
# ava_training_set = "ava_dataset_files/ava_train_v2.1.csv"
ava_training_set = "ava_dataset_files/ava_train_truppr_v2class.csv"
ava_validation_set = "ava_dataset_files/ava_valid_truppr_v2class.csv"

train_data = ava_dataset(ava_training_set, videoDir, flowDir, jsonDir)
valid_data = ava_dataset(ava_validation_set, videoDir, flowDir, jsonDir)

numClasses = 2;

########## Activity Recognition - EHPI Stream
# ehpi_stream = EHPIClassifier(numClasses)
# ehpi_stream.cuda(0)

########## Activity Recognition - RGB Stream
i3d_RGB = InceptionI3d(157, in_channels=3) # 400 when only loaded with imagenet weights
i3d_RGB.load_state_dict(torch.load('models/rgb_charades.pt'))
i3d_RGB.replace_logits(numClasses)
i3d_RGB.cuda(0)
i3d_RGB = nn.DataParallel(i3d_RGB)

########## Activity Recognition - Optical Flow Stream
i3d_OF = InceptionI3d(157, in_channels=2) # 400 when only loaded with imagenet weights
i3d_OF.load_state_dict(torch.load('models/flow_charades.pt'))
i3d_OF.replace_logits(numClasses)
i3d_OF.cuda(0)
i3d_OF = nn.DataParallel(i3d_OF)

# ehpi_stream.train(True);
i3d_RGB.train(True);
i3d_OF.train(True);
Exemplo n.º 20
0
    with open(SAVE_DIR + 'info.txt', 'w+') as f:
        f.write('LR = {}\nBATCH_SIZE = {}\nEPOCHS = {}\n'.format(LR, BATCH_SIZE, EPOCHS))

    # Transforms
    train_transforms = transforms.Compose([videotransforms.RandomCrop(224),
                                            videotransforms.RandomHorizontalFlip(),
                                            ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    # Datasets and Dataloaders
    train_dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=36, pin_memory=True)

    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=36, pin_memory=True)    

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}

    # Load pre-trained I3D model
    i3d = InceptionI3d(400, in_channels=3) # pre-trained model has 400 output classes
    i3d.load_state_dict(torch.load('/vision/u/rhsieh91/pytorch-i3d/models/rgb_imagenet.pt'))
    i3d.replace_logits(NUM_CLASSES) # replace final layer to work with new dataset

    # Set up optimizer and learning rate schedule
    optimizer = optim.Adam(i3d.parameters(), lr=LR) 
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [10, 20], gamma=0.1) # decay learning rate by gamma at epoch 10 and 20

    # Start training
    train(i3d, optimizer, dataloaders, num_classes=NUM_CLASSES, epochs=EPOCHS, 
          save_dir=SAVE_DIR, use_gpu=USE_GPU, lr_sched=lr_sched)
def run(init_lr=0.01, max_steps=200, mode='rgb', root='/media/pritesh/Entertainment/Visual-Tactile_Dataset/dataset/',\
        train_split='train.txt', test_split='test.txt', batch_size=1, save_model=''):
    print(train_split, test_split)
    writer = tensorboardX.SummaryWriter()
    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(train_split, root, mode, test_transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=0,
                                             pin_memory=True)

    val_dataset = Dataset(test_split, root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=0,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    sm = InceptionI3d(400, in_channels=3)
    sm.replace_logits(1)
    #add your network here
    fusedNet = FusionNet(sm)
    if torch.cuda.is_available():
        fusedNet.cuda()
    fusedNet = nn.DataParallel(fusedNet)

    lr = init_lr
    optimizer = optim.SGD(fusedNet.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [50, 100, 150, 200])
    if torch.cuda.is_available():
        data = torch.load(save_model)
    else:
        data = torch.load(save_model,
                          map_location=lambda storage, loc: storage)
    fusedNet.load_state_dict(data['model_state'])
    optimizer.load_state_dict(data['optimizer_state'])
    lr_sched.load_state_dict(data['scheduler_state'])

    steps = 0
    with open('inference_V.txt', 'w') as file:
        file.write("train and validation loss file\n")
    # train it
    # Each epoch has a training and validation phase

    fusedNet.train(False)  # Set model to evaluate mode
    for phase in ['train', 'val']:
        print('phase : {}'.format(phase))

        tot_cls_loss = 0.0
        num_iter = 0
        count = 0
        #         optimizer.zero_grad()

        with open('inference_V.txt', 'a') as file:
            file.write("---------------\n")
        # Iterate over data.
        for data in dataloaders[phase]:
            num_iter += 1
            # get the inputs
            f_vid, l_vid, tactile, pos, labels = data

            if torch.cuda.is_available():
                rgb_inputs = Variable(f_vid.cuda())
                t = rgb_inputs.size(2)
                labels = Variable(labels.cuda())
            else:
                rgb_inputs = Variable(f_vid)
                t = rgb_inputs.size(2)
                labels = Variable(labels)

            out = fusedNet(rgb_inputs.float())
            #print('prediction output = ', per_frame_logits.shape)
            #print('labels = ',labels.shape)
            # compute classification loss (with max-pooling along time B x C x T)
            out = out.squeeze(1)
            cls_loss = F.binary_cross_entropy_with_logits(
                out.double(), labels.double())
            tot_cls_loss += cls_loss.item()
            #             cls_loss.backward()
            print('{} Loss: {:.4f} and lr: {}'.format(phase,
                                                      tot_cls_loss / num_iter,
                                                      init_lr))
            with open('inference_V.txt', 'a') as file:
                file.write("%f\n" % (tot_cls_loss / num_iter))


#             optimizer.step()
#             optimizer.zero_grad()
            if phase == 'val':
                writer.add_scalar('inference_error/' + phase,
                                  (tot_cls_loss / num_iter), num_iter)
            else:
                writer.add_scalar('inference_error/' + phase,
                                  (tot_cls_loss / num_iter), num_iter)
def run(mode='rgb', load_model='', sample_mode='oversample', frequency=16,
    input_dir='', output_dir='', batch_size=4, usezip=False):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    chunk_size = 16

    assert(mode in ['rgb', 'flow'])
    assert(sample_mode in ['oversample', 'center_crop', 'resize'])
    
    # setup the model
    if mode == 'flow':
        load_model = os.path.join(load_model, 'flow_imagenet.pt')
        i3d = InceptionI3d(400, in_channels=2) #400 classes representing Kinetics dataset
    else:
        load_model = os.path.join(load_model, 'rgb_imagenet.pt')
        i3d = InceptionI3d(400, in_channels=3)
    
    #i3d.replace_logits(157)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()

    i3d.train(False)  # Set model to evaluate mode

    def forward_batch(b_data):
        with torch.no_grad():
            
            b_data = b_data.transpose([0, 4, 1, 2, 3])
            b_data = torch.from_numpy(b_data)   # b,c,t,h,w  # ?x3x16x224x224 (for RGB)
            
            b_data = Variable(b_data.cuda()).float()
            b_features = i3d.extract_features(b_data)
            
            b_features = b_features.data.cpu().numpy()[:,:,0,0,0]
            return b_features

    video_names_list = []
    for class_name in os.listdir(input_dir):
        if os.path.exists(os.path.join(output_dir, class_name).replace('\\', '/')):
            pass
        else: 
            os.makedirs(os.path.join(output_dir, class_name).replace('\\', '/'))
        for vid_name in os.listdir(os.path.join(input_dir, class_name).replace('\\', '/')):
            video_names_list.append(os.path.join(class_name, vid_name).replace('\\', '/'))


    for idx, video_name in enumerate(video_names_list):
        v_name = video_name.split('/')[1] # Only retrieve name of every .mp4 video
        save_file = '{}-{}.npz'.format(v_name, mode)
        if os.path.exists(os.path.join(output_dir, video_names_list[idx]).replace('\\', '/')):
            pass
        else:
            os.makedirs(os.path.join(output_dir, video_names_list[idx]).replace('\\', '/'))
        if save_file in os.listdir(os.path.join(output_dir, video_names_list[idx])):
            continue
        
        frames_dir = os.path.join(input_dir, video_name)


        if mode == 'rgb':
            if usezip:
                rgb_zipdata = zipfile.ZipFile(os.path.join(frames_dir, 'rgb.zip'), 'r')
                rgb_files = [i for i in rgb_zipdata.namelist() if i.startswith('rgb')]
            else:
                frames_dir = os.path.join(frames_dir, mode)
                rgb_files = [i for i in os.listdir(frames_dir) if i.startswith('rgb')]

            rgb_files.sort()
            frame_cnt = len(rgb_files)

        else:
            if usezip:
                flow_x_zipdata = zipfile.ZipFile(os.path.join(frames_dir, 'flow_x.zip'), 'r')
                flow_x_files = [i for i in flow_x_zipdata.namelist() if i.startswith('flow_x')]

                flow_y_zipdata = zipfile.ZipFile(os.path.join(frames_dir, 'flow_y.zip'), 'r')
                flow_y_files = [i for i in flow_y_zipdata.namelist() if i.startswith('flow_y')]
            else:
                flowx_dir = os.path.join(frames_dir, 'flow_x')
                flow_x_files = [i for i in os.listdir(flowx_dir) if i.startswith('flow_x')]
                flowy_dir = os.path.join(frames_dir, 'flow_y')
                flow_y_files = [i for i in os.listdir(flowy_dir) if i.startswith('flow_y')]

            flow_x_files.sort()
            flow_y_files.sort()
            assert(len(flow_y_files) == len(flow_x_files))
            frame_cnt = len(flow_y_files)



        # clipped_length = (frame_cnt // chunk_size) * chunk_size   # Cut frames

        # Cut frames
        assert(frame_cnt > chunk_size)
        clipped_length = frame_cnt - chunk_size
        clipped_length = (clipped_length // frequency) * frequency  # The start of last chunk

        frame_indices = [] # Frames to chunks
        for i in range(clipped_length // frequency + 1):
            frame_indices.append(
                [j for j in range(i * frequency, i * frequency + chunk_size)])

        frame_indices = np.array(frame_indices)

        #frame_indices = np.reshape(frame_indices, (-1, 16)) # Frames to chunks
        chunk_num = frame_indices.shape[0]

        batch_num = int(np.ceil(chunk_num / batch_size))    # Chunks to batches
        frame_indices = np.array_split(frame_indices, batch_num, axis=0)

        if sample_mode == 'oversample':
            full_features = [[] for i in range(10)]
        else:
            full_features = [[]]

        for batch_id in range(batch_num):
            
            require_resize = sample_mode == 'resize'

            if mode == 'rgb':
                if usezip:
                    batch_data = load_ziprgb_batch(rgb_zipdata, rgb_files, 
                        frame_indices[batch_id], require_resize)
                else:                
                    batch_data = load_rgb_batch(frames_dir, rgb_files, 
                        frame_indices[batch_id], require_resize)
            else:
                if usezip:
                    batch_data = load_zipflow_batch(
                        flow_x_zipdata, flow_y_zipdata,
                        flow_x_files, flow_y_files, 
                        frame_indices[batch_id], require_resize)
                else:
                    batch_data = load_flow_batch(frames_dir, 
                        flow_x_files, flow_y_files, 
                        frame_indices[batch_id], require_resize)

            if sample_mode == 'oversample':
                batch_data_ten_crop = oversample_data(batch_data)

                for i in range(10):
                    #pdb.set_trace()
                    assert(batch_data_ten_crop[i].shape[-2]==224)
                    assert(batch_data_ten_crop[i].shape[-3]==224)
                    full_features[i].append(forward_batch(batch_data_ten_crop[i]))

            else:
                if sample_mode == 'center_crop':
                    batch_data = batch_data[:,:,16:240,58:282,:] # Center Crop  (4, 16, 224, 224, 2)
                
                assert(batch_data.shape[-2]==224)
                assert(batch_data.shape[-3]==224)
                full_features[0].append(forward_batch(batch_data))



        full_features = [np.concatenate(i, axis=0) for i in full_features]
        full_features = [np.expand_dims(i, axis=0) for i in full_features]
        full_features = np.concatenate(full_features, axis=0)

        np.savez(os.path.join(os.path.join(output_dir, video_names_list[idx]), save_file), 
            feature=full_features,
            frame_cnt=frame_cnt,
            video_name=v_name)

        print('{} Extracted features {}: {} / {}, {}'.format(
            v_name, mode, frame_cnt, clipped_length, full_features.shape))
Exemplo n.º 23
0
def run(init_lr=0.01,
        root='',
        split_file='data/annotations/charades.json',
        batch_size=8,
        save_dir='',
        stride=4,
        num_span_frames=32,
        num_epochs=200):
    writer = SummaryWriter()  # tensorboard logging

    # setup dataset
    train_transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])
    test_transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])

    print('Getting train dataset...')
    train_dataset = Dataset(split_file,
                            'training',
                            root,
                            train_transforms,
                            stride,
                            num_span_frames,
                            is_sife=False)
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   pin_memory=True)
    print('Getting validation dataset...')
    val_dataset = Dataset(split_file,
                          'testing',
                          root,
                          test_transforms,
                          stride,
                          num_span_frames,
                          is_sife=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=0,
                                                 pin_memory=True)

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}

    print('Loading model...')
    # setup the model

    i3d = InceptionI3d(400, in_channels=3)
    if args.checkpoint_path:
        i3d.replace_logits(157)
        state_dict = torch.load(args.checkpoint_path)['model_state_dict']
        checkpoint = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove 'module'
            checkpoint[name] = v
        i3d.load_state_dict(checkpoint)
    else:
        i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
        i3d.replace_logits(157)

    i3d.cuda()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.device_count() > 1:
        print('Using {} GPUs'.format(torch.cuda.device_count()))
        i3d = nn.DataParallel(i3d)
    i3d.to(device)
    print('Loaded model.')

    optimizer = optim.Adam(i3d.parameters(), lr=init_lr)
    #lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [30], gamma=0.1)

    steps = 0 if not args.checkpoint_path else torch.load(
        args.checkpoint_path)['steps']
    start_epoch = 0 if not args.checkpoint_path else torch.load(
        args.checkpoint_path)['epoch']

    # TRAIN
    for epoch in range(start_epoch, num_epochs):
        print('-' * 50)
        print('EPOCH {}/{}'.format(epoch, num_epochs))
        print('-' * 50)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
                print('-' * 10, 'TRAINING', '-' * 10)
            else:
                i3d.train(False)  # Set model to evaluate mode
                print('-' * 10, 'VALIDATION', '-' * 10)

            # Iterate over data.
            all_preds = []
            all_labels = []
            print('Entering data loading...')
            for i, data in enumerate(dataloaders[phase]):
                # get the inputs
                inputs, labels, vid = data

                t = inputs.shape[2]
                inputs = inputs.cuda()
                labels = labels.cuda()

                if phase == 'train':
                    per_frame_logits = i3d(inputs)
                else:
                    with torch.no_grad():
                        per_frame_logits = i3d(inputs)

                # upsample to input size
                per_frame_logits = F.interpolate(
                    per_frame_logits, t, mode='linear')  # B x Classes x T

                max_frame_logits = torch.max(per_frame_logits,
                                             dim=2)[0]  # B x Classes
                labels = torch.max(labels, dim=2)[0]  # B x Classes

                if phase == 'train':
                    loss = F.binary_cross_entropy_with_logits(
                        max_frame_logits, labels)
                    writer.add_scalar('loss/train', loss, steps)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    if steps % 10 == 0:
                        print('Step {} {} loss: {:.4f}'.format(
                            steps, phase, loss))
                    steps += 1

                # metrics for validation
                pred = (torch.sigmoid(max_frame_logits) >=
                        0.5).float()  # predicted labels for this batch (B x C)
                if i == 0:
                    all_preds = np.array(pred.tolist())
                    all_labels = np.array(labels.tolist())
                else:
                    all_preds = np.append(all_preds, pred.tolist(), axis=0)
                    all_labels = np.append(all_labels, labels.tolist(), axis=0)

            # Eval
            all_APs = [
                metrics.average_precision_score(y_true=all_labels[:, j],
                                                y_score=all_preds[:, j])
                for j in range(157)
            ]
            mAP = np.nanmean(all_APs)
            if phase == 'train':
                writer.add_scalar('mAP/train', mAP, epoch)
                print('-' * 50)
                print('{} mAP: {:.4f}'.format(phase, mAP))
                print('-' * 50)
                save_checkpoint(i3d, optimizer, loss, save_dir, epoch,
                                steps)  # save checkpoint after epoch!
            else:
                writer.add_scalar('mAP/val', mAP, epoch)
                print('{} mAP: {:.4f}'.format(phase, mAP))

        #lr_sched.step() # step after epoch

    writer.close()
Exemplo n.º 24
0
for i, x in enumerate(X):
    if int(x[1]) >= 10:
        new_X.append(x)
        new_Y.append(Y[i])

new_X = np.array(new_X)
new_Y = np.array(new_Y)

train_data = VideoDataset(new_X, new_Y)
trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)


load_model = "../input/rgb_charades.pt"
save_dir = "../input/i3d"

i3d = InceptionI3d(400, in_channels=3)
i3d.replace_logits(157)
i3d.load_state_dict(torch.load(load_model))
i3d.cuda()

i3d.train(False)  # Set model to evaluate mode
        
tot_loss = 0.0
tot_loc_loss = 0.0
tot_cls_loss = 0.0
            
# Iterate over data.
for data in trainloader:
    # get the inputs
    inputs, labels, name = data
    if os.path.exists(os.path.join(save_dir, name[0]+'.npy')):
Exemplo n.º 25
0
def run(max_steps=64e3,
        mode='flow',
        root='./frames',
        split='gt.json',
        batch_size=1,
        load_model='',
        save_dir=''):
    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(split,
                      'training',
                      root,
                      mode,
                      test_transforms,
                      num=-1,
                      save_dir=save_dir)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=8,
                                             pin_memory=True)

    val_dataset = Dataset(split,
                          'test',
                          root,
                          mode,
                          test_transforms,
                          num=-1,
                          save_dir=save_dir)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=8,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(20, in_channels=2)
    else:
        i3d = InceptionI3d(20, in_channels=3)
    i3d.replace_logits(157)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()

    for phase in ['train', 'val']:
        i3d.train(False)  # Set model to evaluate mode

        tot_loss = 0.0
        tot_loc_loss = 0.0
        tot_cls_loss = 0.0

        # Iterate over data.
        for data in dataloaders[phase]:
            # get the inputs
            inputs, labels, name = data
            # if os.path.exists(os.path.join(save_dir, name[0] + '.npy')):
            #     continue

            b, c, t, h, w = inputs.shape
            if t > 16:
                features = []
                for start in range(0, t, 16):
                    end = min(t - 1, start + 16)
                    if end < start + 16:
                        break
                    # start = max(1, start - 48)
                    ip = Variable(torch.from_numpy(
                        inputs.numpy()[:, :, start:end]).cuda(),
                                  volatile=True)
                    feature = i3d.extract_features(ip)
                    feature = torch.squeeze(feature)
                    features.append(feature.data.cpu().numpy())
                np.save(os.path.join(save_dir, name[0]), np.asarray(features))
            else:
                # wrap them in Variable
                inputs = Variable(inputs.cuda(), volatile=True)
                features = i3d.extract_features(inputs)
                np.save(
                    os.path.join(save_dir, name[0]),
                    features.squeeze(0).permute(1, 2, 3, 0).data.cpu().numpy())
Exemplo n.º 26
0
def run(mode='rgb',
        load_model='',
        sample_mode='oversample',
        frequency=16,
        input_dir='',
        output_dir='',
        batch_size=40,
        usezip=False):

    chunk_size = 16

    assert (mode in ['rgb', 'flow'])
    assert (sample_mode in ['oversample', 'center_crop', 'resize'])

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
    else:
        i3d = InceptionI3d(400, in_channels=3)

    #i3d.replace_logits(157)
    i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()

    i3d.train(False)  # Set model to evaluate mode

    def forward_batch(b_data):
        b_data = b_data.transpose([0, 4, 1, 2, 3])
        b_data = torch.from_numpy(b_data)  # b,c,t,h,w  # 40x3x16x224x224

        b_data = Variable(b_data.cuda(), volatile=True).float()
        b_features = i3d.extract_features(b_data)

        b_features = b_features.data.cpu().numpy()[:, :, 0, 0, 0]
        return b_features

    video_names = [i for i in os.listdir(input_dir) if i[0] == 'v']

    for video_name in video_names:

        save_file = '{}-{}.npz'.format(video_name, mode)
        if save_file in os.listdir(output_dir):
            continue

        frames_dir = os.path.join(input_dir, video_name)

        if mode == 'rgb':
            if usezip:
                rgb_zipdata = zipfile.ZipFile(
                    os.path.join(frames_dir, 'img.zip'), 'r')
                rgb_files = [
                    i for i in rgb_zipdata.namelist() if i.startswith('img')
                ]
            else:
                rgb_files = [
                    i for i in os.listdir(frames_dir) if i.startswith('img')
                ]

            rgb_files.sort()
            frame_cnt = len(rgb_files)

        else:
            if usezip:
                flow_x_zipdata = zipfile.ZipFile(
                    os.path.join(frames_dir, 'flow_x.zip'), 'r')
                flow_x_files = [
                    i for i in flow_x_zipdata.namelist() if i.startswith('x_')
                ]

                flow_y_zipdata = zipfile.ZipFile(
                    os.path.join(frames_dir, 'flow_y.zip'), 'r')
                flow_y_files = [
                    i for i in flow_y_zipdata.namelist() if i.startswith('y_')
                ]
            else:
                flow_x_files = [
                    i for i in os.listdir(frames_dir) if i.startswith('flow_x')
                ]
                flow_y_files = [
                    i for i in os.listdir(frames_dir) if i.startswith('flow_y')
                ]

            flow_x_files.sort()
            flow_y_files.sort()
            assert (len(flow_y_files) == len(flow_x_files))
            frame_cnt = len(flow_y_files)

        # clipped_length = (frame_cnt // chunk_size) * chunk_size   # Cut frames

        # Cut frames
        assert (frame_cnt > chunk_size)
        clipped_length = frame_cnt - chunk_size
        clipped_length = (clipped_length //
                          frequency) * frequency  # The start of last chunk

        frame_indices = []  # Frames to chunks
        for i in range(clipped_length // frequency + 1):
            frame_indices.append(
                [j for j in range(i * frequency, i * frequency + chunk_size)])

        frame_indices = np.array(frame_indices)

        #frame_indices = np.reshape(frame_indices, (-1, 16)) # Frames to chunks
        chunk_num = frame_indices.shape[0]

        batch_num = int(np.ceil(chunk_num / batch_size))  # Chunks to batches
        frame_indices = np.array_split(frame_indices, batch_num, axis=0)

        if sample_mode == 'oversample':
            full_features = [[] for i in range(10)]
        else:
            full_features = [[]]

        for batch_id in range(batch_num):

            require_resize = sample_mode == 'resize'

            if mode == 'rgb':
                if usezip:
                    batch_data = load_ziprgb_batch(rgb_zipdata, rgb_files,
                                                   frame_indices[batch_id],
                                                   require_resize)
                else:
                    batch_data = load_rgb_batch(frames_dir, rgb_files,
                                                frame_indices[batch_id],
                                                require_resize)
            else:
                if usezip:
                    batch_data = load_zipflow_batch(flow_x_zipdata,
                                                    flow_y_zipdata,
                                                    flow_x_files, flow_y_files,
                                                    frame_indices[batch_id],
                                                    require_resize)
                else:
                    batch_data = load_flow_batch(frames_dir, flow_x_files,
                                                 flow_y_files,
                                                 frame_indices[batch_id],
                                                 require_resize)

            if sample_mode == 'oversample':
                batch_data_ten_crop = oversample_data(batch_data)

                for i in range(10):
                    pdb.set_trace()
                    assert (batch_data_ten_crop[i].shape[-2] == 224)
                    assert (batch_data_ten_crop[i].shape[-3] == 224)
                    full_features[i].append(
                        forward_batch(batch_data_ten_crop[i]))

            else:
                if sample_mode == 'center_crop':
                    batch_data = batch_data[:, :, 16:240, 58:
                                            282, :]  # Centrer Crop  (39, 16, 224, 224, 2)

                assert (batch_data.shape[-2] == 224)
                assert (batch_data.shape[-3] == 224)
                full_features[0].append(forward_batch(batch_data))

        full_features = [np.concatenate(i, axis=0) for i in full_features]
        full_features = [np.expand_dims(i, axis=0) for i in full_features]
        full_features = np.concatenate(full_features, axis=0)

        np.savez(os.path.join(output_dir, save_file),
                 feature=full_features,
                 frame_cnt=frame_cnt,
                 video_name=video_name)

        print('{} done: {} / {}, {}'.format(video_name, frame_cnt,
                                            clipped_length,
                                            full_features.shape))
Exemplo n.º 27
0
def run(configs,
        mode='rgb',
        root='/ssd/Charades_v1_rgb',
        train_split='charades/charades.json',
        save_model='',
        num_classes=None,
        weights=None):
    print(configs)

    # setup dataset
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.RandomHorizontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(train_split,
                      'train',
                      root,
                      mode,
                      num_classes=num_classes,
                      transforms=train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=configs.batch_size,
                                             shuffle=True,
                                             num_workers=4,
                                             pin_memory=True)

    val_dataset = Dataset(train_split,
                          'test',
                          root,
                          mode,
                          num_classes=num_classes,
                          transforms=test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=configs.batch_size,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 pin_memory=False)

    dataloaders = {'train': dataloader, 'test': val_dataloader}
    datasets = {'train': dataset, 'test': val_dataset}

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('weights/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('weights/rgb_imagenet.pt'))

    num_classes = dataset.num_classes
    i3d.replace_logits(num_classes)

    if weights:
        print('loading weights {}'.format(weights))
        i3d.load_state_dict(torch.load(weights))

    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = configs.init_lr
    weight_decay = configs.adam_weight_decay
    optimizer = optim.Adam(i3d.parameters(), lr=lr, weight_decay=weight_decay)

    num_steps_per_update = configs.update_per_step  # accum gradient
    steps = 0
    epoch = 0

    best_val_score = 0
    # train it
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'min',
                                                           patience=5,
                                                           factor=0.3)
    while steps < configs.max_steps and epoch < 400:  # for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, configs.max_steps))
        print('-' * 10)

        epoch += 1
        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            collected_vids = []

            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode

            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            optimizer.zero_grad()

            confusion_matrix = np.zeros((num_classes, num_classes),
                                        dtype=np.int)
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                if data == -1:  # bracewell does not compile opencv with ffmpeg, strange errors occur resulting in no video loaded
                    continue

                # inputs, labels, vid, src = data
                inputs, labels, vid = data

                # wrap them in Variable
                inputs = inputs.cuda()
                t = inputs.size(2)
                labels = labels.cuda()

                per_frame_logits = i3d(inputs, pretrained=False)
                # upsample to input size
                per_frame_logits = F.upsample(per_frame_logits,
                                              t,
                                              mode='linear')

                # compute localization loss
                loc_loss = F.binary_cross_entropy_with_logits(
                    per_frame_logits, labels)
                tot_loc_loss += loc_loss.data.item()

                predictions = torch.max(per_frame_logits, dim=2)[0]
                gt = torch.max(labels, dim=2)[0]

                # compute classification loss (with max-pooling along time B x C x T)
                cls_loss = F.binary_cross_entropy_with_logits(
                    torch.max(per_frame_logits, dim=2)[0],
                    torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.data.item()

                for i in range(per_frame_logits.shape[0]):
                    confusion_matrix[torch.argmax(gt[i]).item(),
                                     torch.argmax(predictions[i]).item()] += 1

                loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update
                tot_loss += loss.data.item()
                if num_iter == num_steps_per_update // 2:
                    print(epoch, steps, loss.data.item())
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    # lr_sched.step()
                    if steps % 10 == 0:
                        acc = float(np.trace(confusion_matrix)) / np.sum(
                            confusion_matrix)
                        print(
                            'Epoch {} {} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f} Accu :{:.4f}'
                            .format(epoch, phase,
                                    tot_loc_loss / (10 * num_steps_per_update),
                                    tot_cls_loss / (10 * num_steps_per_update),
                                    tot_loss / 10, acc))
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'test':
                val_score = float(
                    np.trace(confusion_matrix)) / np.sum(confusion_matrix)
                if val_score > best_val_score or epoch % 2 == 0:
                    best_val_score = val_score
                    model_name = save_model + "nslt_" + str(
                        num_classes) + "_" + str(steps).zfill(
                            6) + '_%3f.pt' % val_score

                    torch.save(i3d.module.state_dict(), model_name)
                    print(model_name)

                print(
                    'VALIDATION: {} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f} Accu :{:.4f}'
                    .format(phase, tot_loc_loss / num_iter,
                            tot_cls_loss / num_iter,
                            (tot_loss * num_steps_per_update) / num_iter,
                            val_score))

                scheduler.step(tot_loss * num_steps_per_update / num_iter)
def run(init_lr=0.001,
        max_steps=20,
        mode='rgb',
        root='/proxy/',
        train_split='./scott.txt',
        test_split="./scottt.txt",
        batch_size=8 * 5,
        save_model='nope'):

    # This table contains the distance between two possible ordering sequences
    # It is therefore a 120*120 table
    distance_dict = np.load("distance_dict.npy")
    distance_dict = torch.from_numpy(distance_dict).float().cuda()
    root = "./proxy/"
    dataset = Dataset(
        train_split,
        root,
        mode,
    )
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             pin_memory=True)

    val_dataset = Dataset(test_split, root, mode)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=8,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        #Imagenet Pretraining
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        #You can modify the number of outputs in the file Siamese_I3D.py

        i3d = ProxyNetwork()

    i3d.cuda()

    i3d = nn.DataParallel(i3d)
    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])

    num_steps_per_update = 1  # accum gradient
    steps = 0
    # train it
    while steps < max_steps:  #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        t1 = time.time()
        processed_elements = 0

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode

            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            optimizer.zero_grad()

            # Iterate over data.
            for data in dataloaders[phase]:
                processed_elements += 40
                # get the inputs
                inputs, labels = data
                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                #Custom loss implementation
                # Depending on the "real" labels

                per_frame_logits = i3d(inputs)
                for i in range(labels.shape[0]):
                    #print(i)
                    per_frame_logits[i] *= distance_dict[labels[i][0][0]]

                # upsample to input size
                #per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
                per_frame_logits = per_frame_logits.squeeze()
                labels = labels.squeeze()
                labels = labels.type(torch.LongTensor)
                labels = labels.cuda()
                # compute localization loss
                loc_loss = F.cross_entropy(per_frame_logits, labels)
                tot_loc_loss += loc_loss.item()
                #Class loss

                loss = loc_loss / num_steps_per_update
                tot_loss += loss.item()
                loss.backward()
                # 10800 is the number of elements in the training set
                len_training_set = 10800
                print("processed elements  : " + str(processed_elements) +
                      " / " + str(len_training_set))
                print(time.time() - t1)

            if phase == 'train':
                steps += 1
                optimizer.step()
                optimizer.zero_grad()
                lr_sched.step()
                if steps % 1 == 0:
                    print(
                        '{} Train Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'
                        .format(phase,
                                tot_loc_loss / (10 * num_steps_per_update),
                                tot_cls_loss / (10 * num_steps_per_update),
                                tot_loss / 10))
                    # save model
                    torch.save(i3d, "customloss" + str(steps) + '.pt')
                    tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'val':
                print(
                    '{}  Val Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'
                    .format(phase, tot_loc_loss, tot_cls_loss,
                            (tot_loss * num_steps_per_update)))
def run(init_lr=0.01, max_steps=200, mode='rgb', root='/media/pritesh/Entertainment/Visual-Tactile_Dataset/dataset/',\
        train_split='train.txt', test_split='test.txt', batch_size=5, save_model=''):
    writer = tensorboardX.SummaryWriter()
    # setup dataset
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.RandomHorizontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(train_split, root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=3,
                                             pin_memory=True)

    val_dataset = Dataset(test_split, root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=3,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    sm = InceptionI3d(400, in_channels=3)
    sm.load_state_dict(torch.load('models/rgb_imagenet.pt'))
    #tm = InceptionI3d(400, in_channels=2)
    #tm.load_state_dict(torch.load('models/flow_imagenet.pt'))
    sm.replace_logits(1)
    sm = freeze_network_layer(sm)
    #add your network here
    fusedNet = FusionNet(sm)
    if torch.cuda.is_available():
        fusedNet.cuda()
        fusedNet = nn.DataParallel(fusedNet)

    lr = init_lr
    optimizer = optim.SGD(fusedNet.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [50, 100, 150, 200])

    steps = 0
    with open('i3d_video.txt', 'w') as file:
        file.write("train and validation loss file\n")
    # train it
    while steps < max_steps:  #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            print('phase : {}'.format(phase))
            if phase == 'train':
                fusedNet.train(True)
            else:
                fusedNet.train(False)  # Set model to evaluate mode

            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            count = 0
            optimizer.zero_grad()
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                f_vid, l_vid, tactile, pos, labels = data

                if torch.cuda.is_available():
                    inputs = Variable(f_vid.cuda())
                    t = inputs.size(2)
                    labels = Variable(labels.cuda())
                else:
                    inputs = Variable(f_vid)
                    t = inputs.size(2)
                    labels = Variable(labels)

                per_frame_logits = fusedNet(inputs.float())
                #print('prediction output = ', per_frame_logits.shape)
                #print('labels = ',labels.shape)
                # compute classification loss (with max-pooling along time B x C x T)
                per_frame_logits = per_frame_logits.squeeze(1)
                cls_loss = F.binary_cross_entropy_with_logits(
                    per_frame_logits.double(), labels.double())
                tot_cls_loss += cls_loss.item()
                cls_loss.backward()
                print('{} Loss: {:.4f} and lr: {}'.format(
                    phase, tot_cls_loss / num_iter, init_lr))
                with open('i3d_video.txt', 'a') as file:
                    file.write("%f\n" % (tot_cls_loss / num_iter))
                optimizer.step()
                optimizer.zero_grad()
                if phase == 'val':
                    writer.add_scalar('error/' + phase,
                                      (tot_cls_loss / num_iter), num_iter)
                else:
                    writer.add_scalar('error/' + phase,
                                      (tot_cls_loss / num_iter), num_iter)
                    if (steps % 50 == 0):
                        torch.save(
                            fusedNet.module.state_dict(),
                            save_model + phase + str(steps).zfill(6) + '.pt')
                        save_checkpoint(fusedNet, optimizer, lr_sched, steps)
            #save error at every epoch
            writer.add_scalar('errorAtEpoch/' + phase,
                              (tot_cls_loss / num_iter), steps)
            tot_cls_loss = 0.
        #if(steps%50 == 0):
        #    torch.save(fusedNet.module.state_dict(), save_model+phase+str(steps).zfill(6)+'.pt')
        #    save_checkpoint(fusedNet, optimizer, lr_sched, steps)
        steps += 1
        lr_sched.step()
Exemplo n.º 30
0
                                 transforms=test_transforms,
                                 stride=8,
                                 fm_us=64)

    Video_loader = torch.utils.data.DataLoader(val_video_data,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=data_workers,
                                               pin_memory=True)

    print('Find {} train samples '.format(len(val_video_data)))

    for i in args.__dict__.keys():
        print(i, ':\t', args.__dict__[i])

    I3D = InceptionI3d(400, in_channels=3, dropout_keep_prob=0)
    I3D.replace_logits(157)
    # I3D = nn.DataParallel(I3D).cuda()
    I3D.load_state_dict(torch.load('../model/rgb_charades.pt'))
    I3D = nn.DataParallel(I3D).cuda()
    # I3D.load_state_dict(torch.load(args.model_pth))

    VD_pred = vid_map(Video_loader, I3D, epoch, print_freq=10)

    # output = []
    # gt = []

    Map = winsmooth(VD_pred)
    mAP, _, ap = map.charades_map(np.vstack(Map[0]), np.vstack(Map[1]))
    print('The final mAp is:', mAP)
    submission_file(