示例#1
0
def run(init_lr=0.01,
        root='',
        split_file='data/annotations/charades.json',
        batch_size=8,
        save_dir='',
        stride=4,
        num_span_frames=32,
        num_epochs=200):
    writer = SummaryWriter()  # tensorboard logging

    # setup dataset
    train_transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])
    test_transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])

    print('Getting train dataset...')
    train_dataset = Dataset(split_file,
                            'training',
                            root,
                            train_transforms,
                            stride,
                            num_span_frames,
                            is_sife=False)
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   pin_memory=True)
    print('Getting validation dataset...')
    val_dataset = Dataset(split_file,
                          'testing',
                          root,
                          test_transforms,
                          stride,
                          num_span_frames,
                          is_sife=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=0,
                                                 pin_memory=True)

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}

    print('Loading model...')
    # setup the model

    i3d = InceptionI3d(400, in_channels=3)
    if args.checkpoint_path:
        i3d.replace_logits(157)
        state_dict = torch.load(args.checkpoint_path)['model_state_dict']
        checkpoint = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove 'module'
            checkpoint[name] = v
        i3d.load_state_dict(checkpoint)
    else:
        i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
        i3d.replace_logits(157)

    i3d.cuda()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.device_count() > 1:
        print('Using {} GPUs'.format(torch.cuda.device_count()))
        i3d = nn.DataParallel(i3d)
    i3d.to(device)
    print('Loaded model.')

    optimizer = optim.Adam(i3d.parameters(), lr=init_lr)
    #lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [30], gamma=0.1)

    steps = 0 if not args.checkpoint_path else torch.load(
        args.checkpoint_path)['steps']
    start_epoch = 0 if not args.checkpoint_path else torch.load(
        args.checkpoint_path)['epoch']

    # TRAIN
    for epoch in range(start_epoch, num_epochs):
        print('-' * 50)
        print('EPOCH {}/{}'.format(epoch, num_epochs))
        print('-' * 50)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
                print('-' * 10, 'TRAINING', '-' * 10)
            else:
                i3d.train(False)  # Set model to evaluate mode
                print('-' * 10, 'VALIDATION', '-' * 10)

            # Iterate over data.
            all_preds = []
            all_labels = []
            print('Entering data loading...')
            for i, data in enumerate(dataloaders[phase]):
                # get the inputs
                inputs, labels, vid = data

                t = inputs.shape[2]
                inputs = inputs.cuda()
                labels = labels.cuda()

                if phase == 'train':
                    per_frame_logits = i3d(inputs)
                else:
                    with torch.no_grad():
                        per_frame_logits = i3d(inputs)

                # upsample to input size
                per_frame_logits = F.interpolate(
                    per_frame_logits, t, mode='linear')  # B x Classes x T

                max_frame_logits = torch.max(per_frame_logits,
                                             dim=2)[0]  # B x Classes
                labels = torch.max(labels, dim=2)[0]  # B x Classes

                if phase == 'train':
                    loss = F.binary_cross_entropy_with_logits(
                        max_frame_logits, labels)
                    writer.add_scalar('loss/train', loss, steps)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    if steps % 10 == 0:
                        print('Step {} {} loss: {:.4f}'.format(
                            steps, phase, loss))
                    steps += 1

                # metrics for validation
                pred = (torch.sigmoid(max_frame_logits) >=
                        0.5).float()  # predicted labels for this batch (B x C)
                if i == 0:
                    all_preds = np.array(pred.tolist())
                    all_labels = np.array(labels.tolist())
                else:
                    all_preds = np.append(all_preds, pred.tolist(), axis=0)
                    all_labels = np.append(all_labels, labels.tolist(), axis=0)

            # Eval
            all_APs = [
                metrics.average_precision_score(y_true=all_labels[:, j],
                                                y_score=all_preds[:, j])
                for j in range(157)
            ]
            mAP = np.nanmean(all_APs)
            if phase == 'train':
                writer.add_scalar('mAP/train', mAP, epoch)
                print('-' * 50)
                print('{} mAP: {:.4f}'.format(phase, mAP))
                print('-' * 50)
                save_checkpoint(i3d, optimizer, loss, save_dir, epoch,
                                steps)  # save checkpoint after epoch!
            else:
                writer.add_scalar('mAP/val', mAP, epoch)
                print('{} mAP: {:.4f}'.format(phase, mAP))

        #lr_sched.step() # step after epoch

    writer.close()
def run(init_lr=0.001,
        max_steps=20,
        mode='rgb',
        root='/proxy/',
        train_split='./scott.txt',
        test_split="./scottt.txt",
        batch_size=8 * 5,
        save_model='nope'):

    # This table contains the distance between two possible ordering sequences
    # It is therefore a 120*120 table
    distance_dict = np.load("distance_dict.npy")
    distance_dict = torch.from_numpy(distance_dict).float().cuda()
    root = "./proxy/"
    dataset = Dataset(
        train_split,
        root,
        mode,
    )
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             pin_memory=True)

    val_dataset = Dataset(test_split, root, mode)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=8,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        #Imagenet Pretraining
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        #You can modify the number of outputs in the file Siamese_I3D.py

        i3d = ProxyNetwork()

    i3d.cuda()

    i3d = nn.DataParallel(i3d)
    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])

    num_steps_per_update = 1  # accum gradient
    steps = 0
    # train it
    while steps < max_steps:  #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        t1 = time.time()
        processed_elements = 0

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode

            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            optimizer.zero_grad()

            # Iterate over data.
            for data in dataloaders[phase]:
                processed_elements += 40
                # get the inputs
                inputs, labels = data
                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                #Custom loss implementation
                # Depending on the "real" labels

                per_frame_logits = i3d(inputs)
                for i in range(labels.shape[0]):
                    #print(i)
                    per_frame_logits[i] *= distance_dict[labels[i][0][0]]

                # upsample to input size
                #per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
                per_frame_logits = per_frame_logits.squeeze()
                labels = labels.squeeze()
                labels = labels.type(torch.LongTensor)
                labels = labels.cuda()
                # compute localization loss
                loc_loss = F.cross_entropy(per_frame_logits, labels)
                tot_loc_loss += loc_loss.item()
                #Class loss

                loss = loc_loss / num_steps_per_update
                tot_loss += loss.item()
                loss.backward()
                # 10800 is the number of elements in the training set
                len_training_set = 10800
                print("processed elements  : " + str(processed_elements) +
                      " / " + str(len_training_set))
                print(time.time() - t1)

            if phase == 'train':
                steps += 1
                optimizer.step()
                optimizer.zero_grad()
                lr_sched.step()
                if steps % 1 == 0:
                    print(
                        '{} Train Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'
                        .format(phase,
                                tot_loc_loss / (10 * num_steps_per_update),
                                tot_cls_loss / (10 * num_steps_per_update),
                                tot_loss / 10))
                    # save model
                    torch.save(i3d, "customloss" + str(steps) + '.pt')
                    tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'val':
                print(
                    '{}  Val Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'
                    .format(phase, tot_loc_loss, tot_cls_loss,
                            (tot_loss * num_steps_per_update)))
示例#3
0
def run(init_lr=0.1, max_step=64e3, mode='rgb', root='/ssd/Charades_v1_rgb', train_split='charades/charades.json', batch_size=8*5, save_model=''):
    # setup dataset
    train_transforms = transforms.Compose([videotransforms.RandomCrop(224),
                                           videotransforms.RandomHorisontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.RandomCrop(224)])

    dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)

    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)    

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
    i3d.replace_logits(157)
    #i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])


    num_steps_per_update = 4 # accum gradient
    steps = 0
    # train it
    while steps < max_steps:#for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train(True)
            else:
                i3d.train(False)  # Set model to evaluate mode
                
            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            optimizer.zero_grad()
            
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                per_frame_logits = i3d(inputs)
                # upsample to input size
                per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')

                # compute localization loss
                loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                tot_loc_loss += loc_loss.data[0]

                # compute classification loss (with max-pooling along time B x C x T)
                cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.data[0]

                loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update
                tot_loss += loss.data[0]
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_sched.step()
                    if steps % 10 == 0:
                        print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10)
                        # save model
                        torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
            if phase == 'val':
                print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter) 
示例#4
0
def run(init_lr=0.1,
        max_steps=64e3,
        mode='rgb',
        root='../../SSBD/ssbd_clip_segment/data/',
        train_split='../../SSBD/Annotations/annotations_charades.json',
        batch_size=1,
        save_model=''):
    # setup dataset
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.RandomHorizontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=4,
                                             pin_memory=True)

    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # dataloaders = {'train': dataloader}
    # datasets = {'train': dataset}

    # setup the model

    xdc = torch.hub.load('HumamAlwassel/XDC',
                         'xdc_video_encoder',
                         pretraining='r2plus1d_18_xdc_ig65m_kinetics',
                         num_classes=3)
    # if mode == 'flow':
    #     i3d = InceptionI3d(400, in_channels=2)
    #     i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    # else:
    #     i3d = InceptionI3d(400, in_channels=3)
    #     i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
    # i3d.replace_logits(8)
    # #i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
    # i3d.cuda()
    # i3d = nn.DataParallel(i3d)
    xdc.cuda()
    xdc = nn.DataParallel(xdc).cuda()

    for name, param in xdc.named_parameters():
        if 'fc' not in name and '4.1' not in name:
            param.requires_grad = False

    lr = init_lr
    optimizer = optim.SGD(xdc.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])

    num_steps_per_update = 4  # accum gradient
    steps = 0
    best_val = 0
    # new_flag = 0
    # train it
    while steps < max_steps:  #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)
        # new_state_dict = OrderedDict()
        # state_dict = torch.load(save_model+'.pt')
        # for k, v in state_dict.items():
        #     name = "module."+k # add module.
        #     new_state_dict[name] = v
        # xdc.load_state_dict(new_state_dict)
        # new_flag = 0
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                xdc.train(True)
            else:
                xdc.train(False)  # Set model to evaluate mode

            tot_loss = 0.0
            # tot_loc_loss = 0.0
            # tot_cls_loss = 0.0
            num_iter = 0
            total = 0
            n = 0
            optimizer.zero_grad()

            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                per_frame_logits = xdc(inputs)
                # print(per_frame_logits.shape)
                # print(labels.shape)
                # upsample to input size
                # per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')

                # compute localization loss
                # loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                # tot_loc_loss += loc_loss.data.item()

                # compute classification loss (with max-pooling along time B x C x T)
                # cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
                # print(torch.max(per_frame_logits, dim=2)[0])
                # print(torch.max(labels, dim=2)[0])
                correct = per_frame_logits.argmax(1).eq(labels.argmax(1))
                total += correct.float().sum().item()
                n += batch_size
                # tot_cls_loss += cls_loss.data.item()

                loss = F.binary_cross_entropy_with_logits(
                    per_frame_logits, labels) / num_steps_per_update
                tot_loss += loss.data.item()
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_sched.step()
                    if steps % 10 == 0:
                        print('{} Tot Loss: {:.4f} Accuracy: {:.4f}'.format(
                            phase, tot_loss / 10, total / n))
                        # save model
                        # if(steps % 10000 == 0):
                        # torch.save(xdc.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
                        # tot_loss = tot_loc_loss = tot_cls_loss = 0.
                        tot_loss = 0
                        total = 0
                        n = 0
            if phase == 'val':
                print('{} Tot Loss: {:.4f} Accuracy: {:.4f}'.format(
                    phase, (tot_loss * num_steps_per_update) / num_iter,
                    total / n))
                if (total / n > best_val):
                    best_val = total / n
                    torch.save(xdc.module.state_dict(), save_model + '.pt')
示例#5
0
def run(mode='rgb',
        root='/home/dataset/Charades_v1_rgb',
        train_split='./Charades/charades.json',
        batch_size=8):
    # create a txt file to save results
    import time
    cur_time = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time()))
    res_file = cur_time + '_charades_scores.txt'
    os.mknod(res_file)

    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=4,
                                                 pin_memory=True)
    print("Loading model......")
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(157, in_channels=3)
        i3d.load_state_dict(torch.load('models/rgb_charades.pt'))
    # i3d.replace_logits(157)
    # i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    num_iter = 1

    # Each epoch has a training and validation phase
    i3d.eval()

    tot_loss = 0.0
    tot_loc_loss = 0.0
    tot_cls_loss = 0.0

    print("Start testing......")
    print('-' * 20)
    # Iterate over data.
    for data in val_dataloader:
        # get the inputs
        vid, inputs, labels = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda())
        t = inputs.size(2)
        labels = Variable(labels.cuda())

        per_frame_logits = i3d(inputs)
        # upsample to input size
        per_frame_logits = F.interpolate(per_frame_logits,
                                         t,
                                         mode='linear',
                                         align_corners=True)

        # compute localization loss
        loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
        tot_loc_loss += loc_loss.data

        # compute classification loss (with max-pooling along time B x C x T)
        per_video_logits = torch.max(per_frame_logits, dim=2)[0]
        wirte2txt(res_file, vid, per_video_logits)
        # print('{:.5f}\t{}'.format(F.sigmoid(),torch.max(labels, dim=2)[0][i][j]) )
        # print(per_frame_logits.size(), torch.max(per_frame_logits, dim=2)[0].size())
        cls_loss = F.binary_cross_entropy_with_logits(
            torch.max(per_frame_logits, dim=2)[0],
            torch.max(labels, dim=2)[0])
        tot_cls_loss += cls_loss.data

        loss = (0.5 * loc_loss + 0.5 * cls_loss)
        tot_loss += loss.data

        if num_iter % 10 == 0:
            print(
                'Test {}: Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.
                format(num_iter, tot_loc_loss / 10, tot_cls_loss / 10,
                       tot_loss / 10))
            # save model
            # torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
            tot_loss = tot_loc_loss = tot_cls_loss = 0.
        num_iter += 1
    print('Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(
        tot_loc_loss / (num_iter % 10), tot_cls_loss / (num_iter % 10),
        tot_loss / (num_iter % 10)))
示例#6
0
def train(init_lr, max_steps, mode, root_folder, train_split, batch_size, load_model, save_model):

    train_transforms = transforms.Compose([videotransforms.RandomCrop(224), videotransforms.RandomHorizontalFlip()])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    dataset = Dataset(train_split, 'training', root_folder, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)
    val_dataset = Dataset(train_split, 'testing', root_folder, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=36, pin_memory=True)
    dataloaders = {'train': dataloader, 'val': val_dataloader}

    i3d = InceptionI3d(400, in_channels=2 if mode == 'flow' else 3)  # setup the model
    i3d.load_state_dict(torch.load('models/{}_imagenet.pt'.format(mode)))
    i3d.replace_logits(157)
    if load_model:
        i3d.load_state_dict(torch.load(load_model))
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])

    steps = 0
    num_steps_per_update = 4  # accum gradient
    while steps < max_steps:  # train it

        print('Step {:6d} / {}'.format(steps, max_steps))
        print('-' * 10)

        for phase in ['train', 'val']:  # each epoch has a training and validation phase

            i3d.train(phase == 'train')  # eval only during validation phase
            num_iter, tot_loss, tot_loc_loss, tot_cls_loss = 0, 0.0, 0.0, 0.0
            optimizer.zero_grad()
            for data in dataloaders[phase]:  # iterate over data

                num_iter += 1
                inputs, labels = data  # get the inputs
                inputs = Variable(inputs.cuda())  # wrap them in Variable
                labels = Variable(labels.cuda())
                t = inputs.size(2)

                per_frame_logits = i3d(inputs)  # upsample to input size
                per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')

                loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)  # compute localization loss
                tot_loc_loss += loc_loss.data[0]

                cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.data[0]  # compute classification loss (with max-pooling along time B x C x T)

                loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update
                tot_loss += loss.data[0]
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':

                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_sched.step()
                    if steps % 10 == 0:

                        print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(
                            phase, tot_loc_loss / (10 * num_steps_per_update),
                            tot_cls_loss / (10 * num_steps_per_update), tot_loss / 10))
                        torch.save(i3d.module.state_dict(), save_model + str(steps).zfill(6)+'.pt')  # save model
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.

            if phase == 'val':
                print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(
                    phase, tot_loc_loss / num_iter, tot_cls_loss / num_iter,
                    (tot_loss * num_steps_per_update) / num_iter))
示例#7
0
def run(init_lr=0.1,
        max_steps=64e3,
        mode='rgb',
        root='/storage/truppr/CHARADES/Charades_v1_rgb',
        train_split='charades/charades.json',
        batch_size=16,
        save_model=''):
    # setup dataset
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.RandomHorizontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    # print(root)
    print("creating training set...")
    dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=18,
                                             pin_memory=True)

    print("creating validation set...")
    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=18,
                                                 pin_memory=True)

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    print("setting up the model...")
    if mode == 'flow' or mode == 'rgb':
        if mode == 'flow':
            i3d = InceptionI3d(400, in_channels=2)
            i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
        elif mode == 'rgb':
            i3d = InceptionI3d(400, in_channels=3)
            i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
        i3d.replace_logits(157)  # number of classes... originally 157
        i3d.cuda(0)
        i3d = nn.DataParallel(i3d)

    elif mode == 'both':
        i3d_rgb = InceptionI3d(400, in_channels=3)
        i3d_rgb.load_state_dict(torch.load('models/rgb_imagenet.pt'))

        i3d_flow = InceptionI3d(400, in_channels=2)
        i3d_flow.load_state_dict(torch.load('models/flow_imagenet.pt'))

        i3d_rgb.replace_logits(157)  # number of classes... originally 157
        i3d_flow.replace_logits(157)

        i3d_rgb.cuda(0)
        i3d_flow.cuda(0)

        i3d_rgb = nn.DataParallel(i3d_rgb)
        i3d_flow = nn.DataParallel(i3d_flow)

    lr = init_lr

    if mode == 'both':
        optimizer_rgb = optim.SGD(i3d_rgb.parameters(),
                                  lr=lr,
                                  momentum=0.9,
                                  weight_decay=0.0000001)
        optimizer_flow = optim.SGD(i3d_flow.parameters(),
                                   lr=lr,
                                   momentum=0.9,
                                   weight_decay=0.0000001)
        lr_sched_rgb = optim.lr_scheduler.MultiStepLR(optimizer_rgb,
                                                      [300, 1000])
        lr_sched_flow = optim.lr_scheduler.MultiStepLR(optimizer_flow,
                                                       [300, 1000])
    else:
        optimizer = optim.SGD(i3d.parameters(),
                              lr=lr,
                              momentum=0.9,
                              weight_decay=0.0000001)
        lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])

    num_steps_per_update = 4  # accum gradient
    steps = 0
    # train it
    while steps < max_steps:  #for epoch in range(num_epochs):
        # print 'Step {}/{}'.format(steps, max_steps)
        # print '-' * 10
        print('Step ' + str(steps) + '/' + str(max_steps))
        print('-' * 25)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                print("training model...")
                if mode == 'both':
                    i3d_rgb.train(True)
                    i3d_flow.train(True)
                    optimizer_rgb.zero_grad()
                    optimizer_flow.zero_grad()
                else:
                    i3d.train(True)
                    optimizer.zero_grad()
            else:
                print("validating model...")
                if mode == 'both':
                    i3d_rgb.train(False)
                    i3d_flow.train(False)
                    optimizer_rgb.zero_grad()
                    optimizer_flow.zero_grad()
                else:
                    i3d.train(False)  # Set model to evaluate mode
                    optimizer.zero_grad()

            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            # optimizer.zero_grad()
            print("zeroed...")
            # print(len(dataloaders["train"]))
            # print(dataloaders["train"])
            # Iterate over data.
            for data in dataloaders[phase]:
                # print("starting iter...")

                num_iter += 1
                # get the inputs
                inputs, labels = data

                print("data size: ", inputs.shape, " label: ", labels)

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                t = inputs.size(2)
                labels = Variable(labels.cuda())

                torch.set_printoptions(profile="full")
                print("labels:\n", labels)
                print("labels:\n", labels.shape)
                print("Inputs: \n", inputs.shape)
                torch.set_printoptions(profile="default")

                if mode == 'both':
                    per_frame_logits = i3d_rgb(inputs)
                    per_flows_logits = i3d_flow(flow_inputs)
                else:
                    per_frame_logits = i3d(inputs)

                    # upsample to input size
                    per_frame_logits = F.upsample(per_frame_logits,
                                                  t,
                                                  mode='linear')

                # compute localization loss
                loc_loss = F.binary_cross_entropy_with_logits(
                    per_frame_logits, labels)
                tot_loc_loss += loc_loss.item()

                # compute classification loss (with max-pooling along time B x C x T)
                cls_loss = F.binary_cross_entropy_with_logits(
                    torch.max(per_frame_logits, dim=2)[0],
                    torch.max(labels, dim=2)[0])
                tot_cls_loss += cls_loss.item()

                loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update
                tot_loss += loss.item()
                loss.backward()

                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_sched.step()
                    if steps % 10 == 0:
                        # print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10)
                        print(
                            str(phase) + ' Loc Loss: ' +
                            str(tot_loc_loss / (10 * num_steps_per_update)) +
                            ' Cls Loss: ' + str(tot_cls_loss /
                                                (10 * num_steps_per_update)) +
                            ' Tot Loss: ' + str(tot_loss / 10))
                        # save model
                        torch.save(
                            i3d.module.state_dict(),
                            save_model + str(steps).zfill(6) + '-' +
                            str(tot_loss / 10) + '.pt')
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
                #else:
                #    print(str(phase) + ' Loc Loss: ' + str(tot_loc_loss/(10*num_steps_per_update)) + ' Cls Loss: ' + str(tot_cls_loss/(10*num_steps_per_update)) + ' Tot Loss: ' + str(tot_loss/10))

            if phase == 'val':
                print(
                    str(phase) + ' Loc Loss: ' +
                    str(tot_loc_loss / num_iter).zfill(4) + ' Cls Loss: ' +
                    str(tot_cls_loss / num_iter).zfill(4) + ' Tot Loss: ' +
                    str((tot_loss * num_steps_per_update) / num_iter).zfill(4))
                # print '{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter)
            print("whoops...")
示例#8
0
def run(mode='rgb', root='', split_file='data/annotations/charades.json', batch_size=8, stride=4, num_span_frames=125):
    
    # setup dataset
    test_transforms = transforms.Compose([transforms.Resize((224,224)),
                                          transforms.ToTensor()
                                         ])
    
    print('Getting validation dataset...')
    val_path = './data/val_dataset_{}_{}.pickle'.format(stride, num_span_frames)
    if os.path.exists(val_path):
        pickle_in = open(val_path, 'rb')
        val_dataset = pickle.load(pickle_in)
    else:
        val_dataset = Dataset(split_file, 'testing', root, mode, test_transforms, stride, num_span_frames, is_sife=False)
        pickle_out = open(val_path, 'wb')
        pickle.dump(val_dataset, pickle_out)
        pickle_out.close()
    print('Got val dataset.')
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)    

    
    print('Loading model...')
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('models/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        if args.checkpoint_path:
            i3d.replace_logits(157)
            state_dict = torch.load(args.checkpoint_path)['model_state_dict']
            checkpoint = OrderedDict()
            for k, v in state_dict.items():
                name = k[7:] # remove 'module'
                checkpoint[name] = v
            i3d.load_state_dict(checkpoint)
        else:
            i3d.load_state_dict(torch.load('models/rgb_imagenet.pt'))
            i3d.replace_logits(157)
    i3d.cuda()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.device_count() > 1:
        print('Using {} GPUs'.format(torch.cuda.device_count()))
        i3d = nn.DataParallel(i3d)
    i3d.to(device)
    print('Loaded model.')


    all_preds = [] #torch.zeros((, 157)).cuda()
    all_labels = [] #torch.zeros((, 157)).cuda()
    print('Entering data loading...')
    for i, data in enumerate(val_dataloader):
        # get the inputs
        inputs, labels, vid = data

        t = inputs.shape[2]
        inputs = inputs.cuda()
        labels = labels.cuda()
        
        with torch.no_grad():
            per_frame_logits = i3d(inputs)

        # upsample to input size
        per_frame_logits = F.interpolate(per_frame_logits, t, mode='linear') # B x Classes x T
        
        max_frame_logits = torch.max(per_frame_logits, dim=2)[0] # B x Classes
        labels = torch.max(labels, dim=2)[0] # B x Classes

        # metrics for validation
        pred = (torch.sigmoid(max_frame_logits) >= 0.5).float() # predicted labels for this batch (B x C)
        if i == 0:
            all_preds = np.array(pred.tolist())
            all_labels = np.array(labels.tolist())
        else:
            all_preds = np.append(all_preds, pred.tolist(), axis=0)
            all_labels = np.append(all_labels, labels.tolist(), axis=0)
        #print('Step {}: all_preds.shape={}, all_labels.shape={}'.format(i, all_preds.shape, all_labels.shape))
        #print('Step {}: all_preds={}, all_labels={}'.format(i, all_preds, all_labels))
        if i % 10 == 0:
            all_APs = [metrics.average_precision_score(y_true=all_labels[:, j], y_score=all_preds[:, j]) for j in range(157)]
            mAP = np.nanmean(all_APs)
            print('Step {}'.format(i))
            print('all_APs:')
            print(all_APs)
            print('mAP = {}'.format(mAP))

    # Eval
    all_APs = [metrics.average_precision_score(y_true=all_labels[:, j], y_score=all_preds[:, j]) for j in range(157)]
    mAP = np.nanmean(all_APs)
    print('-' * 50)
    print('Final mAP: {:.4f}'.format(mAP))
    print('-' * 50)