Exemplo n.º 1
0
def run(init_lr=0.0001,
        max_steps=5e3,
        frames_per_clip=64,
        dataset_path='/media/sitzikbs/6TB/ANU_ikea_dataset/',
        train_filename='train_cross_env.txt',
        testset_filename='test_cross_env.txt',
        db_filename='../ikea_dataset_frame_labeler/ikea_annotation_db',
        logdir='',
        frame_skip=1,
        batch_size=8,
        camera='dev3',
        refine=False,
        refine_epoch=0,
        load_mode='img',
        pose_path='predictions/pose2d/openpose',
        arch='HCN',
        steps_per_update=1):

    os.makedirs(logdir, exist_ok=True)

    # setup dataset
    train_transforms = None
    test_transforms = None
    train_dataset = Dataset(dataset_path,
                            db_filename=db_filename,
                            train_filename=train_filename,
                            transform=train_transforms,
                            set='train',
                            camera=camera,
                            frame_skip=frame_skip,
                            frames_per_clip=frames_per_clip,
                            mode=load_mode,
                            pose_path=pose_path,
                            arch=arch)
    print("Number of clips in the dataset:{}".format(len(train_dataset)))
    weights = utils.make_weights_for_balanced_classes(
        train_dataset.clip_set, train_dataset.clip_label_count)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(
        weights, len(weights))

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   sampler=sampler,
                                                   num_workers=6,
                                                   pin_memory=False)

    test_dataset = Dataset(dataset_path,
                           db_filename=db_filename,
                           train_filename=train_filename,
                           test_filename=testset_filename,
                           transform=test_transforms,
                           set='test',
                           camera=camera,
                           frame_skip=frame_skip,
                           frames_per_clip=frames_per_clip,
                           mode=load_mode,
                           pose_path=pose_path,
                           arch=arch)

    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=6,
                                                  pin_memory=False)

    # setup the model
    num_classes = train_dataset.num_classes
    if arch == 'HCN':
        model = HCN.HCN(in_channel=2,
                        num_joint=19,
                        num_person=1,
                        out_channel=64,
                        window_size=frames_per_clip,
                        num_class=num_classes)
    elif arch == 'ST_GCN':
        graph_args = {'layout': 'openpose', 'strategy': 'spatial'}  #ntu-rgb+d
        model = st_gcn.Model(in_channels=2,
                             num_class=num_classes,
                             graph_args=graph_args,
                             edge_importance_weighting=True,
                             dropout=0.5)
    else:
        raise ValueError(
            "Unsupported architecture: please select HCN | ST_GCN")

    if refine:
        if refine_epoch == 0:
            raise ValueError(
                "You set the refine epoch to 0. No need to refine, just retrain."
            )
        refine_model_filename = os.path.join(
            logdir,
            str(refine_epoch).zfill(6) + '.pt')
        checkpoint = torch.load(refine_model_filename)
        model.load_state_dict(checkpoint["model_state_dict"])

    model.cuda()
    # model = nn.DataParallel(model)

    lr = init_lr
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1E-6)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer,
                                              [1000, 2000, 3000, 4000])
    # criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification

    if refine:
        lr_sched.load_state_dict(checkpoint["lr_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

    train_writer = SummaryWriter(os.path.join(logdir, 'train'))
    test_writer = SummaryWriter(os.path.join(logdir, 'test'))

    num_steps_per_update = steps_per_update  # accum gradient - try to have number of examples per update match original code 8*5*4
    # eval_steps  = 5
    steps = 0
    # train it
    n_examples = 0
    train_num_batch = len(train_dataloader)
    test_num_batch = len(test_dataloader)
    refine_flag = True
    best_acc = 0

    while steps < max_steps:  #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)
        if steps <= refine_epoch and refine and refine_flag:
            lr_sched.step()
            steps += 1
            n_examples += len(train_dataset.clip_set)
            continue
        else:
            refine_flag = False
        # Each epoch has a training and validation phase

        test_batchind = -1
        test_fraction_done = 0.0
        test_enum = enumerate(test_dataloader, 0)
        tot_loss = 0.0
        num_iter = 0
        optimizer.zero_grad()

        # Iterate over data.
        avg_acc = []
        for train_batchind, data in enumerate(train_dataloader):

            num_iter += 1
            # get the inputs
            inputs, labels, vid_idx, frame_pad = data

            # wrap them in Variable
            inputs = Variable(inputs.cuda(), requires_grad=True)
            labels = Variable(labels.cuda())
            labels = torch.argmax(labels, dim=1)

            logits = model(inputs)
            t = inputs.size(2)
            per_frame_logits = torch.nn.functional.interpolate(
                logits.unsqueeze(-1), t, mode='linear', align_corners=True)
            probs = torch.nn.functional.softmax(per_frame_logits, dim=1)

            loss = nn.CrossEntropyLoss()(per_frame_logits, labels)

            tot_loss += loss.item()
            loss.backward()

            acc = utils.accuracy_v2(torch.argmax(per_frame_logits, dim=1),
                                    labels)

            avg_acc.append(acc.item())
            train_fraction_done = (train_batchind + 1) / train_num_batch
            print('[{}] train Acc: {}, Loss: {:.4f} [{} / {}]'.format(
                steps, acc.item(), loss.item(), train_batchind,
                len(train_dataloader)))
            if (num_iter == num_steps_per_update
                    or train_batchind == len(train_dataloader) - 1):
                n_steps = num_steps_per_update
                if train_batchind == len(train_dataloader) - 1:
                    n_steps = num_iter
                n_examples += batch_size * n_steps
                print('updating the model...')
                print('train Total Loss: {:.4f}'.format(tot_loss / n_steps))
                optimizer.step()
                optimizer.zero_grad()
                train_writer.add_scalar('loss', tot_loss / n_steps, n_examples)
                train_writer.add_scalar('Accuracy', np.mean(avg_acc),
                                        n_examples)
                train_writer.add_scalar('lr', optimizer.param_groups[0]['lr'],
                                        n_examples)
                num_iter = 0
                tot_loss = 0.

            if test_fraction_done <= train_fraction_done and test_batchind + 1 < test_num_batch:
                model.train(False)  # Set model to evaluate mode
                test_batchind, data = next(test_enum)
                inputs, labels, vid_idx, frame_pad = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda(), requires_grad=True)
                labels = Variable(labels.cuda())
                labels = torch.argmax(labels, dim=1)

                with torch.no_grad():
                    logits = model(inputs)
                    t = inputs.size(2)
                    per_frame_logits = torch.nn.functional.interpolate(
                        logits.unsqueeze(-1),
                        t,
                        mode='linear',
                        align_corners=True)
                    probs = torch.nn.functional.softmax(per_frame_logits,
                                                        dim=1)

                    loss = nn.CrossEntropyLoss()(per_frame_logits, labels)

                    acc = utils.accuracy_v2(
                        torch.argmax(per_frame_logits, dim=1), labels)

                print('[{}] test Acc: {}, Loss: {:.4f} [{} / {}]'.format(
                    steps, acc.item(), loss.item(), test_batchind,
                    len(test_dataloader)))
                test_writer.add_scalar('loss', loss.item(), n_examples)
                test_writer.add_scalar('Accuracy', acc.item(), n_examples)
                test_fraction_done = (test_batchind + 1) / test_num_batch
                model.train(True)
        if steps % 100 == 0:
            # save model
            torch.save(
                {
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "lr_state_dict": lr_sched.state_dict()
                }, logdir + str(steps).zfill(6) + '.pt')

        # remember best prec@1 and save checkpoint
        is_best = acc > best_acc
        best_acc = max(acc, best_acc)
        if (is_best):
            model_tmp = copy.deepcopy(model.state_dict())
            model.load_state_dict(model_tmp)
            torch.save(
                {
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "lr_state_dict": lr_sched.state_dict()
                }, os.path.join(logdir, 'best_classifier.pth'))

        steps += 1
        lr_sched.step()
    train_writer.close()
    test_writer.close()
Exemplo n.º 2
0
def run(dataset_path, db_filename, model_path, output_path, frames_per_clip=16,
        testset_filename='test_cross_env.txt', trainset_filename='train_cross_env.txt', frame_skip=1,
        batch_size=8, device='dev3', arch='HCN', pose_path='predictions/pose2d/openpose'):

    pred_output_filename = os.path.join(output_path, 'pred.npy')
    json_output_filename = os.path.join(output_path, 'action_segments.json')

    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    test_dataset = Dataset(dataset_path, db_filename=db_filename, test_filename=testset_filename,
                           train_filename=trainset_filename, transform=test_transforms, set='test', camera=device,
                           frame_skip=frame_skip, frames_per_clip=frames_per_clip, mode='img', pose_path=pose_path,
                           arch=arch)

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=6,
                                                  pin_memory=True)

    # setup the model
    num_classes = test_dataset.num_classes

    if arch == 'HCN':
        model = HCN.HCN(in_channel=2, num_joint=19, num_person=1, out_channel=64, window_size=frames_per_clip,
                        num_class=num_classes)
    elif arch == 'ST_GCN':
        graph_args = {'layout': 'openpose', 'strategy': 'spatial'}  # layout:'ntu-rgb+d'
        model = st_gcn.Model(in_channels=2, num_class=num_classes, graph_args=graph_args,
                             edge_importance_weighting=True, dropout=0.5)
    else:
        raise ValueError("Unsupported architecture: please select HCN | ST_GCN")

    checkpoints = torch.load(model_path)
    model.load_state_dict(checkpoints["model_state_dict"]) # load trained model
    model.cuda()
    # model = nn.DataParallel(model)

    n_examples = 0

    # Iterate over data.
    avg_acc = []
    pred_labels_per_video = [[] for i in range(len(test_dataset.video_list))]
    logits_per_video = [[] for i in range(len(test_dataset.video_list))]

    for test_batchind, data in enumerate(test_dataloader):
        model.train(False)
        # get the inputs
        inputs, labels, vid_idx, frame_pad = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda(), requires_grad=True)
        labels = Variable(labels.cuda())

        t = inputs.size(2)
        logits = model(inputs)
        logits = torch.nn.functional.interpolate(logits.unsqueeze(-1), t, mode='linear', align_corners=True)
        # logits = F.interpolate(logits, t, mode='linear', align_corners=True)  # b x classes x frames

        acc = i3d_utils.accuracy_v2(torch.argmax(logits, dim=1), torch.argmax(labels, dim=1))

        avg_acc.append(acc.item())
        n_examples += batch_size
        print('batch Acc: {}, [{} / {}]'.format(acc.item(), test_batchind, len(test_dataloader)))
        logits = logits.permute(0, 2, 1)  # [ batch, classes, frames] -> [ batch, frames, classes]
        logits = logits.reshape(inputs.shape[0] * frames_per_clip, -1)
        pred_labels = torch.argmax(logits, 1).detach().cpu().numpy().tolist()
        logits = torch.nn.functional.softmax(logits, dim=1).detach().cpu().numpy().tolist()

        pred_labels_per_video, logits_per_video = \
            utils.accume_per_video_predictions(vid_idx, frame_pad,pred_labels_per_video, logits_per_video, pred_labels,
                                     logits, frames_per_clip)

    pred_labels_per_video = [np.array(pred_video_labels) for pred_video_labels in pred_labels_per_video]
    logits_per_video = [np.array(pred_video_logits) for pred_video_logits in logits_per_video]

    np.save(pred_output_filename, {'pred_labels': pred_labels_per_video,
                                   'logits': logits_per_video})
    utils.convert_frame_logits_to_segment_json(logits_per_video, json_output_filename, test_dataset.video_list,
                                               test_dataset.action_list)
Exemplo n.º 3
0
def run(
    dataset_path,
    annotation_path,
    model_path,
    output_path,
    frames_per_clip,
    frame_skip,
    mode,
    batch_size,
):
    pred_output_filename = os.path.join(output_path, 'predictions.npy')
    json_output_filename = os.path.join(output_path, 'action_segments.json')

    # setup dataset
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    test_dataset = Dataset(
        dataset_path=dataset_path,
        annotation_path=annotation_path,
        transform=test_transforms,
        index_filename="test_dataset_index.txt",
        frame_skip=frame_skip,
        frames_per_clip=frames_per_clip,
    )

    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=6,
                                                  pin_memory=True)

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
    else:
        i3d = InceptionI3d(157, in_channels=3)
    num_classes = len(test_dataset.action_name_list)
    i3d.replace_logits(num_classes)
    checkpoints = torch.load(model_path)
    i3d.load_state_dict(checkpoints["model_state_dict"])  # load trained model
    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    n_examples = 0

    # Iterate over data.
    avg_acc = []
    pred_labels_per_video = [[] for i in range(len(test_dataset.video_list))]
    logits_per_video = [[] for i in range(len(test_dataset.video_list))]
    # last_vid_idx = 0
    bar = tqdm(test_dataloader)
    for test_batchind, data in enumerate(bar):
        i3d.train(False)
        # get the inputs
        inputs, labels, vid_idx, frame_pad = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda(), requires_grad=True)
        labels = Variable(labels.cuda())

        t = inputs.size(2)
        logits = i3d(inputs)
        logits = F.interpolate(logits, t, mode='linear',
                               align_corners=True)  # b x classes x frames

        acc = i3d_utils.accuracy_v2(torch.argmax(logits, dim=1),
                                    torch.argmax(labels, dim=1))
        avg_acc.append(acc.item())
        n_examples += batch_size

        bar.set_postfix({"Batch Acc": acc.item()})
        # print('batch Acc: {}, [{} / {}]'.format(acc.item(), test_batchind + 1, len(test_dataloader)))
        logits = logits.permute(0, 2, 1)
        logits = logits.reshape(inputs.shape[0] * frames_per_clip, -1)
        pred_labels = torch.argmax(logits, 1).detach().cpu().numpy()
        logits = torch.nn.functional.softmax(
            logits, dim=1).detach().cpu().numpy().tolist()

        pred_labels_per_video, logits_per_video = \
            utils.accume_per_video_predictions(vid_idx, frame_pad, pred_labels_per_video, logits_per_video, pred_labels,
                                               logits, frames_per_clip)

    pred_labels_per_video = [
        np.array(pred_video_labels)
        for pred_video_labels in pred_labels_per_video
    ]
    logits_per_video = [
        np.array(pred_video_logits) for pred_video_logits in logits_per_video
    ]

    np.save(pred_output_filename, {
        'pred_labels': pred_labels_per_video,
        'logits': logits_per_video
    })
    utils.convert_frame_logits_to_segment_json(
        logits_per_video, json_output_filename,
        [video[0]
         for video in test_dataset.video_list], test_dataset.action_name_list)
Exemplo n.º 4
0
def run(
        dataset_path,
        annotation_path,
        init_lr,
        frames_per_clip,
        mode,
        logdir,
        frame_skip,
        batch_size,
        refine,
        refine_epoch,
        pretrained_model,
        max_steps,
):
    os.makedirs(logdir, exist_ok=True)

    # setup dataset
    train_transforms = transforms.Compose(
        [
            videotransforms.RandomCrop(224),
            videotransforms.RandomHorizontalFlip(),
        ]
    )
    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])

    train_dataset = Dataset(
        dataset_path=dataset_path,
        annotation_path=annotation_path,
        transform=train_transforms,
        index_filename="train_dataset_index.txt",
        frame_skip=frame_skip,
        frames_per_clip=frames_per_clip,
    )

    print("Number of clips in the train dataset:{}".format(len(train_dataset)))

    test_dataset = Dataset(
        dataset_path=dataset_path,
        annotation_path=annotation_path,
        transform=test_transforms,
        index_filename="test_dataset_index.txt",
        frame_skip=frame_skip,
        frames_per_clip=frames_per_clip,
    )

    print("Number of clips in the test dataset:{}".format(len(test_dataset)))

    weights = utils.make_weights_for_balanced_classes(train_dataset.clip_list, train_dataset.clip_label_count)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=3,
        pin_memory=True
    )

    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=3,
        pin_memory=True
    )

    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('models/flow_' + pretrained_model + '.pt'))
    else:
        i3d = InceptionI3d(157, in_channels=3)
        i3d.load_state_dict(torch.load('models/rgb_' + pretrained_model + '.pt'))

    num_classes = len(train_dataset.action_name_list)
    i3d.replace_logits(num_classes)

    for name, param in i3d.named_parameters():  # freeze i3d parameters
        if 'logits' in name:
            param.requires_grad = True
        elif 'Mixed_5c' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False

    if refine:
        if refine_epoch == 0:
            raise ValueError("You set the refine epoch to 0. No need to refine, just retrain.")
        refine_model_filename = os.path.join(logdir, str(refine_epoch).zfill(6) + '.pt')
        checkpoint = torch.load(refine_model_filename)
        i3d.load_state_dict(checkpoint["model_state_dict"])

    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr

    optimizer = optim.Adam(i3d.parameters(), lr=lr, weight_decay=1E-6)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [15, 30, 45, 60])

    if refine:
        lr_sched.load_state_dict(checkpoint["lr_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

    train_writer = SummaryWriter(os.path.join(logdir, 'train'))
    test_writer = SummaryWriter(os.path.join(logdir, 'test'))

    num_steps_per_update = 4 * 5  # accum gradient - try to have number of examples per update match original code 8*5*4
    # eval_steps  = 5
    steps = 0
    # train it
    n_examples = 0
    train_num_batch = len(train_dataloader)
    test_num_batch = len(test_dataloader)
    refine_flag = True

    while steps <= max_steps:  # for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)
        if steps <= refine_epoch and refine and refine_flag:
            lr_sched.step()
            steps += 1
            n_examples += len(train_dataset.clip_list)
            continue
        else:
            refine_flag = False
        # Each epoch has a training and validation phase

        test_batchind = -1
        test_fraction_done = 0.0
        test_enum = enumerate(test_dataloader)
        tot_loss = 0.0
        tot_loc_loss = 0.0
        tot_cls_loss = 0.0
        num_iter = 0
        optimizer.zero_grad()

        # Iterate over data.
        avg_acc = []
        for train_batchind, data in enumerate(train_dataloader):

            num_iter += 1
            # get the inputs
            inputs, labels, vid_idx, frame_pad = data

            # wrap them in Variable
            inputs = Variable(inputs.cuda(), requires_grad=True)
            labels = Variable(labels.cuda())

            t = inputs.size(2)
            per_frame_logits = i3d(inputs)
            per_frame_logits = F.interpolate(per_frame_logits, t, mode='linear', align_corners=True)

            # compute localization loss
            loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
            tot_loc_loss += loc_loss.item()

            # compute classification loss (with max-pooling along time B x C x T)
            cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0],
                                                          torch.max(labels, dim=2)[0])
            tot_cls_loss += cls_loss.item()

            loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update

            tot_loss += loss.item()
            loss.backward()

            acc = utils.accuracy_v2(torch.argmax(per_frame_logits, dim=1), torch.argmax(labels, dim=1))
            # acc = utils.accuracy(per_frame_logits, labels)
            avg_acc.append(acc.item())
            train_fraction_done = (train_batchind + 1) / train_num_batch
            print('[{}] train Acc: {}, Loss: {:.4f} [{} / {}]'.format(steps, acc.item(), loss.item(), train_batchind,
                                                                      len(train_dataloader)))
            if num_iter == num_steps_per_update or train_batchind == len(train_dataloader) - 1:
                n_steps = num_steps_per_update
                if train_batchind == len(train_dataloader) - 1:
                    n_steps = num_iter
                n_examples += batch_size * n_steps
                print('updating the model...')
                print('train Total Loss: {:.4f}'.format(tot_loss / n_steps))
                optimizer.step()
                optimizer.zero_grad()
                train_writer.add_scalar('loss', tot_loss / n_steps, n_examples)
                train_writer.add_scalar('cls loss', tot_cls_loss / n_steps, n_examples)
                train_writer.add_scalar('loc loss', tot_loc_loss / n_steps, n_examples)
                train_writer.add_scalar('Accuracy', np.mean(avg_acc), n_examples)
                train_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], n_examples)
                num_iter = 0
                tot_loss = 0.

            if test_fraction_done <= train_fraction_done and test_batchind + 1 < test_num_batch:
                i3d.train(False)  # Set model to evaluate mode
                test_batchind, data = next(test_enum)
                inputs, labels, vid_idx, frame_pad = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda(), requires_grad=True)
                labels = Variable(labels.cuda())

                with torch.no_grad():
                    per_frame_logits = i3d(inputs)
                    per_frame_logits = F.interpolate(per_frame_logits, t, mode='linear', align_corners=True)

                    # compute localization loss
                    loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)

                    # compute classification loss (with max-pooling along time B x C x T)
                    cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0],
                                                                  torch.max(labels, dim=2)[0])

                    loss = (0.5 * loc_loss + 0.5 * cls_loss) / num_steps_per_update
                    acc = utils.accuracy_v2(torch.argmax(per_frame_logits, dim=1), torch.argmax(labels, dim=1))

                print('[{}] test Acc: {}, Loss: {:.4f} [{} / {}]'.format(steps, acc.item(), loss.item(), test_batchind,
                                                                         len(test_dataloader)))
                test_writer.add_scalar('loss', loss.item(), n_examples)
                test_writer.add_scalar('cls loss', loc_loss.item(), n_examples)
                test_writer.add_scalar('loc loss', cls_loss.item(), n_examples)
                test_writer.add_scalar('Accuracy', acc.item(), n_examples)
                test_fraction_done = (test_batchind + 1) / test_num_batch
                i3d.train(True)
        if steps % 2 == 0:
            # save model
            torch.save({"model_state_dict": i3d.module.state_dict(),
                        "optimizer_state_dict": optimizer.state_dict(),
                        "lr_state_dict": lr_sched.state_dict()},
                       logdir + str(steps).zfill(6) + '.pt')
        steps += 1
        lr_sched.step()
    train_writer.close()
    test_writer.close()
Exemplo n.º 5
0
def run(dataset_path, db_filename, model_path, output_path, frames_per_clip=16, mode='rgb',
        testset_filename='test_cross_env.txt', trainset_filename='train_cross_env.txt', frame_skip=1,
        batch_size=8, device='dev3', model_name='p3d'):

    pred_output_filename = os.path.join(output_path, 'pred.npy')
    json_output_filename = os.path.join(output_path, 'action_segments.json')

    # setup dataset
    img_size = 112 if model_name == 'c3d' else 160
    test_transforms = transforms.Compose([videotransforms.CenterCrop(img_size)])

    test_dataset = Dataset(dataset_path, db_filename=db_filename, test_filename=testset_filename,
                           train_filename=trainset_filename, transform=test_transforms, set='test', camera=device,
                           frame_skip=frame_skip, frames_per_clip=frames_per_clip, resize=None, mode='img',
                           input_type=mode)

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=6,
                                                  pin_memory=True)

    # setup the model
    num_classes = test_dataset.num_classes
    if model_name == 'c3d':
        model = c3d.C3D()
        model.load_state_dict(torch.load('c3d.pickle'))
        model.replace_logits(num_classes)
    elif model_name == 'p3d':
        model = p3d.P3D199(pretrained=False, modality='RGB', num_classes=num_classes)
    else:
        raise ValueError("unsupported model")

    checkpoints = torch.load(model_path)
    model.load_state_dict(checkpoints["model_state_dict"]) # load trained model
    model.cuda()
    model = nn.DataParallel(model)

    n_examples = 0

    # Iterate over data.
    avg_acc = []
    pred_labels_per_video = [[] for i in range(len(test_dataset.video_list))]
    logits_per_video = [[] for i in range(len(test_dataset.video_list))]

    for test_batchind, data in enumerate(test_dataloader):
        model.train(False)
        # get the inputs
        inputs, labels, vid_idx, frame_pad = data

        # wrap them in Variable
        inputs = Variable(inputs.cuda(), requires_grad=True)
        labels = Variable(labels.cuda())

        t = inputs.size(2)
        logits = model(inputs)
        logits = torch.nn.functional.interpolate(logits.unsqueeze(-1), t, mode='linear', align_corners=True)

        acc = i3d_utils.accuracy_v2(torch.argmax(logits, dim=1), torch.argmax(labels, dim=1))
        avg_acc.append(acc.item())
        n_examples += batch_size
        print('batch Acc: {}, [{} / {}]'.format(acc.item(), test_batchind, len(test_dataloader)))
        logits = logits.permute(0, 2, 1)
        logits = logits.reshape(inputs.shape[0] * frames_per_clip, -1)
        pred_labels = torch.argmax(logits, 1).detach().cpu().numpy()
        logits = torch.nn.functional.softmax(logits, dim=1).detach().cpu().numpy().tolist()

        pred_labels_per_video, logits_per_video = \
            utils.accume_per_video_predictions(vid_idx, frame_pad, pred_labels_per_video, logits_per_video, pred_labels,
                                               logits, frames_per_clip)

    pred_labels_per_video = [np.array(pred_video_labels) for pred_video_labels in pred_labels_per_video]
    logits_per_video = [np.array(pred_video_logits) for pred_video_logits in logits_per_video]
    np.save(pred_output_filename, {'pred_labels': pred_labels_per_video, 'logits': logits_per_video})
    utils.convert_frame_logits_to_segment_json(logits_per_video, json_output_filename, test_dataset.video_list,
                                               test_dataset.action_list)
Exemplo n.º 6
0
def run(init_lr=0.0001, max_steps=64e3, frames_per_clip=16, dataset_path='/media/sitzikbs/6TB/ANU_ikea_dataset/',
        train_filename='train_cross_env.txt', testset_filename='test_cross_env.txt',
        db_filename='../ikea_dataset_frame_labeler/ikea_annotation_db', logdir='',
        frame_skip=1, batch_size=8, camera='dev3', refine=False, refine_epoch=0, load_mode='vid', input_type='rgb',
        model_name='c3d'):


    os.makedirs(logdir, exist_ok=True)

    # setup dataset
    img_size = 112 if model_name == 'c3d' else 160 #224
    train_transforms = transforms.Compose([videotransforms.RandomCrop(img_size),
                                           videotransforms.RandomHorizontalFlip(),
    ])
    test_transforms = transforms.Compose([videotransforms.CenterCrop(img_size)])

    train_dataset = Dataset(dataset_path, db_filename=db_filename, train_filename=train_filename,
                 transform=train_transforms, set='train', camera=camera, frame_skip=frame_skip,
                            frames_per_clip=frames_per_clip, resize=None, mode=load_mode, input_type=input_type)
    print("Number of clips in the dataset:{}".format(len(train_dataset)))
    weights = utils.make_weights_for_balanced_classes(train_dataset.clip_set, train_dataset.clip_label_count)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=sampler,
                                                   num_workers=6, pin_memory=False)

    test_dataset = Dataset(dataset_path, db_filename=db_filename, train_filename=train_filename,
                           test_filename=testset_filename, transform=test_transforms, set='test', camera=camera,
                           frame_skip=frame_skip, frames_per_clip=frames_per_clip, resize=None, mode=load_mode,
                           input_type=input_type)

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=6,
                                                  pin_memory=False)

    # setup the model
    num_classes = train_dataset.num_classes
    if model_name == 'c3d':
        model = c3d.C3D()
        model.load_state_dict(torch.load('c3d.pickle'))
        model.replace_logits(num_classes)
    elif model_name == 'p3d':
        model = p3d.P3D199(pretrained=True, modality='RGB', num_classes=num_classes)
    else:
        raise ValueError("unsupported model")

    if refine:
        if refine_epoch == 0:
            raise ValueError("You set the refine epoch to 0. No need to refine, just retrain.")
        refine_model_filename = os.path.join(logdir, str(refine_epoch).zfill(6)+'.pt')
        checkpoint = torch.load(refine_model_filename)
        model.load_state_dict(checkpoint["model_state_dict"])

    model.cuda()
    model = nn.DataParallel(model)

    lr = init_lr
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1E-6)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [10, 20, 30, 40])
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification

    if refine:
        lr_sched.load_state_dict(checkpoint["lr_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

    train_writer = SummaryWriter(os.path.join(logdir, 'train'))
    test_writer = SummaryWriter(os.path.join(logdir, 'test'))

    num_steps_per_update = 4  # accum gradient - try to have number of examples per update match original code 8*5*4
    # eval_steps  = 5
    steps = 0
    # train it
    n_examples = 0
    train_num_batch = len(train_dataloader)
    test_num_batch = len(test_dataloader)
    refine_flag = True

    while steps < max_steps:#for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)
        if steps <= refine_epoch and refine and refine_flag:
            lr_sched.step()
            steps += 1
            n_examples += len(train_dataset.clip_set)
            continue
        else:
            refine_flag = False
        # Each epoch has a training and validation phase

        test_batchind = -1
        test_fraction_done = 0.0
        test_enum = enumerate(test_dataloader, 0)
        tot_loss = 0.0
        num_iter = 0
        optimizer.zero_grad()

        # Iterate over data.
        avg_acc = []
        for train_batchind, data in enumerate(train_dataloader):

            num_iter += 1
            # get the inputs
            inputs, labels, vid_idx, frame_pad = data

            # wrap them in Variable
            inputs = Variable(inputs.cuda(), requires_grad=True)
            labels = Variable(labels.cuda())
            labels = torch.argmax(labels, dim=1)

            logits = model(inputs)
            t = inputs.size(2)
            per_frame_logits = torch.nn.functional.interpolate(logits.unsqueeze(-1), t, mode='linear', align_corners=True)
            probs = torch.nn.functional.softmax(per_frame_logits, dim=1)
            preds = torch.max(probs, 1)[1]

            loss = criterion(per_frame_logits, labels)
            tot_loss += loss.item()
            loss.backward()

            acc = utils.accuracy_v2(torch.argmax(per_frame_logits, dim=1), labels)

            avg_acc.append(acc.item())
            train_fraction_done = (train_batchind + 1) / train_num_batch
            print('[{}] train Acc: {}, Loss: {:.4f} [{} / {}]'.format(steps, acc.item(), loss.item(), train_batchind, len(train_dataloader)))
            if (num_iter == num_steps_per_update or train_batchind == len(train_dataloader)-1) :
                n_steps = num_steps_per_update
                if train_batchind == len(train_dataloader)-1:
                    n_steps = num_iter
                n_examples += batch_size*n_steps
                print('updating the model...')
                print('train Total Loss: {:.4f}'.format(tot_loss / n_steps))
                optimizer.step()
                optimizer.zero_grad()
                train_writer.add_scalar('loss', tot_loss / n_steps, n_examples)
                train_writer.add_scalar('Accuracy', np.mean(avg_acc), n_examples)
                train_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], n_examples)
                num_iter = 0
                tot_loss = 0.

            if test_fraction_done <= train_fraction_done and test_batchind + 1 < test_num_batch:
                model.train(False)  # Set model to evaluate mode
                test_batchind, data = next(test_enum)
                inputs, labels, vid_idx, frame_pad = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda(), requires_grad=True)
                labels = Variable(labels.cuda())
                labels = torch.argmax(labels, dim=1)

                with torch.no_grad():
                    logits = model(inputs)
                    t = inputs.size(2)
                    per_frame_logits = torch.nn.functional.interpolate(logits.unsqueeze(-1), t, mode='linear',
                                                                       align_corners=True)
                    probs = torch.nn.functional.softmax(per_frame_logits, dim=1)
                    preds = torch.max(probs, 1)[1]

                    loss = criterion(per_frame_logits, labels)
                    acc = utils.accuracy_v2(torch.argmax(per_frame_logits, dim=1), labels)


                print('[{}] test Acc: {}, Loss: {:.4f} [{} / {}]'.format(steps, acc.item(), loss.item(), test_batchind,
                                                                     len(test_dataloader)))
                test_writer.add_scalar('loss', loss.item(), n_examples)
                test_writer.add_scalar('Accuracy', acc.item(), n_examples)
                test_fraction_done = (test_batchind + 1) / test_num_batch
                model.train(True)
        if steps % 2 == 0:
            # save model
            torch.save({"model_state_dict": model.module.state_dict(),
                        "optimizer_state_dict": optimizer.state_dict(),
                        "lr_state_dict": lr_sched.state_dict()},
                       logdir + str(steps).zfill(6) + '.pt')

        steps += 1
        lr_sched.step()
    train_writer.close()
    test_writer.close()