Exemplo n.º 1
0
def main():
    args = parse_args('mfnet', val=True)

    output_dir = os.path.dirname(args.ckpt_path)
    log_file = make_log_file_name(output_dir, args)
    print_and_save(args, log_file)
    cudnn.benchmark = True

    mfnet_3d = MFNET_3D_MO
    num_classes = [args.action_classes, args.verb_classes, args.noun_classes]
    validate = validate_mfnet_mo_json

    kwargs = {}
    num_coords = 0
    if args.use_hands:
        num_coords += 2
    kwargs['num_coords'] = num_coords

    model_ft = mfnet_3d(num_classes, **kwargs)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'})
    if args.old_mfnet_eval:
        checkpoint['state_dict']['module.classifier_list.classifier_list.0.weight'] = checkpoint['state_dict']['module.classifier.weight']
        checkpoint['state_dict']['module.classifier_list.classifier_list.0.bias'] = checkpoint['state_dict']['module.classifier.bias']
    model_ft.load_state_dict(checkpoint['state_dict'], strict=False)
    print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file)

    num_valid_classes = len([cls for cls in num_classes if cls > 0])
    valid_classes = [cls for cls in num_classes if cls > 0]


    crop_type = CenterCrop((224, 224))
    val_sampler = MiddleSampling(num=args.clip_length, window=64)
    val_transforms = transforms.Compose([Resize((256, 256), False), crop_type,
                                         ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)])
    val_loader = VideoAndPointDatasetLoader(val_sampler, args.val_list, point_list_prefix=args.bpv_prefix,
                                            num_classes=num_classes, img_tmpl='frame_{:010d}.jpg',
                                            norm_val=[456., 256., 456., 256.], batch_transform=val_transforms,
                                            use_hands=False, validation=True)
    val_iter = torch.utils.data.DataLoader(val_loader,
                                           batch_size=args.batch_size,
                                           shuffle=False,
                                           num_workers=args.num_workers,
                                           pin_memory=True)

    outputs = validate(model_ft, val_iter, num_valid_classes, args.val_list.split("\\")[-1], action_file=args.epic_actions_path)

    eval_mode = 'seen' if 's1' in args.val_list else 'unseen' if 's2' in args.val_list else 'unknown'
    json_file = "{}.json".format(os.path.join(output_dir, eval_mode))
    with open(json_file, 'w') as jf:
        json.dump(outputs, jf)
def test_cnn(model, criterion, test_iterator, cur_epoch, dataset, log_file,
             gpus):
    losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
    with torch.no_grad():
        model.eval()
        print_and_save(
            'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset),
            log_file)
        for batch_idx, (inputs, targets) in enumerate(test_iterator):
            inputs = inputs.cuda(gpus[0])
            targets = targets.cuda(gpus[0])

            output = model(inputs)
            loss = criterion(output, targets)

            t1, t5 = accuracy(output.detach(), targets.detach(), topk=(1, 5))
            top1.update(t1.item(), output.size(0))
            top5.update(t5.item(), output.size(0))
            losses.update(loss.item(), output.size(0))

            print_and_save(
                '[Epoch:{}, Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]'
                .format(cur_epoch, batch_idx, len(test_iterator), top1.val,
                        top1.avg, top5.val, top5.avg), log_file)

        print_and_save(
            '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format(
                dataset, losses.avg, top1.avg, top5.avg), log_file)
    return top1.avg
def test_cnn_do(model, criterion, test_iterator, cur_epoch, dataset, log_file,
                gpus):
    losses, losses_a, losses_b, top1_a, top5_a, top1_b, top5_b = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(
    ), AverageMeter(), AverageMeter()
    with torch.no_grad():
        model.eval()
        print_and_save(
            'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset),
            log_file)
        for batch_idx, (inputs, targets) in enumerate(test_iterator):
            inputs = torch.tensor(inputs).cuda(gpus[0])

            targets_a = torch.tensor(targets[0]).cuda(gpus[0])
            targets_b = torch.tensor(targets[1]).cuda(gpus[0])
            output_a, output_b = model(inputs)
            loss_a = criterion(output_a, targets_a)
            loss_b = criterion(output_b, targets_b)
            loss = 0.75 * loss_a + 0.25 * loss_b

            t1_a, t5_a = accuracy(output_a.detach().cpu(),
                                  targets_a.detach().cpu(),
                                  topk=(1, 5))
            t1_b, t5_b = accuracy(output_b.detach().cpu(),
                                  targets_b.detach().cpu(),
                                  topk=(1, 5))
            top1_a.update(t1_a.item(), output_a.size(0))
            top5_a.update(t5_a.item(), output_a.size(0))
            top1_b.update(t1_b.item(), output_b.size(0))
            top5_b.update(t5_b.item(), output_b.size(0))
            losses_a.update(loss_a.item(), output_a.size(0))
            losses_b.update(loss_b.item(), output_b.size(0))
            losses.update(loss.item(), output_a.size(0))

            to_print = '[Epoch:{}, Batch {}/{}]' \
                       '[Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \
                       'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]]'.format(
                       cur_epoch, batch_idx, len(test_iterator),
                       top1_a.val, top1_a.avg, top5_a.val, top5_a.avg,
                       top1_b.val, top1_b.avg, top5_b.val, top5_b.avg)
            print_and_save(to_print, log_file)

        print_and_save(
            '{} Results: Loss {:.3f}, Top1_a {:.3f}, Top5_a {:.3f}, Top1_b {:.3f}, Top5_b {:.3f}'
            .format(dataset, losses.avg, top1_a.avg, top5_a.avg, top1_b.avg,
                    top5_b.avg), log_file)
    return top1_a.avg, top1_b.avg
Exemplo n.º 4
0
def validate_lstm(model, criterion, test_iterator, cur_epoch, dataset,
                  log_file, args):
    losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
    outputs = []

    print_and_save(
        'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset),
        log_file)
    with torch.no_grad():
        model.eval()
        for batch_idx, (inputs, seq_lengths, targets,
                        video_names) in enumerate(test_iterator):
            inputs = torch.tensor(inputs).cuda()
            targets = torch.tensor(targets).cuda()

            inputs = inputs.transpose(1, 0)
            output = model(inputs, seq_lengths)
            loss = criterion(output, targets)

            batch_preds = []
            for j in range(output.size(0)):
                res = np.argmax(output[j].detach().cpu().numpy())
                label = targets[j].cpu().numpy()
                outputs.append([res, label])
                batch_preds.append("{}, P-L:{}-{}".format(
                    video_names[j], res, label))

            t1, t5 = accuracy(output.detach().cpu(),
                              targets.detach().cpu(),
                              topk=(1, 5))
            top1.update(t1.item(), output.size(0))
            top5.update(t5.item(), output.size(0))
            losses.update(loss.item(), output.size(0))

            print_and_save(
                '[Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]\n\t{}'
                .format(batch_idx, len(test_iterator), top1.val, top1.avg,
                        top5.val, top5.avg, batch_preds), log_file)
        print_and_save(
            '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format(
                dataset, losses.avg, top1.avg, top5.avg), log_file)
    return top1.avg, outputs
Exemplo n.º 5
0
log_file = None
no_norm_input = False
ckpt_path = r"outputs\lstm_polar_128_0.0_1000_8_32_2_seq32_coords_polar_clr_tri_vsel125\lstm_polar_128_0.0_1000_8_32_2_seq32_coords_polar_clr_tri_vsel125_best.pth"
val_list = r"splits\hand_tracks\hand_locs_val_1.txt"
lstm_input, lstm_hidden, lstm_layers, verb_classes, lstm_seq_size = 8, 32, 2, 125, 32

lstm_model = LSTM_Hands_Polar
kwargs = {'dropout': 0, 'bidir': True}
model_ft = lstm_model(lstm_input, lstm_hidden, lstm_layers, verb_classes,
                      **kwargs)
model_ft = torch.nn.DataParallel(model_ft).cuda()
checkpoint = torch.load(ckpt_path)
model_ft.load_state_dict(checkpoint['state_dict'])
model_ft.eval()
print_and_save("Model loaded to gpu", log_file)
criterion = torch.nn.CrossEntropyLoss().cuda()

#%%
norm_val = [1., 1., 1., 1.] if no_norm_input else [456., 256., 456., 256.]
norm_val = np.array(norm_val)
#dataset_loader = PointPolarDatasetLoader(val_list, max_seq_length=lstm_seq_size,
#                                         norm_val=norm_val, validation=True)

#track_path = r"D:\Code\epic-kitchens-processing\output\yolo_allhands_tracked_videos\clean\P01\P01_04.pkl"
#track_path = r"D:\Datasets\egocentric\EPIC_KITCHENS\clean_hand_detection_tracks\P30\P30_05\179200_5_140.pkl"
track_path = r"D:\Datasets\egocentric\EPIC_KITCHENS\clean_hand_detection_tracks\P30\P30_05\81347_4_11.pkl"
hand_tracks = load_pickle(track_path)
left_track = np.array(hand_tracks['left'], dtype=np.float32)
right_track = np.array(hand_tracks['right'], dtype=np.float32)
def main():
    args, model_name = parse_args('lstm', val=False)

    output_dir, log_file = init_folders(args.base_output_dir, model_name,
                                        args.resume, args.logging)
    print_and_save(args, log_file)
    print_and_save("Model name: {}".format(model_name), log_file)
    cudnn.benchmark = True

    lstm_model = LSTM_per_hand if args.lstm_dual else LSTM_Hands_attn if args.lstm_attn else LSTM_Hands
    kwargs = {
        'dropout': args.dropout,
        'bidir': args.lstm_bidir,
        'noun_classes': args.noun_classes,
        'double_output': args.double_output
    }
    model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers,
                          args.verb_classes, **kwargs)
    #    model_ft = LSTM_Hands_encdec(456, 64, 32, args.lstm_layers, verb_classes, 0)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    print_and_save("Model loaded to gpu", log_file)
    if args.resume:
        model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir,
                                                model_name, args.resume_from)
        print_and_save("Resuming training from: {}".format(ckpt_path),
                       log_file)

    if args.only_left and args.only_right:
        sys.exit(
            "It must be at most one of *only_left* or *only_right* True at any time."
        )
    norm_val = [1., 1., 1., 1.
                ] if args.no_norm_input else [456., 256., 456., 256.]
    if args.lstm_feature == "coords" or args.lstm_feature == "coords_dual":
        if args.lstm_clamped and (not args.lstm_dual
                                  or args.lstm_seq_size == 0):
            sys.exit(
                "Clamped tracks require dual lstms and a fixed lstm sequence size."
            )
        train_loader = PointDatasetLoader(args.train_list,
                                          max_seq_length=args.lstm_seq_size,
                                          num_classes=args.verb_classes,
                                          norm_val=norm_val,
                                          dual=args.lstm_dual,
                                          clamp=args.lstm_clamped,
                                          only_left=args.only_left,
                                          only_right=args.only_right)
        test_loader = PointDatasetLoader(args.test_list,
                                         max_seq_length=args.lstm_seq_size,
                                         num_classes=args.verb_classes,
                                         norm_val=norm_val,
                                         dual=args.lstm_dual,
                                         clamp=args.lstm_clamped,
                                         only_left=args.only_left,
                                         only_right=args.only_right)
    elif args.lstm_feature == "vec_sum" or args.lstm_feature == "vec_sum_dual":
        train_loader = PointVectorSummedDatasetLoader(
            args.train_list,
            max_seq_length=args.lstm_seq_size,
            num_classes=args.verb_classes,
            dual=args.lstm_dual)
        test_loader = PointVectorSummedDatasetLoader(
            args.test_list,
            max_seq_length=args.lstm_seq_size,
            num_classes=args.verb_classes,
            dual=args.lstm_dual)
    elif args.lstm_feature == "coords_bpv":
        #        if args.num_workers > 0:
        #            from utils.dataset_loader import make_data_arr, parse_samples_list
        #            data_arr_train = make_data_arr(parse_samples_list(args.train_list), args.bpv_prefix)
        #            data_arr_test = make_data_arr(parse_samples_list(args.test_list), args.bpv_prefix)
        #            import multiprocessing
        #            manager_train = multiprocessing.Manager()
        #            data_train = manager_train.list(data_arr_train)
        #            manager_test = multiprocessing.Manager()
        #            data_test = manager_test.list(data_arr_test)
        #
        #            train_loader = PointBpvDatasetLoader(args.train_list, args.lstm_seq_size,
        #                                                 args.double_output, norm_val=norm_val,
        #                                                 bpv_prefix=args.bpv_prefix, data_arr=data_train)
        #            test_loader = PointBpvDatasetLoader(args.test_list, args.lstm_seq_size,
        #                                                args.double_output, norm_val=norm_val,
        #                                                bpv_prefix=args.bpv_prefix, data_arr=data_test)
        #        else:
        train_loader = PointBpvDatasetLoader(args.train_list,
                                             args.lstm_seq_size,
                                             args.double_output,
                                             norm_val=norm_val,
                                             bpv_prefix=args.bpv_prefix,
                                             num_workers=args.num_workers)
        test_loader = PointBpvDatasetLoader(args.test_list,
                                            args.lstm_seq_size,
                                            args.double_output,
                                            norm_val=norm_val,
                                            bpv_prefix=args.bpv_prefix,
                                            num_workers=args.num_workers)
    elif args.lstm_feature == "coords_objects":
        train_loader = PointObjDatasetLoader(args.train_list,
                                             args.lstm_seq_size,
                                             args.double_output,
                                             norm_val=norm_val,
                                             bpv_prefix=args.bpv_prefix)
        test_loader = PointObjDatasetLoader(args.test_list,
                                            args.lstm_seq_size,
                                            args.double_output,
                                            norm_val=norm_val,
                                            bpv_prefix=args.bpv_prefix)

    else:
        sys.exit("Unsupported lstm feature")


#    train_loader = PointImageDatasetLoader(train_list, norm_val=norm_val)
#    test_loader = PointImageDatasetLoader(test_list, norm_val=norm_val)

    train_iterator = torch.utils.data.DataLoader(train_loader,
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 num_workers=args.num_workers,
                                                 pin_memory=True,
                                                 collate_fn=lstm_collate)
    test_iterator = torch.utils.data.DataLoader(test_loader,
                                                batch_size=args.batch_size,
                                                shuffle=False,
                                                num_workers=args.num_workers,
                                                pin_memory=True,
                                                collate_fn=lstm_collate)

    params_to_update = model_ft.parameters()
    print_and_save("Params to learn:", log_file)
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            print_and_save("\t{}".format(name), log_file)

    optimizer = torch.optim.SGD(params_to_update,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.decay)
    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer,
                                     len(train_iterator))

    train_fun, test_fun = (
        train_attn_lstm, test_attn_lstm) if args.lstm_attn else (
            train_lstm_do,
            test_lstm_do) if args.double_output else (train_lstm, test_lstm)
    if not args.double_output:
        new_top1, top1 = 0.0, 0.0
    else:
        new_top1, top1 = (0.0, 0.0), (0.0, 0.0)
    for epoch in range(args.max_epochs):
        train_fun(model_ft, optimizer, ce_loss, train_iterator, epoch,
                  log_file, lr_scheduler)
        if (epoch + 1) % args.eval_freq == 0:
            if args.eval_on_train:
                test_fun(model_ft, ce_loss, train_iterator, epoch, "Train",
                         log_file)
            new_top1 = test_fun(model_ft, ce_loss, test_iterator, epoch,
                                "Test", log_file)
            top1 = save_checkpoints(model_ft, optimizer, top1, new_top1,
                                    args.save_all_weights, output_dir,
                                    model_name, epoch, log_file)
Exemplo n.º 7
0
def validate_lstm_do(model, criterion, test_iterator, cur_epoch, dataset,
                     log_file, args):
    losses, losses_a, losses_b, top1_a, top5_a, top1_b, top5_b = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(
    ), AverageMeter(), AverageMeter()
    outputs_a, outputs_b = [], []

    print_and_save(
        'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset),
        log_file)
    with torch.no_grad():
        model.eval()
        for batch_idx, (inputs, seq_lengths, targets,
                        video_names) in enumerate(test_iterator):
            inputs = torch.tensor(inputs).cuda()
            inputs = inputs.transpose(1, 0)

            output_a, output_b = model(inputs, seq_lengths)

            targets_a = torch.tensor(targets[:, 0]).cuda()
            targets_b = torch.tensor(targets[:, 1]).cuda()
            loss_a = criterion(output_a, targets_a)
            loss_b = criterion(output_b, targets_b)
            loss = 0.75 * loss_a + 0.25 * loss_b

            batch_preds = []
            for j in range(output_a.size(0)):
                res_a = np.argmax(output_a[j].detach().cpu().numpy())
                res_b = np.argmax(output_b[j].detach().cpu().numpy())
                label_a = targets_a[j].cpu().numpy()
                label_b = targets_b[j].cpu().numpy()
                outputs_a.append([res_a, label_a])
                outputs_b.append([res_b, label_b])
                batch_preds.append("{}, a P-L:{}-{}, b P-L:{}-{}".format(
                    video_names[j], res_a, label_a, res_b, label_b))

            t1_a, t5_a = accuracy(output_a.detach().cpu(),
                                  targets_a.detach().cpu(),
                                  topk=(1, 5))
            t1_b, t5_b = accuracy(output_b.detach().cpu(),
                                  targets_b.detach().cpu(),
                                  topk=(1, 5))
            top1_a.update(t1_a.item(), output_a.size(0))
            top5_a.update(t5_a.item(), output_a.size(0))
            top1_b.update(t1_b.item(), output_b.size(0))
            top5_b.update(t5_b.item(), output_b.size(0))
            losses_a.update(loss_a.item(), output_a.size(0))
            losses_b.update(loss_b.item(), output_b.size(0))
            losses.update(loss.item(), output_a.size(0))

            to_print = '[Batch {}/{}]' \
                '[Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \
                'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]]\n\t{}'.format(
                batch_idx, len(test_iterator),
                top1_a.val, top1_a.avg, top5_a.val, top5_a.avg,
                top1_b.val, top1_b.avg, top5_b.val, top5_b.avg,
                batch_preds)
            print_and_save(to_print, log_file)

        print_and_save(
            '{} Results: Loss {:.3f}, Top1_a {:.3f}, Top5_a {:.3f}, Top1_b {:.3f}, Top5_b {:.3f}'
            .format(dataset, losses.avg, top1_a.avg, top5_a.avg, top1_b.avg,
                    top5_b.avg), log_file)
    return (top1_a.avg, top1_b.avg), (outputs_a, outputs_b)
Exemplo n.º 8
0
def main():
    args = parse_args('lstm', val=True)

    output_dir = os.path.dirname(args.ckpt_path)
    log_file = os.path.join(
        output_dir,
        "results-accuracy-validation.txt") if args.logging else None
    if args.double_output and args.logging:
        if 'verb' in args.ckpt_path:
            log_file = os.path.join(output_dir,
                                    "results-accuracy-validation-verb.txt")
        if 'noun' in args.ckpt_path:
            log_file = os.path.join(output_dir,
                                    "results-accuracy-validation-noun.txt")

    print_and_save(args, log_file)
    cudnn.benchmark = True

    lstm_model = LSTM_per_hand if args.lstm_dual else LSTM_Hands_attn if args.lstm_attn else LSTM_Hands
    kwargs = {
        'dropout': 0,
        'bidir': args.lstm_bidir,
        'noun_classes': args.noun_classes,
        'double_output': args.double_output
    }
    model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers,
                          args.verb_classes, **kwargs)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    checkpoint = torch.load(args.ckpt_path)
    model_ft.load_state_dict(checkpoint['state_dict'])
    print_and_save("Model loaded to gpu", log_file)

    if args.only_left and args.only_right:
        sys.exit(
            "It must be at most one of *only_left* or *only_right* True at any time."
        )
    norm_val = [1., 1., 1., 1.
                ] if args.no_norm_input else [456., 256., 456., 256.]
    if args.lstm_feature == "coords" or args.lstm_feature == "coords_dual":
        if args.lstm_clamped and (not args.lstm_dual
                                  or args.lstm_seq_size == 0):
            sys.exit(
                "Clamped tracks require dual lstms and a fixed lstm sequence size."
            )
        dataset_loader = PointDatasetLoader(args.val_list,
                                            max_seq_length=args.lstm_seq_size,
                                            num_classes=args.verb_classes,
                                            norm_val=norm_val,
                                            dual=args.lstm_dual,
                                            clamp=args.lstm_clamped,
                                            only_left=args.only_left,
                                            only_right=args.only_right,
                                            validation=True)
    elif args.lstm_feature == "vec_sum" or args.lstm_feature == "vec_sum_dual":
        dataset_loader = PointVectorSummedDatasetLoader(
            args.val_list,
            max_seq_length=args.lstm_seq_size,
            num_classes=args.verb_classes,
            dual=args.lstm_dual,
            validation=True)
    elif args.lstm_feature == "coords_bpv":
        dataset_loader = PointBpvDatasetLoader(args.val_list,
                                               args.lstm_seq_size,
                                               args.double_output,
                                               norm_val=norm_val,
                                               bpv_prefix=args.bpv_prefix,
                                               validation=True)
    elif args.lstm_feature == "coords_objects":
        dataset_loader = PointObjDatasetLoader(args.val_list,
                                               args.lstm_seq_size,
                                               args.double_output,
                                               norm_val=norm_val,
                                               bpv_prefix=args.bpv_prefix,
                                               validation=True)
    else:
        sys.exit("Unsupported lstm feature")

    collate_fn = lstm_collate
    #    collate_fn = torch.utils.data.dataloader.default_collate
    dataset_iterator = torch.utils.data.DataLoader(
        dataset_loader,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        pin_memory=True)

    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    validate = validate_lstm_attn if args.lstm_attn else validate_lstm_do if args.double_output else validate_lstm
    top1, outputs = validate(model_ft, ce_loss, dataset_iterator,
                             checkpoint['epoch'],
                             args.val_list.split("\\")[-1], log_file, args)

    if not isinstance(top1, tuple):
        video_preds = [x[0] for x in outputs]
        video_labels = [x[1] for x in outputs]
        mean_cls_acc, top1_acc = eval_final_print(video_preds, video_labels,
                                                  "Verbs",
                                                  args.annotations_path,
                                                  args.val_list,
                                                  args.verb_classes, log_file)
    else:
        video_preds_a, video_preds_b = [x[0] for x in outputs[0]
                                        ], [x[0] for x in outputs[1]]
        video_labels_a, video_labels_b = [x[1] for x in outputs[0]
                                          ], [x[1] for x in outputs[1]]
        mean_cls_acc_a, top1_acc_a = eval_final_print(
            video_preds_a, video_labels_a, "Verbs", args.annotations_path,
            args.val_list, args.verb_classes, log_file)
        mean_cls_acc_b, top1_acc_b = eval_final_print(
            video_preds_b, video_labels_b, "Nouns", args.annotations_path,
            args.val_list, args.verb_classes, log_file)
def train_cnn(model,
              optimizer,
              criterion,
              train_iterator,
              mixup_alpha,
              cur_epoch,
              log_file,
              gpus,
              lr_scheduler=None):
    batch_time, losses, top1, top5 = AverageMeter(), AverageMeter(
    ), AverageMeter(), AverageMeter()
    model.train()

    if not isinstance(lr_scheduler, CyclicLR):
        lr_scheduler.step()

    print_and_save('*********', log_file)
    print_and_save('Beginning of epoch: {}'.format(cur_epoch), log_file)
    t0 = time.time()
    for batch_idx, (inputs, targets) in enumerate(train_iterator):
        if isinstance(lr_scheduler, CyclicLR):
            lr_scheduler.step()

        inputs = inputs.cuda(gpus[0])
        targets = targets.cuda(gpus[0])

        # TODO: Fix mixup and cuda integration, especially for mfnet
        if mixup_alpha != 1:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, mixup_alpha)

        output = model(inputs)

        if mixup_alpha != 1:
            loss = mixup_criterion(criterion, output, targets_a, targets_b,
                                   lam)
        else:
            loss = criterion(output, targets)

        optimizer.zero_grad()
        loss.backward()

        #        if clip_gradient is not None:
        #            total_norm = torch.nn.clip_grad_norm_(model.parameters(), clip_gradient)
        #            if total_norm > clip_gradient:
        #                to_print = "clipping gradient: {} with coef {}".format(total_norm, clip_gradient / total_norm)
        #                print_and_save(to_print, log_file)

        optimizer.step()

        t1, t5 = accuracy(output.detach(), targets.detach(), topk=(1, 5))
        top1.update(t1.item(), output.size(0))
        top5.update(t5.item(), output.size(0))
        losses.update(loss.item(), output.size(0))
        batch_time.update(time.time() - t0)
        t0 = time.time()
        print_and_save(
            '[Epoch:{}, Batch {}/{} in {:.3f} s][Loss {:.4f}[avg:{:.4f}], Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]], LR {:.6f}'
            .format(cur_epoch, batch_idx, len(train_iterator), batch_time.val,
                    losses.val, losses.avg, top1.val, top1.avg, top5.val,
                    top5.avg,
                    lr_scheduler.get_lr()[0]), log_file)
    print_and_save("Epoch train time: {}".format(batch_time.sum), log_file)
Exemplo n.º 10
0
def eval_final_print(video_preds, video_labels, cls_type, annotations_path,
                     val_list, max_classes, log_file):
    cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels(
        video_preds,
        video_labels,
        all_class_indices=list(range(int(max_classes))))
    print_and_save(cls_type, log_file)
    print_and_save(cf, log_file)

    if annotations_path:
        brd_splits = '_brd' in val_list
        valid_verb_indices, verb_ids_sorted, valid_noun_indices, noun_ids_sorted = get_classes(
            annotations_path, val_list, brd_splits, 100)
        if cls_type == 'Verbs':
            valid_indices, ids_sorted = valid_verb_indices, verb_ids_sorted
            all_indices = list(
                range(int(125))
            )  # manually set verb classes to avoid loading the verb names file that loads 125...
        else:
            valid_indices, ids_sorted = valid_noun_indices, noun_ids_sorted
            all_indices = list(range(int(352)))

        ave_pre, ave_rec, _ = avg_rec_prec_trimmed(video_preds, video_labels,
                                                   valid_indices, all_indices)
        print_and_save("{} > 100 instances at training:".format(cls_type),
                       log_file)
        print_and_save("Classes are {}".format(valid_indices), log_file)
        print_and_save(
            "average precision {0:02f}%, average recall {1:02f}%".format(
                ave_pre, ave_rec), log_file)
        print_and_save("Most common {} in training".format(cls_type), log_file)
        print_and_save(
            "15 {} rec {}".format(cls_type, recall[ids_sorted[:15]]), log_file)
        print_and_save(
            "15 {} pre {}".format(cls_type, precision[ids_sorted[:15]]),
            log_file)

    print_and_save("Cls Rec {}".format(recall), log_file)
    print_and_save("Cls Pre {}".format(precision), log_file)
    print_and_save("Cls Acc {}".format(cls_acc), log_file)
    print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file)
    print_and_save("Dataset Acc {}".format(top1_acc), log_file)
    return mean_cls_acc, top1_acc
Exemplo n.º 11
0
def main():
    args = parse_args('mfnet', val=True)

    output_dir = os.path.dirname(args.ckpt_path)
    log_file = make_log_file_name(output_dir, args)
    print_and_save(args, log_file)
    cudnn.benchmark = True

    if not args.double_output:
        mfnet_3d = MFNET_3D
        num_classes = args.verb_classes
        validate = validate_resnet
        overall_top1, overall_mean_cls_acc = 0.0, 0.0
    else:
        mfnet_3d = MFNET_3D_DO
        num_classes = (args.verb_classes, args.noun_classes)
        validate = validate_resnet_do
        overall_top1, overall_mean_cls_acc = (0.0, 0.0), (0.0, 0.0)

    model_ft = mfnet_3d(num_classes)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'})
    model_ft.load_state_dict(checkpoint['state_dict'])
    print_and_save("Model loaded on gpu {} devices".format(args.gpus),
                   log_file)

    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    for i in range(args.mfnet_eval):
        crop_type = CenterCrop(
            (224, 224)) if args.eval_crop == 'center' else RandomCrop(
                (224, 224))
        if args.eval_sampler == 'middle':
            val_sampler = MiddleSampling(num=args.clip_length)
        else:
            val_sampler = RandomSampling(num=args.clip_length,
                                         interval=args.frame_interval,
                                         speed=[1.0, 1.0],
                                         seed=i)

        val_transforms = transforms.Compose([
            Resize((256, 256), False), crop_type,
            ToTensorVid(),
            Normalize(mean=mean_3d, std=std_3d)
        ])

        val_loader = VideoDatasetLoader(val_sampler,
                                        args.val_list,
                                        num_classes=num_classes,
                                        batch_transform=val_transforms,
                                        img_tmpl='frame_{:010d}.jpg',
                                        validation=True)
        val_iter = torch.utils.data.DataLoader(val_loader,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=args.num_workers,
                                               pin_memory=True)

        top1, outputs = validate(model_ft, ce_loss, val_iter,
                                 checkpoint['epoch'],
                                 args.val_list.split("\\")[-1], log_file)

        if not isinstance(top1, tuple):
            video_preds = [x[0] for x in outputs]
            video_labels = [x[1] for x in outputs]
            mean_cls_acc, top1_acc = eval_final_print(video_preds,
                                                      video_labels, "Verbs",
                                                      args.annotations_path,
                                                      args.val_list,
                                                      num_classes, log_file)
            overall_mean_cls_acc += mean_cls_acc
            overall_top1 += top1_acc
        else:
            video_preds_a, video_preds_b = [x[0] for x in outputs[0]
                                            ], [x[0] for x in outputs[1]]
            video_labels_a, video_labels_b = [x[1] for x in outputs[0]
                                              ], [x[1] for x in outputs[1]]
            mean_cls_acc_a, top1_acc_a = eval_final_print(
                video_preds_a, video_labels_a, "Verbs", args.annotations_path,
                args.val_list, num_classes, log_file)
            mean_cls_acc_b, top1_acc_b = eval_final_print(
                video_preds_b, video_labels_b, "Nouns", args.annotations_path,
                args.val_list, num_classes, log_file)
            overall_mean_cls_acc = (overall_mean_cls_acc[0] + mean_cls_acc_a,
                                    overall_mean_cls_acc[1] + mean_cls_acc_b)
            overall_top1 = (overall_top1[0] + top1_acc_a,
                            overall_top1[1] + top1_acc_b)

    print_and_save("", log_file)
    if not isinstance(top1, tuple):
        print_and_save(
            "Mean Cls Acc {}".format(overall_mean_cls_acc / args.mfnet_eval),
            log_file)
        print_and_save(
            "Dataset Acc ({} times) {}".format(args.mfnet_eval,
                                               overall_top1 / args.mfnet_eval),
            log_file)
    else:
        print_and_save(
            "Mean Cls Acc a {}, b {}".format(
                overall_mean_cls_acc[0] / args.mfnet_eval,
                overall_mean_cls_acc[1] / args.mfnet_eval), log_file)
        print_and_save(
            "Dataset Acc ({} times) a {}, b {}".format(
                args.mfnet_eval, overall_top1[0] / args.mfnet_eval,
                overall_top1[1] / args.mfnet_eval), log_file)
Exemplo n.º 12
0
start_decay = 0.001
batch_size = 128
max_epochs = 100
lr_range = [
    0.001, 0.002, 0.005, 0.008, 0.01, 0.02, 0.04, 0.07, 0.1, 0.5, 1., 2., 3.
]

log_file = os.path.join(base_output_dir, model_name,
                        model_name + "_{}.txt".format("range"))
model_ft = LSTM_Hands(lstm_input, lstm_hidden, lstm_layers, verb_classes,
                      dropout)
model_ft = torch.nn.DataParallel(model_ft).cuda()
cudnn.benchmark = True

params_to_update = model_ft.parameters()
print_and_save("Params to learn:", log_file)
for name, param in model_ft.named_parameters():
    if param.requires_grad == True:
        print_and_save("\t{}".format(name), log_file)

optimizer = torch.optim.SGD(params_to_update,
                            lr=start_lr,
                            momentum=start_mom,
                            weight_decay=start_decay)
lr_scheduler = LRRangeTest(optimizer, lr_range, max_epochs)

ce_loss = torch.nn.CrossEntropyLoss().cuda()

train_loader = PointDatasetLoader(train_list,
                                  max_seq_length=16,
                                  norm_val=norm_val,
Exemplo n.º 13
0
def train_cnn_do(model,
                 optimizer,
                 criterion,
                 train_iterator,
                 mixup_alpha,
                 cur_epoch,
                 log_file,
                 gpus,
                 lr_scheduler=None):
    batch_time, losses_a, losses_b, losses, top1_a, top5_a, top1_b, top5_b = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter()
    model.train()

    if not isinstance(lr_scheduler, CyclicLR):
        lr_scheduler.step()

    print_and_save('*********', log_file)
    print_and_save('Beginning of epoch: {}'.format(cur_epoch), log_file)
    t0 = time.time()
    for batch_idx, (inputs, targets) in enumerate(train_iterator):
        if isinstance(lr_scheduler, CyclicLR):
            lr_scheduler.step()

        inputs = torch.tensor(inputs, requires_grad=True).cuda(gpus[0])

        targets_a = torch.tensor(targets[0]).cuda(gpus[0])
        targets_b = torch.tensor(targets[1]).cuda(gpus[0])

        output_a, output_b = model(inputs)

        loss_a = criterion(output_a, targets_a)
        loss_b = criterion(output_b, targets_b)
        loss = 0.75 * loss_a + 0.25 * loss_b

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        t1_a, t5_a = accuracy(output_a.detach().cpu(),
                              targets_a.detach().cpu(),
                              topk=(1, 5))
        t1_b, t5_b = accuracy(output_b.detach().cpu(),
                              targets_b.detach().cpu(),
                              topk=(1, 5))
        top1_a.update(t1_a.item(), output_a.size(0))
        top5_a.update(t5_a.item(), output_a.size(0))
        top1_b.update(t1_b.item(), output_b.size(0))
        top5_b.update(t5_b.item(), output_b.size(0))
        losses_a.update(loss_a.item(), output_a.size(0))
        losses_b.update(loss_b.item(), output_b.size(0))
        losses.update(loss.item(), output_a.size(0))
        batch_time.update(time.time() - t0)
        t0 = time.time()
        to_print = '[Epoch:{}, Batch {}/{} in {:.3f} s]'\
                   '[Losses {:.4f}[avg:{:.4f}], loss_a {:.4f}[avg:{:.4f}], loss_b {:.4f}[avg:{:.4f}],' \
                   'Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \
                   'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]],' \
                   'LR {:.6f}'.format(
                           cur_epoch, batch_idx, len(train_iterator), batch_time.val,
                           losses_a.val, losses_a.avg, losses_b.val, losses_b.avg, losses.val, losses.avg,
                           top1_a.val, top1_a.avg, top5_a.val, top5_a.avg,
                           top1_b.val, top1_b.avg, top5_b.val, top5_b.avg,
                           lr_scheduler.get_lr()[0])
        print_and_save(to_print, log_file)
Exemplo n.º 14
0
def main():
    args, model_name = parse_args('mfnet', val=False)

    output_dir, log_file = init_folders(args.base_output_dir, model_name,
                                        args.resume, args.logging)
    print_and_save(args, log_file)
    print_and_save("Model name: {}".format(model_name), log_file)
    cudnn.benchmark = True

    mfnet_3d = MFNET_3D if not args.double_output else MFNET_3D_DO
    num_classes = args.verb_classes if not args.double_output else (
        args.verb_classes, args.noun_classes)
    model_ft = mfnet_3d(num_classes, dropout=args.dropout)
    if args.pretrained:
        checkpoint = torch.load(args.pretrained_model_path)
        # below line is needed if network is trained with DataParallel
        base_dict = {
            '.'.join(k.split('.')[1:]): v
            for k, v in list(checkpoint['state_dict'].items())
        }
        base_dict = {
            k: v
            for k, v in list(base_dict.items()) if 'classifier' not in k
        }
        model_ft.load_state_dict(
            base_dict,
            strict=False)  #model.load_state_dict(checkpoint['state_dict'])
    model_ft.cuda(device=args.gpus[0])
    model_ft = torch.nn.DataParallel(model_ft,
                                     device_ids=args.gpus,
                                     output_device=args.gpus[0])
    print_and_save("Model loaded on gpu {} devices".format(args.gpus),
                   log_file)

    # load dataset and train and validation iterators
    train_sampler = prepare_sampler("train", args.clip_length,
                                    args.frame_interval)
    train_transforms = transforms.Compose([
        RandomScale(make_square=True,
                    aspect_ratio=[0.8, 1. / 0.8],
                    slen=[224, 288]),
        RandomCrop((224, 224)),
        RandomHorizontalFlip(),
        RandomHLS(vars=[15, 35, 25]),
        ToTensorVid(),
        Normalize(mean=mean_3d, std=std_3d)
    ])
    train_loader = VideoDatasetLoader(train_sampler,
                                      args.train_list,
                                      num_classes=num_classes,
                                      batch_transform=train_transforms,
                                      img_tmpl='frame_{:010d}.jpg')
    train_iterator = torch.utils.data.DataLoader(train_loader,
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 num_workers=args.num_workers,
                                                 pin_memory=True)

    test_sampler = prepare_sampler("val", args.clip_length,
                                   args.frame_interval)
    test_transforms = transforms.Compose([
        Resize((256, 256), False),
        CenterCrop((224, 224)),
        ToTensorVid(),
        Normalize(mean=mean_3d, std=std_3d)
    ])
    test_loader = VideoDatasetLoader(test_sampler,
                                     args.test_list,
                                     num_classes=num_classes,
                                     batch_transform=test_transforms,
                                     img_tmpl='frame_{:010d}.jpg')
    test_iterator = torch.utils.data.DataLoader(test_loader,
                                                batch_size=args.batch_size,
                                                shuffle=False,
                                                num_workers=args.num_workers,
                                                pin_memory=True)

    # config optimizer
    param_base_layers = []
    param_new_layers = []
    name_base_layers = []
    for name, param in model_ft.named_parameters():
        if args.pretrained:
            if 'classifier' in name:
                param_new_layers.append(param)
            else:
                param_base_layers.append(param)
                name_base_layers.append(name)
        else:
            param_new_layers.append(param)

    optimizer = torch.optim.SGD([{
        'params': param_base_layers,
        'lr_mult': args.lr_mult_base
    }, {
        'params': param_new_layers,
        'lr_mult': args.lr_mult_new
    }],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)

    if args.resume and 'optimizer' in checkpoint:
        optimizer.load_state_dict(checkpoint['optimizer'])

    ce_loss = torch.nn.CrossEntropyLoss().cuda(device=args.gpus[0])
    lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer,
                                     len(train_iterator))

    if not args.double_output:
        new_top1, top1 = 0.0, 0.0
    else:
        new_top1, top1 = (0.0, 0.0), (0.0, 0.0)
    train = train_cnn if not args.double_output else train_cnn_do
    test = test_cnn if not args.double_output else test_cnn_do
    for epoch in range(args.max_epochs):
        train(model_ft, optimizer, ce_loss, train_iterator, args.mixup_a,
              epoch, log_file, args.gpus, lr_scheduler)
        if (epoch + 1) % args.eval_freq == 0:
            if args.eval_on_train:
                test(model_ft, ce_loss, train_iterator, epoch, "Train",
                     log_file, args.gpus)
            new_top1 = test(model_ft, ce_loss, test_iterator, epoch, "Test",
                            log_file, args.gpus)
            top1 = save_checkpoints(model_ft, optimizer, top1, new_top1,
                                    args.save_all_weights, output_dir,
                                    model_name, epoch, log_file)
def main():
    args, model_name = parse_args('mfnet', val=False)

    output_dir, log_file = init_folders(args.base_output_dir, model_name, args.resume, args.logging)
    print_and_save(args, log_file)
    print_and_save("Model name: {}".format(model_name), log_file)
    cudnn.benchmark = True

    mfnet_3d = MFNET_3D_MO  # mfnet 3d multi output
    kwargs = {}
    num_coords = 0
    objectives_text = "Objectives: "
    num_classes = [args.action_classes, args.verb_classes, args.noun_classes]
    num_objectives = 0
    if args.action_classes > 0: # plan to use in EPIC
        objectives_text += " actions {}, ".format(args.action_classes)
        num_objectives += 1
    if args.verb_classes > 0:
        objectives_text += " verbs {}, ".format(args.verb_classes)
        num_objectives += 1
    if args.noun_classes > 0:
        objectives_text += " nouns {}, ".format(args.noun_classes)
        num_objectives += 1
    # if args.use_gaze: # unused in EPIC
    #     objectives_text += " gaze, "
    #     num_coords += 1
    #     num_objectives += 1
    if args.use_hands:
        objectives_text += " hands, "
        num_coords += 2
        num_objectives += 1
    kwargs["num_coords"] = num_coords
    print_and_save("Training for {} objective(s)".format(num_objectives), log_file)
    print_and_save(objectives_text, log_file)
    # for now just limit the tasks to max 3 and dont take extra nouns into account
    model_ft = mfnet_3d(num_classes, dropout=args.dropout, **kwargs)
    if args.pretrained:
        checkpoint = torch.load(args.pretrained_model_path)
        # below line is needed if network is trained with DataParallel
        base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}
        base_dict = {k: v for k, v in list(base_dict.items()) if 'classifier' not in k}
        model_ft.load_state_dict(base_dict, strict=False)  # model.load_state_dict(checkpoint['state_dict'])
    model_ft.cuda(device=args.gpus[0])
    model_ft = torch.nn.DataParallel(model_ft, device_ids=args.gpus, output_device=args.gpus[0])
    print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file)
    if args.resume: #Note: When resuming the 1task+hand models from before 18-June-19 I should be using MFNET_3D from mfnet_3d_hands.py
        model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir, model_name, args.resume_from)
        print_and_save("Resuming training from: {}".format(ckpt_path), log_file)

    # load train-val sampler
    train_sampler = prepare_sampler("train", args.clip_length, args.frame_interval)
    train_transforms = transforms.Compose([
        RandomScale(make_square=True, aspect_ratio=[0.8, 1./0.8], slen=[224, 288]),
        RandomCrop((224, 224)), RandomHorizontalFlip(), RandomHLS(vars=[15, 35, 25]),
        ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)])
    train_loader = VideoAndPointDatasetLoader(train_sampler, args.train_list, point_list_prefix=args.bpv_prefix,
                                              num_classes=num_classes, img_tmpl='frame_{:010d}.jpg',
                                              norm_val=[456., 256., 456., 256.], batch_transform=train_transforms,
                                              use_hands=args.use_hands)
    train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size,
                                                 shuffle=True, num_workers=args.num_workers,
                                                 pin_memory=True)

    test_sampler = prepare_sampler("val", args.clip_length, args.frame_interval)
    test_transforms = transforms.Compose([Resize((256, 256), False), CenterCrop((224, 224)),
                                          ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)])

    # make train-val dataset loaders
    test_loader = VideoAndPointDatasetLoader(test_sampler, args.test_list, point_list_prefix=args.bpv_prefix,
                                             num_classes=num_classes, img_tmpl='frame_{:010d}.jpg',
                                             norm_val=[456., 256., 456., 256.], batch_transform=test_transforms,
                                             use_hands=args.use_hands)

    test_iterator = torch.utils.data.DataLoader(test_loader, batch_size=args.batch_size,
                                                shuffle=False, num_workers=args.num_workers,
                                                pin_memory=True)

    # config optimizer
    param_base_layers = []
    param_new_layers = []
    name_base_layers = []
    for name, param in model_ft.named_parameters():
        if args.pretrained:
            if 'classifier' in name:
                param_new_layers.append(param)
            else:
                param_base_layers.append(param)
                name_base_layers.append(name)
        else:
            param_new_layers.append(param)

    optimizer = torch.optim.SGD([{'params': param_base_layers, 'lr_mult': args.lr_mult_base},
                                 {'params': param_new_layers, 'lr_mult': args.lr_mult_new}],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)

    # if args.resume and 'optimizer' in checkpoint:
    #     optimizer.load_state_dict(checkpoint['optimizer'])

    ce_loss = torch.nn.CrossEntropyLoss().cuda(device=args.gpus[0])
    # mse_loss = torch.nn.MSELoss().cuda(device=args.gpus[0])
    lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer, len(train_iterator))

    train = train_mfnet_mo
    test = test_mfnet_mo
    num_valid_classes = len([cls for cls in num_classes if cls > 0])
    new_top1, top1 = [0.0] * num_valid_classes, [0.0] * num_valid_classes
    for epoch in range(args.max_epochs):
        train(model_ft, optimizer, ce_loss, train_iterator, num_valid_classes, False, args.use_hands, epoch,
              log_file, args.gpus, lr_scheduler)
        if (epoch + 1) % args.eval_freq == 0:
            if args.eval_on_train:
                test(model_ft, ce_loss, train_iterator, num_valid_classes, False, args.use_hands, epoch,
                     "Train", log_file, args.gpus)
            new_top1 = test(model_ft, ce_loss, test_iterator, num_valid_classes, False, args.use_hands, epoch,
                            "Test", log_file, args.gpus)
            top1 = save_mt_checkpoints(model_ft, optimizer, top1, new_top1, args.save_all_weights, output_dir,
                                       model_name, epoch, log_file)
def main():
    args, model_name = parse_args('lstm_diffs', val=False)
    # init dirs, names
    output_dir, log_file = init_folders(args.base_output_dir, model_name,
                                        args.resume, args.logging)
    print_and_save(args, log_file)
    print_and_save("Model name: {}".format(model_name), log_file)
    cudnn.benchmark = True
    # init model
    lstm_model = LSTM_Hands
    kwargs = {'dropout': args.dropout, 'bidir': args.lstm_bidir}
    model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers,
                          args.verb_classes, **kwargs)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    print_and_save("Model loaded to gpu", log_file)
    if args.resume:
        model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir,
                                                model_name, args.resume_from)
        print_and_save("Resuming training from: {}".format(ckpt_path),
                       log_file)

    norm_val = [1., 1., 1., 1.
                ] if args.no_norm_input else [456., 256., 456., 256.]

    #    train_loader = PointPolarDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size,
    #                                           norm_val=norm_val)
    #    test_loader = PointPolarDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size,
    #                                          norm_val=norm_val)
    #    train_loader = AnglesDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size)
    #    test_loader = AnglesDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size)

    train_loader = PointDiffDatasetLoader(args.train_list,
                                          max_seq_length=args.lstm_seq_size,
                                          norm_val=norm_val)
    test_loader = PointDiffDatasetLoader(args.test_list,
                                         max_seq_length=args.lstm_seq_size,
                                         norm_val=norm_val)

    train_iterator = torch.utils.data.DataLoader(train_loader,
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 num_workers=args.num_workers,
                                                 collate_fn=lstm_collate,
                                                 pin_memory=True)
    test_iterator = torch.utils.data.DataLoader(test_loader,
                                                batch_size=args.batch_size,
                                                shuffle=False,
                                                num_workers=args.num_workers,
                                                collate_fn=lstm_collate,
                                                pin_memory=True)

    params_to_update = model_ft.parameters()
    print_and_save("Params to learn:", log_file)
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            print_and_save("\t{}".format(name), log_file)

    optimizer = torch.optim.SGD(params_to_update,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.decay)
    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer,
                                     len(train_iterator))

    train_fun, test_fun = (train_lstm, test_lstm)
    new_top1, top1 = 0.0, 0.0
    for epoch in range(args.max_epochs):
        train_fun(model_ft, optimizer, ce_loss, train_iterator, epoch,
                  log_file, lr_scheduler)
        if (epoch + 1) % args.eval_freq == 0:
            if args.eval_on_train:
                test_fun(model_ft, ce_loss, train_iterator, epoch, "Train",
                         log_file)
            new_top1 = test_fun(model_ft, ce_loss, test_iterator, epoch,
                                "Test", log_file)
            top1 = save_checkpoints(model_ft, optimizer, top1, new_top1,
                                    args.save_all_weights, output_dir,
                                    model_name, epoch, log_file)
Exemplo n.º 17
0
def eval_final_print_mt(video_preds,
                        video_labels,
                        task_id,
                        current_classes,
                        log_file,
                        annotations_path=None,
                        val_list=None,
                        task_type='None',
                        action_file=None):
    cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels(
        video_preds,
        video_labels,
        all_class_indices=list(range(int(current_classes))))
    print_and_save("Task {}".format(task_id), log_file)
    print_and_save(cf, log_file)

    if annotations_path:
        brd_splits = '_brd' in val_list
        valid_verb_indices, verb_ids_sorted, valid_noun_indices, noun_ids_sorted = get_classes(
            annotations_path, val_list, brd_splits, 100)
        if task_type == 'EpicActions':
            valid_indices = get_action_classes(annotations_path, val_list,
                                               brd_splits, 100, action_file)
            all_indices = list(range(2521))
        if task_type == 'EpicVerbs':  # 'Verbs': error prone if I ever train nouns on their own
            valid_indices, ids_sorted = valid_verb_indices, verb_ids_sorted
            all_indices = list(
                range(int(125))
            )  # manually set verb classes to avoid loading the verb names file that loads 125...
        elif task_type == 'EpicNouns':
            valid_indices, ids_sorted = valid_noun_indices, noun_ids_sorted
            all_indices = list(range(int(352)))
        ave_pre, ave_rec, _ = avg_rec_prec_trimmed(video_preds, video_labels,
                                                   valid_indices, all_indices)
        print_and_save("{} > 100 instances at training:".format(task_type),
                       log_file)
        print_and_save("Classes are {}".format(valid_indices), log_file)
        print_and_save(
            "average precision {0:02f}%, average recall {1:02f}%".format(
                ave_pre, ave_rec), log_file)
        if task_type != 'EpicActions':
            print_and_save("Most common {} in training".format(task_type),
                           log_file)
            print_and_save(
                "15 {} rec {}".format(task_type, recall[ids_sorted[:15]]),
                log_file)
            print_and_save(
                "15 {} pre {}".format(task_type, precision[ids_sorted[:15]]),
                log_file)

    print_and_save("Cls Rec {}".format(recall), log_file)
    print_and_save("Cls Pre {}".format(precision), log_file)
    print_and_save("Cls Acc {}".format(cls_acc), log_file)
    print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file)
    print_and_save("Dataset Acc {}".format(top1_acc), log_file)
    return mean_cls_acc, top1_acc
Exemplo n.º 18
0
def validate_lstm_attn(model, criterion, test_iterator, cur_epoch, dataset,
                       log_file, args):
    losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter()
    predictions = []
    # for attention
    all_predictions = torch.zeros((0, args.lstm_seq_size, 1))
    all_attentions = torch.zeros((0, args.lstm_seq_size, args.lstm_seq_size))
    all_targets = torch.zeros((0, 1))
    all_video_names = []
    num_changing_in_seq = 0
    for_the_better, for_the_worse = [], []

    print_and_save(
        'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset),
        log_file)
    with torch.no_grad():
        model.eval()
        for batch_idx, (inputs, seq_lengths, targets,
                        video_names) in enumerate(test_iterator):
            inputs = torch.tensor(inputs).cuda()
            targets = torch.tensor(targets).cuda()

            inputs = inputs.transpose(1, 0)
            outputs, attn_weights = model(inputs, seq_lengths)

            loss = 0
            for output in outputs:
                loss += criterion(output, targets)
            loss /= len(outputs)

            outputs = torch.stack(outputs)
            outputs = torch.argmax(outputs, dim=2).detach().cpu(
            )  # edw exw thn provlepsh gia kathe step tou sequence gia olo to batch
            all_predictions = torch.cat(
                (all_predictions, torch.transpose(outputs, 0, 1).float()),
                dim=0)
            outputs = outputs.numpy()
            maj_vote = [
                np.bincount(outputs[:, kk]).argmax()
                for kk in range(len(outputs[0]))
            ]  # to argmax kanei majority voting
            for i in range(len(maj_vote)):  # iteration in the batch size
                if outputs[:, i].any() != outputs[-1, i]:
                    num_changing_in_seq += 1
                    if maj_vote[i] != outputs[
                            -1,
                            i]:  # compare the majority vote to the prediction of the last step in the sequence
                        tar = targets[i].cpu().numpy()
                        if maj_vote[i] == tar:
                            for_the_better.append(video_names[i])
                        elif outputs[-1, i] == tar:
                            for_the_worse.append(video_names[i])
            outputs = maj_vote
            attn_weights = torch.transpose(torch.stack(attn_weights), 0,
                                           1).detach().cpu()
            all_attentions = torch.cat((all_attentions, attn_weights), dim=0)
            all_targets = torch.cat(
                (all_targets, targets.detach().cpu().float()), dim=0)
            all_video_names = all_video_names + video_names

            batch_preds = []
            for j in range(len(outputs)):
                res = outputs[j]
                label = targets[j].cpu().numpy()
                predictions.append([res, label])
                batch_preds.append("{}, P-L:{}-{}".format(
                    video_names[j], res, label))

            t1, t5 = accuracy(
                output.detach().cpu(
                ),  # use output (instead of outputs) for accuracy; outputs is used for the confusion matrix
                targets.detach().cpu(),
                topk=(1, 5))
            top1.update(t1.item(), output.size(0))
            top5.update(t5.item(), output.size(0))
            losses.update(loss.item(), output.size(0))

            print_and_save(
                '[Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]\n\t{}'
                .format(batch_idx, len(test_iterator), top1.val, top1.avg,
                        top5.val, top5.avg, batch_preds), log_file)

        print_and_save(
            "Num samples with differences {}".format(num_changing_in_seq),
            log_file)
        print_and_save(
            "{} changed for the better\n{}".format(len(for_the_better),
                                                   for_the_better), log_file)
        print_and_save(
            "{} changed for the worse\n{}".format(len(for_the_worse),
                                                  for_the_worse), log_file)

        print_and_save(
            '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format(
                dataset, losses.avg, top1.avg, top5.avg), log_file)
        if args.save_attentions:
            all_predictions = all_predictions.numpy().astype(np.int)
            all_targets = all_targets.numpy().astype(np.int)
            output_dir = os.path.join(os.path.dirname(log_file), "figures")
            os.makedirs(output_dir, exist_ok=True)
            for i in range(len(all_targets)):
                name_parts = all_video_names[i].split("\\")[-2:]
                output_file = os.path.join(
                    output_dir,
                    "{}_{}.png".format(name_parts[0],
                                       name_parts[1].split('.')[0]))
                showAttention(args.lstm_seq_size, all_predictions[i],
                              all_targets[i], all_attentions[i], output_file)

    return top1.avg, predictions
def main():
    args = parse_args('mfnet', val=True)

    output_dir = os.path.dirname(args.ckpt_path)
    log_file = make_log_file_name(output_dir, args)
    print_and_save(args, log_file)
    cudnn.benchmark = True

    mfnet_3d = MFNET_3D_MO
    num_classes = [args.action_classes, args.verb_classes, args.noun_classes]
    validate = validate_mfnet_mo

    kwargs = {}
    num_coords = 0
    if args.use_gaze:
        num_coords += 1
    if args.use_hands:
        num_coords += 2
    kwargs['num_coords'] = num_coords

    model_ft = mfnet_3d(num_classes, **kwargs)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'})
    model_ft.load_state_dict(checkpoint['state_dict'])
    print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file)

    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    num_valid_classes = len([cls for cls in num_classes if cls > 0])
    valid_classes = [cls for cls in num_classes if cls > 0]
    overall_top1 = [0]*num_valid_classes
    overall_mean_cls_acc = [0]*num_valid_classes
    for i in range(args.mfnet_eval):
        crop_type = CenterCrop((224, 224)) if args.eval_crop == 'center' else RandomCrop((224, 224))
        if args.eval_sampler == 'middle':
            val_sampler = MiddleSampling(num=args.clip_length)
        else:
            val_sampler = RandomSampling(num=args.clip_length,
                                         interval=args.frame_interval,
                                         speed=[1.0, 1.0], seed=i)

        val_transforms = transforms.Compose([Resize((256, 256), False), crop_type,
                                             ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)])

        # val_loader = FromVideoDatasetLoaderGulp(val_sampler, args.val_list, 'GTEA', num_classes, GTEA_CLASSES,
        #                                         use_gaze=args.use_gaze, gaze_list_prefix=args.gaze_list_prefix,
        #                                         use_hands=args.use_hands, hand_list_prefix=args.hand_list_prefix,
        #                                         batch_transform=val_transforms, extra_nouns=False, validation=True)
        val_loader = VideoFromImagesDatasetLoader(val_sampler, args.val_list, 'GTEA', num_classes, GTEA_CLASSES,
                                                  use_gaze=args.use_gaze, gaze_list_prefix=args.gaze_list_prefix,
                                                  use_hands=args.use_hands, hand_list_prefix=args.hand_list_prefix,
                                                  batch_transform=val_transforms, extra_nouns=False, validation=True)
        val_iter = torch.utils.data.DataLoader(val_loader,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=args.num_workers,
                                               pin_memory=True)

        # evaluate dataset
        top1, outputs = validate(model_ft, ce_loss, val_iter, num_valid_classes, args.use_gaze, args.use_hands,
                                 checkpoint['epoch'], args.val_list.split("\\")[-1], log_file)

        # calculate statistics
        for ind in range(num_valid_classes):
            video_preds = [x[0] for x in outputs[ind]]
            video_labels = [x[1] for x in outputs[ind]]
            mean_cls_acc, top1_acc = eval_final_print_mt(video_preds, video_labels, ind, valid_classes[ind], log_file)
            overall_mean_cls_acc[ind] += mean_cls_acc
            overall_top1[ind] += top1_acc

    print_and_save("", log_file)
    text_mean_cls_acc = "Mean Cls Acc ({} times)".format(args.mfnet_eval)
    text_dataset_acc = "Dataset Acc ({} times)".format(args.mfnet_eval)
    for ind in range(num_valid_classes):
        text_mean_cls_acc += ", T{}::{} ".format(ind, (overall_mean_cls_acc[ind] / args.mfnet_eval))
        text_dataset_acc += ", T{}::{} ".format(ind, (overall_top1[ind] / args.mfnet_eval))
    print_and_save(text_mean_cls_acc, log_file)
    print_and_save(text_dataset_acc, log_file)
def main():
    args = parse_args('resnet', val=True)

    output_dir = os.path.dirname(args.ckpt_path)
    log_file = os.path.join(
        output_dir,
        "results-accuracy-validation.txt") if args.logging else None
    print_and_save(args, log_file)
    cudnn.benchmark = True

    model_ft = resnet_loader(args.verb_classes, 0, False, False,
                             args.resnet_version,
                             1 if args.channels == 'G' else 3, args.no_resize)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    checkpoint = torch.load(args.ckpt_path)
    # below line is needed if network is trained with DataParallel and now the model is not initiated with dataparallel
    #    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    #    model_ft.load_state_dict(base_dict)
    model_ft.load_state_dict(checkpoint['state_dict'])
    print_and_save("Model loaded to gpu", log_file)

    mean = meanRGB if args.channels == 'RGB' else meanG
    std = stdRGB if args.channels == 'RGB' else stdG
    normalize = transforms.Normalize(mean=mean, std=std)
    if args.no_resize:
        resize = WidthCrop()
    else:
        if args.pad:
            resize = ResizePadFirst(
                224, False, interpolation_methods[args.inter]
            )  # currently set this binarize to False, because it is false duh
        else:
            resize = Resize((224, 224), False,
                            interpolation_methods[args.inter])
    test_transforms = transforms.Compose(
        [resize,
         To01Range(args.bin_img),
         transforms.ToTensor(), normalize])
    dataset_loader = ImageDatasetLoader(args.val_list,
                                        num_classes=args.verb_classes,
                                        batch_transform=test_transforms,
                                        channels=args.channels,
                                        validation=True)
    collate_fn = torch.utils.data.dataloader.default_collate
    dataset_iterator = torch.utils.data.DataLoader(
        dataset_loader,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        pin_memory=True)

    ce_loss = torch.nn.CrossEntropyLoss().cuda()

    validate = validate_resnet
    top1, outputs = validate(model_ft, ce_loss, dataset_iterator,
                             checkpoint['epoch'],
                             args.val_list.split("\\")[-1], log_file)

    #video_pred = [np.argmax(x[0].detach().cpu().numpy()) for x in outputs]
    #video_labels = [x[1].cpu().numpy() for x in outputs]
    video_preds = [x[0] for x in outputs]
    video_labels = [x[1] for x in outputs]

    cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels(
        video_preds, video_labels)

    print_and_save(cf, log_file)
    print_and_save("Cls Rec {}".format(recall), log_file)
    print_and_save("Cls Pre {}".format(precision), log_file)
    print_and_save("Cls Acc {}".format(cls_acc), log_file)
    print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file)
    print_and_save("Dataset Acc {}".format(top1_acc), log_file)