Exemplo n.º 1
0
def load_models(opt):
    opt.resume_path = opt.resume_path_det
    opt.pretrain_path = opt.pretrain_path_det
    opt.sample_duration = opt.sample_duration_det
    opt.model = opt.model_det
    opt.model_depth = opt.model_depth_det
    opt.modality = opt.modality_det
    opt.resnet_shortcut = opt.resnet_shortcut_det
    opt.n_classes = opt.n_classes_det
    opt.n_finetune_classes = opt.n_finetune_classes_det

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)

    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value)
    opt.std = get_std(opt.norm_value)

    print(opt)
    with open(os.path.join(opt.result_path, 'opts_det.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    detector, parameters = generate_model(opt)

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        detector.load_state_dict(checkpoint['state_dict'])

    print('Model 1 \n', detector)
    pytorch_total_params = sum(p.numel() for p in detector.parameters()
                               if p.requires_grad)
    print("Total number of trainable parameters: ", pytorch_total_params)

    # Reset parsed args
    opt = parse_opts_online()

    opt.resume_path = opt.resume_path_clf
    opt.pretrain_path = opt.pretrain_path_clf
    opt.sample_duration = opt.sample_duration_clf
    opt.model = opt.model_clf
    opt.model_depth = opt.model_depth_clf
    opt.modality = opt.modality_clf
    opt.resnet_shortcut = opt.resnet_shortcut_clf
    opt.n_classes = opt.n_classes_clf
    opt.n_finetune_classes = opt.n_finetune_classes_clf
    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)

    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    if opt.model == 'ssar':
        opt.arch = opt.model
    else:
        opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value)
    opt.std = get_std(opt.norm_value)

    print(opt)
    with open(os.path.join(opt.result_path, 'opts_clf.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)
    classifier, parameters = generate_model(opt)

    if opt.resume_path:
        print('loading pretrained model {}'.format(opt.pretrain_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        classifier.load_state_dict(checkpoint['state_dict'])

    print('Model 2 \n', classifier)
    pytorch_total_params = sum(p.numel() for p in classifier.parameters()
                               if p.requires_grad)
    print("Total number of trainable parameters: ", pytorch_total_params)

    return detector, classifier
Exemplo n.º 2
0
from temporal_transforms import *
from target_transforms import ClassLabel
from dataset import get_online_data
from utils import AverageMeter, LevenshteinDistance, Queue

import pdb
import time
import numpy as np
import datetime


def weighting_func(x):
    return (1 / (1 + np.exp(-0.2 * (x - 9))))


opt = parse_opts_online()

opt = parse_opts_online()


def load_models(opt):
    opt.resume_path = opt.resume_path_clf
    opt.pretrain_path = opt.pretrain_path_clf
    opt.sample_duration = opt.sample_duration_clf
    opt.model = opt.model_clf
    opt.model_depth = opt.model_depth_clf
    opt.width_mult = opt.width_mult_clf
    opt.modality = opt.modality_clf
    opt.resnet_shortcut = opt.resnet_shortcut_clf
    opt.n_classes = opt.n_classes_clf
    opt.n_finetune_classes = opt.n_finetune_classes_clf
Exemplo n.º 3
0
def main(clf_threshold_pre):
    print(f'Early-detection threshold: {clf_threshold_pre}')

    opt = parse_opts_online()

    detector, classifier = load_models(opt)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if opt.model_clf == 'ssar':
        opt.sample_size_clf = (126, 224)
        opt.mean_clf = (0.485, 0.456, 0.406)
        opt.std_clf = (0.229, 0.224, 0.225)

        spatial_transform_clf = transforms.Compose([
            transforms.Resize(opt.sample_size_clf),
            transforms.ToTensor(),
            transforms.Normalize(opt.mean_clf, opt.std_clf)
        ])

    spatial_transform = Compose(
        [Scale(112),
         CenterCrop(112),
         ToTensor(opt.norm_value), norm_method])

    target_transform = ClassLabel()

    ## Get list of videos to test
    if opt.dataset == 'egogesture':
        subject_list = [
            'Subject{:02d}'.format(i)
            for i in [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]
        ]
        test_paths = []
        for subject in subject_list:
            for x in glob.glob(
                    os.path.join(opt.video_path, subject, '*/*/rgb*/')):
                test_paths.append(x)
    elif opt.dataset == 'nv':
        df = pd.read_csv(os.path.join(
            opt.video_path, 'nvgesture_test_correct_cvpr2016_v2.lst'),
                         delimiter=' ',
                         header=None)
        test_paths = []
        for x in df[0].values:
            test_paths.append(
                os.path.join(opt.video_path, x.replace('path:', ''),
                             'sk_color_all').replace(os.sep, '/'))

    # Figures setup
    # fig, ax = plt.subplots(nrows=6, ncols=1)

    # x_data, y_datas = [], []
    # lines = []
    # for j in range(6):
    #     if j != 0:
    #         ax[j].set_xlim(0, 400)
    #         ax[j].set_ylim(0, 1)
    #     y_datas.append([])
    #     lines.append([])
    #     for _ in range(opt.n_classes_clf):
    #         y_data = []
    #         y_datas[j].append(y_data)
    #         line, = ax[j].plot(x_data, y_data)
    #         lines[j].append(line)

    print('Start Evaluation')
    detector.eval()
    classifier.eval()

    levenshtein_accuracies = AverageMeter()
    frames_early_meter = AverageMeter()
    videoidx = 0
    for path in test_paths[4:]:
        path = os.path.normpath(path)
        if opt.dataset == 'egogesture':
            opt.whole_path = path.rsplit(os.sep, 4)[-4:]
            opt.whole_path = os.sep.join(opt.whole_path)
        elif opt.dataset == 'nv':
            opt.whole_path = path.split(
                os.sep,
                3)  # TODO: fix bad dependency on fixed depth file locations
            opt.whole_path = opt.whole_path[-1]

        videoidx += 1
        active_index = 0
        passive_count = 999
        active = False
        prev_active = False
        finished_prediction = None
        pre_predict = False

        cum_sum = np.zeros(opt.n_classes_clf, )
        cum_sum_unweighted = np.zeros(opt.n_classes_clf, )
        clf_selected_queue = np.zeros(opt.n_classes_clf, )
        det_selected_queue = np.zeros(opt.n_classes_det, )
        myqueue_det = Queue(opt.det_queue_size, n_classes=opt.n_classes_det)
        myqueue_clf = Queue(opt.clf_queue_size, n_classes=opt.n_classes_clf)

        print('[{}/{}]============'.format(videoidx, len(test_paths)))
        print(path)
        opt.sample_duration = max(opt.sample_duration_clf,
                                  opt.sample_duration_det)

        if opt.model_clf == 'ssar':
            test_data = get_online_data(
                opt, [spatial_transform, spatial_transform_clf],
                None,
                target_transform,
                modality='RGB')
        else:
            test_data = get_online_data(opt, spatial_transform, None,
                                        target_transform)

        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        results = []
        prev_best1 = opt.n_classes_clf

        if opt.model_clf == 'ssar':
            # Init recurrent state zero
            lstm_hidden = [None, None, None, None]

        for i, (inputs, targets) in enumerate(test_loader):
            if opt.model_clf == 'ssar':
                inputs, inputs_clf = inputs
            if not opt.no_cuda:
                targets = targets.cuda(non_blocking=True)
            ground_truth_array = np.zeros(opt.n_classes_clf + 1, )
            with torch.no_grad():
                inputs = Variable(inputs)
                targets = Variable(targets)
                if opt.modality_det == 'RGB':
                    inputs_det = inputs[:, :3, -opt.sample_duration_det:, :, :]
                elif opt.modality_det == 'Depth':
                    inputs_det = inputs[:, -1, -opt.
                                        sample_duration_det:, :, :].unsqueeze(
                                            1)
                elif opt.modality_det == 'RGB-D':
                    inputs_det = inputs[:, :, -opt.sample_duration_det:, :, :]

                # print(inputs_det[0, :, -1, 0:4, 0:4])
                outputs_det = detector(inputs_det)
                outputs_det = F.softmax(outputs_det, dim=1)
                outputs_det = outputs_det.cpu().numpy()[0].reshape(-1, )

                # enqueue the probabilities to the detector queue
                myqueue_det.enqueue(outputs_det.tolist())

                if opt.det_strategy == 'raw':
                    det_selected_queue = outputs_det
                elif opt.det_strategy == 'median':
                    det_selected_queue = myqueue_det.median
                elif opt.det_strategy == 'ma':
                    det_selected_queue = myqueue_det.ma
                elif opt.det_strategy == 'ewma':
                    det_selected_queue = myqueue_det.ewma

                prediction_det = np.argmax(det_selected_queue)
                prob_det = det_selected_queue[1]

                #### State of the detector is checked here as detector act as a switch for the classifier
                if prediction_det == 1:
                    if opt.model_clf == 'ssar':
                        inputs_clf = Variable(inputs_clf)
                        if not opt.no_cuda:
                            inputs_clf = inputs_clf.cuda()
                        if opt.modality_clf == 'RGB':
                            inputs_clf = inputs_clf[:, :3, -1, :, :]
                        elif opt.modality_clf == 'Depth':
                            inputs_clf = inputs_clf[:, -1,
                                                    -1, :, :].unsqueeze(1)
                        elif opt.modality_clf == 'RGB-D':
                            inputs_clf = inputs_clf[:, :, -1, :, :]

                        outputs_clf, lstm_hidden = classifier(
                            inputs_clf, lstm_hidden, get_lstm_state=True)
                    else:
                        if opt.modality_clf == 'RGB':
                            inputs_clf = inputs[:, :3, :, :, :]
                        elif opt.modality_clf == 'Depth':
                            inputs_clf = inputs[:, -1, :, :, :].unsqueeze(1)
                        elif opt.modality_clf == 'RGB-D':
                            inputs_clf = inputs[:, :, :, :, :]

                        outputs_clf = classifier(inputs_clf)

                    outputs_clf = F.softmax(outputs_clf, dim=1)
                    outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1, )

                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count = 0

                    if opt.clf_strategy == 'raw':
                        clf_selected_queue = outputs_clf
                    elif opt.clf_strategy == 'median':
                        clf_selected_queue = myqueue_clf.median
                    elif opt.clf_strategy == 'ma':
                        clf_selected_queue = myqueue_clf.ma
                    elif opt.clf_strategy == 'ewma':
                        clf_selected_queue = myqueue_clf.ewma

                else:
                    if opt.model_clf == 'ssar':
                        # Reset recurrent state
                        lstm_hidden = [None, None, None, None]

                    outputs_clf = np.zeros(opt.n_classes_clf, )
                    # Push the probabilities to queue
                    myqueue_clf.enqueue(outputs_clf.tolist())
                    passive_count += 1

            if passive_count >= opt.det_counter:
                active = False
            else:
                active = True

            # one of the following line need to be commented !!!!
            if active:
                active_index += 1
                cum_sum = ((cum_sum * (active_index - 1)) +
                           (weighting_func(active_index) * clf_selected_queue)
                           ) / active_index  # Weighted Aproach
                cum_sum_unweighted = ((cum_sum_unweighted *
                                       (active_index - 1)) +
                                      (1.0 * clf_selected_queue)
                                      ) / active_index  #Not Weighting Aproach

                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if float(cum_sum[best1] - cum_sum[best2]) > clf_threshold_pre:
                    finished_prediction = True
                    pre_predict = True

            else:
                active_index = 0

            # Visualize
            # x_data.append(i)
            # y_datas[1][0].append(prob_det)
            # lines[1][0].set_xdata(x_data)
            # lines[1][0].set_ydata(y_datas[1][0])
            # for j in range(opt.n_classes_clf):
            #     y_datas[2][j].append(cum_sum[j])
            #     y_datas[3][j].append(cum_sum_unweighted[j])
            #     y_datas[4][j].append(clf_selected_queue[j] if active else 0)
            #     for k in range(2, 5):
            #         lines[k][j].set_xdata(x_data)
            #         lines[k][j].set_ydata(y_datas[k][j])
            # for k in range(1, 6):
            #     ax[k].set_xlim(i - 400, i)
            # mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, -1)
            # img = inputs_det[0, :, -1].permute(1, 2, 0).cpu().numpy() + mean
            # img = img.astype(int)
            # if i == 0:
            #     im_plt = ax[0].imshow(img)
            # else:
            #     im_plt.set_data(img)
            if i % 10 == 0:
                plt.draw()
                plt.pause(0.001)

            if active == False and prev_active == True:
                finished_prediction = True
            elif active == True and prev_active == False:
                finished_prediction = False

            if finished_prediction == True:
                detection_frame = (i *
                                   opt.stride_len) + opt.sample_duration_clf
                best2, best1 = tuple(cum_sum.argsort()[-2:][::1])
                if cum_sum[best1] > opt.clf_threshold_final:
                    if pre_predict == True:
                        if best1 != prev_best1:
                            if cum_sum[best1] > opt.clf_threshold_final:
                                results.append((detection_frame, best1))
                                print(
                                    'Early Detected - class : {} with prob : {} at frame {}'
                                    .format(best1, cum_sum[best1],
                                            detection_frame))
                    else:
                        # raw_best = clf_selected_queue.argsort()[-1]
                        # results.append((detection_frame,raw_best))
                        # print( 'Late Detected - class : {} with prob : {} at frame {}'.format(raw_best, clf_selected_queue[raw_best], detection_frame))
                        if cum_sum[best1] > opt.clf_threshold_final:
                            if best1 == prev_best1:
                                if cum_sum[best1] > 5:
                                    results.append((detection_frame, best1))
                                    print(
                                        'Late Detected - class : {} with prob : {} at frame {}'
                                        .format(best1, cum_sum[best1],
                                                detection_frame))
                            else:
                                results.append((detection_frame, best1))

                                print(
                                    'Late Detected - class : {} with prob : {} at frame {}'
                                    .format(best1, cum_sum[best1],
                                            detection_frame))

                    prev_best1 = best1
                    finished_prediction = False

                # prev_best1 = best1
                # finished_prediction = False

                cum_sum = np.zeros(opt.n_classes_clf, )
                cum_sum_unweighted = np.zeros(opt.n_classes_clf, )

            if active == False and prev_active == True:
                pre_predict = False

            prev_active = active

        if opt.dataset == 'egogesture':
            opt.video_path = os.path.normpath(opt.video_path)
            opt.whole_path = os.path.normpath(opt.whole_path)
            target_csv_path = os.path.join(
                opt.video_path, 'labels-final-revised1',
                opt.whole_path.rsplit(os.sep, 2)[0],
                'Group' + opt.whole_path.rsplit('.', 1)[0][-1] +
                '.csv').replace('Subject', 'subject')
            true_classes = []
            end_frames = []
            with open(target_csv_path) as csvfile:
                readCSV = csv.reader(csvfile, delimiter=',')
                for row in readCSV:
                    true_classes.append(int(row[0]) - 1)
                    end_frames.append(int(row[2]))
        elif opt.dataset == 'nv':
            true_classes = []
            with open('./annotation_nvGesture/vallistall.txt') as csvfile:
                readCSV = csv.reader(csvfile, delimiter=' ')
                for row in readCSV:
                    if row[0] == opt.whole_path:
                        if row[1] != '26':
                            true_classes.append(int(row[1]) - 1)

        predicted = np.array(results)[:, 1]
        detection_frames = np.array(results)[:, 0]

        true_classes = np.array(true_classes)
        levenshtein_distance, avg_frames_early = LevenshteinDistancePlusAvgFramesEarly(
            true_classes, predicted, end_frames, detection_frames)
        levenshtein_accuracy = 1 - (levenshtein_distance / len(true_classes))
        if levenshtein_distance < 0:  # Distance cannot be less than 0
            levenshtein_accuracies.update(0, len(true_classes))
        else:
            levenshtein_accuracies.update(levenshtein_accuracy,
                                          len(true_classes))
        frames_early_meter.update(avg_frames_early)

        print('predicted classes: \t', predicted)
        print('True classes :\t\t', true_classes)
        print('Levenshtein Accuracy = {} ({})'.format(
            levenshtein_accuracies.val, levenshtein_accuracies.avg))
        print(
            f'Average frames early = {frames_early_meter.val} ({frames_early_meter.avg})'
        )

    print('Average Levenshtein Accuracy= {}'.format(
        levenshtein_accuracies.avg))

    print('-----Evaluation is finished------')

    early_x_data.append(clf_threshold_pre)
    early_y_data.append(frames_early_meter.avg)
    early_plot.set_xdata(early_x_data)
    early_plot.set_ydata(early_y_data)
    plt.annotate(f'{levenshtein_accuracies.avg * 100:.2f}',
                 (clf_threshold_pre, frames_early_meter.avg),
                 textcoords='offset pixels',
                 xytext=(10, 10))
    plt.gca().relim()
    plt.gca().autoscale_view()
    plt.pause(0.0001)
    plt.draw()