示例#1
0
def extract_feature(opt, video_dir, C3D_model):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    load_image_fn = None
    data = Video(opt, video_dir, load_image_fn,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    c3d_features = []
    for i, clip in enumerate(data_loader):

        print(clip.mean())

        ## c3d feats
        clip = clip.to(opt.device)
        with torch.no_grad():
            c3d_outputs = C3D_model(clip)

        # 汇总
        c3d_features.append(c3d_outputs.cpu().data) # torch.Size([8, 512, 14, 14])

    c3d_features = torch.cat(c3d_features, 0)  # c3d feature of one video


    return c3d_features.cpu().numpy()
示例#2
0
def get_loaders(opt):
    """ Make dataloaders for train and validation sets
	"""
    # train loader
    norm_method = Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale((opt.sample_size, opt.sample_size)),
        Resize(256),
        CenterCrop(224),
        ToTensor(), norm_method
    ])
    temporal_transform = TemporalRandomCrop(25)
    target_transform = ClassLabel()
    training_data = get_training_set(opt, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=True)

    # validation loader
    target_transform = ClassLabel()
    temporal_transform = LoopPadding(25)
    validation_data = get_validation_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=True)
    return train_loader, val_loader
示例#3
0
def get_dataloader(opt):

    mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255]
    std = [1, 1, 1]

    norm_method = Normalize(mean, std)

    spatial_transform = Compose(
        [Scale(112),
         CornerCrop(112, 'c'),
         ToTensor(255), norm_method])

    temporal_transform = LoopPadding(16)
    target_transform = ClassLabel()

    test_data = SurgicalDataset(os.path.abspath(opt.frames_path),
                                os.path.abspath(
                                    opt.video_phase_annotation_path),
                                opt.class_names,
                                spatial_transform=spatial_transform,
                                temporal_transform=temporal_transform,
                                target_transform=target_transform,
                                sample_duration=16)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=4,
                                              pin_memory=True)

    return test_loader
def classify_video(video_dir, video_name, class_names, model, opt):
    # print("video_dir: {}, video_name: {}".format(video_dir,video_name));
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    video_outputs = []
    # video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        # video_segments.append(segments)

    if len(video_outputs) != 0:
        video_outputs = torch.cat(video_outputs)
        return video_outputs.numpy()
    else:
        return None
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']
    print('video_name, class_names', video_name)
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    print('Running on video', video_dir)

    #print ('Data loader size', len(data_loader))
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        print(i, inputs.size(), segments.shape)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    #print('Video outputs and segments', video_outputs)
    results = {'video': video_name, 'clips': []}
    if len(video_outputs) > 0:
        print('Video outputs and segments: ', video_outputs[0].shape)

        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)

        _, max_indices = video_outputs.max(dim=1)
        print('Video outputs', video_outputs.size())
        for i in range(video_outputs.size(0)):
            clip_results = {
                'segment': video_segments[i].tolist(),
            }

            if opt.mode == 'score':
                clip_results['label'] = class_names[max_indices[i]]
                clip_results['scores'] = video_outputs[i].tolist()
            elif opt.mode == 'feature':
                clip_results['features'] = video_outputs[i].tolist()

            results['clips'].append(clip_results)

    return results
示例#6
0
def model_process(count, model):
    opt = parse_opts()

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    #opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    #print(opt)
    #print(opt.result_path)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    #print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    print('testing is run')

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)

        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        tester.test(count, test_loader, model, opt, test_data.class_names)
示例#7
0
def classify_video(video_dir,
                   video_name,
                   class_names,
                   model,
                   opt,
                   annotation_digit=5):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    print('reading file from: ', video_dir, 'file name: ', video_name)

    video_outputs = []
    video_segments = []
    shit_lol = enumerate(data_loader)
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    results = {'video': video_name, 'clips': []}

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()
            clip_results['ground_truth_annotaion'] = annotation_digit

        results['clips'].append(clip_results)

    return results
示例#8
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size),
                                 ToTensor(),
                                 Normalize(opt.mean, [1, 1, 1])])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir, spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration,
                 stride=opt.stride)
    data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size,
                                              shuffle=False, num_workers=opt.n_threads, pin_memory=True)

    video_outputs = []
    video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)

        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    if len(video_outputs) == 0:
        with open("error.list", 'a') as fout:
            fout.write("{}\n".format(video_name))
        return {}

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)

    results = {
        'video': video_name,
        'clips': []
    }

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()

        results['clips'].append(clip_results)

    return results
示例#9
0
def extract_feature(opt, video_dir, C3D_model, load_image_fn, C2D_model,
                    c2d_shape, duration):
    assert opt.mode in ['score', 'feature']
    C, H, W = c2d_shape

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)

    opt.num_segments = max(int(duration / opt.clip_len), 1)
    data = Video(opt,
                 video_dir,
                 load_image_fn,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=0,
                                              pin_memory=True)

    c3d_features = []
    c2d_features = []
    for i, (clip, frames_npy_data) in enumerate(data_loader):

        ## c3d feats
        clip = clip.to(opt.device)
        with torch.no_grad():
            c3d_outputs = C3D_model(clip)

        frames = frames_npy_data.to(opt.device)
        with torch.no_grad():
            c2d_outputs = C2D_model(frames).squeeze()
            if len(c2d_outputs.shape) == 1:
                c2d_outputs = c2d_outputs.unsqueeze(0)

        # 汇总
        c3d_features.append(c3d_outputs.cpu().data)
        c2d_features.append(c2d_outputs.cpu().data)

    try:
        c3d_features = torch.cat(c3d_features)  # c3d feature of one video
        c2d_features = torch.cat(c2d_features)  # c3d feature of one video
    except:
        return None, None

    return c3d_features.cpu().numpy(), c2d_features.cpu().numpy()
示例#10
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []

    with torch.no_grad():
        for i, (inputs, segments) in enumerate(data_loader):
            inputs = Variable(inputs)
            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)
        results = {'video': video_name, 'clips': []}

        os.mkdir('features/' + video_name.split('.')[0])

        mypath = 'features/' + video_name.split('.')[0] + '/'

        _, max_indices = video_outputs.max(dim=1)
        for i in range(video_outputs.size(0)):

            with open(mypath + str(i) + '.txt', 'w+') as f:

                f.write(' '.join(map(str, video_outputs[i].tolist())))

        return results
示例#11
0
def classify_video(video_dir, video_name, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []

    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    # results = {
    #     'video': video_name,
    #     'clips': []
    # }
    clips = []
    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        clip_results['features'] = video_outputs[i].tolist()
        clips.append(clip_results)

    return video_name, clips
示例#12
0
def get_loaders(opt):
	""" Make dataloaders for train and validation sets
	"""
	# train loader
	opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
	if opt.no_mean_norm and not opt.std_norm:
		norm_method = Normalize([0, 0, 0], [1, 1, 1])
	elif not opt.std_norm:
		norm_method = Normalize(opt.mean, [1, 1, 1])
	else:
		norm_method = Normalize(opt.mean, opt.std)
	spatial_transform = Compose([
		# crop_method,
		Scale((opt.sample_size, opt.sample_size)),
		# RandomHorizontalFlip(),
		ToTensor(opt.norm_value), norm_method
	])
	temporal_transform = TemporalRandomCrop(16)
	target_transform = ClassLabel()
	training_data = get_training_set(opt, spatial_transform,
									 temporal_transform, target_transform)
	train_loader = torch.utils.data.DataLoader(
		training_data,
		batch_size=opt.batch_size,
		shuffle=True,
		num_workers=opt.num_workers,
		pin_memory=True)

	# validation loader
	spatial_transform = Compose([
		Scale((opt.sample_size, opt.sample_size)),
		# CenterCrop(opt.sample_size),
		ToTensor(opt.norm_value), norm_method
	])
	target_transform = ClassLabel()
	temporal_transform = LoopPadding(16)
	validation_data = get_validation_set(
		opt, spatial_transform, temporal_transform, target_transform)
	val_loader = torch.utils.data.DataLoader(
		validation_data,
		batch_size=opt.batch_size,
		shuffle=False,
		num_workers=opt.num_workers,
		pin_memory=True)
	return train_loader, val_loader
def classify_video(video_dir, video_name, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    video_outputs = []
    video_segments = []

    with torch.no_grad():
        for i, (inputs, segments) in enumerate(data_loader):

            inputs = Variable(inputs)

            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

    if video_outputs:
        video_outputs = torch.cat(video_outputs)
        video_segments = torch.cat(video_segments)

    results = dict()
    results['video'] = video_name
    results['features'] = video_outputs
    results['clips'] = video_segments

    return results
示例#14
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode == 'feature'

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    with torch.no_grad():

        for i, (inputs, segments) in enumerate(data_loader):
            inputs = Variable(inputs)
            outputs = model(inputs)

            video_outputs.append(outputs.cpu().data)
            video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    # video_segments = torch.cat(video_segments)
    results = []

    for i in range(video_outputs.size(0)):
        clip_results = np.expand_dims(video_outputs[i].numpy(), axis=0)

        results.append(clip_results)
    results = np.concatenate(results, axis=0)
    return results
示例#15
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        outputs = F.softmax(outputs, dim=1)
        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)

    video_segments = torch.cat(video_segments)

    results = {'video': video_name, 'clips': []}

    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }
        label = get_video_results(video_outputs[i], class_names, 5)
        clip_results['label'] = label
        results['clips'].append(clip_results)

#     _, max_indices = video_outputs.max(dim=1)
#     for i in range(video_outputs.size(0)):
#         clip_results = {
#             'segment': video_segments[i].tolist(),
#         }

#         if opt.mode == 'score':
#             clip_results['label'] = class_names[max_indices[i]]
#             clip_results['scores'] = video_outputs[i, max_indices[i]].item()
#         elif opt.mode == 'feature':
#             clip_results['features'] = video_outputs[i].tolist()

#         results['clips'].append(clip_results)

#     average_scores = torch.mean(video_outputs, dim=0)
#     video_results, predicted_labels = get_video_results(average_scores, class_names, 1)

#     video_results = get_video_results(average_scores, class_names, 5)
#     results = {
#         'video': video_name,
#         'result': video_results,
# #         'predicted_labels': predicted_labels
#     }
    return results
示例#16
0
def objective(trial):
    opt = parse_opts()

    if trial:
        opt.weight_decay = trial.suggest_uniform('weight_decay', 0.01, 0.1)
        opt.learning_rate = trial.suggest_uniform('learning_rate', 1 - 5,
                                                  1 - 4)

    if opt.root_path != '':
        opt.video_path = os.path.join(opt.root_path, opt.video_path)
        opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path)
        opt.result_path = os.path.join(opt.root_path, opt.result_path)
        if opt.resume_path:
            opt.resume_path = os.path.join(opt.root_path, opt.resume_path)
        if opt.pretrain_path:
            opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path)
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model, parameters = generate_model(opt)
    print(model)
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    # norm_method = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(
            training_data,
            batch_size=opt.batch_size,
            # sampler option is mutually exclusive with shuffle
            shuffle=False,
            sampler=ImbalancedDatasetSampler(training_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            os.path.join(opt.result_path, 'train_batch.log'),
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

        optimizer = optim.Adam(parameters,
                               lr=opt.learning_rate,
                               weight_decay=opt.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               verbose=True,
                                                               factor=0.1**0.5)
    if not opt.no_val:
        spatial_transform = Compose([
            Scale(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=opt.batch_size,
            shuffle=False,
            sampler=ImbalancedDatasetSampler(validation_data),
            num_workers=opt.n_threads,
            pin_memory=True)
        val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                            ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    writer = SummaryWriter(
        comment=
        f"_wd{opt.weight_decay}_lr{opt.learning_rate}_ft_begin{opt.ft_begin_index}_pretrain{not opt.pretrain_path == ''}"
    )
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            epoch, losses_avg, accuracies_avg = train_epoch(
                i, train_loader, model, criterion, optimizer, opt,
                train_logger, train_batch_logger)
            writer.add_scalar('loss/train', losses_avg, epoch)
            writer.add_scalar('acc/train', accuracies_avg, epoch)

        if not opt.no_val:
            epoch, val_losses_avg, val_accuracies_avg = val_epoch(
                i, val_loader, model, criterion, opt, val_logger)
            writer.add_scalar('loss/val', val_losses_avg, epoch)
            writer.add_scalar('acc/val', val_accuracies_avg, epoch)

        if not opt.no_train and not opt.no_val:
            scheduler.step(val_losses_avg)
        print('=' * 100)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()

        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)
        test.test(test_loader, model, opt, test_data.class_names)

    writer.close()
    return val_losses_avg
示例#17
0
def main(args):

    import os
    import numpy as np
    import sys
    import json
    import torch
    from torch import nn
    from torch import optim
    from torch.optim import lr_scheduler

    from opts import parse_opts
    from mean import get_mean, get_std
    from spatial_transforms import (
        Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop,
        MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor)
    from temporal_transforms import LoopPadding, TemporalRandomCrop
    from target_transforms import ClassLabel, VideoID
    from target_transforms import Compose as TargetCompose
    from dataset import get_training_set, get_validation_set, get_test_set
    from utils import Logger
    from train import train_epoch
    from validation import val_epoch
    import test
    import collections
    from sklearn.svm import LinearSVC
    from sklearn.svm import SVC
    from joblib import dump, load
    from sklearn import preprocessing
    from scipy import stats
    from sklearn.metrics import accuracy_score


    local_path = os.getcwd()

    if args.video_directory_path in ["", " ", '', './video', './video/']:
        video_path = local_path + '/video/'
    else:
        video_path = args.video_directory_path
        
    video_path_jpg = local_path + '/video_jpg/'


    if not os.path.exists(video_path_jpg):
        os.makedirs(video_path_jpg)

    extracted_feature_path = local_path + '/extracted_features'
    if not os.path.exists(extracted_feature_path):
        os.makedirs(extracted_feature_path)

    final_results_path = local_path + '/final_test_results'
    if not os.path.exists(final_results_path):
        os.makedirs(final_results_path)

    os.system('python utils/video_jpg.py' + ' ' + video_path + ' ' + video_path_jpg)
    os.system('python utils/n_frames.py' + ' ' + video_path_jpg)


    if args.pretrain_directory_path in ["", " ", '', './pretrain', './pretrain/']:
        pretrain_directory_path = local_path + '/pretrain'
    else:
        pretrain_directory_path = args.pretrain_directory_path


    import easydict
    opt = easydict.EasyDict({
        "n_classes": 2, 
        "sample_size": 112,
        "sample_duration": 16,
        "batch_size": 16,
        "n_threads": 4,
        "norm_value": 1,
        "resnet_shortcut": 'B',
        "resnext_cardinality": 32,
    })
    opt.root_path =  local_path
    opt.video_path = video_path_jpg




    # use two gpu devices on the server, you can customize it depending on how many available gpu devices you have
    os.environ['CUDA_VISIBLE_DEVICES']='0'



    from datasets.no_label_binary import NoLabelBinary

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1,1,1])


    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = VideoID() # ClassLabel()



    # get test data
    test_data = NoLabelBinary(
        opt.video_path,
        None,
        'testing',
        0,
        spatial_transform=spatial_transform,
        temporal_transform=temporal_transform,
        target_transform=target_transform,
        sample_duration=opt.sample_duration)


    # wrap test data
    test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=False)


    # ### Extract Features

    # ##### 3D ResNeXt-101


    from models import resnext

    # construct model architecture
    model_rxt101 = resnext.resnet101(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    cardinality=opt.resnext_cardinality,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rxt101 = model_rxt101.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rxt101 = nn.DataParallel(model_rxt101, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnext-101-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rxt101.state_dict()
    model_dict.update(pretrain_dict) 
    model_rxt101.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rxt101.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rxt101.eval()


    # forward all the videos to extract features
    avgpool_test = []
    targets_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rxt101(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            targets_test.append(target)



    avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
    np.save(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy', avgpool_test_np)

    targets_test_np = np.concatenate(np.array(targets_test), axis=0)
    np.save(opt.root_path + '/extracted_features/class_names_test.npy', targets_test_np)


    # ##### 3D ResNet-50


    from models import resnet

    # construct model architecture
    model_rt50 = resnet.resnet50(
                    num_classes=opt.n_classes,
                    shortcut_type=opt.resnet_shortcut,
                    sample_size=opt.sample_size,
                    sample_duration=opt.sample_duration)

    model_rt50 = model_rt50.cuda()
    # wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
    model_rt50 = nn.DataParallel(model_rt50, device_ids=None)


    ### Load pretrained weight
    # customize the pretrained model path
    pretrain = torch.load(pretrain_directory_path + '/resnet-50-kinetics.pth')
    pretrain_dict = pretrain['state_dict']

    # do not load the last layer since we want to fine-tune it
    pretrain_dict.pop('module.fc.weight')
    pretrain_dict.pop('module.fc.bias')
    model_dict = model_rt50.state_dict()
    model_dict.update(pretrain_dict) 
    model_rt50.load_state_dict(model_dict)




    # register layer index to extract the features by forwarding all the video clips
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

    model_rt50.module.avgpool.register_forward_hook(get_activation('avgpool'))
    model_rt50.eval()


    # forward all the videos to extract features
    avgpool_test = []
    with torch.no_grad():
        print("Extract test set features:")
        for i, (inputs, target) in enumerate(test_loader):
            if i % 30 == 0:
                print(i)
            output = model_rt50(inputs)
            avgpool_test.append(activation['avgpool'].view(len(target), -1).cpu())
            
        # save the features
        avgpool_test_np = np.concatenate([i.numpy() for i in avgpool_test], axis=0)
        np.save(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy', avgpool_test_np)    


    # ### Load & fuse the features


    x_test_1 = np.load(opt.root_path + '/extracted_features/resnext101_avgpool_test.npy')
    x_test_2 = np.load(opt.root_path + '/extracted_features/resnet50_avgpool_test.npy')
    x_test = np.concatenate([x_test_1, x_test_2], axis=1)

    y_test = np.load(opt.root_path + '/extracted_features/class_names_test.npy')


    # ### Load Classification head and predict

    if args.model == 'hw4':
        # hw4 best model
        clf = load('./hw6_results/logistic2_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_2)
        y_pred_test_prob_raw = clf.predict_proba(x_test_2)

    elif args.model == 'hw5':
        # hw5 best model
        clf = load('./hw6_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'hw6':
        # hw6 best model
        clf = load('./hw6_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)

    elif args.model == 'hw8':
        # hw8 best model
        clf = load('./hw8_results/logistic_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test)
        y_pred_test_prob_raw = clf.predict_proba(x_test)

    elif args.model == 'final':
        # Final best model
        clf = load('./hw8_results/logistic1_ucf.joblib') 
        y_pred_test_raw = clf.predict(x_test_1)
        y_pred_test_prob_raw = clf.predict_proba(x_test_1)



    split_idx = []
    for idx, y_name in enumerate(y_test):
        if idx == 0 or y_name != y_test[idx-1]:
            split_idx.append(idx)
    split_idx.append(len(y_test))
            
    y_pred_test, y_pred_test_prob, y_pred_test_final = {}, {}, {}
    for i, split in enumerate(split_idx):
        if i < len(split_idx) - 1:
            y_pred_test[y_test[split]] = y_pred_test_raw[split:split_idx[i+1]]
            y_pred_test_prob[y_test[split]] = y_pred_test_prob_raw[split:split_idx[i+1]]
            y_pred_test_final[y_test[split]] = np.argmax(np.mean(y_pred_test_prob_raw[split:split_idx[i+1]], axis=0))   


    # ### Get the length (in seconds) of each video clip


    tvns = list(y_pred_test_final.keys())
    mp4_path = video_path
    clip_duration_dict = {}

    from moviepy.editor import VideoFileClip
    i = 0
    for tvn in tvns:
        i += 1
        if i % 100 == 0:
            print(i)
        clip = VideoFileClip(os.path.join(mp4_path, tvn + ".mp4"))
        clip_duration_dict[tvn] = [clip.duration]


    # ### Generate Figures
    import matplotlib.pyplot as plt
    for tvn in clip_duration_dict:
        interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
        x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
        y_idx = np.argmax(y_pred_test_prob[tvn], 1)
        y = y_pred_test_prob[tvn][:, 1]
        x = x[:len(y)]
        plt.plot(x, y)
        plt.ylim([-0.1, 1.1])
        plt.xlabel ('time/sec')
        plt.ylabel ('pred score for ground truth label')
        plt.title("Ground Truth Label:  " + tvn  + "\n Model Avg. Predict Score:  " + str(np.mean(y))) # str(real_prediction_dict[tvn]['score'])
        plt.savefig(opt.root_path + "/final_test_results/" + tvn + '_' + args.model + "_UIN-625007598", bbox_inches='tight')
        plt.close()


    # ### Generate Json
    timeTrueLabel = {}
    for tvn in clip_duration_dict:
        if tvn in y_pred_test_prob:
            interval = clip_duration_dict[tvn][0]/list(y_test).count(tvn)
            x = np.arange(0, clip_duration_dict[tvn][0], interval) + interval
            y_idx = np.argmax(y_pred_test_prob[tvn], 1)
            y = y_pred_test_prob[tvn][:, 1]
            x = x[:len(y)]  
            timeTrueLabel[tvn] = [[str(time), str(y[idx])] for idx, time in enumerate(x)]



    with open(opt.root_path + '/final_test_results/timeLabel_' + args.model + '_UIN-625007598.json', 'w') as fp:
        json.dump(timeTrueLabel, fp)
def main():

    resnet_in = generate_model(opt)
    resnet_in.module.fc = Identity()
    model = ReNet34(resnet_in, encode_length=encode_length)

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(opt.scales,
                                               opt.sample_size,
                                               crop_positions=['c'])

        ## train loader
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(opt.sample_duration)
        target_transform = ClassLabel()
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)

        ## test loader
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)

        target_transform = ClassLabel()
        test_data = get_test_set(opt, spatial_transform, temporal_transform,
                                 target_transform)
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=opt.batch_size,
                                                  shuffle=False,
                                                  num_workers=opt.n_threads,
                                                  pin_memory=True)

        ## Database loader
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        database_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=opt.n_threads,
            pin_memory=True)

        if opt.nesterov:
            dampening = 0
        else:
            dampening = opt.dampening

        optimizer = optim.SGD(model.parameters(),
                              lr=opt.learning_rate,
                              momentum=opt.momentum,
                              dampening=dampening,
                              weight_decay=opt.weight_decay,
                              nesterov=opt.nesterov)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=opt.lr_patience)

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])
            for state in optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

    print('run')
    for epoch in range(opt.begin_epoch, opt.n_epochs + 1):
        model.cuda().train()
        for i, (images, labels) in enumerate(train_loader):

            images = Variable(images.cuda())
            labels = Variable(labels.cuda().long())

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            x, _, b = model(images)

            target_b = F.cosine_similarity(b[:int(labels.size(0) / 2)],
                                           b[int(labels.size(0) / 2):])
            target_x = F.cosine_similarity(x[:int(labels.size(0) / 2)],
                                           x[int(labels.size(0) / 2):])
            loss = F.mse_loss(target_b, target_x)
            loss.backward()
            optimizer.step()
            scheduler.step()

        # Test the Model
        if (epoch + 1) % 10 == 0:
            model.eval()
            retrievalB, retrievalL, queryB, queryL = compress(
                database_loader, test_loader, model)
            result_map = calculate_top_map(qB=queryB,
                                           rB=retrievalB,
                                           queryL=queryL,
                                           retrievalL=retrievalL,
                                           topk=100)
            print('--------mAP@100: {}--------'.format(result_map))
示例#19
0
def extract_features(video_dir,
                     video_name,
                     class_names,
                     model,
                     opt,
                     annotation_digit=5):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    # print('reading file from: ', video_dir, 'file name: ', video_name)

    video_outputs = []
    video_segments = []
    model.eval()
    for i, (inputs, segments) in enumerate(data_loader):
        # inputs = Variable(inputs, volatile=True)
        inputs = inputs.cuda()
        outputs = model(inputs)
        # outputs_cpu = outputs.cpu().data.numpy()
        # video_outputs += outputs_cpu
        # video_outputs += outputs.cpu().data
        # np.vstack([video_outputs, outputs_cpu])
        video_outputs.append(outputs.cpu().data)
        # video_outputs.cat(video_outputs, outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)
    video_segments = torch.cat(video_segments)
    results = {'video': video_name, 'clips': []}

    _, max_indices = video_outputs.max(dim=1)
    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }

        if opt.mode == 'score':
            clip_results['label'] = class_names[max_indices[i]]
            clip_results['scores'] = video_outputs[i].tolist()
        elif opt.mode == 'feature':
            clip_results['features'] = video_outputs[i].tolist()
            clip_results['ground_truth_annotaion'] = annotation_digit

        results['clips'].append(clip_results)

    total_feature_vectors = len(results["clips"])
    np_data = np.array([], dtype=np.float64).reshape(0, 2048)
    for features_in_one_video in range(total_feature_vectors):
        # for i in result[1]["clips"]:
        # print (i["scores"])
        one_feature_vector = results["clips"][features_in_one_video][
            "features"]
        a = np.asarray(one_feature_vector)
        # print(a)
        np_data = np.vstack([np_data, a])

    return np_data
示例#20
0
            RandomHorizontalFlip(),
            ColorJitter(brightness=0.1),
            ToTensor(1),
            Normalize(args.mean, args.std)
        ]),
        'val':
        Compose([
            Scale(args.img_size),
            CenterCrop(args.img_size),
            ToTensor(1),
            Normalize(args.mean, args.std)
        ])
    }

    temporal_transform = {
        'train': Compose([LoopPadding(args.clip_len)]),
        'val': LoopPadding(args.clip_len)
    }

    dataset = {
        'train':
        HandHygiene(os.path.join(VIDEO_DIR, 'train'),
                    temporal_transform=temporal_transform['train'],
                    openpose_transform=openpose_transform['train'],
                    spatial_transform=spatial_transform['train'],
                    arguments=args),
        'val':
        HandHygiene(os.path.join(VIDEO_DIR, 'val'),
                    temporal_transform=temporal_transform['val'],
                    openpose_transform=openpose_transform['val'],
                    spatial_transform=spatial_transform['val'],
                weight_decay=opt.weight_decay,
                nesterov=True)
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 [15, 25, 40, 45, 50, 55, 60],
                                                 gamma=0.1)

    if not opt.no_val:

        ##--------------------------------------------------------------------------------------------
        if opt.model == 'I3D':
            spatial_transform = Compose([
                Scale((256, 256)),
                CenterCrop(224),
                ToTensor(opt.norm_value), norm_method
            ])
            temporal_transform = LoopPadding(0)
            target_transform = ClassLabel()
            validation_data = get_validation_set(opt, spatial_transform,
                                                 temporal_transform,
                                                 target_transform)
            val_loader = torch.utils.data.DataLoader(validation_data,
                                                     batch_size=1,
                                                     shuffle=False,
                                                     num_workers=opt.n_threads,
                                                     pin_memory=True)
        elif opt.model == 'resnet_50':
            spatial_transform = Compose([
                Scale(256),
                CenterCrop(256),
                ToTensor(opt.norm_value), norm_method
            ])
示例#22
0
def main():
    opt = parse_opts()

    ecd_name, cls_name = opt.model_name.split('-')
    ecd_model = get_encoder_net(ecd_name)
    cls_model = get_end_net(cls_name)

    cfg.encoder_model = ecd_name
    cfg.classification_model = cls_name

    if opt.debug:
        cfg.debug = opt.debug
    else:
        if opt.tensorboard == 'TEST':
            cfg.tensorboard = opt.model_name
        else:
            cfg.tensorboard = opt.tensorboard
            cfg.flag = opt.flag
    model = cls_model(cfg,
                      encoder=CNNencoder(
                          cfg,
                          ecd_model(pretrained=True, path=opt.encoder_model)))
    cfg.video_path = os.path.join(cfg.root_path, cfg.video_path)
    cfg.annotation_path = os.path.join(cfg.root_path, cfg.annotation_path)

    cfg.list_all_member()

    torch.manual_seed(cfg.manual_seed)
    print('##########################################')
    print('####### model 仅支持单GPU')
    print('##########################################')
    model = model.cuda()
    print(model)
    criterion = nn.CrossEntropyLoss()
    if cfg.cuda:
        criterion = criterion.cuda()

    norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print('##########################################')
    print('####### train')
    print('##########################################')
    assert cfg.train_crop in ['random', 'corner', 'center']
    if cfg.train_crop == 'random':
        crop_method = (cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(cfg.scales, cfg.sample_size)
    elif cfg.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(cfg.scales,
                                           cfg.sample_size,
                                           crop_positions=['c'])
    spatial_transform = Compose([
        crop_method,
        RandomHorizontalFlip(),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = TemporalRandomCrop(cfg.sample_duration)
    target_transform = ClassLabel()
    training_data = get_training_set(cfg, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.n_threads,
                                               drop_last=False,
                                               pin_memory=True)
    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    print('##########################################')
    print('####### run')
    print('##########################################')
    if cfg.debug:
        logger = None
    else:
        path = get_log_dir(cfg.logdir, name=cfg.tensorboard, flag=cfg.flag)
        logger = Logger(logdir=path)
        cfg.save_config(path)

    for i in range(cfg.begin_epoch, cfg.n_epochs + 1):
        train_epoch(i, train_loader, model, criterion, optimizer, cfg, logger)
        validation_loss = val_epoch(i, val_loader, model, criterion, cfg,
                                    logger)

        scheduler.step(validation_loss)
示例#23
0
def train_main_multi_batch(model, input_root_dir, opt):
    ####

    epoch_logger = logging.getLogger('info')
    batch_logger = logging.getLogger('info')

    elogHandler = logging.StreamHandler()
    eformatter = jsonlogger.JsonFormatter()
    elogHandler.setFormatter(eformatter)
    epoch_logger.addHandler(elogHandler)

    blogHandler = logging.StreamHandler()
    bformatter = jsonlogger.JsonFormatter()
    blogHandler.setFormatter(bformatter)
    batch_logger.addHandler(blogHandler)

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)

    # criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    if not opt.no_cuda:
        criterion = criterion.cuda()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    epoch = 1

    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()

    ii = 0

    previous_label = "FAKE"
    pre_previous_label = "FAKE"

    for files_dir in os.listdir(input_root_dir):
        sub_path = os.path.join(input_root_dir, files_dir)
        print("Files dir: " + files_dir)
        print("Sub path:" + sub_path)

        data_file_path = os.path.join(sub_path, 'metadata.json')
        with open(data_file_path, 'r') as data_file:
            labels = json.load(data_file)

        opt.batch_size = 36
        total_batch_size = len(os.listdir(sub_path))
        i = 0
        input_files = os.listdir(sub_path)
        for inp_num in range(1, len(input_files), 2):
            print("Lala: " + str(inp_num))
            # print(input_files)
            input_file1 = input_files[inp_num]
            input_file2 = input_files[inp_num - 1]
            if input_file1.endswith(".mp4") and input_file2.endswith(".mp4"):

                video_path1 = os.path.join(sub_path, input_file1)
                video_path2 = os.path.join(sub_path, input_file2)

                label1 = labels[input_file1]
                label2 = labels[input_file2]

                if label1['label'] != previous_label or label1[
                        'label'] != pre_previous_label:

                    previous_label = label1['label']

                    subprocess.call('mkdir tmp', shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 tmp/image_%05d.jpg'
                        .format(video_path1),
                        shell=True)
                    subprocess.call(
                        'ffmpeg -hide_banner -loglevel panic -i {}  -vframes 288 -start_number 289 tmp/image_%05d.jpg'
                        .format(video_path2),
                        shell=True)

                    video_dir = '{}tmp/'.format(
                        '/data/codebases/video_classification/')

                    data = Video(video_dir,
                                 spatial_transform=spatial_transform,
                                 temporal_transform=temporal_transform,
                                 sample_duration=opt.sample_duration)

                    data_loader = torch.utils.data.DataLoader(
                        data,
                        batch_size=opt.batch_size,
                        shuffle=False,
                        num_workers=opt.n_threads,
                        pin_memory=True)

                    for k, (inputs, targets) in enumerate(data_loader):
                        data_time.update(time.time() - end_time)

                        print("Label: " + label1['label'] + ", " +
                              label2['label'])

                        # # FOR CROSS ENTROPY LOSS
                        # targets = torch.zeros([18, 1], dtype=torch.long)
                        # for j in range(0,18):
                        #     if(label['label'] == 'FAKE'):
                        #         targets[j][0] = 0
                        #         # targets[j][1] = 1
                        #     else:
                        #         targets[j][0] = 1
                        #         # targets[j][1] = 0

                        # FOR MSE LOSS
                        targets = torch.zeros([opt.batch_size, opt.n_classes],
                                              dtype=torch.float)
                        for j in range(0, int(opt.batch_size / 2)):
                            if (label1['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        for j in range(int(opt.batch_size / 2),
                                       opt.batch_size):
                            if (label2['label'] == 'FAKE'):
                                targets[j][0] = 0.0
                                targets[j][1] = 1.0
                            else:
                                targets[j][0] = 1.0
                                targets[j][1] = 0.0

                        if not opt.no_cuda:
                            targets = targets.cuda(non_blocking=True)
                        inputs = Variable(inputs)
                        targets = Variable(targets)
                        outputs = model(inputs)

                        print(outputs.t())
                        print(targets.t())

                        # FOR CROSS ENTROPY LOSS
                        # loss = criterion(outputs, torch.max(targets, 1)[1])
                        # FOR MSE LOSS
                        loss = criterion(outputs, targets)

                        print(loss)

                        # FOR CROSS ENTROPY LOSS
                        # acc = calculate_accuracy(outputs, targets)
                        # FOR MSE LOSS
                        acc = calculate_accuracy_mse(outputs, targets)

                        print(acc)

                        try:
                            losses.update(loss.data[0], inputs.size(0))
                        except:
                            losses.update(loss.data, inputs.size(0))
                        accuracies.update(acc, inputs.size(0))

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                        batch_time.update(time.time() - end_time)
                        end_time = time.time()

                        batch_logger.log(
                            1, {
                                'epoch': epoch,
                                'batch': i + 1,
                                'iter': (epoch - 1) * opt.batch_size + (i + 1),
                                'loss': losses.val,
                                'acc': accuracies.val,
                                'lr': optimizer.param_groups[0]['lr']
                            })

                        print(
                            'Epoch: [{0}][{1}/{2}]\t'
                            'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                            'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                            'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                                epoch,
                                i + 1,
                                opt.batch_size,
                                batch_time=batch_time,
                                data_time=data_time,
                                loss=losses,
                                acc=accuracies))
                        ii += 1
                    subprocess.call('rm -rf tmp', shell=True)
                i += 1

            if ii % 100 == 0:
                save_loc = '/data/codebases/video_classification/model{}.pth'.format(
                    ii)
                torch.save(model.state_dict(), save_loc)
        epoch_logger.log(
            1, {
                'epoch': epoch,
                'loss': losses.avg,
                'acc': accuracies.avg,
                'lr': optimizer.param_groups[0]['lr']
            })
        print('XXX Epoch: [{0}]\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch,
                                                         i + 1,
                                                         opt.batch_size,
                                                         batch_time=batch_time,
                                                         data_time=data_time,
                                                         loss=losses,
                                                         acc=accuracies))
    exit(1)
示例#24
0
        os.path.join(cfg.custom_logdir, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    val_logger = Logger(os.path.join(cfg.custom_logdir, 'val.log'),
                        ['epoch', 'loss', 'acc'])

    print('##########################################')
    print('####### run')
    print('##########################################')
示例#25
0
def get_ucf_data(opt):

    mean = get_mean(opt.norm_value, dataset='kinetics')
    std = get_std(opt.norm_value)
    norm_method = Normalize(mean, [1, 1, 1])

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CornerCrop(opt.sample_size, 'c'),
        ToTensor(opt.norm_value), norm_method
    ])

    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()  # VideoID()

    # get training data
    training_data = UCF101(opt.video_path,
                           opt.annotation_path,
                           'training',
                           0,
                           spatial_transform=spatial_transform,
                           temporal_transform=temporal_transform,
                           target_transform=target_transform,
                           sample_duration=16)

    # wrap training data
    train_loader = torch.utils.data.DataLoader(training_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=opt.n_threads,
                                               pin_memory=False)  # True

    # get validation data
    val_data = UCF101(opt.video_path,
                      opt.annotation_path,
                      'validation',
                      0,
                      spatial_transform=spatial_transform,
                      temporal_transform=temporal_transform,
                      target_transform=target_transform,
                      sample_duration=16)

    # wrap validation data
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=False)

    target_transform = VideoID()
    # get test data
    test_data = UCF101(opt.video_path,
                       opt.annotation_path,
                       'testing',
                       0,
                       spatial_transform=spatial_transform,
                       temporal_transform=temporal_transform,
                       target_transform=target_transform,
                       sample_duration=16)

    # wrap test data
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=False)

    return train_loader, val_loader, test_loader, test_data
示例#26
0
        optimizer = optim.SGD(parameters,
                              lr=opt.learning_rate,
                              momentum=opt.momentum,
                              dampening=dampening,
                              weight_decay=opt.weight_decay,
                              nesterov=opt.nesterov)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=opt.lr_patience)
    if not opt.no_val:
        spatial_transform = Compose([
            Scale(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = ClassLabel()
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(validation_data,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.n_threads,
                                                 pin_memory=True)
        val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                            ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}')  # .format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
示例#27
0
def main():
    opt = parse_opts()
    # Path configurations
    opt.annotation_path = os.path.join(opt.annotation_directory,
                                       opt.annotation_path)
    save_result_dir_name = \
        os.path.join(opt.result_path,
                     get_prefix() + '_{}{}_{}_epochs'.format(opt.model, opt.model_depth, opt.n_epochs))
    if not os.path.exists(save_result_dir_name):
        os.mkdir(save_result_dir_name)
    opt.result_path = os.path.join(opt.result_path, save_result_dir_name)

    # For data generator
    opt.scales = [opt.initial_scale]
    for epoch in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)

    # Model
    model, parameters = generate_model(opt)
    # print(model)

    # Loss function
    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    # Normalizing
    if not opt.no_mean_norm:
        opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
        opt.std = get_std(opt.norm_value, dataset=opt.std_dataset)
        norm_method = Normalize(opt.mean, opt.std)
    else:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])

    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    # **************************** TRAINING CONFIGURATIONS ************************************
    assert opt.train_crop in ['corner', 'center']
    if opt.train_crop == 'corner':
        crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size)
    elif opt.train_crop == 'center':
        crop_method = MultiScaleCornerCrop(opt.scales,
                                           opt.sample_size,
                                           crop_positions=['c'])

    # Пространственное преобразование
    spatial_transform = Compose([
        crop_method,
        #RandomHorizontalFlip(),
        ToTensor(opt.norm_value),
        norm_method
    ])
    # Временное преобразование
    temporal_transform = TemporalRandomCrop(opt.sample_duration)
    # Целевое преобразование
    target_transform = ClassLabel()

    train_loader_list = []
    if not opt.no_cross_validation:
        annotation_list = os.listdir(opt.annotation_directory)
        for annotation in annotation_list:
            opt.annotation_path = os.path.join(opt.annotation_directory,
                                               annotation)
            training_data = get_training_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
            train_loader = torch.utils.data.DataLoader(
                training_data,
                batch_size=opt.batch_size,
                shuffle=True,
                num_workers=opt.n_threads,
                pin_memory=True)
            train_loader_list.append(train_loader)
    else:
        training_data = get_training_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
        train_loader = torch.utils.data.DataLoader(training_data,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.n_threads,
                                                   pin_memory=True)
        train_loader_list.append(train_loader)

    train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        os.path.join(opt.result_path, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    optimizer = optim.SGD(parameters,
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          dampening=opt.dampening,
                          weight_decay=opt.weight_decay)

    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=opt.lr_patience)

    # ***************************** VALIDATION CONFIGURATIONS *********************************
    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(opt.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    target_transform = ClassLabel()

    val_loader_list = []
    if not opt.no_cross_validation:
        annotation_list = os.listdir(opt.annotation_directory)
        for annotation in annotation_list:
            opt.annotation_path = os.path.join(opt.annotation_directory,
                                               annotation)
            validation_data = get_validation_set(opt, spatial_transform,
                                                 temporal_transform,
                                                 target_transform)
            val_loader = torch.utils.data.DataLoader(validation_data,
                                                     batch_size=opt.batch_size,
                                                     shuffle=False,
                                                     num_workers=opt.n_threads,
                                                     pin_memory=True)
            val_loader_list.append(val_loader)
    else:
        validation_data = get_validation_set(opt, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(validation_data,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.n_threads,
                                                 pin_memory=True)
        val_loader_list.append(val_loader)

    val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                        ['epoch', 'loss', 'acc'])

    # **************************************** TRAINING ****************************************
    epoch_avg_time = AverageMeter()
    train_loss_list = []
    train_acc_list = []
    valid_acc_list = []
    best_accuracy = 0
    current_train_data = 0
    current_valid_data = 0
    opt.frequence_cross_validation = round(opt.n_epochs /
                                           opt.n_cross_validation_sets + 0.5)

    for epoch in range(opt.begin_epoch, opt.n_epochs + 1):
        epoch_start_time = time.time()
        print('Epoch #' + str(epoch))

        # optimizer = regulate_learning_rate(optimizer, epoch, opt.frequence_regulate_lr)

        train_loader = train_loader_list[current_train_data]
        if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0:
            print('\t##### Cross-validation: switch training data #####')
            current_train_data = (current_train_data +
                                  1) % len(train_loader_list)
            train_loader = train_loader_list[current_train_data]
        train_loss, train_acc = train_epoch(epoch, train_loader, model,
                                            criterion, optimizer, opt,
                                            train_logger, train_batch_logger)

        val_loader = val_loader_list[current_valid_data]
        if not opt.no_cross_validation and epoch % opt.frequence_cross_validation == 0:
            print('\t##### Cross-validation: switch validation data #####')
            current_valid_data = (current_valid_data +
                                  1) % len(val_loader_list)
            val_loader = val_loader_list[current_valid_data]
        validation_acc = val_epoch(epoch, val_loader, model, criterion, opt,
                                   val_logger)

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        valid_acc_list.append(validation_acc)

        # Save model with best accuracy
        if validation_acc > best_accuracy:
            best_accuracy = validation_acc
            save_file_path = os.path.join(opt.result_path, 'best_model.pth')
            states = {
                'epoch': epoch + 1,
                'arch': opt.arch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(states, save_file_path)

        epoch_end_time = time.time() - epoch_start_time
        epoch_avg_time.update(epoch_end_time)
        print('\tTime left: ' +
              str(round(epoch_avg_time.avg *
                        (opt.n_epochs - epoch) / 60, 1)) + ' minutes')

    # ******************************* SAVING RESULTS OF TRAINING ******************************
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs),
                  train_loss_list, 'red', 'Loss',
                  os.path.join(opt.result_path, 'train_loss.png'))
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), train_acc_list,
                  'blue', 'Accuracy',
                  os.path.join(opt.result_path, 'train_accuracy.png'))
    save_pictures(np.linspace(1, opt.n_epochs, opt.n_epochs), valid_acc_list,
                  'blue', 'Accuracy',
                  os.path.join(opt.result_path, 'validation_accuracy.png'))