class ExtructFeatrue(object):
    def __init__(self):
        saved = torch.load(
            '/mnt/workspace/model/activitynet_clip_kinetics600_dpn107_rgb_model/activitynet_clip_600_dpn107_rgb_model_best_074.pth.tar'
        )
        self.model = TSN(201, 3, 'RGB', 'dpn107', 1)
        self.train_augmentation = self.model.get_augmentation()
        self.input_mean = self.model.input_mean
        self.input_std = self.model.input_std
        self.softmax = nn.Softmax(dim=-1).cuda()
        self.model = nn.DataParallel(self.model)
        self.model.load_state_dict(saved['state_dict'])

        self.base_model = nn.DataParallel(self.model.module.base_model).cuda()
        self.new_fc = nn.DataParallel(self.model.module.new_fc).cuda()

        self.model.eval()
        self.base_model.eval()
        self.new_fc.eval()

    def loadFeatrue(self, x):

        midfeature = self.base_model(x)
        classfeature = self.softmax(self.new_fc(midfeature))
        return midfeature, classfeature
Exemplo n.º 2
0
def main():
    args = parser.parse_args()
    if args.dataset == 'ucf101':
        args.num_class = 101
    elif args.dataset == 'hmdb51':
        args.num_class = 51
    elif args.dataset == 'kinetics':
        args.num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    place = fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        model = TSN(args.num_class,
                    args.num_segments,
                    args.modality,
                    args.arch,
                    dropout=0)
        args.short_size = model.scale_size
        args.target_size = model.crop_size
        args.input_mean = model.input_mean
        args.input_std = model.input_std * 3

        state_dict = fluid.dygraph.load_dygraph(args.load_path)[0]
        model.set_dict(state_dict)

        test_reader = KineticsReader('test', args,
                                     args.test_list).create_reader()
        log = open(args.log_path, 'w')

        model.eval()
        avg_acc = AverageMeter()

        for batch_id, data in enumerate(test_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label.stop_gradient = True

            out, acc = model(img, label)

            avg_acc.update(acc.numpy()[0], label.shape[0])
            if (batch_id + 1) % args.print_freq == 0:
                output = 'Test batch_id:{} | acc {} | avg acc:{}'.format(
                    batch_id + 1,
                    acc.numpy()[0], avg_acc.avg)
                print(output)
                log.write(output + '\n')
                log.flush()
        output = 'Test Avg acc:{}'.format(avg_acc.avg)
        print(output)
        log.write(output + '\n')
        log.flush()
        log.close()
Exemplo n.º 3
0
def eval(args):
    # parse config
    config = parse_config(args.config)
    val_config = merge_configs(config, 'valid', vars(args))
    # print_configs(val_config, "Valid")
    with fluid.dygraph.guard():
        val_model = TSN(args.batch_size, 32)

        label_dic = np.load('work/UCF-101_jpg/label_dir.npy',
                            allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        val_reader = UCFReader('valid', val_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()

        para_state_dict, _ = fluid.load_dygraph(weights)
        val_model.load_dict(para_state_dict)
        val_model.eval()

        acc_list = []
        for batch_id, data in enumerate(val_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data]).astype('int64')

            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            label = fluid.layers.reshape(label, [args.batch_size, 1])
            label.stop_gradient = True

            out, acc = val_model(img, label)
            print('batch_id=', batch_id, 'acc=', acc.numpy())
            acc_list.append(acc.numpy())

        print("验证集准确率为:{}".format(np.mean(acc_list)))
Exemplo n.º 4
0
def infer(args):
    # parse config
    config = parse_config(args.config)
    infer_config = merge_configs(config, 'infer', vars(args))
    # print_configs(infer_config, "Infer")
    with fluid.dygraph.guard():
        infer_model = TSN(args.batch_size, 32)

        label_dic = np.load('work/UCF-101_jpg/label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        infer_reader = UCFReader('infer', infer_config).create_reader()

        # if no weight files specified, exit()
        if args.weights:
            weights = args.weights
        else:
            print("model path must be specified")
            exit()
            
        para_state_dict, _ = fluid.load_dygraph(weights)
        # print('para_state_dict:', para_state_dict)
        infer_model.load_dict(para_state_dict)
        infer_model.eval()
        
        for batch_id, data in enumerate(infer_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')
            y_data = np.array([[x[1]] for x in data])
            
            img = fluid.dygraph.to_variable(dy_x_data)
            
            out = infer_model(img)
            label_id = fluid.layers.argmax(out, axis=1).numpy()[0]

            print("实际标签{}, 预测结果{}".format(y_data, label_dic[label_id]))
Exemplo n.º 5
0
    if os.path.isfile(args.resume):
        print(("=> loading checkpoint '{}'".format(args.resume)))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        net.load_state_dict(checkpoint['state_dict'])
        print(("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch'])))
    else:
        print(("=> no checkpoint found at '{}'".format(args.resume)))

# ToDo: why
# if len(devices) > 1:  # cause bug
#     device = torch.device('cuda:{}'.format(devices[0]))
#     net = net.to(device)
net.eval()

data_gen = enumerate(data_loader)

total_num = len(data_loader.dataset)
output = []


def eval_video(video_data):
    i, data, label = video_data
    num_crop = args.test_crops

    if args.modality == 'RGB':
        length = 3
    elif args.modality == 'Flow':
        length = 10
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(originalNet.input_mean, originalNet.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=False)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
    else:
        devices = list(range(args.workers))

    #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    originalNet = torch.nn.DataParallel(originalNet.cuda())
    originalNet.eval()

    exit()
    data_gen = enumerate(data_loader)

    total_num = len(data_loader.dataset)
    output = []

    def eval_video(video_data):
        i, data, label = video_data
        num_crop = args.test_crops

        if "RGBFlow" == 'RGB':
            length = 3
        elif "RGBFlow" == 'Flow':
            length = 10
Exemplo n.º 7
0
def main():

    global args, best_prec1
    num_class = 4
    rgb_read_format = "{:d}.jpg"

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                'RGB',
                base_model='ECO',
                consensus_type='identity',
                dropout=0.3,
                partial_bn=True)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    model_dir = args.model_path
    new_state_dict = torch.load(model_dir)['state_dict']

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)
    data_length = 1

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality='RGB',
        image_tmpl=rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True)

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    model.eval()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()
        input_var = input
        target_var = target
        output = model(input_var)
        _, pred = output.data.topk(1, 1, True, True)
        print(pred, target)
    print('done')
Exemplo n.º 8
0
def get_pred(video_path, caption_path, opt):
    # options
    parser = argparse.ArgumentParser(
        description="TRN testing on the full validation set")
    # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades'])
    # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff'])

    parser.add_argument('--dataset', type=str, default='somethingv2')
    parser.add_argument('--modality', type=str, default='RGB')

    parser.add_argument(
        '--weights',
        type=str,
        default=
        'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar'
    )
    parser.add_argument('--arch', type=str, default="BNInception")
    parser.add_argument('--save_scores', type=str, default=None)
    parser.add_argument('--test_segments', type=int, default=8)
    parser.add_argument('--max_num', type=int, default=-1)
    parser.add_argument('--test_crops', type=int, default=10)
    parser.add_argument('--input_size', type=int, default=224)
    parser.add_argument('--crop_fusion_type',
                        type=str,
                        default='TRNmultiscale',
                        choices=['avg', 'TRN', 'TRNmultiscale'])
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--gpus', nargs='+', type=int, default=None)
    parser.add_argument('--img_feature_dim', type=int, default=256)
    parser.add_argument(
        '--num_set_segments',
        type=int,
        default=1,
        help='TODO: select multiply set of n-frames from a video')
    parser.add_argument('--softmax', type=int, default=0)

    args = parser.parse_args()

    def accuracy(output, target, topk=(1, )):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)
        prob, pred = output.topk(maxk, 1, True, True)
        prob = prob.t().data.numpy().squeeze()
        pred = pred.t().data.numpy().squeeze()
        return prob, pred

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality, opt)
    num_class = len(categories)

    net = TSN(num_class,
              args.test_segments
              if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1,
              args.modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              img_feature_dim=args.img_feature_dim,
              opt=opt)

    try:
        checkpoint = torch.load(args.weights)
    except:
        args.weights = os.path.join(opt.project_root, 'scripts/Eval/',
                                    args.weights)
        checkpoint = torch.load(args.weights)

    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    net.load_state_dict(base_dict)

    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        video_path,
        caption_path,
        num_segments=args.test_segments,
        new_length=1 if args.modality == "RGB" else 5,
        modality=args.modality,
        image_tmpl=prefix,
        test_mode=True,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
    else:
        devices = list(range(args.workers))

    #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net = torch.nn.DataParallel(net.cuda())
    net.eval()

    data_gen = enumerate(data_loader)

    output = []

    def eval_video(video_data):
        i, data, label = video_data
        num_crop = args.test_crops

        if args.modality == 'RGB':
            length = 3
        elif args.modality == 'Flow':
            length = 10
        elif args.modality == 'RGBDiff':
            length = 18
        else:
            raise ValueError("Unknown modality " + args.modality)

        input_var = torch.autograd.Variable(data.view(-1, length, data.size(2),
                                                      data.size(3)),
                                            volatile=True)
        rst = net(input_var)
        if args.softmax == 1:
            # take the softmax to normalize the output to probability
            rst = F.softmax(rst)

        rst = rst.data.cpu().numpy().copy()

        if args.crop_fusion_type in ['TRN', 'TRNmultiscale']:
            rst = rst.reshape(-1, 1, num_class)
        else:
            rst = rst.reshape((num_crop, args.test_segments,
                               num_class)).mean(axis=0).reshape(
                                   (args.test_segments, 1, num_class))

        return i, rst, label[0]

    max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset)

    prob_all, pred_all = [], []
    for i, (data, label) in data_gen:
        if i >= max_num:
            break
        rst = eval_video((i, data, label))
        output.append(rst[1:])
        prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)),
                              label,
                              topk=(1, 174))
        prob_all.append(prob)
        pred_all.append(pred)
    return prob_all, pred_all
def main(conf, test_set, test_part=-1):
    gulp_path = os.path.join(conf.gulp_test_dir, conf.modality.lower(), 'test',
                             test_set)
    gulp_path = os.path.realpath(gulp_path)
    gulp_path = Path(gulp_path)

    classes_map = pickle.load(open(conf.classes_map, "rb"))
    conf.num_classes = count_num_classes(classes_map)

    net = TSN(conf.num_classes,
              1,
              conf.modality,
              base_model=conf.arch,
              consensus_type=conf.crop_fusion_type,
              dropout=conf.dropout)

    checkpoint = torch.load(conf.weights)
    print("Model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    net.load_state_dict(base_dict)

    if conf.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif conf.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                conf.test_crops))

    class_type = 'verb+noun' if conf.class_type == 'action' else conf.class_type
    if conf.modality == 'Flow':
        dataset = EpicVideoFlowDataset(gulp_path=gulp_path,
                                       class_type=class_type)
    else:
        dataset = EpicVideoDataset(gulp_path=gulp_path, class_type=class_type)

    data_loader = torch.utils.data.DataLoader(EpicTSNTestDataset(
        dataset,
        classes_map,
        num_segments=conf.test_segments,
        new_length=1 if conf.modality == "RGB" else 5,
        modality=conf.modality,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=conf.arch == 'BNInception'),
            ToTorchFormatTensor(div=conf.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ]),
        part=test_part),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=conf.workers * 2,
                                              pin_memory=True)

    net = torch.nn.DataParallel(net, device_ids=conf.gpus).cuda()
    net.eval()

    total_num = len(data_loader.dataset)
    output = []

    proc_start_time = time.time()
    for i, (keys, input_) in enumerate(data_loader):
        rst = eval_video(conf, (i, keys, input_), net)
        output.append(rst[1:])
        cnt_time = time.time() - proc_start_time
        print('video {} done, total {}/{}, average {} sec/video'.format(
            i, i + 1, total_num,
            float(cnt_time) / (i + 1)))

    video_index = [x[0] for x in output]
    scores = [x[1] for x in output]

    save_scores = './{}/tsn_{}_{}_testset_{}_{}_lr_{}_model_{:03d}.npz'.format(
        conf.checkpoint, conf.class_type, conf.modality.lower(), test_set,
        conf.arch, conf.lr, checkpoint['epoch'])
    if test_part > 0:
        save_scores = save_scores.replace('.npz',
                                          '_part-{}.npz'.format(test_part))
    np.savez(save_scores, segment_indices=video_index, scores=scores)
Exemplo n.º 10
0
def eval_one_model(num_class, modality, weights, devices, args):

    # init model
    net = TSN(num_class,
              1,
              modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              dropout=args.dropout,
              mdl=args.mdl,
              pretrained=False)

    # load checkpoint
    checkpoint = torch.load(weights)
    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = checkpoint['state_dict']
    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    net.load_state_dict(base_dict)

    # transformer
    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    # prepare dataset
    if args.dataset == 'ucf101':
        naming_pattern = "frame{:06d}.jpg" if modality in [
            "RGB", "RGBDiff", 'tvl1'
        ] else args.flow_prefix + "{}_{:06d}.jpg"
    else:
        naming_pattern = "image_{:05d}.jpg" if modality in [
            "RGB", "RGBDiff"
        ] else args.flow_prefix + "{}_{:05d}.jpg"

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        os.path.join(args.data_root_path,
                     ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')),
        args.test_list,
        num_segments=args.test_segments,
        new_length=4 if modality == "RGB" else 6,
        modality=modality,
        image_tmpl=naming_pattern,
        test_mode=True,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    data_gen = iter(data_loader)

    total_num = len(data_loader.dataset)
    output = []  # [class probability, label code]

    # Inferencing

    net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net.eval()

    max_num = len(data_loader.dataset)

    for i in tqdm(range(max_num)):
        data, label = next(data_gen)
        if i >= max_num:
            break
        output.append(
            eval_video(net, (i, data, label), num_class, modality, args))

    video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output]
    video_labels = [x[2] for x in output]

    # summarize results
    cf = confusion_matrix(video_labels, video_pred).astype(float)

    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)

    cls_acc = cls_hit / cls_cnt
    print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100))

    del net
    del data_loader

    class_acc_map = class_acc_mapping(cls_acc, args.dataset)

    return output, video_labels, class_acc_map
Exemplo n.º 11
0
                       cropping,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       GroupNormalize(net.input_mean, net.input_std),
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers * 2, pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))


net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
net.eval()

data_gen = enumerate(data_loader)

total_num = len(data_loader.dataset)
output = []


def eval_video(video_data):
    i, data, label = video_data
    num_crop = args.test_crops

    if args.modality == 'RGB':
        length = 3
    elif args.modality == 'Flow':
        length = 10