Пример #1
0
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']
    elif args.dataset == "imagenet_vid+imagenet_det":
        args.imdb_name = "imagenet_vid_train+imagenet_det_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30']

    args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    #torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = False
    cfg.USE_GPU_NMS = args.cuda
    if args.use_det:
        print("Using VID and DET datasets.")
Пример #2
0
def loop():

    args = parse_args()

    print('Called with args:')
    print(args)

    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_test"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']

    args.cfg_file = "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))



    # initilize the network here.
    if args.net == 'vgg16':
        fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
    fpn.create_architecture()
    print('load model successfully!')
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    if args.cuda:
        cfg.CUDA = True
    if args.cuda:
        fpn.cuda()
    start = time.time()
    max_per_image = 100
    vis =True #args.vis

    if vis:
        thresh = 0.0
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)



    for h in range(200):
        dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                                 imdb.num_classes, training=False, normalize=False)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
                                                 shuffle=False, num_workers=0,
                                                 pin_memory=True)

        data_iter = iter(dataloader)

        _t = {'im_detect': time.time(), 'misc': time.time()}
        det_file = os.path.join(output_dir, 'detections.pkl')
        input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(input_dir):
            raise Exception('There is no input directory for loading network from ' + input_dir)
        load_name = os.path.join(input_dir,
                                 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fpn.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']


        fpn.eval()
        empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
        for i in range(num_images):
            data = data_iter.next()
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            det_tic = time.time()
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data  # 1*300*10
            boxes = rois.data[:, :, 1:5]  # 1*300*4

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # 1*300*40
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = boxes

            pred_boxes /= data[1][0][2].cuda()

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            if vis:
                im = cv2.imread(imdb.image_path_at(i))
                im2show = np.copy(im)
            for j in range(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([all_boxes[j][i][:, -1]
                                          for j in range(1, imdb.num_classes)])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(i + 1, num_images, detect_time, nms_time))
            sys.stdout.flush()

            if vis:
                cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show)
                #pdb.set_trace()
                # cv2.imshow('test', im2show)
                # cv2.waitKey(0)
            del data
            del pred_boxes
            del scores
            torch.cuda.empty_cache()

        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

        print('Evaluating detections')
        aps, clss = imdb.evaluate_detections(all_boxes, output_dir)
        #print(aps)
        with open("result.txt", 'a+') as f:
            # print(args.checkepoch)
            lp=""
            cc=0
            for b in clss:
                if cc!=len(clss)-1:
                    lp=lp+"'"+str(b) + ":" + str(aps[cc])+"',"
                else:
                    lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'"
                cc=cc+1

            sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch)
            # print(sp)
            f.write(sp + "\n")
        end = time.time()
        print("test time: %0.4fs" % (end - start))

        args.checkepoch = args.checkepoch + 1

        del data_iter
        del dataset
        del dataloader

        torch.cuda.empty_cache()
        #torch.empty_cache()
        gc.collect()
Пример #3
0
if __name__ == '__main__':

    args = parse_args()

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    set_cfgs = [
            'ANCHOR_SCALES', '[2, 4 , 8, 16, 32]', 'ANCHOR_RATIOS', '[1.0/2 , 3.0/4 , 1 , 4.0/3 , 2 ]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    cfg_from_list(set_cfgs)

    cfg.USE_GPU_NMS = args.cuda



    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
def main():
    args = parse_args()
    print('Called with args:')
    print(args)

    best_meanap = 0
    meanap = 0
    if args.dataset == "virat":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[1, 2, 3]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '15'
        ]
    if args.dataset == "ucfsport":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '2'
        ]
    if args.dataset == "urfall":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '1'
        ]
    if args.dataset == "imfd":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '1'
        ]
    if args.dataset == "jhmdb":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '1'
        ]
    if args.dataset == "ucf24":
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '1'
        ]
    args.cfg_file = "cfgs/{}.yml".format(args.net)
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    #torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.USE_GPU_NMS = args.cuda
    if args.dataset == 'virat':
        num_class = cfg.VIRAT.NUM_CLASS
        output_dir = cfg.VIRAT.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    elif args.dataset == 'ucfsport':
        num_class = cfg.UCFSPORT.NUM_CLASSES
        output_dir = cfg.UCFSPORT.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    elif args.dataset == 'urfall':
        num_class = cfg.URFD.NUM_CLASSES
        output_dir = cfg.URFD.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    elif args.dataset == 'imfd':
        num_class = cfg.IMFD.NUM_CLASSES
        output_dir = cfg.IMFD.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    elif args.dataset == 'jhmdb':
        num_class = cfg.JHMDB.NUM_CLASSES
        output_dir = cfg.JHMDB.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    elif args.dataset == 'ucf24':
        num_class = cfg.UCF24.NUM_CLASSES
        output_dir = cfg.UCF24.output_model_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    else:
        print("dataset is not defined ")

    #log initialisation
    args.store_name = '_'.join([
        'S-RAD', args.dataset, args.net,
        'segment%d' % args.num_segments, 'e{}'.format(args.max_epochs),
        'session%d' % args.session
    ])
    check_rootfolders(args.store_name, args.dataset)

    #logging
    log_training, logger = log_info(cfg,
                                    args.store_name,
                                    args.dataset,
                                    args=args)

    #dataloader
    train_loader = construct_loader(cfg,
                                    dataset=args.dataset,
                                    num_segments=args.num_segments,
                                    interval=args.interval,
                                    batch_size=args.batch_size,
                                    split='train',
                                    input_sampling=True,
                                    split_num=args.splits,
                                    pathway=args.pathway)
    val_loader = construct_loader(cfg,
                                  dataset=args.dataset,
                                  num_segments=args.num_segments,
                                  interval=args.interval,
                                  batch_size=args.batch_size,
                                  split='val',
                                  input_sampling=True,
                                  split_num=args.splits,
                                  pathway=args.pathway)
    if args.dataset == 'virat':
        test_loader = construct_loader(cfg,
                                       dataset=args.dataset,
                                       num_segments=args.num_segments,
                                       interval=args.interval,
                                       batch_size=args.batch_size,
                                       split='test',
                                       input_sampling=True,
                                       split_num=args.splits,
                                       pathway=args.pathway)

    # prevent something not % n_GPU
    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16':
        S_RAD = vgg16(num_class,
                      pretrained=True,
                      class_agnostic=args.class_agnostic,
                      loss_type=args.loss_type)
    elif args.net == 'res50':
        S_RAD = resnet(num_class,
                       num_layers=50,
                       base_model='resnet50',
                       n_segments=args.num_segments,
                       n_div=args.shift_div,
                       place=args.shift_place,
                       pretrain=args.pretrain,
                       shift=args.shift,
                       class_agnostic=args.class_agnostic,
                       loss_type=args.loss_type,
                       pathway=args.pathway)

    else:
        print("network is not defined")
        pdb.set_trace()

    #create the architecture
    S_RAD.create_architecture()

    #set the parameters
    lr = args.lr
    params = []
    for key, value in dict(S_RAD.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \
                        'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.cuda:
        S_RAD.cuda()

    #define optimizer
    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    #adding UCF sport weights to the first branch base1
    #if args.pathway == "two_pathway":
    #if args.tune_from:

    ## adding temporal shift pretrained kinetics weights
    #if args.pathway =="naive":
    if args.tune_from:
        print(("=> fine-tuning from '{}'".format(args.tune_from)))
        sd = torch.load(args.tune_from)
        sd = sd['state_dict']
        model_dict = S_RAD.state_dict()
        replace_dict = []
        for k, v in sd.items():
            if k not in model_dict:
                replace_dict.append(
                    (k.replace(
                        'module.base_model.conv1', 'RCNN_base1.0').replace(
                            'module.base_model.bn1', 'RCNN_base1.1').replace(
                                'module.base_model.layer1.0',
                                'RCNN_base1.4.0').replace(
                                    'module.base_model.layer1.1',
                                    'RCNN_base1.4.1').replace(
                                        'module.base_model.layer1.2',
                                        'RCNN_base1.4.2').replace(
                                            'module.base_model.layer2.0',
                                            'RCNN_base1.5.0').
                     replace('module.base_model.layer2.1',
                             'RCNN_base1.5.1').replace(
                                 'module.base_model.layer2.2',
                                 'RCNN_base1.5.2').replace(
                                     'module.base_model.layer2.3',
                                     'RCNN_base1.5.3').replace(
                                         'module.base_model.layer3.0',
                                         'RCNN_base1.6.0').replace(
                                             'module.base_model.layer3.1',
                                             'RCNN_base1.6.1').replace(
                                                 'module.base_model.layer3.2',
                                                 'RCNN_base1.6.2').
                     replace('module.base_model.layer3.3',
                             'RCNN_base1.6.3').replace(
                                 'module.base_model.layer3.4',
                                 'RCNN_base1.6.4').replace(
                                     'module.base_model.layer3.5',
                                     'RCNN_base1.6.5').replace(
                                         'module.base_model.layer4.0.',
                                         'RCNN_top.0.0.').replace(
                                             'module.base_model.layer4.1',
                                             'RCNN_top.0.1').replace(
                                                 'module.base_model.layer4.2',
                                                 'RCNN_top.0.2').
                     replace('module.base_model.layer4.0.conv1.net',
                             'RCNN_top.0.0.conv1').replace(
                                 'module.base_model.layer4.1.conv1.net',
                                 'RCNN_top.0.1.conv1').replace(
                                     'module.base_model.layer4.2.conv1.net',
                                     'RCNN_top.0.2.conv1').replace(
                                         'RCNN_top.0.0.conv1.net',
                                         'RCNN_top.0.0.conv1').replace(
                                             'RCNN_top.0.1.conv1.net',
                                             'RCNN_top.0.1.conv1').replace(
                                                 'RCNN_top.0.2.conv1.net',
                                                 'RCNN_top.0.2.conv1'), k))

        for k_new, k in replace_dict:
            sd[k_new] = sd.pop(k)
        keys1 = set(list(sd.keys()))
        keys2 = set(list(model_dict.keys()))
        set_diff = (keys1 - keys2) | (keys2 - keys1)
        print('#### Notice: keys that failed to load: {}'.format(set_diff))
        if args.dataset not in args.tune_from:  # new dataset
            print('=> New dataset, do not load fc weights')
            sd = {k: v for k, v in sd.items() if 'fc' not in k}

        model_dict.update(sd)
        S_RAD.load_state_dict(model_dict)

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'S-RAD_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                        args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']

        sd = checkpoint['model']
        model_dict = S_RAD.state_dict()
        replace_dict = []
        for k, v in sd.items():
            if k not in model_dict:
                replace_dict.append((k.replace('RCNN_base', 'RCNN_base1'), k))
        for k_new, k in replace_dict:
            sd[k_new] = sd.pop(k)
        keys1 = set(list(sd.keys()))
        keys2 = set(list(model_dict.keys()))
        set_diff = (keys1 - keys2) | (keys2 - keys1)
        print('#### Notice: keys that failed to load: {}'.format(set_diff))
        model_dict.update(sd)
        S_RAD.load_state_dict(model_dict)
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    if args.mGPUs:
        mGPUs = True
        S_RAD = nn.DataParallel(S_RAD)
    else:
        mGPUs = False

    session = args.session

    if args.evaluate:
        validate_voc(val_loader, S_RAD,args.start_epoch,num_class, \
                 args.num_segments,session,args.batch_size,\
                 cfg,log_training,args.dataset,args.pathway,args.eval_metrics)
        sys.exit

    for epoch in range(args.start_epoch, args.max_epochs + 1):

        if epoch % (args.lr_decay_step + 1) == 0:

            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        if args.dataset == 'virat':
            #dataloader
            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, data,
                  cfg, args.acc_step, log_training)

            # evaluate on validation set
            validate_virat(val_loader, S_RAD,epoch,num_class, \
            args.num_segments,session,args.batch_size,data,cfg,\
            log_training,args.dataset)

            if epoch % 10 == 0:
                validate_virat(test_loader, S_RAD,epoch,num_class, \
                    args.num_segments,session,args.batch_size,data,cfg,
                    log_training,args.dataset)

        elif args.dataset == 'ucfsport':

            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, cfg,
                  args.acc_step, log_training)
            validate_voc(val_loader, S_RAD,epoch,num_class, \
                    args.num_segments,session,args.batch_size,
                    cfg,log_training,args.dataset,args.pathway,args.eval_metrics)

        elif args.dataset == 'urfall':

            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, cfg,
                  args.acc_step, log_training)
            validate_voc(val_loader, S_RAD,epoch,num_class, \
                   args.num_segments,session,args.batch_size,\
                   cfg,log_training,args.dataset,args.pathway,args.eval_metrics)

        elif args.dataset == 'imfd':

            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, cfg,
                  args.acc_step, log_training)
            validate_voc(val_loader, S_RAD,epoch,num_class, \
                    args.num_segments,session,args.batch_size,
                    cfg,log_training,args.dataset,args.pathway,args.eval_metrics)

        elif args.dataset == 'jhmdb':

            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, cfg,
                  args.acc_step, log_training)
            if epoch % 2 == 0:
                validate_voc(val_loader, S_RAD,epoch,num_class, \
                      args.num_segments,session,args.batch_size,
                      cfg,log_training,args.dataset,args.pathway,args.eval_metrics)

        elif args.dataset == 'ucf24':
            train(train_loader, S_RAD, lr, optimizer, epoch, num_class,
                  args.batch_size, session, mGPUs, logger, output_dir, cfg,
                  args.acc_step, log_training)
            validate_voc(val_loader, S_RAD,epoch,num_class, \
                    args.num_segments,session,args.batch_size,
                    cfg,log_training,args.dataset,args.pathway,args.eval_metrics)
Пример #5
0
            cv2.rectangle(im, bbox[0:2], bbox[2:4], (249, 102, 183), 1)
            txt = '%s: %.3f' % (class_name, score)
            if cv_chinese:
                img = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img)
                font = ImageFont.truetype("fonts/STFANGSO.TTF", 26, encoding="utf-8")
                draw.text((bbox[0] + 5, bbox[1] + 20), txt, (249, 102, 183), font=font)
                im = np.array(img)[:, :, ::-1]
            else:
                cv2.putText(im, txt, (bbox[0] + 5, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX,
                            0.5, (249, 102, 183), thickness=1)
    return im


if __name__ == '__main__':
    cfg_from_list(['ANCHOR_SCALES', '[8, 16, 32]',
                   'ANCHOR_RATIOS', '[0.5,1,2]'])
    cfg.TRAIN.USE_FLIPPED = False
    if net == 'vgg16':
        fasterRCNN = vgg16(all_classes, pretrained=False,
                           class_agnostic=class_agnostic)
    elif net == 'res101':
        fasterRCNN = resnet(all_classes, 101, pretrained=False,
                            class_agnostic=class_agnostic)
    elif net == 'res50':
        fasterRCNN = resnet(all_classes, 50, pretrained=False,
                            class_agnostic=class_agnostic)
    elif net == 'res152':
        fasterRCNN = resnet(all_classes, 152, pretrained=False,
                            class_agnostic=class_agnostic)
    else:
        print("network is not defined")
Пример #6
0
def main(args):
    #setup_logger(name="fvcore")
    #logger = setup_logger()
    #logger.info("Arguments: " + str(args))

    #cfg = setup_cfg(args)
    
    print(cfg)
    #build_model
    #model = build_model(cfg)
    if torch.cuda.is_available() and not args.cuda:
      print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
  
    args.cfg_file = "./../../cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
  
    if args.cfg_file is not None:
      cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
      cfg_from_list(args.set_cfgs)
  
    print('Using config:')
    pprint.pprint(cfg)
  
    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
    imdb.competition_mode(on=True)
  
    print('{:d} roidb entries'.format(len(roidb)))
  
    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
      raise Exception('There is no input directory for loading network from ' + input_dir)
    load_name = os.path.join(input_dir,
      'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))
  
    # initilize the network here.
    if args.net == 'vgg16':
      fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
      fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
      fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
      fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic)
    else:
      print("network is not defined")
      pdb.set_trace()
  
    fasterRCNN.create_architecture()
    #load weight
    #checkpointer = DetectionCheckpointer(model)
    #checkpointer.load(cfg.MODEL.WEIGHTS)
    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
      cfg.POOLING_MODE = checkpoint['pooling_mode']
    
    print('load model successfully!')
    
        
    #load image
    path = os.path.expanduser(args.input)
    #original_image = read_image(path, format="BGR")
    #height, width = original_image.shape[:2]
    #transform_gen = T.ResizeShortestEdge(
    #    [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
    #)
    #image = transform_gen.get_transform(original_image).apply_image(original_image)
    #image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).requires_grad_(True)
    
    original_image = asarray(Image.open(path))
    height, width = original_image.shape[:2]
    image = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                        imdb.num_classes, training=False, normalize = False)
    images = torch.utils.data.DataLoader(dataset, batch_size=1,
                            shuffle=False, num_workers=0,
                            pin_memory=True)

    images_iter = iter(dataloader)
    inputs = {"image": image, "height": height, "width": width}

    # Grad-CAM
    layer_name = get_last_conv_name(fasterRCNN)
    grad_cam = GradCAM(fasterRCNN, layer_name)
    mask, box, class_id = grad_cam(inputs)  # cam mask
    grad_cam.remove_handlers()

    #
    image_dict = {}
    img = original_image[..., ::-1]
    x1, y1, x2, y2 = box
    image_dict['predict_box'] = img[y1:y2, x1:x2]
    image_cam, image_dict['heatmap'] = gen_cam(img[y1:y2, x1:x2], mask)

    # Grad-CAM++
    grad_cam_plus_plus = GradCamPlusPlus(model, layer_name)
    mask_plus_plus = grad_cam_plus_plus(inputs)  # cam mask
    _, image_dict['heatmap++'] = gen_cam(img[y1:y2, x1:x2], mask_plus_plus)
    grad_cam_plus_plus.remove_handlers()

    # get name of classes
    meta = MetadataCatalog.get(
        cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
    )
    label = meta.thing_classes[class_id]

    print("label:{}".format(label))
    # # GuidedBackPropagation
    # gbp = GuidedBackPropagation(model)
    # inputs['image'].grad.zero_()  # make gradient zero
    # grad = gbp(inputs)
    # print("grad.shape:{}".format(grad.shape))
    # gb = gen_gb(grad)
    # gb = gb[y1:y2, x1:x2]
    # image_dict['gb'] = gb
    # Guided Grad-CAM
    # cam_gb = gb * mask[..., np.newaxis]
    # image_dict['cam_gb'] = norm_image(cam_gb)

    save_image(image_dict, os.path.basename(path))
Пример #7
0
def load_detector(dataset):
    args = parse_args(dataset)
    print('Called with args:')
    print(args)

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "vg":
        args.imdb_name = "vg_2007_trainval"
        args.imdbval_name = "vg_2007_trainval"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
        from open_relation.dataset.vg.label_hier.obj_hier import objnet
        classes = np.asarray(objnet.get_raw_labels())
    elif args.dataset == "vrd":
        args.imdb_name = "vrd_2007_trainval"
        args.imdbval_name = "vrd_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
        from open_relation.dataset.vrd.label_hier.obj_hier import objnet
        classes = np.asarray(objnet.get_raw_labels())
    elif args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
        classes = np.asarray([
            '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ])

    args.cfg_file = os.path.join(
        project_root, "cfgs/{}_ls.yml".format(args.net)
        if args.large_scale else "cfgs/{}.yml".format(args.net))
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    fasterRCNN = vgg16(classes,
                       pretrained=False,
                       class_agnostic=args.class_agnostic)
    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    fasterRCNN.eval()
    return fasterRCNN