def __init__(self, base_batch, class_agnostic, base_net=50, cfg_file=__detector_prefix_path__ + '/model/yml/res50.yml', load_name='', gpu_id=0): np.random.seed() self.batch_size = base_batch self.class_agnostic = class_agnostic cfg_from_file(cfg_file) self.pascal_classes = np.asarray(['__background__', # always index 0 'front_foot','back_foot']) self.fasterRCNN = resnet(self.pascal_classes, base_net, pretrained=False, class_agnostic=False) self.fasterRCNN.create_architecture() self.fasterRCNN.cuda() self.fasterRCNN.eval() # load check point print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode']
def loop(): args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_test" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # initilize the network here. if args.net == 'vgg16': fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fpn.create_architecture() print('load model successfully!') im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fpn.cuda() start = time.time() max_per_image = 100 vis =True #args.vis if vis: thresh = 0.0 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) for h in range(200): dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fpn.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fpn.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = data_iter.next() im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data # 1*300*10 boxes = rois.data[:, :, 1:5] # 1*300*4 if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # 1*300*40 if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = boxes pred_boxes /= data[1][0][2].cuda() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show) #pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) del data del pred_boxes del scores torch.cuda.empty_cache() with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print('Evaluating detections') aps, clss = imdb.evaluate_detections(all_boxes, output_dir) #print(aps) with open("result.txt", 'a+') as f: # print(args.checkepoch) lp="" cc=0 for b in clss: if cc!=len(clss)-1: lp=lp+"'"+str(b) + ":" + str(aps[cc])+"'," else: lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'" cc=cc+1 sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch) # print(sp) f.write(sp + "\n") end = time.time() print("test time: %0.4fs" % (end - start)) args.checkepoch = args.checkepoch + 1 del data_iter del dataset del dataloader torch.cuda.empty_cache() #torch.empty_cache() gc.collect()
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda DATASET = 'fabric_binary' P_TYPE = 'P-%d' P_NUM = 15 print('Called with args:') print(args) test_metrics_list = [] # ===========================TRAIN============================ # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/fabric.yml" if args.cfg_file is not None: cfg_from_file(args.cfg_file) # -- Note: Use validation set and disable the flipped to enable faster loading. if args.cuda: cfg.CUDA = True # xxx: different from training cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = True if args.cuda else False print('Using config:') pprint.pprint(cfg) # np.random.seed(cfg.RNG_SEED) # train set imdb_name = DATASET + "_train_supervised" imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name) train_size = len(roidb)
def main(): args = parse_args() print('Called with args:') print(args) best_meanap = 0 meanap = 0 if args.dataset == "virat": args.set_cfgs = [ 'ANCHOR_SCALES', '[1, 2, 3]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '15' ] if args.dataset == "ucfsport": args.set_cfgs = [ 'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '2' ] if args.dataset == "urfall": args.set_cfgs = [ 'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '1' ] if args.dataset == "imfd": args.set_cfgs = [ 'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '1' ] if args.dataset == "jhmdb": args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '1' ] if args.dataset == "ucf24": args.set_cfgs = [ 'ANCHOR_SCALES', '[4,8,16,24,28]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '1' ] args.cfg_file = "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.USE_GPU_NMS = args.cuda if args.dataset == 'virat': num_class = cfg.VIRAT.NUM_CLASS output_dir = cfg.VIRAT.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) elif args.dataset == 'ucfsport': num_class = cfg.UCFSPORT.NUM_CLASSES output_dir = cfg.UCFSPORT.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) elif args.dataset == 'urfall': num_class = cfg.URFD.NUM_CLASSES output_dir = cfg.URFD.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) elif args.dataset == 'imfd': num_class = cfg.IMFD.NUM_CLASSES output_dir = cfg.IMFD.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) elif args.dataset == 'jhmdb': num_class = cfg.JHMDB.NUM_CLASSES output_dir = cfg.JHMDB.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) elif args.dataset == 'ucf24': num_class = cfg.UCF24.NUM_CLASSES output_dir = cfg.UCF24.output_model_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) else: print("dataset is not defined ") #log initialisation args.store_name = '_'.join([ 'S-RAD', args.dataset, args.net, 'segment%d' % args.num_segments, 'e{}'.format(args.max_epochs), 'session%d' % args.session ]) check_rootfolders(args.store_name, args.dataset) #logging log_training, logger = log_info(cfg, args.store_name, args.dataset, args=args) #dataloader train_loader = construct_loader(cfg, dataset=args.dataset, num_segments=args.num_segments, interval=args.interval, batch_size=args.batch_size, split='train', input_sampling=True, split_num=args.splits, pathway=args.pathway) val_loader = construct_loader(cfg, dataset=args.dataset, num_segments=args.num_segments, interval=args.interval, batch_size=args.batch_size, split='val', input_sampling=True, split_num=args.splits, pathway=args.pathway) if args.dataset == 'virat': test_loader = construct_loader(cfg, dataset=args.dataset, num_segments=args.num_segments, interval=args.interval, batch_size=args.batch_size, split='test', input_sampling=True, split_num=args.splits, pathway=args.pathway) # prevent something not % n_GPU if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': S_RAD = vgg16(num_class, pretrained=True, class_agnostic=args.class_agnostic, loss_type=args.loss_type) elif args.net == 'res50': S_RAD = resnet(num_class, num_layers=50, base_model='resnet50', n_segments=args.num_segments, n_div=args.shift_div, place=args.shift_place, pretrain=args.pretrain, shift=args.shift, class_agnostic=args.class_agnostic, loss_type=args.loss_type, pathway=args.pathway) else: print("network is not defined") pdb.set_trace() #create the architecture S_RAD.create_architecture() #set the parameters lr = args.lr params = [] for key, value in dict(S_RAD.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.cuda: S_RAD.cuda() #define optimizer if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) #adding UCF sport weights to the first branch base1 #if args.pathway == "two_pathway": #if args.tune_from: ## adding temporal shift pretrained kinetics weights #if args.pathway =="naive": if args.tune_from: print(("=> fine-tuning from '{}'".format(args.tune_from))) sd = torch.load(args.tune_from) sd = sd['state_dict'] model_dict = S_RAD.state_dict() replace_dict = [] for k, v in sd.items(): if k not in model_dict: replace_dict.append( (k.replace( 'module.base_model.conv1', 'RCNN_base1.0').replace( 'module.base_model.bn1', 'RCNN_base1.1').replace( 'module.base_model.layer1.0', 'RCNN_base1.4.0').replace( 'module.base_model.layer1.1', 'RCNN_base1.4.1').replace( 'module.base_model.layer1.2', 'RCNN_base1.4.2').replace( 'module.base_model.layer2.0', 'RCNN_base1.5.0'). replace('module.base_model.layer2.1', 'RCNN_base1.5.1').replace( 'module.base_model.layer2.2', 'RCNN_base1.5.2').replace( 'module.base_model.layer2.3', 'RCNN_base1.5.3').replace( 'module.base_model.layer3.0', 'RCNN_base1.6.0').replace( 'module.base_model.layer3.1', 'RCNN_base1.6.1').replace( 'module.base_model.layer3.2', 'RCNN_base1.6.2'). replace('module.base_model.layer3.3', 'RCNN_base1.6.3').replace( 'module.base_model.layer3.4', 'RCNN_base1.6.4').replace( 'module.base_model.layer3.5', 'RCNN_base1.6.5').replace( 'module.base_model.layer4.0.', 'RCNN_top.0.0.').replace( 'module.base_model.layer4.1', 'RCNN_top.0.1').replace( 'module.base_model.layer4.2', 'RCNN_top.0.2'). replace('module.base_model.layer4.0.conv1.net', 'RCNN_top.0.0.conv1').replace( 'module.base_model.layer4.1.conv1.net', 'RCNN_top.0.1.conv1').replace( 'module.base_model.layer4.2.conv1.net', 'RCNN_top.0.2.conv1').replace( 'RCNN_top.0.0.conv1.net', 'RCNN_top.0.0.conv1').replace( 'RCNN_top.0.1.conv1.net', 'RCNN_top.0.1.conv1').replace( 'RCNN_top.0.2.conv1.net', 'RCNN_top.0.2.conv1'), k)) for k_new, k in replace_dict: sd[k_new] = sd.pop(k) keys1 = set(list(sd.keys())) keys2 = set(list(model_dict.keys())) set_diff = (keys1 - keys2) | (keys2 - keys1) print('#### Notice: keys that failed to load: {}'.format(set_diff)) if args.dataset not in args.tune_from: # new dataset print('=> New dataset, do not load fc weights') sd = {k: v for k, v in sd.items() if 'fc' not in k} model_dict.update(sd) S_RAD.load_state_dict(model_dict) if args.resume: load_name = os.path.join( output_dir, 'S-RAD_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] sd = checkpoint['model'] model_dict = S_RAD.state_dict() replace_dict = [] for k, v in sd.items(): if k not in model_dict: replace_dict.append((k.replace('RCNN_base', 'RCNN_base1'), k)) for k_new, k in replace_dict: sd[k_new] = sd.pop(k) keys1 = set(list(sd.keys())) keys2 = set(list(model_dict.keys())) set_diff = (keys1 - keys2) | (keys2 - keys1) print('#### Notice: keys that failed to load: {}'.format(set_diff)) model_dict.update(sd) S_RAD.load_state_dict(model_dict) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) if args.mGPUs: mGPUs = True S_RAD = nn.DataParallel(S_RAD) else: mGPUs = False session = args.session if args.evaluate: validate_voc(val_loader, S_RAD,args.start_epoch,num_class, \ args.num_segments,session,args.batch_size,\ cfg,log_training,args.dataset,args.pathway,args.eval_metrics) sys.exit for epoch in range(args.start_epoch, args.max_epochs + 1): if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma if args.dataset == 'virat': #dataloader train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, data, cfg, args.acc_step, log_training) # evaluate on validation set validate_virat(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size,data,cfg,\ log_training,args.dataset) if epoch % 10 == 0: validate_virat(test_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size,data,cfg, log_training,args.dataset) elif args.dataset == 'ucfsport': train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, cfg, args.acc_step, log_training) validate_voc(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size, cfg,log_training,args.dataset,args.pathway,args.eval_metrics) elif args.dataset == 'urfall': train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, cfg, args.acc_step, log_training) validate_voc(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size,\ cfg,log_training,args.dataset,args.pathway,args.eval_metrics) elif args.dataset == 'imfd': train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, cfg, args.acc_step, log_training) validate_voc(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size, cfg,log_training,args.dataset,args.pathway,args.eval_metrics) elif args.dataset == 'jhmdb': train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, cfg, args.acc_step, log_training) if epoch % 2 == 0: validate_voc(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size, cfg,log_training,args.dataset,args.pathway,args.eval_metrics) elif args.dataset == 'ucf24': train(train_loader, S_RAD, lr, optimizer, epoch, num_class, args.batch_size, session, mGPUs, logger, output_dir, cfg, args.acc_step, log_training) validate_voc(val_loader, S_RAD,epoch,num_class, \ args.num_segments,session,args.batch_size, cfg,log_training,args.dataset,args.pathway,args.eval_metrics)
def main(args): #setup_logger(name="fvcore") #logger = setup_logger() #logger.info("Arguments: " + str(args)) #cfg = setup_cfg(args) print(cfg) #build_model #model = build_model(cfg) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "./../../cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() #load weight #checkpointer = DetectionCheckpointer(model) #checkpointer.load(cfg.MODEL.WEIGHTS) print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') #load image path = os.path.expanduser(args.input) #original_image = read_image(path, format="BGR") #height, width = original_image.shape[:2] #transform_gen = T.ResizeShortestEdge( # [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST #) #image = transform_gen.get_transform(original_image).apply_image(original_image) #image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).requires_grad_(True) original_image = asarray(Image.open(path)) height, width = original_image.shape[:2] image = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False) images = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) images_iter = iter(dataloader) inputs = {"image": image, "height": height, "width": width} # Grad-CAM layer_name = get_last_conv_name(fasterRCNN) grad_cam = GradCAM(fasterRCNN, layer_name) mask, box, class_id = grad_cam(inputs) # cam mask grad_cam.remove_handlers() # image_dict = {} img = original_image[..., ::-1] x1, y1, x2, y2 = box image_dict['predict_box'] = img[y1:y2, x1:x2] image_cam, image_dict['heatmap'] = gen_cam(img[y1:y2, x1:x2], mask) # Grad-CAM++ grad_cam_plus_plus = GradCamPlusPlus(model, layer_name) mask_plus_plus = grad_cam_plus_plus(inputs) # cam mask _, image_dict['heatmap++'] = gen_cam(img[y1:y2, x1:x2], mask_plus_plus) grad_cam_plus_plus.remove_handlers() # get name of classes meta = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" ) label = meta.thing_classes[class_id] print("label:{}".format(label)) # # GuidedBackPropagation # gbp = GuidedBackPropagation(model) # inputs['image'].grad.zero_() # make gradient zero # grad = gbp(inputs) # print("grad.shape:{}".format(grad.shape)) # gb = gen_gb(grad) # gb = gb[y1:y2, x1:x2] # image_dict['gb'] = gb # Guided Grad-CAM # cam_gb = gb * mask[..., np.newaxis] # image_dict['cam_gb'] = norm_image(cam_gb) save_image(image_dict, os.path.basename(path))
interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) #Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors) if __name__ == '__main__': args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) #将yml文件中的网络参数合并到cfg中 # if args.set_cfgs is not None: # cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset # if not os.path.exists(input_dir): # raise Exception('There is no input directory for loading network from ' + input_dir)
if __name__ == '__main__': imdb_name = "train_data_25fps_flipped.pkl" num_classes = 21 set_cfgs = [ 'ANCHOR_SCALES', '[2,4,5,6,8,9,10,12,14,16]', 'NUM_CLASSES', num_classes ] cfg_file = "cfgs/{}_{}.yml".format(net, dataset) cfg.CUDA = True cfg.USE_GPU_NMS = True if cfg_file is not None: cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) print('Using config:') pprint.pprint(cfg) # for reproduce np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) if cfg.CUDA: torch.cuda.manual_seed_all(cfg.RNG_SEED) cudnn.benchmark = True # train set roidb_path = roidb_dir + "/" + dataset + "/" + imdb_name
def load_detector(dataset): args = parse_args(dataset) print('Called with args:') print(args) np.random.seed(cfg.RNG_SEED) if args.dataset == "vg": args.imdb_name = "vg_2007_trainval" args.imdbval_name = "vg_2007_trainval" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] from open_relation.dataset.vg.label_hier.obj_hier import objnet classes = np.asarray(objnet.get_raw_labels()) elif args.dataset == "vrd": args.imdb_name = "vrd_2007_trainval" args.imdbval_name = "vrd_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] from open_relation.dataset.vrd.label_hier.obj_hier import objnet classes = np.asarray(objnet.get_raw_labels()) elif args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] classes = np.asarray([ '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]) args.cfg_file = os.path.join( project_root, "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. fasterRCNN = vgg16(classes, pretrained=False, class_agnostic=args.class_agnostic) fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() fasterRCNN.eval() return fasterRCNN