def read_cfgs(): args = parse_args() print('Called with args:') print(args) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['MAX_NUM_GT_BOXES', '20'] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['MAX_NUM_GT_BOXES', '20'] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = ['MAX_NUM_GT_BOXES', '50'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['MAX_NUM_GT_BOXES', '30'] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['MAX_NUM_GT_BOXES', '50'] elif args.dataset == 'vmrdcompv1': args.imdb_name = "vmrd_compv1_trainval" args.imdbval_name = "vmrd_compv1_test" args.set_cfgs = ['MAX_NUM_GT_BOXES', '20'] elif args.dataset == "vg_vmrd": args.imdb_name = "vmrd_compv1_trainval+vg_150-50-50_minitrain" args.imdbval_name = "vmrd_compv1_test" args.set_cfgs = ['MAX_NUM_GT_BOXES', '50'] elif args.dataset == 'bdds': args.imdb_name = "bdds_trainval" args.imdbval_name = "bdds_test" args.set_cfgs = ['MAX_NUM_GT_BOXES', '20'] elif args.dataset[:7] == 'cornell': cornell = args.dataset.split('_') args.imdb_name = 'cornell_{}_{}_trainval_{}'.format(cornell[1],cornell[2],cornell[3]) args.imdbval_name = 'cornell_{}_{}_test_{}'.format(cornell[1],cornell[2],cornell[3]) args.set_cfgs = ['MAX_NUM_GT_BOXES', '50'] elif args.dataset[:8] == 'jacquard': jacquard = args.dataset.split('_') args.imdb_name = 'jacquard_{}_trainval_{}'.format(jacquard[1], jacquard[2]) args.imdbval_name = 'jacquard_{}_test_{}'.format(jacquard[1], jacquard[2]) args.set_cfgs = ['MAX_NUM_GT_GRASPS', '1000'] if args.dataset[:7] == 'cornell': args.cfg_file = "cfgs/cornell_{}_{}_ls.yml".format(args.frame, args.net) if args.large_scale \ else "cfgs/cornell_{}_{}.yml".format(args.frame, args.net) elif args.dataset[:8] == 'jacquard': args.cfg_file = "cfgs/jacquard_{}_{}_ls.yml".format(args.frame, args.net) if args.large_scale \ else "cfgs/jacquard_{}_{}.yml".format(args.frame, args.net) else: args.cfg_file = "cfgs/{}_{}_{}_ls.yml".format(args.dataset, args.frame, args.net) if args.large_scale \ else "cfgs/{}_{}_{}.yml".format(args.dataset, args.frame, args.net) print("Using cfg file: " + args.cfg_file) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) if not args.disp_interval: args.disp_interval = cfg.TRAIN.COMMON.DISPLAY if not args.batch_size: args.batch_size = cfg.TRAIN.COMMON.IMS_PER_BATCH if not args.lr_decay_step: args.lr_decay_step = cfg.TRAIN.COMMON.LR_DECAY_STEPSIZE[0] if not args.lr: args.lr = cfg.TRAIN.COMMON.LEARNING_RATE if not args.lr_decay_gamma: args.lr_decay_gamma = cfg.TRAIN.COMMON.GAMMA if not args.max_epochs: args.max_epochs = cfg.TRAIN.COMMON.MAX_EPOCH print('Using config:') # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.COMMON.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda pprint.pprint(cfg) if args.cuda: cfg.CUDA = True return args
def main(scene_img_path, query_img_path): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "coco": args.imdb_name = "coco_2017_train" args.imdbval_name = "coco_2017_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() if args.cuda: cfg.CUDA = True fasterRCNN.cuda() max_per_image = 100 thresh = 0.05 # output_dir_vs = get_output_dir(imdb_vs, 'faster_rcnn_seen') output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen') all_weight = np.zeros((len(ratio_index_vu[0]), 1024)) all_times = np.zeros((imdb_vu.num_classes)) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, training=False, seen=args.seen) fasterRCNN.eval() avg = 0 dataset_vu.query_position = avg num_images_vu = len(imdb_vu.image_index) all_boxes = [[[] for _ in xrange(num_images_vu)] for _ in xrange(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir_vu, 'detections_%d_%d.pkl' % (args.seen, avg)) print(det_file) i = 0 index = 0 data = [0, 0, 0, 0, 0] # version = 'custom' # coco is completed # if version == 'coco': # im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000397133.jpg') # query_im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000007816.jpg') # query_im = crop(query_im, [505.54, 53.01, 543.08, 164.09], size=128) # else: im = imread(scene_img_path) im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) query_im = imread(query_img_path) query_im = cv2.resize(query_im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _im = np.copy(im) _query_im = np.copy(query_im) # make im_data im, im_scale = prep_im_for_blob(im, target_size=600) im = torch.tensor(im) im = torch.unsqueeze(im, 0) im = im.transpose(1, 3) im_data = im.transpose(2, 3) # make query data query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128) query_im = torch.tensor(query_im) query_im = torch.unsqueeze(query_im, 0) query_im = query_im.transpose(1, 3) query = query_im.transpose(2, 3) im_data = data[0] = im_data.cuda() query = data[1] = query.cuda() im_info = data[2] = torch.tensor([[600, 899, 1.4052]]) gt_boxes = data[3] = torch.rand(1, 4, 5) # don't care catgory = data[4] = torch.tensor([1]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory) # all_weight[data[4],:] = all_weight[data[4],:] + weight.view(-1).detach().cpu().numpy() all_weight[i, :] = weight.view(-1).detach().cpu().numpy() all_times[data[4]] = all_times[data[4]] + 1 scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= data[2][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() im2show = np.copy(_im) inds = torch.nonzero(scores > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[data[4]][index] = cls_dets.cpu().numpy() im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8) _im2show = np.concatenate((im2show, _query_im), axis=1) plt.imshow(_im2show) plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[data[4]][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[data[4]][index][:, -1] >= image_thresh)[0] all_boxes[data[4]][index] = all_boxes[data[4]][index][keep, :] except: pass misc_toc = time.time() o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) o_query = cv2.cvtColor(o_query, cv2.COLOR_BGR2RGB) im2show = np.concatenate((im2show, o_query), axis=1) im2show = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imwrite('./test_img/%d.png' % (i), im2show)
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, pretrained, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, cfg_file=cfg_file, net=net) args = set_dataset_args(args) args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) output_dir = output_dir + "_{}".format(_run._id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t( args, batch_size, num_workers, is_bgr) val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt( args_val, 1, num_workers, is_bgr) session = 1 fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, pretrained=pretrained) lr, optimizer, session, start_epoch = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session, start_epoch, is_all_params=True) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) total_step = 0 for epoch in range(start_epoch, max_epochs + 1): # setting to train mode fasterRCNN.train() if epoch - 1 in lr_decay_step: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma total_step = frcnn_utils.train_htcn_one_epoch( args, FL, total_step, dataloader_s, dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger) save_name = os.path.join( output_dir, 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth' .format(args.dataset_t, args.eta, lc, gc, gamma, session, epoch, total_step)) save_checkpoint( { 'session': session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) return 0
def prep_model(input_dir): args = parse_args() print('Called with args:') print(args) args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = 1 print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir) # pascal_classes = np.asarray(['__background__', # 'aeroplane', 'bicycle', 'bird', 'boat', # 'bottle', 'bus', 'car', 'cat', 'chair', # 'cow', 'diningtable', 'dog', 'horse', # 'motorbike', 'person', 'pottedplant', # 'sheep', 'sofa', 'train', 'tvmonitor']) pascal_classes = np.asarray([ '__background__', "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]) # initilize the network here. fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) # fasterRCNN.load_state_dict(checkpoint) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') return fasterRCNN
def __init__(self, baseFolder='models', filename='faster_rcnn_1_10_9999_mosaicCL3to5_CBAM_Gblur_class23_wOrgCW.pth', threshold=0.9, att_type='CBAM'): # att_type=None super(DetectorAIR23, self).__init__() self.cfg = __import__('model').utils.config.cfg def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='cfgs/vgg16.yml', type=str) parser.add_argument('--net', dest='net', help='vgg16, res50, res101, res152', default='res101', type=str) parser.add_argument('--set', dest='set_cfgs', help='set config keys', default=None, nargs=argparse.REMAINDER) parser.add_argument('--cuda', dest='cuda', help='whether use CUDA', action='store_true') parser.add_argument('--mGPUs', dest='mGPUs', help='whether use multiple GPUs', action='store_true') parser.add_argument('--cag', dest='class_agnostic', help='whether perform class_agnostic bbox regression', action='store_true') parser.add_argument('--parallel_type', dest='parallel_type', help='which part of model to parallel, 0: all, 1: model before roi pooling', default=0, type=int) parser.add_argument('--ls', dest='large_scale', help='whether use large imag scale', action='store_true') parser.add_argument('--use_FPN', dest='use_FPN', action='store_true') return parser cmd_args = [ '--net', 'res101', '--ls', '--cuda', ] if 'FPN' in filename: cmd_args.append('--use_FPN') load_name = os.path.join(baseFolder, filename) # w/o bottle class self.thresh = threshold parser = parse_args() self.args = parser.parse_args(cmd_args) self.args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] self.args.cfg_file = "{}/cfgs/{}_ls.yml".format(baseFolder, self.args.net) if self.args.large_scale else "{}/cfgs/{}.yml".format(baseFolder, self.args.net) print('Called with args:') print(self.args) if self.args.cfg_file is not None: # check cfg file and copy cfg_from_file(self.args.cfg_file) if self.args.set_cfgs is not None: cfg_from_list(self.args.set_cfgs) self.cfg.USE_GPU_NMS = self.args.cuda print('Using config:') pprint.pprint(self.cfg) np.random.seed(self.cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. # # input_dir = self.args.load_dir + "/" + self.args.net + "/" + self.args.dataset # if not os.path.exists(input_dir): # raise Exception('There is no input directory for loading network from ' + input_dir) # load_name = os.path.join(input_dir, # 'faster_rcnn_{}_{}_{}.pth'.format(self.args.checksession, self.args.checkepoch, self.args.checkpoint)) self.classes = np.asarray([ '__background__', # always index 0 'cane_stick', 'mobile_phone', 'pack', 'cup', 'glasses', 'hat', 'key', 'medicine_case', 'medicine_packet', 'newspaper', 'remote', 'sock', 'towel', 'wallet', 'pen', 'sink', 'table', 'bed', 'sofa_bed', 'refrigerator', 'television', 'toilet', 'mechanical_fan', ]) # self.display_classes = self.classes self.display_classes = { 'cup': '컵', 'pen': '펜', 'hat': '모자', 'mobile_phone': '핸드폰', 'sock': '양말', 'glasses': '안경', 'towel': '수건', 'newspaper': '신문', 'remote': '리모컨', 'key': '열쇠', 'wallet': '지갑', 'pack': '담배갑', 'medicine_case': '약통', 'medicine_packet': '약봉지', 'sink': '싱크대', 'table': '테이블', 'bed': '침대', 'sofa_bed': '소파', 'refrigerator': '냉장고', 'television': '티비', 'toilet': '화장실', 'mechanical_fan': '선풍기', } # initilize the network here. if self.args.net == 'vgg16': from model.faster_rcnn.vgg16 import vgg16 self.fasterRCNN = vgg16(self.classes, pretrained=False, class_agnostic=self.args.class_agnostic) elif 'res' in self.args.net: # from model.faster_rcnn.resnet import resnet if self.args.use_FPN: from model.fpn.resnet_AIRvar_CBAM import resnet else: from model.faster_rcnn.resnet_AIRvar_CBAM import resnet if self.args.net == 'res101': self.fasterRCNN = resnet(self.classes, 101, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) elif self.args.net == 'res50': self.fasterRCNN = resnet(self.classes, 50, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) elif self.args.net == 'res152': self.fasterRCNN = resnet(self.classes, 152, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) else: print("network is not defined") pdb.set_trace() self.fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if self.args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): self.cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.args.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() # make variable with torch.no_grad(): self.im_data = Variable(self.im_data) self.im_info = Variable(self.im_info) self.num_boxes = Variable(self.num_boxes) self.gt_boxes = Variable(self.gt_boxes) if self.args.cuda > 0: self.cfg.CUDA = True if self.args.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() self.max_per_image = 100
elif args.imdb_name == "val2014": imdb_name = args.dataset + "_2014_val" elif args.imdb_name == "test2014": imdb_name = args.dataset + "_2014_test" elif args.imdb_name == "test2015": imdb_name = args.dataset + "_2015_test" cfg.TRAIN.OBJECT_CLASSES = imdb_vg._classes cfg.TRAIN.ATTRIBUTE_CLASSES = imdb_vg._attributes cfg.TRAIN.RELATION_CLASSES = imdb_vg._relations cfg_file = "cfgs/{}.yml".format(args.net) if cfg_file is not None: cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name) train_size = len(roidb) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb)))
raise Exception("dataset is not defined") if args.more_scale: args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] else: args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/res50.yml" if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # prepare roidb cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) cwd = os.getcwd() if args.not_pure: support_dir = os.path.join(cwd, 'data/supports', args.dataset + '_random') else: support_dir = os.path.join(cwd, 'data/supports', args.dataset) # load dir input_dir = os.path.join(args.load_dir, "train/checkpoints") if not os.path.exists(input_dir):
def get_ready(query_img_path): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "coco": args.imdb_name = "coco_2017_train" args.imdbval_name = "coco_2017_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) if args.cuda: cfg.CUDA = True fasterRCNN.cuda() output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen') dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, training=False, seen=args.seen) fasterRCNN.eval() avg = 0 dataset_vu.query_position = avg num_images_vu = len(imdb_vu.image_index) all_boxes = [[[] for _ in xrange(num_images_vu)] for _ in xrange(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir_vu, 'detections_%d_%d.pkl' % (args.seen, avg)) print(det_file) # make query data query_im = imread(query_img_path) query_im = cv2.resize(query_im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _query_im = np.copy(query_im) query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128) query_im = torch.tensor(query_im) query_im = torch.unsqueeze(query_im, 0) query_im = query_im.transpose(1, 3) query = query_im.transpose(2, 3) query = query.cuda() return fasterRCNN, all_boxes, query, _query_im
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, teacher_pth, student_pth, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, imitation_loss_weight, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, False) if cfg_file is not None: cfg_from_file(cfg_file) if args_val.set_cfgs is not None: cfg_from_list(args_val.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt( args_val, 1, num_workers) session = 1 teacher = init_frcnn_utils.init_model_only(device, "res101", htcn_resnet, val_imdb_ts[0], teacher_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN = init_frcnn_utils.init_model_only( device, "res50", htcn_resnet, val_imdb_ts[0], student_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN_2 = init_frcnn_utils.init_model_only( device, "res50", htcn_resnet, val_imdb_ts[0], student_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN.RCNN_rpn = teacher.RCNN_rpn if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) total_step = 0 best_ap = 0. if isinstance(val_datasets, list): avg_ap = 0 for i, val_dataloader_t in enumerate(val_dataloader_ts): map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, fasterRCNN, device, val_imdb_ts[i]) logger.log_scalar( "student with teacher rpn map on {}".format(val_datasets[i]), map, 0) map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, teacher, device, val_imdb_ts[i]) logger.log_scalar( "teacher original map on {}".format(val_datasets[i]), map, 0) teacher.RCNN_rpn = fasterRCNN_2.RCNN_rpn map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, teacher, device, val_imdb_ts[i]) logger.log_scalar( "teacher with stu rpn map on {}".format(val_datasets[i]), map, 0)
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, pretrained, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=[], imdb_name_target=[], cfg_file=cfg_file, net=net) args = set_dataset_args(args) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) load_id = re.findall("\d+", load_name)[0] output_dir = output_dir + "_{}".format(load_id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, _, imdb, _ = init_dataloaders_1s_mt(args, batch_size, num_workers, is_bgr) session = 1 fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, strict=False, pretrained=pretrained) dtm = nn.Sequential(nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv2d(256, 3, 1)) dtm.to(device) optimizer = torch.optim.SGD(dtm.parameters(), lr=lr, momentum=0.9) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) dtm_util.get_mask_for_target(args, FL, 0, dataloader_s, iters_per_epoch, fasterRCNN, dtm, optimizer, device, logger) find_id = re.findall("\d+", load_name) if len(find_id) == 0: find_id = 0 else: find_id = re.findall("\d+", load_name)[-1] torch.save( dtm, os.path.join(output_dir, 'dtm_target_cnn_{}_{}.p'.format(load_id, find_id))) return 0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, model_type, device, net, optimizer, num_workers, model_pth, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args_val.set_cfgs is not None: cfg_from_list(args_val.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(args_val, 1, num_workers) session = 1 backbone_fn = htcn_resnet if 'res' in net: if model_type == 'normal': backbone_fn = n_resnet elif model_type == 'saitp': backbone_fn = s_resnet else: if model_type == 'normal': backbone_fn = n_vgg16 elif model_type == 'htcn': backbone_fn = htcn_vgg16 elif model_type == 'saitp': backbone_fn = None model = init_frcnn_utils.init_model_only(device, net, backbone_fn, val_imdb_ts[0], model_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) total_step = 0 best_ap = 0. avg_ap = 0. avg_ap_per_class = {} if isinstance(val_datasets, list): for i, val_dataloader_t in enumerate(val_dataloader_ts): map, ap_per_class = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, model, device, val_imdb_ts[i], return_ap_class=True) logger.log_scalar(" map on {}".format(val_datasets[i]), map, 0) for cls, ap in ap_per_class.items(): if cls in avg_ap_per_class: avg_ap_per_class[cls] += ap else: avg_ap_per_class[cls] = ap avg_ap += map avg_ap /= len(val_dataloader_ts) for cls, ap in avg_ap_per_class.items(): ap /= len(val_dataloader_ts) logger.log_scalar(" map of class {}".format(cls), ap, 0) logger.log_scalar("avp map",avg_ap, 0) return avg_ap.item()
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, mask_load_p, resume, load_name, pretrained, model_type, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, cfg_file=cfg_file, net=net) args = set_dataset_args(args) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex, False) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) backbone_fn = htcn_resnet if 'res' in net: if model_type == 'normal': backbone_fn = n_resnet elif model_type == 'saitp': backbone_fn = s_resnet else: if model_type == 'normal': backbone_fn = n_vgg16 elif model_type == 'htcn': backbone_fn = htcn_vgg16 elif model_type == 'saitp': backbone_fn = None dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t( args, batch_size, num_workers, is_bgr, False) model = init_frcnn_utils.init_model_only(device, net, backbone_fn, imdb_t, '', class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) model.eval() im_data = torch.randn(1, 3, 600, 1200).to(device) im_info = torch.FloatTensor([[600, 900, 2]]).to(device) gt_boxes = torch.zeros((1, 1, 5)).to(device) num_boxes = torch.zeros([1]).to(device) macs, params = profile(model, inputs=(im_data, im_info, gt_boxes, num_boxes)) macs, params = clever_format([macs, params], "%.3f") print("Model CFLOPS: {}".format(macs)) print("Model Cparams: {}".format(params)) random_mask = nn.Sequential( nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv2d(256, 3, 1)).to(device) macs, params = profile(random_mask, inputs=(im_data, )) macs, params = clever_format([macs, params], "%.3f") print("Mask CFLOPS: {}".format(macs)) print("Mask Cparams: {}".format(params)) iters_per_epoch = int(1000 / batch_size) data_iter_s = iter(dataloader_s) for step in range(1, iters_per_epoch + 1): try: data_s = next(data_iter_s) except: data_iter_s = iter(dataloader_s) data_s = next(data_iter_s) im_data = data_s[0].to(device) im_info = data_s[1].to(device) gt_boxes = data_s[2].to(device) num_boxes = data_s[3].to(device) pass
def frcnn(train): args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) from model.utils.config import cfg cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) pascal_classes = np.asarray([ '___background__', u'person', u'bicycle', u'car', u'motorcycle', u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light', u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird', u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear', u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie', u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball', u'kite', u'baseball bat', u'baseball glove', u'skateboard', u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup', u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich', u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut', u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table', u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard', u'cell phone', u'microwave', u'oven', u'toaster', u'sink', u'refrigerator', u'book', u'clock', u'vase', u'scissors', u'teddy bear', u'hair drier', u'toothbrush' ]) # initilize the network here. #args.imdb_name = "coco_2014_train+coco_2014_valminusminival" # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.5 webcam_num = args.webcam_num imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) import json, re from tqdm import tqdm d = {} pbar = tqdm(imglist) if not train: for i in pbar: im_file = os.path.join(args.image_dir, i) # im = cv2.imread(im_file) im_name = i im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: #Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() lis = json.load( open( '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json', 'r')) sm_lis = np.zeros(len(lis)) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) #cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] score = cls_dets[0][-1] try: sm_lis[lis.index(pascal_classes[j])] = score.numpy() except: pass d[re.sub("\D", "", im_name)] = sm_lis.tolist() json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2) else: pass
def load(self, path, use_cuda): # define options path_model_detector = os.path.join( path, 'fpn101_1_10_9999.pth' ) # model for detector (food, tableware, drink) dataset = 'CloudStatus_val' imdb_name2 = 'CloudTableThings_val' total_imdb_name = 'CloudStatusTableThings_val' load_name = path_model_detector self.use_share_regress = True self.use_progress = True net = 'resnet101' self.cuda = use_cuda self.class_agnostic = False self.att_type = 'None' self.vis = True # generate debug images # Load food classifier # possible dbname='FoodX251', 'Food101', 'Kfood' # possible eval_crop_type='CenterCrop', 'TenCrop' self.food_classifier = FoodClassifier(net='senet154', dbname='Kfood', eval_crop_type='CenterCrop', ck_file_folder=path, use_cuda=use_cuda, pretrained=False) cfg_file = os.path.join(path, '{}_ls.yml'.format(net)) set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] if cfg_file is not None: cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) USE_GPU_NMS = self.cuda print('Using config:') pprint.pprint(cfg) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = load_name if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) self.list_box_color = [(0, 0, 0), (0, 0, 200), (0, 200, 0), (200, 200, 0), (200, 0, 200), (0, 200, 200), (200, 0, 200)] self.classes0 = self.get_class_list(dataset) self.classes1 = self.get_class_list(imdb_name2) self.classes_total = self.get_class_list(total_imdb_name) from model.fpn.resnet_multi_CBAM import resnet self.fasterRCNN = resnet(self.classes0, self.classes1, use_pretrained=False, num_layers=101, class_agnostic=self.class_agnostic, use_share_regress=self.use_share_regress, use_progress=self.use_progress, att_type=self.att_type) self.fasterRCNN.create_architecture() print("loading checkpoint %s..." % (load_name)) if self.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load( load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('succeeded') # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() # make variable with torch.no_grad(): self.im_data = Variable(self.im_data) self.im_info = Variable(self.im_info) self.num_boxes = Variable(self.num_boxes) self.gt_boxes = Variable(self.gt_boxes) if self.cuda > 0: cfg.CUDA = True if self.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() print('- models loaded from {}'.format(path))
if __name__ == '__main__': args = parse_args() print('Called with args:') print(args) args = set_dataset_args(args,test=True) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") np.random.seed(cfg.RNG_SEED) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs_target is not None: cfg_from_list(args.set_cfgs_target) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name_target, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # initilize the network here. from model.faster_rcnn.vgg16_HTCN import vgg16 from model.faster_rcnn.resnet_HTCN import resnet if args.net == 'vgg16':
def train(dataset="kaggle_pna", train_ds="train", arch="couplenet", net="res152", start_epoch=1, max_epochs=20, disp_interval=100, save_dir="save", num_workers=4, cuda=True, large_scale=False, mGPUs=True, batch_size=4, class_agnostic=False, anchor_scales=4, optimizer="sgd", lr_decay_step=10, lr_decay_gamma=.1, session=1, resume=False, checksession=1, checkepoch=1, checkpoint=0, use_tfboard=False, flip_prob=0.0, scale=0.0, scale_prob=0.0, translate=0.0, translate_prob=0.0, angle=0.0, dist="cont", rotate_prob=0.0, shear_factor=0.0, shear_prob=0.0, rpn_loss_cls_wt=1, rpn_loss_box_wt=1, RCNN_loss_cls_wt=1, RCNN_loss_bbox_wt=1, **kwargs): print("Train Arguments: {}".format(locals())) # Import network definition if arch == 'rcnn': from model.faster_rcnn.resnet import resnet elif arch == 'rfcn': from model.rfcn.resnet_atrous import resnet elif arch == 'couplenet': from model.couplenet.resnet_atrous import resnet from roi_data_layer.pnaRoiBatchLoader import roibatchLoader from roi_data_layer.pna_roidb import combined_roidb print('Called with kwargs:') print(kwargs) # Set up logger if use_tfboard: from model.utils.logger import Logger # Set the logger logger = Logger('./logs') # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32] if anchor_scales == 3: scales = [8, 16, 32] elif anchor_scales == 4: scales = [4, 8, 16, 32] # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50 if train_ds == "train": imdb_name = "pna_2018_train" elif train_ds == "trainval": imdb_name = "pna_2018_trainval" set_cfgs = [ 'ANCHOR_SCALES', str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] import model model_repo_path = os.path.dirname( os.path.dirname(os.path.dirname(model.__file__))) cfg_file = "cfgs/{}_ls.yml".format( net) if large_scale else "cfgs/{}.yml".format(net) if cfg_file is not None: cfg_from_file(os.path.join(model_repo_path, cfg_file)) if set_cfgs is not None: cfg_from_list(set_cfgs) train_kwargs = kwargs.pop("TRAIN", None) resnet_kwargs = kwargs.pop("RESNET", None) mobilenet_kwargs = kwargs.pop("MOBILENET", None) if train_kwargs is not None: for key, value in train_kwargs.items(): cfg["TRAIN"][key] = value if resnet_kwargs is not None: for key, value in resnet_kwargs.items(): cfg["RESNET"][key] = value if mobilenet_kwargs is not None: for key, value in mobilenet_kwargs.items(): cfg["MOBILENET"][key] = value if kwargs is not None: for key, value in kwargs.items(): cfg[key] = value print('Using config:') cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR) cfg.TRAIN_DATA_CLEAN_PATH = os.path.abspath(cfg.TRAIN_DATA_CLEAN_PATH) pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) print("LEARNING RATE: {}".format(cfg.TRAIN.LEARNING_RATE)) # Warning to use cuda if available if torch.cuda.is_available() and not cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Train set # Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) # output_dir = os.path.join(save_dir, arch, net, dataset) output_dir = cfg.MODEL_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=sampler_batch, num_workers=num_workers) # Initilize the tensor holder im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # Copy tensors in CUDA memory if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # Make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if cuda: cfg.CUDA = True # Initilize the network: if net == 'vgg16': # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) print("Pretrained model is not downloaded and network is not used") elif net == 'res18': model = resnet(imdb.classes, 18, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res34': model = resnet(imdb.classes, 34, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res50': model = resnet(imdb.classes, 50, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res101': model = resnet(imdb.classes, 101, pretrained=True, class_agnostic=class_agnostic) elif net == 'res152': model = resnet(imdb.classes, 152, pretrained=True, class_agnostic=class_agnostic) else: print("network is not defined") pdb.set_trace() # Create network architecture model.create_architecture() # Update model parameters lr = cfg.TRAIN.LEARNING_RATE # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] # Optimizer if optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) # Resume training if resume: load_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, checksession, checkepoch, checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) session = checkpoint['session'] + 1 start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) # Train on Multiple GPUS if mGPUs: model = nn.DataParallel(model) # Copy network to CUDA memroy if cuda: model.cuda() # Training loop iters_per_epoch = int(train_size / batch_size) sys.stdout.flush() for epoch in range(start_epoch, max_epochs + 1): # remove batch re-sizing for augmentation or adjust? dataset.resize_batch() # Set model to train mode model.train() loss_temp = 0 start = time.time() # Update learning rate as per decay step if epoch % (lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma # Get batch data and train data_iter = iter(dataloader) for step in range(iters_per_epoch): sys.stdout.flush() data = next(data_iter) # Apply augmentations aug_img_tensors, aug_bbox_tensors = apply_augmentations( data[0], data[2], flip_prob=flip_prob, scale=scale, scale_prob=scale_prob, translate=translate, translate_prob=translate_prob, angle=angle, dist=dist, rotate_prob=rotate_prob, shear_factor=shear_factor, shear_prob=shear_prob) # im_data.data.resize_(data[0].size()).copy_(data[0]) im_data.data.resize_(aug_img_tensors.size()).copy_(aug_img_tensors) im_info.data.resize_(data[1].size()).copy_(data[1]) # gt_boxes.data.resize_(data[2].size()).copy_(data[2]) gt_boxes.data.resize_( aug_bbox_tensors.size()).copy_(aug_bbox_tensors) num_boxes.data.resize_(data[3].size()).copy_(data[3]) # Compute multi-task loss model.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls_wt * rpn_loss_cls.mean() + rpn_loss_box_wt * rpn_loss_box.mean() + \ RCNN_loss_cls_wt * RCNN_loss_cls.mean() + RCNN_loss_bbox_wt * RCNN_loss_bbox.mean() loss_temp += loss.data[0] # Backward pass to compute gradients and update weights optimizer.zero_grad() loss.backward() if net == "vgg16": clip_gradient(model, 10.) optimizer.step() # Display training stats on terminal if step % disp_interval == 0: end = time.time() if step > 0: loss_temp /= disp_interval if mGPUs: batch_loss = loss.data[0] loss_rpn_cls = rpn_loss_cls.mean().data[0] loss_rpn_box = rpn_loss_box.mean().data[0] loss_rcnn_cls = RCNN_loss_cls.mean().data[0] loss_rcnn_box = RCNN_loss_bbox.mean().data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: batch_loss = loss.data[0] loss_rpn_cls = rpn_loss_cls.data[0] loss_rpn_box = rpn_loss_box.data[0] loss_rcnn_cls = RCNN_loss_cls.data[0] loss_rcnn_box = RCNN_loss_bbox.data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\t batch_loss: %.4f, rpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (batch_loss, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) loss_temp = 0 start = time.time() # Save model at checkpoints if mGPUs: save_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch, step)) save_checkpoint( { 'session': session, 'epoch': epoch + 1, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) else: save_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch, step)) save_checkpoint( { 'session': session, 'epoch': epoch + 1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) print('save model: {}'.format(save_name)) end = time.time() delete_older_checkpoints( os.path.join(cfg.MODEL_DIR, "couplenet_{}_*.pth".format(i))) print("Run Time: ", end - start)
def run(args): lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY try: xrange # Python 2 except NameError: xrange = range # Python 3 #args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] #pdb.set_trace() output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) # cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) keep = softnms_cpu_torch(cls_dets) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = keep if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start))
def load_model(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda np.random.seed(cfg.RNG_SEED) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) global pascal_classes pascal_classes = np.asarray([ '__background__', # always index 0 'element in the Array-list', 'root node in the Binary-tree', 'internal node in the Binary-tree', 'leaf node in the Binary-tree', 'vertex in the Graph', 'process in the Deadlock', 'resource in the Deadlock', 'head element in the Queue', 'element in the Queue', 'tail element in the Queue', 'head node in the Queue', 'pointer in the Queue', 'node in the Non-binary-tree', 'node in the Network_topology', 'head element in the Linked_List', 'element in the Linked_List', 'tail element in the Linked_List', 'insert element in the Linked_List', 'head node in the Linked_List', 'arrow', 'edge', 'top element in the Stack', 'bottom element in the Stack', 'push element in the Stack', 'pop element in the Stack', 'internal element in the Stack', 'empty stack in the Stack', 'terminal in the Flowchart', 'process in the Flowchart', 'decision in the Flowchart', 'flowline in the Flowchart', 'document in the Flowchart', 'input in the Flowchart', 'output in the Flowchart', 'annotation in the Flowchart', 'database in the Flowchart', 'manual operation in the Flowchart', 'predefined process in the Flowchart', 'on-page connector in the Flowchart' ]) # initilize the network here. global fasterRCNN if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!')
def __call__(self, *args, **kwargs): net = 'vgg16' checksession = 1 checkepoch = 6 checkpoint = 10021 load_dir = './mydetector/model' cfgs = 'vgg16.vml' set_cfgs = None dataset = 'imagenet' image_dir = 'images' webcam_num = -1 cfg_file = './mydetector/cfgs/vgg16.yml' vis = False cfg.CUDA = True cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(1) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. #加载预训练模型 input_dir = load_dir + "/" + net + "/" + dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint)) pascal_classes = np.asarray([ '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]) # initilize the network here. if net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=False) elif net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=False) elif net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=False) elif net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=False) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True imglist = os.listdir(image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 im_file = os.path.join(image_dir, imglist[num_images]) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join( image_dir, imglist[num_images][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break
def load_model(args): # set cfg according to the dataset used to train the pre-trained model if args.dataset == "pascal_voc": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "pascal_voc_0712": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "coco": args.set_cfgs = [ "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]", ] elif args.dataset == "imagenet": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "vg": args.set_cfgs = [ "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]", ] if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda print("Using config:") pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # Load classes classes = ["__background__"] with open(os.path.join(args.classes_dir, "objects_vocab.txt")) as f: for object in f.readlines(): classes.append(object.split(",")[0].lower().strip()) if not os.path.exists(args.load_dir): raise Exception( "There is no input directory for loading network from " + args.load_dir) load_name = os.path.join( args.load_dir, "faster_rcnn_{}_{}.pth".format(args.net, args.dataset)) # initilize the network here. the network used to train the pre-trained model if args.net == "vgg16": fasterRCNN = vgg16(classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res101": fasterRCNN = resnet(classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res50": fasterRCNN = resnet(classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res152": fasterRCNN = resnet(classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint["model"]) if "pooling_mode" in checkpoint.keys(): cfg.POOLING_MODE = checkpoint["pooling_mode"] print("load model successfully!") print("load model %s" % (load_name)) return classes, fasterRCNN
args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50'] args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb)
def bld_train(args, ann_path=None, step=0): # print('Train from annotaion {}'.format(ann_path)) # print('Called with args:') # print(args) if args.use_tfboard: from model.utils.logger import Logger # Set the logger logger = Logger( os.path.join('./.logs', args.active_method, "/activestep" + str(step))) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "voc_coco": args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val" args.imdbval_name = "voc_coco_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] else: raise NotImplementedError args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # print('Using config:') # pprint.pprint(cfg) # np.random.seed(cfg.RNG_SEED) # torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set = source set + target set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # source train set, fully labeled #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json') #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json') imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'source')) imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'target')) print('{:d} roidb entries for source set'.format(len(roidb))) print('{:d} roidb entries for target set'.format(len(roidb_tg))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str( step) if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch_tg = None # do not sample target set bs_tg = 4 dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \ imdb_tg.num_classes, training=True) assert imdb.num_classes == imdb_tg.num_classes dataloader_tg = torch.utils.data.DataLoader(dataset_tg, batch_size=bs_tg, sampler=sampler_batch_tg, num_workers=args.num_workers, worker_init_fn=_rand_fn()) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) image_label = torch.FloatTensor(1) confidence = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() image_label = image_label.cuda() confidence = confidence.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) image_label = Variable(image_label) confidence = Variable(confidence) if args.cuda: cfg.CUDA = True # initialize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError # initialize the expectation network. if args.net == 'vgg16': fasterRCNN_val = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN_val = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN_val = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN_val = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError fasterRCNN.create_architecture() fasterRCNN_val.create_architecture() # lr = cfg.TRAIN.LEARNING_RATE lr = args.lr # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) else: raise NotImplementedError if args.resume: load_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) # expectation model print("load checkpoint for expectation model: %s" % args.model_path) checkpoint = torch.load(args.model_path) fasterRCNN_val.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fasterRCNN_val = fasterRCNN_val fasterRCNN_val.eval() if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) #fasterRCNN_val = nn.DataParallel(fasterRCNN_val) if args.cuda: fasterRCNN.cuda() fasterRCNN_val.cuda() # Evaluation # data_iter = iter(dataloader_tg) # for target_k in range( int(train_size_tg / args.batch_size)): fname = "noisy_annotations.pkl" if not os.path.isfile(fname): for batch_k, data in enumerate(dataloader_tg): im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) b_size = len(im_data) # expactation pass rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(b_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) ) box_deltas = box_deltas.view(b_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # TODO: data distalliation # Choose the confident samples for b_idx in range(b_size): # fill one confidence # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0) # resize prediction pred_boxes[b_idx] /= data[1][b_idx][2] for j in xrange(1, imdb.num_classes): if image_label.data[b_idx, j] != 1: continue # next if no image label # filtering box outside of the image not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \ (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3]) keep = torch.nonzero(not_keep == 0).view(-1) # decease the number of pgts thresh = 0.5 while torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1).numel() <= 0: thresh = thresh * 0.5 inds = torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1) # if there is no det, error if inds.numel() <= 0: print('Warning!!!!!!! It should not appear!!') continue # find missing ID missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0] if (len(missing_list) == 0): continue missing_id = missing_list[0] cls_scores = scores[b_idx, :, j][keep][inds] cls_boxes = pred_boxes[b_idx][keep][inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) keep = nms(cls_dets, 0.2) # Magic number ???? keep = keep.view(-1).tolist() sys.stdout.write( 'from {} predictions choose-> min({},4) as pseudo label \r' .format(len(cls_scores), len(keep))) sys.stdout.flush() _, order = torch.sort(cls_scores[keep], 0, True) if len(keep) == 0: continue max_keep = 4 for pgt_k in range(max_keep): if len(order) <= pgt_k: break if missing_id + pgt_k >= 20: break gt_boxes.data[b_idx, missing_id + pgt_k, :4] = cls_boxes[keep][order[ len(order) - 1 - pgt_k]] gt_boxes.data[b_idx, missing_id + pgt_k, 4] = j # class #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]] num_boxes[b_idx] = num_boxes[b_idx] + 1 sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg + b_idx]] pgt_boxes = np.array([ gt_boxes[b_idx, x, :4].cpu().data.numpy() for x in range(int(num_boxes[b_idx])) ]) pgt_classes = np.array([ gt_boxes[b_idx, x, 4].cpu().data[0] for x in range(int(num_boxes[b_idx])) ]) sample["boxes"] = pgt_boxes sample["gt_classes"] = pgt_classes # DEBUG assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \ "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy()) #with open(fname, 'w') as f: # pickle.dump(roidb_tg, f) else: pass # with open(fname) as f: # Python 3: open(..., 'rb') # roidb_tg = pickle.load(f) print("-- Optimization Stage --") # Optimization print("######################################################l") roidb.extend(roidb_tg) # merge two datasets print('before filtering, there are %d images...' % (len(roidb))) i = 0 while i < len(roidb): if True: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 else: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 i += 1 print('after filtering, there are %d images...' % (len(roidb))) from roi_data_layer.roidb import rank_roidb_ratio ratio_list, ratio_index = rank_roidb_ratio(roidb) train_size = len(roidb) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers, worker_init_fn=_rand_fn()) iters_per_epoch = int(train_size / args.batch_size) print("Training set size is {}".format(train_size)) for epoch in range(args.start_epoch, args.max_epochs + 1): fasterRCNN.train() loss_temp = 0 start = time.time() epoch_start = start # adjust learning rate if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma # one step data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) #gt_boxes.data = \ # torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2) conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda() confidence.data.resize_(conf_data.size()).copy_(conf_data) fasterRCNN.zero_grad() # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.data[0] # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= args.disp_interval if args.mGPUs: loss_rpn_cls = rpn_loss_cls.mean().data[0] loss_rpn_box = rpn_loss_box.mean().data[0] loss_rcnn_cls = RCNN_loss_cls.mean().data[0] loss_rcnn_box = RCNN_loss_bbox.mean().data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.data[0] loss_rpn_box = rpn_loss_box.data[0] loss_rcnn_cls = RCNN_loss_cls.data[0] loss_rcnn_box = RCNN_loss_bbox.data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) images = [] for k in range(args.batch_size): image = draw_bounding_boxes( im_data[k].data.cpu().numpy(), gt_boxes[k].data.cpu().numpy(), im_info[k].data.cpu().numpy(), num_boxes[k].data.cpu().numpy()) images.append(image) logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \ images, step) loss_temp = 0 start = time.time() if False: break if args.mGPUs: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) else: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) epoch_end = time.time() print('Epoch time cost: {}'.format(epoch_end - epoch_start)) print('finished!')