def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" trained_model = '/opt/ml/model/m2det512_vgg.pth' #trained_model = '../../m2det512_vgg.pth' anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) return net, priors, _preprocess, detector
def __init__(self, phase, num_classes): super(BlazeFace, self).__init__() self.phase = phase self.num_classes = num_classes self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True) self.bn_1 = nn.BatchNorm2d(24) self.relu = nn.ReLU(inplace=True) self.blaze_1 = BlazeBlock(24, 24) self.blaze_2 = BlazeBlock(24, 24) self.blaze_3 = BlazeBlock(24, 48, stride=2) self.blaze_4 = BlazeBlock(48, 48) self.blaze_5 = BlazeBlock(48, 48) self.blaze_6 = BlazeBlock(48, 24, 96, stride=2) self.blaze_7 = BlazeBlock(96, 24, 96) self.blaze_8 = BlazeBlock(96, 24, 96) self.blaze_9 = BlazeBlock(96, 24, 96, stride=2) self.blaze_10 = BlazeBlock(96, 24, 96) self.blaze_11 = BlazeBlock(96, 24, 96) self.apply(initialize) self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2) self.loc = nn.ModuleList(self.head[0]) self.conf = nn.ModuleList(self.head[1]) self.cfg = (wider_face) # print(self.cfg) self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def main(): global args args = arg_parse() cfg_from_file(args.cfg_file) bgr_means = cfg.TRAIN.BGR_MEAN dataset_name = cfg.DATASETS.DATA_TYPE batch_size = cfg.TEST.BATCH_SIZE num_workers = args.num_workers if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection top_k = 200 else: trainvalDataset = COCODetection top_k = 300 dataroot = cfg.DATASETS.DATAROOT if cfg.MODEL.SIZE == '300': size_cfg = cfg.SMALL else: size_cfg = cfg.BIG valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') cfg.TRAIN.TRAIN_ON = False net = SSD(cfg) checkpoint = torch.load(args.weights) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(cfg) ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1)) val_dataset = trainvalDataset(dataroot, valSet, ValTransform, "val") val_loader = data.DataLoader(val_dataset, batch_size, shuffle=False, num_workers=num_workers, collate_fn=detection_collate) top_k = 300 thresh = cfg.TEST.CONFIDENCE_THRESH eval_net(val_dataset, val_loader, net, detector, cfg, ValTransform, top_k, thresh=thresh, batch_size=batch_size)
def main(): global args args = arg_parse() bgr_means = (104, 117, 123) dataset_name = args.dataset size = args.size top_k = args.top_k thresh = args.confidence_threshold use_refine = False if args.version.split("_")[0] == "refine": use_refine = True if dataset_name[0] == "V": cfg = cfg_dict["VOC"][args.version][str(size)] trainvalDataset = VOCDetection dataroot = VOCroot targetTransform = AnnotationTransform() valSet = datasets_dict["VOC2007"] classes = VOC_CLASSES else: cfg = cfg_dict["COCO"][args.version][str(size)] trainvalDataset = COCODetection dataroot = COCOroot targetTransform = None valSet = datasets_dict["COCOval"] classes = COCO_CLASSES num_classes = cfg['num_classes'] save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') net = model_builder(args.version, cfg, "test", int(size), num_classes, args.channel_size) state_dict = torch.load(args.weights) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(num_classes, 0, cfg, use_arm=use_refine) img_wh = cfg["img_wh"] ValTransform = BaseTransform(img_wh, bgr_means, (2, 0, 1)) input_folder = args.images for item in os.listdir(input_folder)[:]: img_path = os.path.join(input_folder, item) img = cv2.imread(img_path) dets = im_detect(img, net, detector, cfg, ValTransform, thresh) draw_img = draw_rects(img, dets, classes) out_img_name = "output_" + item save_path = os.path.join(save_folder, out_img_name) cv2.imwrite(save_path, img)
def test_model(trained_model): # load net img_dim = (300, 512)[args.size == '512'] num_classes = (21, 81)[args.dataset == 'COCO'] net = build_net('test', img_dim, num_classes) # initialize detector state_dict = torch.load(trained_model) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') # print(net) # load data if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'VOC2012': testset = VOCDetection(VOCroot, [('2012', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) # COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 detector = Detect(num_classes, 0, cfg) save_folder = os.path.join(args.save_folder, args.dataset) rgb_means = ((104, 117, 123), (103.94, 116.78, 123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.01)
def main(): global args args = arg_parse() ssh_run_param(args) cfg_from_file(args.cfg_file) bgr_means = cfg.TRAIN.BGR_MEAN dataset_name = cfg.DATASETS.DATA_TYPE batch_size = cfg.TEST.BATCH_SIZE num_workers = args.num_workers if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection classes = VOC_CLASSES top_k = 200 else: trainvalDataset = COCODetection classes = COCO_CLASSES top_k = 300 valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') cfg.TRAIN.TRAIN_ON = False net = SSD(cfg) checkpoint = torch.load(args.weights) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(cfg) img_wh = cfg.TEST.INPUT_WH ValTransform = BaseTransform(img_wh, bgr_means, (2, 0, 1)) input_folder = args.images thresh = cfg.TEST.CONFIDENCE_THRESH for item in os.listdir(input_folder)[2:3]: img_path = os.path.join(input_folder, item) print(img_path) img = cv2.imread(img_path) dets = im_detect(img, net, detector, ValTransform, thresh) draw_img = draw_rects(img, dets, classes) out_img_name = "output_" + item save_path = os.path.join(save_folder, out_img_name) cv2.imwrite(save_path, img)
def __init__(self, args): super(VggStride16, self).__init__() self.phase = args.phase self.num_classes = args.num_classes self.priors = Variable(PriorBox(vggstride16_config).forward(), volatile=True) self.crop_size = args.crop_size self.vgg = nn.ModuleList(vgg(base[self.crop_size], 3,)) self.n_anchor = len(vggstride16_config['scales']) * (len(vggstride16_config['aspect_ratios'][0] * 2) + 1) self.loc_layers = nn.Conv2d(self.vgg[-2].out_channels, self.n_anchor * 4, kernel_size=3, padding=1) self.cls_layers = nn.Conv2d(self.vgg[-2].out_channels, self.n_anchor * self.num_classes, kernel_size=3, padding=1) self.softmax = nn.Softmax() if self.phase == 'test': self.detect = Detect(self.num_classes, 0, 200, 0.01, 0.45) # conf 0.01
def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda()
def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() # self.net = self.net.cuda() # self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config)
def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.relu = nn.ReLU(inplace=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def _init_model(self): if torch.cuda.is_available(): cuda = True if '300' in self.model_path: cfg = COCO_300 self.img_dim = 300 print('Model input size is 300') else: cfg = COCO_512 self.img_dim = 512 print('Model input size is 512') priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: self.priors = priors.cuda() self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes) # initialize detector state_dict = torch.load(self.model_path)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!') # print(net) self.detector = Detect(self.num_classes, 0, cfg)
def get_voc_reader(args): img_dim = args.size rgb_means = (104, 117, 123) rgb_std = (1, 1, 1) p = (0.6, 0.2)[args.version == 'RFB_mobile'] train_sets = [('2007', 'trainval'), ('2012', 'trainval')] cfg = (VOC_300, VOC_512)[args.size == '512'] testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, rgb_std, p), AnnotationTransform()) trainloader = torch.utils.data.DataLoader(train_dataset, args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate) num_classes = len(args.classes.split(",")) detector = Detect(num_classes, 0, cfg) return (trainloader, (testset, detector))
from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=args.refine, c7_channel=args.c7_channel) else: net = None print('loading model!', args.model_dir, args.iteration) net.load_state_dict(torch.load(trained_model)) print(net) net.eval() print('Finished loading model!', args.model_dir, args.iteration, 'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh), 'tub_score=' + str(args.tub_generate_score)) detector = Detect(num_classes, 0, args.top_k, args.confidence_threshold, args.nms_threshold) priorbox = PriorBox(cfg) # priorbox=PriorBox(multi_cfg['2.2']) with torch.no_grad(): priors = priorbox.forward().to(device) # load data net = net.to(device) # evaluation test_net(args.save_folder, net, dataset, BaseTransform(net.size, dataset_mean), args.top_k, detector, priors) else: out_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) print('Without detection', out_dir)
plt.show() if __name__ == "__main__": Image = os.listdir('image/') for img_name in Image: img = cv2.imread("image/"+img_name) model = 'fssd_voc_79_74.pth' net = build_net(300, 21) state_dict = torch.load(model) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() net = net.cuda() cudnn.benchmark = True print("Finished loading model") transform = BaseTransform(300, (104, 117, 123), (2, 0, 1)) detector = Detect(21, 0, VOC_300) priorbox = PriorBox(VOC_300) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() test_net(net, img, img_name, detector, transform, priors,top_k=200, thresh=0.4)
print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() num_classes = cfg.model.num_classes with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(num_classes, cfg.loss.bkg_label, anchor_config) base = int(np.ceil(pow(num_classes, 1. / 3))) cats = [_.strip().split(',')[-1] for _ in open('data/coco_labels.txt', 'r').readlines()] label_config = {'VOC': VOC_CLASSES, 'COCO': tuple(['__background__'] + cats)} labels = label_config[args.dataset] def draw_detection(im, bboxes, scores, cls_inds, fps, thr=0.2): imgcv = np.copy(im) h, w, _ = imgcv.shape for i, box in enumerate(bboxes): if scores[i] < thr: continue cls_indx = int(cls_inds[i])
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() if args.cuda: net = net.cuda() # cudnn.benchmark = True else: net = net.cpu() print('Finished loading model!') # print(net) start = time.time() detector = Detect(numclass, 0, cfg) out = net(x) # forward pass boxes, scores = detector.forward(out, priors) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image bboxes = [] for j in range(1, numclass): inds = np.where(scores[:, j] > 0.1)[0] #conf > 0.6 if inds is None: continue c_bboxes = boxes[inds]
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': if args.alpha - 0.0 > 1e-5: dataset = VOCDetection(VOCroot, train_sets, preproc_mixup(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing, mixup_alpha=args.alpha) else: dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size for sv in stepvalues: if start_iter > sv: step_index += 1 continue else: break else: start_iter = 0 lr = args.lr avg_loss_list = [] flag = True for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) avg_loss = (loc_loss + conf_loss) / epoch_size avg_loss_list.append(avg_loss) print("avg_loss_list:") if len(avg_loss_list) <= 5: print(avg_loss_list) else: print(avg_loss_list[-5:]) loc_loss = 0 conf_loss = 0 if (epoch <= 150 and epoch % 10 == 0) or ( 150 < epoch < 200 and epoch % 5 == 0) or (epoch > 200): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') if (epoch != args.resume_epoch): #if(epoch): ValNet = build_net(img_dim, num_classes, args.norm, args.vgg_bn) val_state_dict = torch.load(args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') from collections import OrderedDict new_state_dict = OrderedDict() for k, v in val_state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] else: name = k new_state_dict[name] = v ValNet.load_state_dict(new_state_dict) ValNet.eval() print('Finished loading ' + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth model!') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) if args.cuda: ValNet = ValNet.cuda() cudnn.benchmark = True else: ValNet = ValNet.cpu() top_k = 200 detector = Detect(num_classes, 0, cfg, GIOU=args.giou) save_val_folder = os.path.join(args.save_val_folder, args.dataset) val_transform = BaseTransform(ValNet.size, rgb_means, (2, 0, 1)) val_net(priors, save_val_folder, testset, num_classes, ValNet, detector, val_transform, top_k, 0.01, args.cuda, args.vgg_bn) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) images, targets = next(batch_iterator) # no mixup if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # fh = net.base[22].register_forward_hook(get_features_hook) # bh = net.base[22].register_backward_hook(get_grads_hook) out = net(images, vgg_bn=args.vgg_bn) optimizer.zero_grad() loss_l, loss_c, = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() # fh.remove() # bh.remove() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print( 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f S: %.4f||' % (loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item()) + 'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' % (lr)) torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 if args.resume_net: epoch = 0 + args.resume_epoch epoch_size = len(train_dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', train_dataset.name) ''' n_flops, n_convops, n_params = measure_model(net, int(args.size), int(args.size)) print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'. format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6)) ''' print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr log_file = open(log_file_path, 'w') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if epoch % args.save_frequency == 0 and epoch > 0: torch.save( net.state_dict(), os.path.join( save_folder, args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth')) if epoch % args.test_frequency == 0 and epoch > 0: net.eval() top_k = 200 detector = Detect(num_classes, 0, cfg) if args.dataset == 'VOC': APs, mAP = test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) APs = [str(num) for num in APs] mAP = str(mAP) log_file.write(str(iteration) + ' APs:\n' + '\n'.join(APs)) log_file.write('mAP:\n' + mAP + '\n') else: test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) net.train() epoch += 1 load_t0 = time.time() for iter_tmp in range(iteration, 0, -epoch_size * args.save_frequency): if iter_tmp in stepvalues: step_index = stepvalues.index(iter_tmp) + 1 if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([ loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') break lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] load_t1 = time.time() if iteration % 10 == 0: print(args.version + 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') if iteration % epoch_size == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) log_file.close() torch.save( net.state_dict(), os.path.join(save_folder, 'Final_' + args.version + '_' + args.dataset + '.pth'))
def main(): global args args = arg_parse() cfg_from_file(args.cfg_file) save_folder = args.save_folder batch_size = cfg.TRAIN.BATCH_SIZE bgr_means = cfg.TRAIN.BGR_MEAN p = 0.6 gamma = cfg.SOLVER.GAMMA momentum = cfg.SOLVER.MOMENTUM weight_decay = cfg.SOLVER.WEIGHT_DECAY size = cfg.MODEL.SIZE thresh = cfg.TEST.CONFIDENCE_THRESH if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection top_k = 1000 else: trainvalDataset = COCODetection top_k = 1000 dataset_name = cfg.DATASETS.DATA_TYPE dataroot = cfg.DATASETS.DATAROOT trainSet = cfg.DATASETS.TRAIN_TYPE valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES start_epoch = args.resume_epoch epoch_step = cfg.SOLVER.EPOCH_STEPS end_epoch = cfg.SOLVER.END_EPOCH if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') net = SSD(cfg) print(net) if cfg.MODEL.SIZE == '300': size_cfg = cfg.SMALL else: size_cfg = cfg.BIG optimizer = optim.SGD( net.parameters(), lr=cfg.SOLVER.BASE_LR, momentum=momentum, weight_decay=weight_decay) if args.resume_net != None: checkpoint = torch.load(args.resume_net) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) optimizer.load_state_dict(checkpoint['optimizer']) print('Loading resume network...') if args.ngpu > 1: net = torch.nn.DataParallel(net) net.cuda() cudnn.benchmark = True criterion = list() if cfg.MODEL.REFINE: detector = Detect(cfg) arm_criterion = RefineMultiBoxLoss(cfg, 2) odm_criterion = RefineMultiBoxLoss(cfg, cfg.MODEL.NUM_CLASSES) criterion.append(arm_criterion) criterion.append(odm_criterion) else: detector = Detect(cfg) ssd_criterion = MultiBoxLoss(cfg) criterion.append(ssd_criterion) TrainTransform = preproc(size_cfg.IMG_WH, bgr_means, p) ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1)) val_dataset = trainvalDataset(dataroot, valSet, ValTransform, dataset_name) val_loader = data.DataLoader( val_dataset, batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=detection_collate) for epoch in range(start_epoch + 1, end_epoch + 1): train_dataset = trainvalDataset(dataroot, trainSet, TrainTransform, dataset_name) epoch_size = len(train_dataset) train_loader = data.DataLoader( train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate) train(train_loader, net, criterion, optimizer, epoch, epoch_step, gamma, end_epoch, cfg) if (epoch % 5 == 0) or (epoch % 2 == 0 and epoch >= 60): save_checkpoint(net, epoch, size, optimizer) if (epoch >= 2 and epoch % 2 == 0): eval_net( val_dataset, val_loader, net, detector, cfg, ValTransform, top_k, thresh=thresh, batch_size=batch_size) save_checkpoint(net, end_epoch, size, optimizer)
def Model_Params(self, model_dir="output", use_gpu=True): ''' User Function - Set Model Params Args: model_dir (str): Select the right model name as per training model_path (str): Relative path to params file use_gpu (bool): If True use GPU else run on CPU Returns: None ''' f = open(model_dir +"/config_final.py", 'r'); lines = f.read(); f.close(); if(not use_gpu): lines = lines.replace("cuda=True", "cuda=False"); f = open(model_dir +"/config_test.py", 'w'); f.write(lines); f.close(); print("Loading model for inference"); self.system_dict["cfg"] = Config.fromfile(model_dir +"/config_test.py") anchor_config = anchors(self.system_dict["cfg"].model) self.system_dict["priorbox"] = PriorBox(anchor_config) self.system_dict["net"] = build_net('test', self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model) init_net(self.system_dict["net"], self.system_dict["cfg"], model_dir + "/VOC/Final_Pelee_VOC_size304.pth") print_info('===> Finished constructing and loading model', ['yellow', 'bold']) self.system_dict["net"].eval() with torch.no_grad(): self.system_dict["priors"] = self.system_dict["priorbox"].forward() if self.system_dict["cfg"].test_cfg.cuda: self.system_dict["net"] = self.system_dict["net"].cuda() self.system_dict["priors"] = self.system_dict["priors"].cuda() cudnn.benchmark = True else: self.system_dict["net"] = self.system_dict["net"].cpu() self.system_dict["_preprocess"] = BaseTransform(self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model.rgb_means, (2, 0, 1)) self.system_dict["num_classes"] = self.system_dict["cfg"].model.num_classes self.system_dict["detector"] = Detect(self.system_dict["num_classes"], self.system_dict["cfg"].loss.bkg_label, anchor_config) print("Done...."); print("Loading other params"); base = int(np.ceil(pow(self.system_dict["num_classes"], 1. / 3))) self.system_dict["colors"] = [self._to_color(x, base) for x in range(self.system_dict["num_classes"])] cats = ['__background__']; f = open(self.system_dict["class_list"]); lines = f.readlines(); f.close(); for i in range(len(lines)): if(lines != ""): cats.append(lines[i][:len(lines[i])-1]) self.system_dict["labels"] = cats; print("Done....");
new_state_dict[name] = v net.load_state_dict(new_state_dict) # 将读取的模型参数,灌入net中 net.eval() # 现在模型参数就有啦,可以进入评估模式了 print('Finished loading model!') print(net) # load data,加载测试数据 if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2014', 'minival')], None) #COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 # 每张图像上最多检出top_k个bbox detector = Detect(num_classes,0,cfg) # 调用detection.py里的Detect类,完成forward操作的detector save_folder = os.path.join(args.save_folder,args.dataset) rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), # resize + 减均值 + 通道调换 top_k, thresh=0.01) # thresh=0.01,为什么这么小?可以结合mAP介绍的笔记
def __init__(self, phase, num_class, block, num_blocks, size, connections, strides): super(MB_FCN, self).__init__() self.phase = phase self.in_planes = 64 self.num_class = num_class self.size = size self.priorboxs = PriorBoxLayer(size, size, stride=strides) self.priors = None self.connections = connections self.strides = strides # Resnet network self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 修改stride 使得深层的感受野大小于浅层大小相同(只修改layer4) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=1) out_channels = [64, 64 * 4, 128 * 4, 256 * 4, 512 * 4] # 下采样的pooling self.downspamle_maxpools = list() # type: list self.downspamle_features = list() # type: list # 用于上采样的反卷积层 self.deconvs = list() # type:list self.upsample_features = list() # type: list upsample_padding = [(1, 1), (0, 1), (0, 5)] # 用于转置卷积的padding参数,对应放大2倍,4倍,8倍 self.out_channels = [] for stride, connection in zip(strides, connections): pools = list() deconv = list() down_features = list() up_features = list() channels = 0 for c in connection: channels += out_channels[c - 1] current_stride = pow(2, c) if c == 5: current_stride //= 2 if pow(2, c) < stride: pool = nn.MaxPool2d(kernel_size=3, stride=stride // pow(2, c), padding=1) # type: nn.MaxPool2d if c == 5: pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # type: nn.MaxPool2d pools.append(pool) down_features.append(c) elif current_stride > stride: upsample_time = current_stride // stride padding_index = int(math.log(upsample_time, 2) - 1) convT = nn.ConvTranspose2d(out_channels[c - 1], out_channels[c - 1], kernel_size=3, \ stride=upsample_time, padding=upsample_padding[padding_index][0], \ output_padding=upsample_padding[padding_index][1]) deconv.append(convT) up_features.append(c) self.downspamle_maxpools.append(pools) self.downspamle_features.append(down_features) self.deconvs.append(deconv) self.upsample_features.append(up_features) self.out_channels.append(channels) # 用于提取位置信息和人脸信息的单层卷积序列 loc = [] conf = [] for channels in self.out_channels: loc.append( nn.Conv2d(channels, 4, kernel_size=3, stride=1, padding=1)) conf.append( nn.Conv2d(channels, 4, kernel_size=3, stride=1, padding=1)) self.loc = nn.ModuleList(loc) self.conf = nn.ModuleList(conf) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_class, 0, 750, 0.05, 0.3)
use_cuda = torch.cuda.is_available() detect_model = build_net('test', ops.img_dim, ops.num_classes) # initialize detector #--------------------------------------------- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # chkpt = torch.load(ops.detect_model, map_location=device) chkpt = torch.load(ops.detect_model, map_location=lambda storage, loc: storage) detect_model.load_state_dict(chkpt) detect_model.eval() # 设置为前向推断模式 acc_model(ops, detect_model) detect_model = detect_model.to(device) detector = Detect(ops.num_classes, 0, cfg) # num_classes, bkg_label, cfg priorbox = PriorBox(cfg, debug_=False) with torch.no_grad(): priors = priorbox.forward() if use_cuda: priors = priors.cuda() video_capture = cv2.VideoCapture(ops.test_path) ret, img_raw = video_capture.read() if ret: # scale = torch.Tensor([img_raw.shape[1], img_raw.shape[0],img_raw.shape[1], img_raw.shape[0]]) scale = [ img_raw.shape[1], img_raw.shape[0], img_raw.shape[1], img_raw.shape[0] ] # if use_cuda:
def train(cfg): cfg = Config.fromfile(cfg) net = build_net('train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, False) net.to(device) if os.path.exists(checkpoint_path.format(start_epoch)): checkpoints = torch.load(checkpoint_path.format(start_epoch)) net.load_state_dict(checkpoints) logging.info('checkpoint loaded.') optimizer = optim.SGD(net.parameters(), lr=cfg.train_cfg.lr[0], momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss .encode_target) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward().to(device) net.train() anchor_config = anchors(cfg) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') dataset = get_dataloader(cfg, 'Helmet', 'train_sets') train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size, shuffle=True, num_workers=0, collate_fn=detection_collate) logging.info('dataset loaded, start to train...') for epoch in range(start_epoch, cfg.model.epochs): for i, data in enumerate(train_ds): try: lr = adjust_learning_rate_helmet(optimizer, epoch, cfg) images, targets = data images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if i % 30 == 0: logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format( epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr )) if i % 2000 == 0: # two_imgs = images[0:2, :] # out = net(two_imgs) # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch) torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') except KeyboardInterrupt: torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') exit(0) torch.save(net.state_dict(), checkpoint_path.format(epoch))
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
print_info('Detect time per image: {:.3f}s'.format(tot_detect_time / (num_images-1))) print_info('Nms time per image: {:.3f}s'.format(tot_nms_time / (num_images - 1))) print_info('Total time per image: {:.3f}s'.format((tot_detect_time + tot_nms_time) / (num_images - 1))) print_info('FPS: {:.3f} fps'.format((num_images - 1) / (tot_detect_time + tot_nms_time))) if __name__ == '__main__': net = build_net('test', size = cfg.model.input_size, config = cfg.model.m2det_config) init_net(net, cfg, args.trained_model) print_info('===> Finished constructing and loading model',['yellow','bold']) net.eval() _set = 'eval_sets' if not args.test else 'test_sets' testset = get_dataloader(cfg, args.dataset, _set) if cfg.test_cfg.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) save_folder = os.path.join(cfg.test_cfg.save_folder, args.dataset) _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) test_net(save_folder, net, detector, cfg.test_cfg.cuda, testset, transform = _preprocess, max_per_image = cfg.test_cfg.topk, thresh = cfg.test_cfg.score_threshold)
if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) arm_criterion = RefineMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False) odm_criterion = RefineMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, 0.01) priorbox = PriorBox(cfg) detector = Detect(num_classes, 0, cfg, object_score=0.01) priors = Variable(priorbox.forward(), volatile=True) #dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!')
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() detector = Detect(num_classes,0,cfg) transform = BaseTransform(img_dim, rgb_means, (2, 0, 1)) object_detector = ObjectDetector(net, detector, transform) img_list = os.listdir(args.img_dir) for i, img in enumerate(img_list): img_name = img img = os.path.join(args.img_dir, img) image = cv2.imread(img) detect_bboxes, tim = object_detector.predict(image) for class_id,class_collection in enumerate(detect_bboxes): if len(class_collection)>0: for i in range(class_collection.shape[0]): if class_collection[i,-1]>0.6:
def main_worker(gpu, ngpus_per_node, args): global best_map ## deal with args args.gpu = gpu cfg_from_file(args.cfg_file) torch.set_default_tensor_type('torch.cuda.FloatTensor') # distributed cfgs if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) torch.cuda.set_device(args.gpu) net = SSD(cfg) # print(net) if args.resume_net != None: checkpoint = torch.load(args.resume_net) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) print('Loading resume network...') if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: # print(args.gpu) torch.cuda.set_device(args.gpu) net.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.gpu]) else: net.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: # torch.cuda.set_device(args.gpu) net = net.cuda(args.gpu) # args = arg_parse() batch_size = args.batch_size print("batch_size = ", batch_size) bgr_means = cfg.TRAIN.BGR_MEAN p = 0.6 gamma = cfg.SOLVER.GAMMA momentum = cfg.SOLVER.MOMENTUM weight_decay = cfg.SOLVER.WEIGHT_DECAY size = cfg.MODEL.SIZE # size =300 thresh = cfg.TEST.CONFIDENCE_THRESH if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection top_k = 1000 else: trainvalDataset = COCODetection top_k = 1000 dataset_name = cfg.DATASETS.DATA_TYPE dataroot = cfg.DATASETS.DATAROOT trainSet = cfg.DATASETS.TRAIN_TYPE valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES start_epoch = args.resume_epoch epoch_step = cfg.SOLVER.EPOCH_STEPS end_epoch = cfg.SOLVER.END_EPOCH args.num_workers = args.workers # optimizer optimizer = optim.SGD(net.parameters(), lr=cfg.SOLVER.BASE_LR, momentum=momentum, weight_decay=weight_decay) if cfg.MODEL.SIZE == '300': size_cfg = cfg.SMALL else: size_cfg = cfg.BIG # if args.resume_net != None: # checkpoint = torch.load(args.resume_net) # optimizer.load_state_dict(checkpoint['optimizer']) cudnn.benchmark = True # deal with criterion criterion = list() if cfg.MODEL.REFINE: detector = Detect(cfg) arm_criterion = RefineMultiBoxLoss(cfg, 2) odm_criterion = RefineMultiBoxLoss(cfg, cfg.MODEL.NUM_CLASSES) arm_criterion.cuda(args.gpu) odm_criterion.cuda(args.gpu) criterion.append(arm_criterion) criterion.append(odm_criterion) else: detector = Detect(cfg) ssd_criterion = MultiBoxLoss(cfg) criterion.append(ssd_criterion) # deal with dataset TrainTransform = preproc(size_cfg.IMG_WH, bgr_means, p) ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1)) val_dataset = trainvalDataset(dataroot, valSet, ValTransform, dataset_name) val_loader = data.DataLoader(val_dataset, batch_size, shuffle=False, num_workers=args.num_workers * ngpus_per_node, collate_fn=detection_collate) # deal with training dataset train_dataset = trainvalDataset(dataroot, trainSet, TrainTransform, dataset_name) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), num_workers=args.num_workers, collate_fn=detection_collate, pin_memory=True, sampler=train_sampler) ## set net in training phase net.train() for epoch in range(start_epoch + 1, end_epoch + 1): if args.distributed: train_sampler.set_epoch(epoch) # train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, # collate_fn=detection_collate) # Training train(train_loader, net, criterion, optimizer, epoch, epoch_step, gamma, end_epoch, cfg, args) if (epoch >= 0 and epoch % 10 == 0): #print("here",args.rank % ngpus_per_node) ## validation the model eval_net(val_dataset, val_loader, net, detector, cfg, ValTransform, args, top_k, thresh=thresh, batch_size=cfg.TEST.BATCH_SIZE) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): if (epoch % 10 == 0) or (epoch % 5 == 0 and epoch >= 60): save_name = os.path.join( args.save_folder, cfg.MODEL.TYPE + "_epoch_{}_rank_{}_{}".format( str(epoch), str(args.rank), str(size)) + '.pth') save_checkpoint(net, epoch, size, optimizer, batch_size, save_name)