def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" trained_model = '/opt/ml/model/m2det512_vgg.pth' #trained_model = '../../m2det512_vgg.pth' anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) return net, priors, _preprocess, detector
def get_model(device, cfg): net = build_net( 'train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg) net.to(device) return net
tot_detect_time += detect_time if i > 0 else 0 tot_nms_time += nms_time if i > 0 else 0 with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print_info('===> Evaluating detections',['yellow','bold']) testset.evaluate_detections(all_boxes, save_folder) print_info('Detect time per image: {:.3f}s'.format(tot_detect_time / (num_images-1))) print_info('Nms time per image: {:.3f}s'.format(tot_nms_time / (num_images - 1))) print_info('Total time per image: {:.3f}s'.format((tot_detect_time + tot_nms_time) / (num_images - 1))) print_info('FPS: {:.3f} fps'.format((num_images - 1) / (tot_detect_time + tot_nms_time))) if __name__ == '__main__': net = build_net('test', size = cfg.model.input_size, config = cfg.model.m2det_config) init_net(net, cfg, args.trained_model) print_info('===> Finished constructing and loading model',['yellow','bold']) net.eval() _set = 'eval_sets' if not args.test else 'test_sets' testset = get_dataloader(cfg, args.dataset, _set) if cfg.test_cfg.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) save_folder = os.path.join(cfg.test_cfg.save_folder, args.dataset) _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) test_net(save_folder,
type=bool, default=False, help='Use tensorborad to show the Loss Graph') args = parser.parse_args() print_info( '----------------------------------------------------------------------\n' '| M2Det Training Program |\n' '----------------------------------------------------------------------', ['yellow', 'bold']) logger = set_logger(args.tensorboard) global cfg cfg = Config.fromfile(args.config) net = build_net( 'train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, args.resume_net ) # init the network with pretrained weights or resumed weights if args.ngpu > 1: net = torch.nn.DataParallel(net) if cfg.train_cfg.cuda: net.cuda() cudnn.benchmark = True optimizer = set_optimizer(net, cfg) criterion = set_criterion(cfg) priorbox = PriorBox(anchors(cfg)) with torch.no_grad():
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
def train(cfg): cfg = Config.fromfile(cfg) net = build_net( 'train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, False) net.to(device) if os.path.exists(checkpoint_path.format(start_epoch)): checkpoints = torch.load(checkpoint_path.format(start_epoch)) net.load_state_dict(checkpoints) logging.info('checkpoint loaded.') optimizer = optim.SGD(net.parameters(), lr=cfg.train_cfg.lr[0], momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss.encode_target) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward().to(device) net.train() anchor_config = anchors(cfg) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') dataset = get_dataloader(cfg, 'COCO', 'train_sets') train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size, shuffle=True, num_workers=0, collate_fn=detection_collate) logging.info('dataset loaded, start to train...') for epoch in range(start_epoch, cfg.model.epochs): for i, data in enumerate(train_ds): try: lr = adjust_learning_rate_v2(optimizer, epoch, cfg) images, targets = data images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if i % 30 == 0: logging.info( 'Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}' .format(epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr)) if i % 2000 == 0: # two_imgs = images[0:2, :] # out = net(two_imgs) # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch) torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') except KeyboardInterrupt: torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') exit(0) torch.save(net.state_dict(), checkpoint_path.format(epoch))
def detect_parking_spaces(dir, threshold=0.2, save=False, show=False, cam=-1, gpu=False, config='training/m2det/configs/m2det512_vgg.py', weights='training/m2det/weights/m2det512_vgg.pth'): print('Detect Parking Spaces Programe') cfg = Config.fromfile(config) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, weights) net.eval() if not gpu: cfg.test_cfg.cuda = False with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() print_info('===> Finished constructing and loading model') _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) base = int(np.ceil(pow(cfg.model.m2det_config.num_classes, 1. / 3))) colors = [ _to_color(x, base) for x in range(cfg.model.m2det_config.num_classes) ] cats = [ _.strip().split(',')[-1] for _ in open('training/m2det/data/coco_labels.txt', 'r').readlines() ] labels = tuple(['__background__'] + cats) im_path = dir + '/images' cam = cam if cam >= 0: capture = cv2.VideoCapture(cam) im_fnames = sorted((fname for fname in os.listdir(im_path) if os.path.splitext(fname)[-1] == '.jpg')) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) im_iter = iter(im_fnames) save_dir = dir + '/detection_images' os.makedirs(save_dir, exist_ok=True) locs_list = {} while True: if cam < 0: try: fname = next(im_iter) except StopIteration: break image = cv2.imread(fname, cv2.IMREAD_COLOR) else: ret, image = capture.read() if not ret: cv2.destroyAllWindows() capture.release() break loop_start = time.time() w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = net(img) if not gpu: priors = priors.cpu() boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms( c_dets, cfg.test_cfg.iou, force_cpu=soft_nms ) #min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] # print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \ # ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)])) fps = 1.0 / float(loop_time) if cam >= 0 else -1 im2show, loc = draw_detection(image, boxes, scores, cls_inds, fps, threshold, colors=colors, labels=labels) locs_list[fname] = loc if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) if show: cv2.imshow('test', im2show) if cam < 0: cv2.waitKey(1000) else: if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() capture.release() break if save: name = fname.split('.')[0] name = name.split('/')[-1] cv2.imwrite(f"{save_dir}/{name}.jpg", im2show) save_name = dir + '/labels/split.txt' f = open(save_name, 'wb') pickle.dump(locs_list, f) f.close()