def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" trained_model = '/opt/ml/model/m2det512_vgg.pth' #trained_model = '../../m2det512_vgg.pth' anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) return net, priors, _preprocess, detector
class EfficientDet(nn.Module): def __init__(self, num_class = 21, levels = 3, num_channels = 128, model_name = 'efficientnet-b0'): super(EfficientDet, self).__init__() self.num_class = num_class self.levels = levels self.num_channels = num_channels self.efficientnet = EfficientNet.from_pretrained(model_name) print('efficientnet: ', self.efficientnet) self.bifpn = BiFPN(num_channels = self.num_channels) self.cfg = (coco, voc)[num_class == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) def forward(self, inputs): P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs) P3 = self.bifpn.Conv(in_channels=P3.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P3) P4 = self.bifpn.Conv(in_channels=P4.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P4) P5 = self.bifpn.Conv(in_channels=P5.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P5) P6 = self.bifpn.Conv(in_channels=P6.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P6) P7 = self.bifpn.Conv(in_channels=P7.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P7) for _ in range(self.levels): P3, P4, P5, P6, P7 = self.bifpn([P3, P4, P5, P6, P7]) P = [P3, P4, P5, P6, P7] features_class = [self.class_net(p, self.num_class) for p in P] features_class = torch.cat(features_class, axis=0) features_bbox = [self.regression_net(p) for p in P] features_bbox = torch.cat(features_bbox, axis=0) output = ( features_bbox.view(inputs.size(0), -1, 4), features_class.view(inputs.size(0), -1, self.num_class), self.priors ) return output @staticmethod def class_net(features, num_class, num_anchor=5): features = nn.Sequential( nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1), nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*num_class, kernel_size = 3, stride=1) )(features) features = features.view(-1, num_class) features = nn.Sigmoid()(features) return features @staticmethod def regression_net(features, num_anchor=5): features = nn.Sequential( nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1), nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*4, kernel_size = 3, stride=1) )(features) features = features.view(-1, 4) features = nn.Sigmoid()(features) return features
class Pelee_Det(object): def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() self.net = self.net.cuda() self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config) def detect(self, image): loop_start = time.time() w, h = image.shape[1], image.shape[0] img = self._preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.net(img) boxes, scores = self.detector.forward(out, self.priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] count = 0 # for j in [2, 6, 7, 14, 15]: for j in range(1, len(ch_labels)): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1, args.thresh) return infos, im2show
def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda()
def im_detect(net, im_org, target_size, transform, cuda, means): # im = cv2.resize(im_org,target_size,target_size,3) im = cv2.resize(np.array(im_org), (target_size, target_size), interpolation=cv2.INTER_LINEAR).astype(np.float32) im -= means im = im.transpose((2, 0, 1)) scale = torch.Tensor( [im_org.shape[1], im_org.shape[0], im_org.shape[1], im_org.shape[0]]) x = Variable((torch.from_numpy(im)).unsqueeze(0), volatile=True) if cuda: x = x.cuda() scale = scale.cuda() out = net(x) cfg_temp = VOC_512 cfg['min_dim'] = target_size size = math.ceil(target_size / 4) multi = target_size / 300 for i in range(0, len(cfg['feature_maps'])): size = net.sizes[i] cfg['feature_maps'][i] = size # for i in range(0,len(cfg['min_sizes'])): # cfg['min_sizes'][i] *= multi # cfg['max_sizes'][i] *= multi priorbox_temp = PriorBox(cfg_temp) priors_temp = priorbox_temp.forward().cuda() priors_temp = Variable(priors_temp, volatile=True) boxes, scores = detector.forward(out, priors_temp) boxes = boxes[0] scores = scores[0] # scale = target_size boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() return (boxes, scores)
def _init_model(self): if torch.cuda.is_available(): cuda = True if '300' in self.model_path: cfg = COCO_300 self.img_dim = 300 print('Model input size is 300') else: cfg = COCO_512 self.img_dim = 512 print('Model input size is 512') priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: self.priors = priors.cuda() self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes) # initialize detector state_dict = torch.load(self.model_path)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!') # print(net) self.detector = Detect(self.num_classes, 0, cfg)
def Train(self, epochs=200, log_iters=True, output_weights_dir="weights", saved_epoch_interval=10): self.system_dict["params"]["max_epoch"] = epochs self.system_dict["params"]["log_iters"] = log_iters self.system_dict["params"]["save_folder"] = output_weights_dir if not os.path.exists(self.system_dict["params"]["save_folder"]): os.mkdir(self.system_dict["params"]["save_folder"]) if (self.system_dict["params"]["size"] == 300): cfg = COCO_300 else: cfg = COCO_512 if self.system_dict["params"]["version"] == 'RFB_vgg': from models.RFB_Net_vgg import build_net elif self.system_dict["params"]["version"] == 'RFB_E_vgg': from models.RFB_Net_E_vgg import build_net elif self.system_dict["params"]["version"] == 'RFB_mobile': from models.RFB_Net_mobile import build_net cfg = COCO_mobile_300 else: print('Unkown version!') img_dim = (300, 512)[self.system_dict["params"]["size"] == 512] rgb_means = ((104, 117, 123), ( 103.94, 116.78, 123.68))[self.system_dict["params"]["version"] == 'RFB_mobile'] p = (0.6, 0.2)[self.system_dict["params"]["version"] == 'RFB_mobile'] f = open( self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"] + "/annotations/classes.txt", 'r') lines = f.readlines() if (lines[-1] == ""): num_classes = len(lines) - 1 else: num_classes = len(lines) + 1 batch_size = self.system_dict["params"]["batch_size"] weight_decay = self.system_dict["params"]["weight_decay"] gamma = self.system_dict["params"]["gamma"] momentum = self.system_dict["params"]["momentum"] self.system_dict["local"]["net"] = build_net('train', img_dim, num_classes) if self.system_dict["params"]["resume_net"] == None: base_weights = torch.load(self.system_dict["params"]["basenet"]) print('Loading base network...') self.system_dict["local"]["net"].base.load_state_dict(base_weights) def xavier(param): init.xavier_uniform(param) def weights_init(m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(m.state_dict()[key], mode='fan_out') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 print('Initializing weights...') # initialize newly added layers' weights with kaiming_normal method self.system_dict["local"]["net"].extras.apply(weights_init) self.system_dict["local"]["net"].loc.apply(weights_init) self.system_dict["local"]["net"].conf.apply(weights_init) self.system_dict["local"]["net"].Norm.apply(weights_init) if self.system_dict["params"]["version"] == 'RFB_E_vgg': self.system_dict["local"]["net"].reduce.apply(weights_init) self.system_dict["local"]["net"].up_reduce.apply(weights_init) else: # load resume network print('Loading resume network...') state_dict = torch.load(self.system_dict["params"]["resume_net"]) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.system_dict["local"]["net"].load_state_dict(new_state_dict) if self.system_dict["params"]["ngpu"] > 1: self.system_dict["local"]["net"] = torch.nn.DataParallel( self.system_dict["local"]["net"], device_ids=list(range(self.system_dict["params"]["ngpu"]))) if self.system_dict["params"]["cuda"]: self.system_dict["local"]["net"].cuda() cudnn.benchmark = True optimizer = optim.SGD( self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"]) #optimizer = optim.RMSprop(self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], alpha = 0.9, eps=1e-08, # momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"]) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if self.system_dict["params"]["cuda"]: priors = priors.cuda() self.system_dict["local"]["net"].train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + self.system_dict["params"]["resume_epoch"] print('Loading Dataset...') if (os.path.isdir("coco_cache")): os.system("rm -r coco_cache") dataset = COCODetection( self.system_dict["dataset"]["train"]["root_dir"], self.system_dict["dataset"]["train"]["coco_dir"], self.system_dict["dataset"]["train"]["set_dir"], preproc(img_dim, rgb_means, p)) epoch_size = len(dataset) // self.system_dict["params"]["batch_size"] max_iter = self.system_dict["params"]["max_epoch"] * epoch_size stepvalues = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) print('Training', self.system_dict["params"]["version"], 'on', dataset.name) step_index = 0 if self.system_dict["params"]["resume_epoch"] > 0: start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size else: start_iter = 0 lr = self.system_dict["params"]["lr"] for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader( dataset, batch_size, shuffle=True, num_workers=self.system_dict["params"]["num_workers"], collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 torch.save( self.system_dict["local"]["net"].state_dict(), self.system_dict["params"]["save_folder"] + "/" + self.system_dict["params"]["version"] + '_' + self.system_dict["params"]["dataset"] + '_epoches_' + 'intermediate' + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = self.adjust_learning_rate(optimizer, self.system_dict["params"]["gamma"], epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if self.system_dict["params"]["cuda"]: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = self.system_dict["local"]["net"](images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % saved_epoch_interval == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Current iter ' + repr(iteration) + '|| Total iter ' + repr(max_iter) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save( self.system_dict["local"]["net"].state_dict(), self.system_dict["params"]["save_folder"] + "/" + 'Final_' + self.system_dict["params"]["version"] + '_' + self.system_dict["params"]["dataset"] + '.pth')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': # dataset = COCODetection(COCOroot, train_sets, preproc( # img_dim, rgb_means, p)) print('COCO not supported now!') return elif args.dataset == 'CUSTOM': dataset = CustomDetection(CUSTOMroot, train_sets, preproc(img_dim, rgb_means, p), CustomAnnotationTransform()) dataset_512 = CustomDetection(CUSTOMroot, train_sets, preproc(512, rgb_means, p), CustomAnnotationTransform()) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size ) # (80000,100000,120000) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr image_size = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator image_size = ('300', '512')[1] #[random.randint(0,1)] batch_iterator = iter( data.DataLoader((dataset, dataset_512)[image_size == '512'], batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) priorbox = PriorBox((VOC_300_2, VOC_512_3)[image_size == '512']) priors = Variable(priorbox.forward(), volatile=True) loc_loss = 0 conf_loss = 0 #if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): #torch.save(net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + #repr(epoch) + '.pth') epoch += 1 if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward load_t0 = time.time() # t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() # t1 = time.time() load_t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 100 == 0: print('Epoch:' + repr(epoch) + ' || image-size:' + repr(image_size) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) if iteration <= 110000 and (iteration == 0 or iteration % 10000 == 0): print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/ssd300_2_VOC_' + repr(iteration) + '.pth') elif (iteration > 110000) and iteration % 1000 == 0: print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/ssd300_2_VOC_' + repr(iteration) + '.pth') torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def train(cfg): cfg = Config.fromfile(cfg) net = build_net('train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, False) net.to(device) if os.path.exists(checkpoint_path.format(start_epoch)): checkpoints = torch.load(checkpoint_path.format(start_epoch)) net.load_state_dict(checkpoints) logging.info('checkpoint loaded.') optimizer = optim.SGD(net.parameters(), lr=cfg.train_cfg.lr[0], momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss .encode_target) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward().to(device) net.train() anchor_config = anchors(cfg) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') dataset = get_dataloader(cfg, 'Helmet', 'train_sets') train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size, shuffle=True, num_workers=0, collate_fn=detection_collate) logging.info('dataset loaded, start to train...') for epoch in range(start_epoch, cfg.model.epochs): for i, data in enumerate(train_ds): try: lr = adjust_learning_rate_helmet(optimizer, epoch, cfg) images, targets = data images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if i % 30 == 0: logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format( epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr )) if i % 2000 == 0: # two_imgs = images[0:2, :] # out = net(two_imgs) # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch) torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') except KeyboardInterrupt: torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') exit(0) torch.save(net.state_dict(), checkpoint_path.format(epoch))
class Solver(object): """ A wrapper class for the training process """ def __init__(self): self.cfg = cfg # Load data print('===> Loading data') self.train_loader = load_data( cfg.dataset, 'train') if 'train' in cfg.phase else None self.eval_loader = load_data(cfg.dataset, 'eval') if 'eval' in cfg.phase else None self.test_loader = load_data(cfg.dataset, 'test') if 'test' in cfg.phase else None # self.visualize_loader = load_data(cfg.DATASET, 'visualize') if 'visualize' in cfg.PHASE else None # Build model print('===> Building model') self.base_trans = BaseTransform(cfg.image_size[0], cfg.network.rgb_means, cfg.network.rgb_std, (2, 0, 1)) self.priors = PriorBox(cfg.anchor) self.model = eval(cfg.model + '.build_net')(cfg.image_size[0], cfg.dataset.num_classes) with torch.no_grad(): self.priors = self.priors.forward() self.detector = Detect2(cfg.post_process) # Utilize GPUs for computation self.use_gpu = torch.cuda.is_available() if cfg.train.train_scope == '': trainable_param = self.model.parameters() else: trainable_param = self.trainable_param(cfg.train.train_scope) self.output_dir = os.path.join(cfg.output_dir, cfg.name, cfg.date) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) self.log_dir = os.path.join(self.output_dir, 'logs') if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) self.checkpoint = cfg.train.checkpoint previous = self.find_previous() previous = False if previous: self.start_epoch = previous[0][-1] self.resume_checkpoint(previous[1][-1]) else: self.start_epoch = self.initialize() if self.use_gpu: print('Utilize GPUs for computation') print('Number of GPU available', torch.cuda.device_count()) self.model.cuda() self.priors.cuda() cudnn.benchmark = True if cfg.ngpu > 1: self.model = torch.nn.DataParallel(self.model, device_ids=list( range(cfg.ngpu))) # Print the model architecture and parameters #print('Model architectures:\n{}\n'.format(self.model)) #print('Parameters and size:') #for name, param in self.model.named_parameters(): # print('{}: {}'.format(name, list(param.size()))) # print trainable scope print('Trainable scope: {}'.format(cfg.train.train_scope)) self.optimizer = self.configure_optimizer(trainable_param, cfg.train.optimizer) self.exp_lr_scheduler = self.configure_lr_scheduler( self.optimizer, cfg.train.lr_scheduler) self.max_epochs = cfg.train.lr_scheduler.max_epochs # metric if cfg.network.multi_box_loss_type == 'origin': self.criterion = MultiBoxLoss2(cfg.matcher, self.priors, self.use_gpu) else: print('ERROR: ' + cfg.multi_box_loss_type + ' is not supported') sys.exit() # Set the logger self.writer = SummaryWriter(log_dir=self.log_dir) self.checkpoint_prefix = cfg.name + '_' + cfg.dataset.dataset def save_checkpoints(self, epochs, iters=None): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) if iters: filename = self.checkpoint_prefix + '_epoch_{:d}_iter_{:d}'.format( epochs, iters) + '.pth' else: filename = self.checkpoint_prefix + '_epoch_{:d}'.format( epochs) + '.pth' filename = os.path.join(self.output_dir, filename) torch.save(self.model.state_dict(), filename) with open(os.path.join(self.output_dir, 'checkpoint_list.txt'), 'a') as f: f.write('epoch {epoch:d}: {filename}\n'.format(epoch=epochs, filename=filename)) print('Wrote snapshot to: {:s}'.format(filename)) # TODO: write relative cfg under the same page def resume_checkpoint(self, resume_checkpoint): if resume_checkpoint == '' or not os.path.isfile(resume_checkpoint): print(("=> no checkpoint found at '{}'".format(resume_checkpoint))) return False print(("=> loading checkpoint '{:s}'".format(resume_checkpoint))) checkpoint = torch.load(resume_checkpoint) # print("=> Weigths in the checkpoints:") # print([k for k, v in list(checkpoint.items())]) # remove the module in the parrallel model if 'module.' in list(checkpoint.items())[0][0]: pretrained_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint.items()) } checkpoint = pretrained_dict resume_scope = self.cfg.train.resume_scope # extract the weights based on the resume scope if resume_scope != '': pretrained_dict = {} for k, v in list(checkpoint.items()): for resume_key in resume_scope.split(','): if resume_key in k: pretrained_dict[k] = v break checkpoint = pretrained_dict pretrained_dict = { k: v for k, v in checkpoint.items() if k in self.model.state_dict() } # print("=> Resume weigths:") # print([k for k, v in list(pretrained_dict.items())]) checkpoint = self.model.state_dict() unresume_dict = set(checkpoint) - set(pretrained_dict) if len(unresume_dict) != 0: print("=> UNResume weigths:") print(unresume_dict) checkpoint.update(pretrained_dict) return self.model.load_state_dict(checkpoint) def find_previous(self): if not os.path.exists( os.path.join(self.output_dir, 'checkpoint_list.txt')): return False with open(os.path.join(self.output_dir, 'checkpoint_list.txt'), 'r') as f: lineList = f.readlines() epoches, resume_checkpoints = [list() for _ in range(2)] for line in lineList: epoch = int(line[line.find('epoch ') + len('epoch '):line.find(':')]) checkpoint = line[line.find(':') + 2:-1] epoches.append(epoch) resume_checkpoints.append(checkpoint) return epoches, resume_checkpoints def weights_init(self, m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal(m.state_dict()[key], mode='fan_out') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 def initialize(self): # TODO: ADD INIT ways # raise ValueError("Fan in and fan out can not be computed for tensor with less than 2 dimensions") # for module in self.cfg.TRAIN.TRAINABLE_SCOPE.split(','): # if hasattr(self.model, module): # getattr(self.model, module).apply(self.weights_init) if self.checkpoint: print('Loading initial model weights from {:s}'.format( self.checkpoint)) self.resume_checkpoint(self.checkpoint) return cfg.train.resume_epoch else: self.model.init_model(cfg.network.basenet) return 0 def trainable_param(self, trainable_scope): for param in self.model.parameters(): param.requires_grad = False trainable_param = [] for module in trainable_scope.split(','): if hasattr(self.model, module): # print(getattr(self.model, module)) for param in getattr(self.model, module).parameters(): param.requires_grad = True trainable_param.extend( getattr(self.model, module).parameters()) return trainable_param def train_model(self): # export graph for the model, onnx always not works # self.export_graph() # warm_up epoch for epoch in iter(range(self.start_epoch + 1, self.max_epochs + 1)): #learning rate sys.stdout.write('\rEpoch {epoch:d}/{max_epochs:d}:\n'.format( epoch=epoch, max_epochs=self.max_epochs)) self.exp_lr_scheduler.step(epoch - cfg.train.lr_scheduler.warmup) if 'train' in cfg.phase: self.train_epoch(self.model, self.train_loader, self.optimizer, self.criterion, self.writer, epoch, self.use_gpu) if 'eval' in cfg.phase and epoch % cfg.test_frequency == 0: self.eval_epoch(self.model, self.eval_loader, self.detector, self.criterion, self.writer, epoch, self.use_gpu) #if 'test' in cfg.PHASE: # self.test_epoch(self.model, self.test_loader, self.detector, self.output_dir, self.use_gpu) #if 'visualize' in cfg.PHASE: # self.visualize_epoch(self.model, self.visualize_loader, self.priorbox, self.writer, epoch, self.use_gpu) if epoch % cfg.train.save_frequency == 0: self.save_checkpoints(epoch) def train_epoch(self, model, data_loader, optimizer, criterion, writer, epoch, use_gpu): model.train() epoch_size = len(data_loader) batch_iterator = iter(data_loader) loc_loss = 0 conf_loss = 0 _t = Timer() for iteration in iter(range((epoch_size))): with torch.no_grad(): images, targets = next(batch_iterator) if use_gpu: images = images.cuda() targets = [anno.cuda() for anno in targets] _t.tic() # forward out = model(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) # some bugs in coco train2017. maybe the annonation bug. if loss_l.item() == float("Inf"): continue loss = loss_l + loss_c loss.backward() optimizer.step() time = _t.toc() loc_loss += loss_l.item() conf_loss += loss_c.item() # log per iter log = '\r==>Train: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || loc_loss: {loc_loss:.4f} cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, loc_loss=loss_l.item(), cls_loss=loss_c.item()) sys.stdout.write(log) sys.stdout.flush() # log per epoch sys.stdout.write('\r') sys.stdout.flush() lr = optimizer.param_groups[0]['lr'] log = '\r==>Train: || Total_time: {time:.3f}s || loc_loss: {loc_loss:.4f} conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format( lr=lr, time=_t.total_time, loc_loss=loc_loss / epoch_size, conf_loss=conf_loss / epoch_size) sys.stdout.write(log) sys.stdout.flush() # log for tensorboard writer.add_scalar('Train/loc_loss', loc_loss / epoch_size, epoch) writer.add_scalar('Train/conf_loss', conf_loss / epoch_size, epoch) writer.add_scalar('Train/lr', lr, epoch) def eval_epoch(self, model, data_loader, detector, output_dir, use_gpu): model.eval() dataset = data_loader.dataset num_images = len(testset) num_classes = cfg.dataset.num_classes all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(self.output_dir, 'detections.pkl') if cfg.test.retest: f = open(det_file, 'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) with torch.no_grad(): x = transform(img).unsqueeze(0) if cuda: x = x.to(torch.device("cuda")) _t['im_detect'].tic() out = net(x=x, test=True) # forward pass boxes, scores = detector.forward(out, self.priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores = scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, num_classes): inds = np.where( scores[:, j] > cfg.post_process.score_threshold)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, cfg.post_process.nms, force_cpu=False) c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if cfg.post_process.max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) _t['im_detect'].clear() _t['misc'].clear() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') if args.dataset == 'VOC': APs, mAP = testset.evaluate_detections(all_boxes, save_folder) else: testset.evaluate_detections(all_boxes, save_folder) def configure_optimizer(self, trainable_param, cfg): if cfg.optimizer == 'sgd': optimizer = optim.SGD(trainable_param, lr=cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) elif cfg.optimizer == 'rmsprop': optimizer = optim.RMSprop(trainable_param, lr=cfg.lr, momentum=cfg.momentum, alpha=cfg.alpha, eps=cfg.eps, weight_decay=cfg.weight_decay) elif cfg.optimizer == 'adam': optimizer = optim.Adam(trainable_param, lr=cfg.lr, betas=(cfg.beta1, cfg.beta2), eps=cfg.eps, weight_decay=cfg.weight_decay) else: AssertionError('optimizer can not be recognized.') return optimizer def configure_lr_scheduler(self, optimizer, cfg): if cfg.lr_decay_type == 'multi-step': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=cfg.steps, gamma=cfg.gamma) elif cfg.lr_decay_type == 'exponential': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.gamma) elif cfg.lr_decay_type == 'cos': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg.max_epochs) else: AssertionError('scheduler can not be recognized.') return scheduler #TODO: export graph def export_graph(self): pass
def main(): means = (104, 117, 123) # only support voc now args.save_root += args.dataset + '/' args.data_root += args.dataset + '/' for eval_gap in [int(g) for g in args.eval_gaps.split(',')]: args.eval_gap = eval_gap args.print_step = 10 args.fusion_type = args.fusion_type.upper() args.fusion = args.fusion_type in ['SUM','CAT','MEAN'] ## Define the experiment Name will used for save directory and ENV for visdom if not args.fusion: args.exp_name = 'AMTNet-{}-s{:d}-{}-sl{:02d}sg{:02d}-bs{:02d}-lr{:05d}'.format(args.dataset, args.train_split, args.input_type_base.upper(), args.seq_len, args.seq_gap, args.batch_size, int(args.lr * 100000)) else: args.exp_name = 'AMTNet-{}-s{:d}-{}-{}-{}-sl{:02d}sg{:02d}-bs{:02d}-lr{:05d}'.format(args.dataset, args.train_split, args.fusion_type, args.input_type_base, args.input_type_extra, args.seq_len, args.seq_gap, args.batch_size,int(args.lr * 100000)) print(args.exp_name, ' eg::=> ', eval_gap) args.cfg = v2 args.num_classes = len(CLASSES[args.dataset]) + 1 # 7 +1 background # Get proior or anchor boxes with torch.no_grad(): priorbox = PriorBox(v2, args.seq_len) priors = priorbox.forward() priors = priors.cuda() num_feat_multiplier = {'CAT': 2, 'SUM': 1, 'MEAN': 1, 'NONE': 1} # fusion type can one of the above keys args.fmd = [512, 1024, 512, 256, 256, 256] args.kd = 3 args.fusion_num_muliplier = num_feat_multiplier[args.fusion_type] dataset = ActionDetection(args, 'test', BaseTransform(args.ssd_dim, means), NormliseBoxes(), full_test=False) ## DEFINE THE NETWORK net = AMTNet(args) if args.ngpu>1: print('\nLets do dataparallel\n\n') net = torch.nn.DataParallel(net) # Load dataset for iteration in [int(it) for it in args.eval_iters.split(',')]: fname = args.save_root + 'cache/' + args.exp_name + "/testing-{:d}-eg{:d}.log".format(iteration, eval_gap) log_file = open(fname, "w", 1) log_file.write(args.exp_name + '\n') print(fname) trained_model_path = args.save_root + 'cache/' + args.exp_name + '/AMTNet_' + repr(iteration) + '.pth' log_file.write(trained_model_path+'\n') # trained_model_path = '/mnt/sun-alpha/ss-workspace/CVPR2018_WORK/ssd.pytorch_exp/UCF24/guru_ssd_pipeline_weights/ssd300_ucf24_90000.pth' net.load_state_dict(torch.load(trained_model_path)) print('Finished loading model %d !' % iteration) net.eval() net = net.cuda() # evaluation torch.cuda.synchronize() tt0 = time.perf_counter() log_file.write('Testing net \n') mAP, ap_all, ap_strs = test_net(net, priors, args, dataset, iteration) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str + '\n') ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n' print(ptr_str) log_file.write(ptr_str) torch.cuda.synchronize() print('Complete set time {:0.2f}'.format(time.perf_counter() - tt0)) log_file.close()
def train(args): cfg = (VOC_300, VOC_512)[args.size == '512'] if args.version == 'SSD_VGG_Mobile_Little': from models.SSD_VGG_Mobile_Little import build_net cfg = VEHICLE_240 elif args.version == 'SSD_VGG_Optim_FPN_RFB': from models.SSD_VGG_Optim_FPN_RFB import build_net elif args.version == 'SSD_ResNet_FPN': from models.SSD_ResNet_FPN import build_net elif args.version == 'SSD_HRNet': from models.SSD_HRNet import build_net elif args.version == 'EfficientDet': from models.EfficientDet import build_net elif args.version == 'SSD_DetNet': from models.SSD_DetNet import build_net cfg = DetNet_300 elif args.version == 'SSD_M2Det': from models.SSD_M2Det import build_net cfg = M2Det_320 elif args.version == 'SSD_Pelee': from models.SSD_Pelee import build_net else: args.version = 'SSD_VGG_RFB' from models.SSD_VGG_RFB import build_net if args.loss == "OHEM": criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) elif args.loss == "GIOU": criterion = GIOUMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) elif args.loss == "DIOU": criterion = GIOUMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, loss_name='Diou') elif args.loss == "CIOU": criterion = GIOUMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, loss_name='Ciou') elif args.loss == "FocalLoss": criterion = FocalLossMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, args.anchor) if 'withneg' in DATASET: train_sets = [ (DATASET.replace('_withneg', ''), 'trainval_withneg'), ] else: train_sets = [ (DATASET.replace('_withneg', ''), 'trainval'), ] if args.resume_epoch == 0: args.save_folder = os.path.join( args.save_folder, DATASET, args.version, args.loss + '_' + args.anchor + '_' + args.fpn_type + '_bz' + str(args.bz)) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) else: args.save_folder = Path(args.resume_net).parent try: net = build_net('train', cfg['min_dim'], num_classes, args.fpn_type) except: net = build_net('train', cfg['min_dim'], num_classes) print(args.save_folder) try: flops, params = get_model_complexity_info( net, (cfg['min_dim'], cfg['min_dim']), print_per_layer_stat=False) print('FLOPs:', flops, 'Params:', params) except: pass init_net(net, args.resume_net ) # init the network with pretrained weights or resumed weights if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=momentum, weight_decay=weight_decay) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if args.cuda: priors = priors.cuda() dataset = VOCDetection(VOCroot, train_sets, preproc(cfg['min_dim'], rgb_means, p), AnnotationTransform()) len_dataset = len(dataset) epoch_size = len_dataset // args.bz max_iter = args.max_epoch * epoch_size print(train_sets, 'len_dataset:', len_dataset, 'max_iter:', max_iter) stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues = stepvalues_VOC step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 if start_iter > stepvalues[0] and start_iter < stepvalues[1]: step_index = 1 elif start_iter > stepvalues[1] and start_iter < stepvalues[2]: step_index = 2 elif start_iter > stepvalues[2]: step_index = 3 net.train() writer = SummaryWriter(args.save_folder) loc_loss = 0 conf_loss = 0 epoch = 0 + args.resume_epoch for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: batch_iterator = iter( data.DataLoader(dataset, args.bz, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, pin_memory=True)) loc_loss = 0 conf_loss = 0 if (epoch % 5 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save(net.state_dict(), os.path.join(args.save_folder, str(epoch) + '.pth')) epoch += 1 if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(args.lr, optimizer, gamma, epoch, step_index, iteration, epoch_size) images, targets = next(batch_iterator) # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + '||EpochIter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '||Totel iter ' + repr(iteration) + '||L: %.4f C: %.4f' % (loss_l.item(), loss_c.item()) + '||LR: %.8f' % (lr)) writer.add_scalar('Train/total_loss', (loss_l.item() + loss_c.item()), iteration) writer.add_scalar('Train/loc_loss', loss_l.item(), iteration) writer.add_scalar('Train/conf_loss', loss_c.item(), iteration) writer.add_scalar('Train/lr', lr, iteration) torch.save(net.state_dict(), os.path.join(args.save_folder, str(args.max_epoch) + '.pth'))
net = torch.nn.DataParallel(net, device_ids=args.gpu_id) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) #dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p, rgb_std), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p, rgb_std)) else: print('Only VOC and COCO are supported now!') exit()
net.load_state_dict(new_state_dict) if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True detector = Detect(num_classes, 0, cfg) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward()) # dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2017', 'val')], None) #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p)) else: print('Only VOC and COCO are supported now!')
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
c7_channel=args.c7_channel) else: net = None print('loading model!', args.model_dir, args.iteration) net.load_state_dict(torch.load(trained_model)) print(net) net.eval() print('Finished loading model!', args.model_dir, args.iteration, 'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh), 'tub_score=' + str(args.tub_generate_score)) detector = Detect(num_classes, 0, args.top_k, args.confidence_threshold, args.nms_threshold) priorbox = PriorBox(cfg) # priorbox=PriorBox(multi_cfg['2.2']) with torch.no_grad(): priors = priorbox.forward().to(device) # load data net = net.to(device) # evaluation test_net(args.save_folder, net, dataset, BaseTransform(net.size, dataset_mean), args.top_k, detector, priors) else: out_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) print('Without detection', out_dir) do_python_eval(out_dir) print('Finished!', args.model_dir, args.iteration, 'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh), 'tub_score=' + str(args.tub_generate_score))
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
def train(): # network set-up ssd_net = build_refine('train', cfg['min_dim'], cfg['num_classes'], use_refine=True, use_tcb=True) net = ssd_net if args.cuda: net = torch.nn.DataParallel( ssd_net) # state_dict will have .module. prefix cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: print('Using preloaded base network...') # Preloaded. print('Initializing other weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.trans_layers.apply(weights_init) ssd_net.latent_layrs.apply(weights_init) ssd_net.up_layers.apply(weights_init) ssd_net.arm_loc.apply(weights_init) ssd_net.arm_conf.apply(weights_init) ssd_net.odm_loc.apply(weights_init) ssd_net.odm_conf.apply(weights_init) if args.cuda: net = net.cuda() # otimizer and loss set-up optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) arm_criterion = RefineMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False, 0, args.cuda) odm_criterion = RefineMultiBoxLoss( cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, 0.01, args.cuda) # 0.01 -> 0.99 negative confidence threshold # different from normal ssd, where the PriorBox is stored inside SSD object priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) # detector used in test_net for testing detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training refineDet on:', dataset.name) print('Using the specified args:') print(args) if args.visdom: import visdom viz = visdom.Visdom() # initialize visdom loss plot vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) # adjust learning rate based on epoch stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] step_index = 0 # training data loader data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) # batch_iterator = None mean_odm_loss_c = 0 mean_odm_loss_l = 0 mean_arm_loss_c = 0 mean_arm_loss_l = 0 # max_iter = cfg['max_epoch'] * epoch_size for iteration in range(args.start_iter, cfg['max_epoch'] * epoch_size + 10): try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter( data_loader) # the dataloader cannot re-initilize images, targets = next(batch_iterator) if args.visdom and iteration != 0 and (iteration % epoch_size == 0): # update visdom loss plot update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 if iteration != 0 and (iteration % epoch_size == 0): # adjust_learning_rate(optimizer, args.gamma, epoch) # evaluation if args.evaluate == True: # load net net.eval() APs, mAP = test_net(args.eval_folder, net, detector, priors, args.cuda, val_dataset, BaseTransform(net.module.size, cfg['testset_mean']), args.max_per_image, thresh=args.confidence_threshold ) # 320 originally for cfg['min_dim'] net.train() epoch += 1 # update learning rate if iteration in stepvalues: step_index = stepvalues.index(iteration) + 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) arm_loc, arm_conf, odm_loc, odm_conf = out # backprop optimizer.zero_grad() #arm branch loss #priors = priors.type(type(images.data)) #convert to same datatype arm_loss_l, arm_loss_c = arm_criterion((arm_loc, arm_conf), priors, targets) #odm branch loss odm_loss_l, odm_loss_c = odm_criterion( (odm_loc, odm_conf), priors, targets, (arm_loc, arm_conf), False) mean_arm_loss_c += arm_loss_c.data[0] mean_arm_loss_l += arm_loss_l.data[0] mean_odm_loss_c += odm_loss_c.data[0] mean_odm_loss_l += odm_loss_l.data[0] loss = arm_loss_l + arm_loss_c + odm_loss_l + odm_loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Total iter ' + repr(iteration) + ' || AL: %.4f AC: %.4f OL: %.4f OC: %.4f||' % (mean_arm_loss_l / 10, mean_arm_loss_c / 10, mean_odm_loss_l / 10, mean_odm_loss_c / 10) + 'Timer: %.4f sec. ||' % (t1 - t0) + 'Loss: %.4f ||' % (loss.data[0]) + 'LR: %.8f' % (lr)) mean_odm_loss_c = 0 mean_odm_loss_l = 0 mean_arm_loss_c = 0 mean_arm_loss_l = 0 # if args.visdom: # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], # iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_refineDet_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def get_prior(): cfg = (VOC_300, VOC_512)[args.size == '512'] priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) return priors
class EfficientDet(nn.Module): def __init__(self, num_class=21, levels=3, num_channels=128, model_name='efficientnet-b0'): super(EfficientDet, self).__init__() self.num_class = num_class self.levels = levels self.num_channels = num_channels self.efficientnet = EfficientNet.from_pretrained(model_name) self.cfg = (coco, voc)[num_class == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.num_anchor = 9 self.class_module = list() self.regress_module = list() for _ in range(3, 8): self.class_module.append( nn.Sequential( nn.Conv2d(in_channels=self.num_channels, out_channels=64, kernel_size=2, stride=1), nn.Conv2d(in_channels=64, out_channels=self.num_anchor * num_class, kernel_size=2, stride=1))) self.regress_module.append( nn.Sequential( nn.Conv2d(in_channels=self.num_channels, out_channels=64, kernel_size=2, stride=1), nn.Conv2d(in_channels=64, out_channels=self.num_anchor * 4, kernel_size=2, stride=1))) self.BIFPN = BIFPN(in_channels=[40, 80, 112, 192, 320], out_channels=self.num_channels, num_outs=5) self.sigmoid = nn.Sigmoid() def forward(self, inputs): P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs) P3, P4, P5, P6, P7 = self.BIFPN([P3, P4, P5, P6, P7]) feature_classes = [] feature_bboxes = [] for i, p in enumerate([P3, P4, P5, P6, P7]): feature_class = self.class_module[i](p) feature_class = feature_class.view(-1, self.num_class) feature_class = self.sigmoid(feature_class) feature_classes.append(feature_class) feature_bbox = self.regress_module[i](p) feature_bbox = feature_bbox.view(-1, 4) feature_bbox = self.sigmoid(feature_bbox) feature_bboxes.append(feature_bbox) feature_classes = torch.cat(feature_classes, axis=0) feature_bboxes = torch.cat(feature_bboxes, axis=0) output = (feature_bboxes.view(inputs.size(0), -1, 4), feature_classes.view(inputs.size(0), -1, self.num_class), self.priors) return output
class SSD(nn.Module): """Single Shot Multibox Architecture The network is composed of a base VGG network followed by the added multibox conv layers. Each multibox layer branches into 1) conv2d for class conf scores 2) conv2d for localization predictions 3) associated priorbox layer to produce default bounding boxes specific to the layer's feature map size. See: https://arxiv.org/pdf/1512.02325.pdf for more details. Args: phase: (string) Can be "test" or "train" size: input image size base: VGG16 layers for input, size of either 300 or 500 extras: extra layers that feed to multibox loc and conf layers head: "multibox head" consists of loc and conf conv layers """ def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,300,300]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() loc = list() conf = list() # apply vgg up to conv4_3 relu for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # apply vgg up to fc7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # apply multibox head to source layers for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors.type(type(x.data)) # default boxes ) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict...') self.load_state_dict( torch.load(base_file, map_location=lambda storage, loc: storage)) print('Finished!') else: print('Sorry only .pth and .pkl files supported.')
if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) if args.dataset == 'VOC': train_sets = [('2007', 'trainval'), ('2012', 'trainval')] cfg = (VOC_300, VOC_512)[args.size == '512'] else: train_sets = [('2014', 'train'), ('2014', 'valminusminival')] cfg = (COCO_300, COCO_512)[args.size == '512'] if args.version == 'RFB_vgg': from models.RFB_Net_vgg import build_net elif args.version == 'RFB_E_vgg': from models.RFB_Net_E_vgg import build_net elif args.version == 'RFB_d2': from models.RFB_Net_vgg_d2 import build_net elif args.version == 'RFB_d3': from models.RFB_Net_vgg_d3 import build_net elif args.version == 'RFB_d4': from models.RFB_Net_vgg_d4 import build_net elif args.version == 'RFB_d4_fpn': from models.RFB_Net_vgg_d4_fpn import build_net elif args.version == 'RFB_mobile': from models.RFB_Net_mobile import build_net cfg = COCO_mobile_300 else: print('Unkown version!') logging.info('build model version: {}'.format(args.version)) img_dim = (300, 512)[args.size == '512'] rgb_means = ((104, 117, 123), (103.94, 116.78, 123.68))[args.version == 'RFB_mobile'] p = (0.6, 0.2)[args.version == 'RFB_mobile'] # 738:6 classes ; 2392:7 ; 8718:6 num_classes = (21, 81)[args.dataset == 'COCO'] logging.info('dataset number of classes: {}'.format(num_classes)) batch_size = args.batch_size weight_decay = 0.0005 gamma = 0.1 momentum = 0.9 net = build_net('train', img_dim, num_classes) # print(net) if args.resume_net == None: base_weights = torch.load(args.basenet) from collections import OrderedDict print('Loading base network...') net.base.load_state_dict(base_weights) def xavier(param): init.xavier_uniform(param) def weights_init(m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(m.state_dict()[key], mode='fan_out') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 print('Initializing weights...') # initialize newly added layers' weights with kaiming_normal method net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) net.Norm.apply(weights_init) if args.version == 'RFB_E_vgg': net.reduce.apply(weights_init) net.up_reduce.apply(weights_init) else: # load resume network print('Loading resume network...') state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if args.cuda: priors = priors.cuda() net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch logging.info('Loading Dataset: {}'.format(args.dataset)) if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save( net.state_dict(), os.path.join(args.save_folder, 'Final_' + args.version + '_' + args.dataset + '.pth'))
def test_net(save_folder, net, dataset, transform, top_k, detector, priors): """Test a Fast R-CNN network on an image database.""" num_images = len(dataset) # all detections are collected into:score # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(len(labelmap) + 1)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} all_time = 0. output_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) det_file = os.path.join(output_dir, 'detections.pkl') output_dir = get_output_dir(output_dir, 'multi_test') ######################### Multiscale PriorBox ##################### priorboxes = {} for v1 in multi_scale[str(ssd_dim)]: if not multi_cfg[str(v1)]: return ("not included this multi_scale") priorbox = PriorBox(multi_cfg[str(v1)]) img_size = multi_cfg[str(v1)]['min_dim'] with torch.no_grad(): priorboxes[str(img_size)] = priorbox.forward().to(device) ########################## Detection ############################## for i in range(num_images): _t['im_detect'].tic() image = dataset.pull_image(i) h, w, _ = image.shape detections_multi = {} for v in multi_scale[str(ssd_dim)]: priors = priorboxes[str(v)] ssd_dim_temp = int(v) for loop in range(2): if (loop == 0): im_trans = base_transform(image, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] else: im_f = image.copy() im_f = cv2.flip(im_f, 1) im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute( 0, 3, 1, 2).to(device) if 'RefineDet' in args.backbone and args.refine: arm_loc, _, loc, conf = net(x) detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() detect_time = _t['im_detect'].toc(average=False) if i > 10: all_time += detect_time ################################################################### for j in range(1, detections.size(1)): cls_dets = np.array([]) for k, d in detections_multi.items(): dets = d[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] if (k[-1] == '1'): boxes[:, 0] = 1 - boxes[:, 0] boxes[:, 2] = 1 - boxes[:, 2] temp_swap = boxes[:, 0].clone() boxes[:, 0] = boxes[:, 2] boxes[:, 2] = temp_swap boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 0)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 160)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_448_0', '320_448_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 128)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_512_0', '320_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 96)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_576_0', '320_576_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 64)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in [ '320_706_0', '320_706_1', '512_1216_0', '512_1216_1' ]: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] if (index_temp.size == 0): continue scores = dets[index_temp, 0].cpu().numpy() cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if (cls_dets.size == 0): cls_dets = cls_dets_temp.copy() else: cls_dets = np.concatenate((cls_dets, cls_dets_temp), axis=0) if (cls_dets.size != 0): cls_dets = bbox_vote(cls_dets) if (len(cls_dets) != 0): all_boxes[j][i] = cls_dets print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time)) FPS = (num_images - 10) / all_time print('FPS:', FPS) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
def test(img_path, model_path='weights/RFB_vgg_COCO_30.3.pth'): img_path = img_path trained_model = model_path if torch.cuda.is_available(): cuda = True if 'mobile' in model_path: cfg = COCO_mobile_300 else: cfg = COCO_300 priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: priors = priors.cuda() numclass = 81 img = cv2.imread(img_path) scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) if 'mobile' in model_path: net = build_rfb_mobilenet('test', 300, numclass) # initialize detector else: net = build_rfb_vgg_net('test', 300, numclass) # initialize detector transform = BaseTransform(net.size, (123, 117, 104), (2, 0, 1)) with torch.no_grad(): x = transform(img).unsqueeze(0) x = Variable(x) if cuda: x = x.cuda() scale = scale.cuda() state_dict = torch.load(trained_model)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() if cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() print('Finished loading model!') # print(net) detector = Detect(numclass, 0, cfg) tic = time.time() out = net(x) # forward pass boxes, scores = detector.forward(out, priors) print('Finished in {}'.format(time.time() - tic)) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # Create figure and axes # Display the image # scale each detection back up to the image for j in range(1, numclass): # print(max(scores[:, j])) inds = np.where(scores[:, j] > 0.6)[0] # conf > 0.6 if inds is None: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, 0.6) c_dets = c_dets[keep, :] c_bboxes = c_dets[:, :4] # print(c_bboxes.shape) # print(c_bboxes.shape[0]) if c_bboxes.shape[0] != 0: # print(c_bboxes.shape) print('{}: {}'.format(j, c_bboxes)) for box in c_bboxes: cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0) cv2.putText(img, '{}'.format(j), (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow('rr', img) cv2.waitKey(0)
def handler(context): dataset_alias = context.datasets trainval_dataset_id = dataset_alias['trainval'] test_dataset_id = dataset_alias['test'] trainval_dataset = list(load_dataset_from_api(trainval_dataset_id)) test_dataset = list(load_dataset_from_api(test_dataset_id)) trainval = DetectionDatasetFromAPI(trainval_dataset, transform=SSDAugmentation( min_dim, MEANS)) test = DetectionDatasetFromAPI(test_dataset, transform=SSDAugmentation(min_dim, MEANS)) train_dataset = trainval test_dataset = test priorbox = PriorBox(min_dim, PARAMS) with torch.no_grad(): priors = priorbox.forward().to(device) ssd_net = build_ssd('train', priors, min_dim, num_classes) ssd_net = ssd_net.to(device) url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' weight_file = os.path.join(ABEJA_TRAINING_RESULT_DIR, 'vgg16_reducedfc.pth') download(url, weight_file) vgg_weights = torch.load(weight_file) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) optimizer = optim.SGD(ssd_net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, PARAMS['variance'], device) # loss counters step_index = 0 trainloader = data.DataLoader(train_dataset, batch_size, num_workers=0, shuffle=True, collate_fn=tools.detection_collate, pin_memory=True) testloader = data.DataLoader(test_dataset, batch_size, num_workers=0, shuffle=False, collate_fn=tools.detection_collate, pin_memory=True) # create batch iterator iteration = 1 while iteration <= max_iter: ssd_net.train() for images, targets in trainloader: if iteration > max_iter: break if iteration in lr_steps: step_index += 1 adjust_learning_rate(optimizer, 0.1, step_index) # load train data images = images.to(device) targets = [ann.to(device) for ann in targets] # forward out = ssd_net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if iteration % 100 == 0: print('[Train] iter {}, loss: {:.4f}'.format( iteration, loss.item())) statistics(iteration, loss.item(), None, None, None) writer.add_scalar('main/loss', loss.item(), iteration) writer.add_scalar('main/loc_loss', loss_l.item(), iteration) writer.add_scalar('main/conf_loss', loss_c.item(), iteration) if iteration % 10000 == 0: eval(testloader, ssd_net, criterion, iteration) ssd_net.train() iteration += 1 torch.save(ssd_net.state_dict(), os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pth'))
class Trainer(object): def __init__(self, model, cfg, train_loader, val_loader, save_epochs, **kwargs): self.kwargs = kwargs self.cfg = cfg self.save_epochs = save_epochs self.device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") self.model = model.to(self.device) self.train_loader = train_loader assert isinstance( self.train_loader, DataLoader), 'train_loader must be DataLoader instance.' self.num_classes = self.train_loader.dataset.classes self.val_loader = val_loader self.start_epoch = 0 self.loss = None self.optimizer = None self._create_optimization() self.resume_from = self.kwargs['resume_from'] self.checkpoint_dir = self.kwargs['checkpoint_dir'] self.load_pretrained_model() self.load_checkpoint(self.kwargs['resume_from']) def train(self, epochs=1000): print('Start to train...') try: for e in range(self.start_epoch, epochs): i = 0 for data, target in self.train_loader: i += 1 images = Variable(data.to(device)) targets = [Variable(anno.to(device)) for anno in target] out = self.model(images) try: self.optimizer.zero_grad() loss_l, loss_c = self.criterion( out, self.priors, targets) loss = loss_l + loss_c loss.backward() self.optimizer.step() if i % 10 == 0: print( 'Epoch: {}, iter: {}, loc_loss: {}, cls_loss: {}' .format(e, i, loss_l, loss_c)) except Exception as _e: print('Got loss error in train: {}'.format(_e)) print('continue....') continue if e % self.save_epochs == 0: print('Saving checkpoints at epoch: {}'.format(e)) self.save_checkpoint( { 'epoch': e + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), }, is_best=False) # if e % 2 == 0: # print('Checking prediction ouput...') # print('label vs predict:') # a = np.array([np.argmax(i) for i in output.detach().cpu().numpy()]) # b = target.cpu().numpy() # print(b) # print(a) # c = [i for i in a - b if i == 0] # print('accuracy: {}%\n'.format((len(c) / len(a)) * 100)) except KeyboardInterrupt: print('Interrupted, saving checkpoints at epoch: {}'.format(e)) self.save_checkpoint( { 'epoch': e + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), }, is_best=False) def save_checkpoint(self, state, is_best): torch.save( state, os.path.join(self.kwargs['checkpoint_dir'], self.resume_from)) if is_best: shutil.copyfile( os.path.join(self.kwargs['checkpoint_dir'], self.resume_from), os.path.join(self.kwargs['checkpoint_dir'], 'final_best_{}'.format(self.resume_from))) def _create_optimization(self): self.optimizer = optim.SGD(self.model.parameters(), lr=4e-3, weight_decay=0, momentum=0) self.criterion = MultiBoxLoss(self.num_classes, 0.5, True, 0, True, 3, 0.5, False).to(device) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = self.priorbox.forward() if torch.cuda.is_available(): self.priors = self.priors.cuda() def load_pretrained_model(self): if 'pretrained_path' in self.kwargs.keys(): print('Loading pretrained weights...') pretrained_dict = torch.load(self.kwargs['pretrained_path']) self.model.load_state_dict(pretrained_dict) print('Pretrained model load successful.') else: print('No pretrained path provide, skip this step.') def load_checkpoint(self, filename): if not os.path.exists(self.kwargs['checkpoint_dir']): os.makedirs(self.kwargs['checkpoint_dir']) else: filename = os.path.join(self.kwargs['checkpoint_dir'], filename) if os.path.exists(filename) and os.path.isfile(filename): print('Loading checkpoint {}'.format(filename)) checkpoint = torch.load(filename) self.start_epoch = checkpoint['epoch'] # self.best_top1 = checkpoint['best_top1'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print( 'checkpoint loaded successful from {} at epoch {}'.format( filename, self.start_epoch)) else: print('No checkpoint exists from {}, skip load checkpoint...'. format(filename))
parser = argparse.ArgumentParser() parser.add_argument("--prune_folder", default = "prunes/") parser.add_argument("--trained_model", default = "prunes/refineDet_trained.pth") parser.add_argument('--dataset_root', default=VOC_ROOT) parser.add_argument("--cut_ratio", default=0.2, type=float) parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') #for test_net: 200 in SSD paper, 200 for COCO, 300 for VOC parser.add_argument('--max_per_image', default=200, type=int, help='Top number of detections kept per image, further restrict the number of predictions to parse') args = parser.parse_args() cfg = voc320 # different from normal ssd, where the PriorBox is stored inside SSD object priorbox = PriorBox(cfg) priors = Variable(priorbox.forward().cuda(), volatile=True) # set the priors to cuda detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01) def test_net(save_folder, net, detector, priors, cuda, testset, transform, max_per_image=200, thresh=0.05): # max_per_image is same as top_k if not os.path.exists(save_folder): os.mkdir(save_folder) num_images = len(testset) num_classes = len(labelmap) # +1 for background # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
# if not os.path.exists(args.save_folder): # os.mkdir(args.save_folder) VOC_dataset_map = {'300':VOC_300, '320':VOC_320, '512':VOC_512} if args.dataset == 'VOC': cfg = VOC_dataset_map[args.size] else: cfg = (COCO_320, COCO_512)[args.size == '512'] if args.version == 'ATiny_pelee': from models.ATiny_pelee import build_net else: print('Unkown version!') priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) #Make Sure not to backward priors = priors.cpu() def py_cpu_nms(dets, thresh): """Pure Python NMS baseline.""" x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] #descending order keep = [] while order.size > 0:
type=str, help='Trained state_dict file path to open') parser.add_argument('--cuda', default=True, type=bool, help='Use cuda to train model') parser.add_argument('--cpu', default=False, type=bool, help='Use cpu nms') args = parser.parse_args() cfg = VOC_Config img_dim = 300 num_classes = 2 rgb_means = (104, 117, 123) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if args.cuda: priors = priors.cuda() class ObjectDetector: def __init__(self, net, detection, transform, num_classes=2, thresh=0.1, cuda=True): self.net = net self.detection = detection self.transform = transform self.num_classes = num_classes self.thresh = thresh self.cuda = cuda def predict(self, img): _t = {'im_detect': Timer(), 'misc': Timer()}
class BlazeFace(nn.Module): """Constructs a BlazeFace model the original paper https://sites.google.com/view/perception-cv4arvr/blazeface """ def __init__(self, phase, num_classes): super(BlazeFace, self).__init__() self.phase = phase self.num_classes = num_classes self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True) self.bn_1 = nn.BatchNorm2d(24) self.relu = nn.ReLU(inplace=True) self.blaze_1 = BlazeBlock(24, 24) self.blaze_2 = BlazeBlock(24, 24) self.blaze_3 = BlazeBlock(24, 48, stride=2) self.blaze_4 = BlazeBlock(48, 48) self.blaze_5 = BlazeBlock(48, 48) self.blaze_6 = BlazeBlock(48, 24, 96, stride=2) self.blaze_7 = BlazeBlock(96, 24, 96) self.blaze_8 = BlazeBlock(96, 24, 96) self.blaze_9 = BlazeBlock(96, 24, 96, stride=2) self.blaze_10 = BlazeBlock(96, 24, 96) self.blaze_11 = BlazeBlock(96, 24, 96) self.apply(initialize) self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2) self.loc = nn.ModuleList(self.head[0]) self.conf = nn.ModuleList(self.head[1]) self.cfg = (wider_face) # print(self.cfg) self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): h = self.conv_1(x) h = self.bn_1(h) h = self.relu(h) h = self.blaze_1(h) h = self.blaze_2(h) h = self.blaze_3(h) h = self.blaze_4(h) h = self.blaze_5(h) h = self.blaze_6(h) h = self.blaze_7(h) h1 = self.blaze_8(h) h = self.blaze_9(h1) h = self.blaze_10(h) h2 = self.blaze_11(h) # @todo: need to cache outputs from each detection layer, not just h(final output) # these will be stored in h ( should be a list ) # @todo: second argument to multibox ([6, 6, 4, 4, 4, 6] is wrong and based on SSD, but # I can't seem to find the correct priorbox numbers for multibox # @ todo: once these issues are fixed and code works till returning output, training should work loc = list() conf = list() for (x, l, c) in zip([h1, h2], self.loc, self.conf): # print(l) # print(x) # print('l(x) shape:', l(x).shape) # print(f"x shape: {x.shape}") print('l(x)', type(l(x))) print('l', type(l)) print('x', type(x)) print('x shape', x.shape) # print('l(x) shape', l(x).shape) # print('type self.loc', type(loc)) print('l(x) permuted shape', l(x).permute(0, 2, 3, 1).shape) loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) o = loc[0] print('o shape', (o.view(o.size(0 ), -1)).shape) loc = torch.cat([o.view(o.size(0 ), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) print ("loc shape:", loc.shape) print ("conf shape:", conf.shape) print ("conf.size(0):", conf.size(0)) print ("loc.size(0)", loc.size(0)) if self.phase == "test": print ("In test mode") output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds conf.view(conf.size(0), -1, self.num_classes), # conf preds self.priors # default boxes ) else: output = ( loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors ) # print(output) # print(output[0].shape) # print(output[1].shape) # print(output[2].shape) return output