def __init__(self, backbone=None, num_classes=21): super(SSD300, self).__init__() if backbone is None: raise Exception("backbone is None") if not hasattr(backbone, "out_channels"): raise Exception("the backbone not has attribute: out_channel") self.feature_extractor = backbone # 把传入的backbone定义给ssd的feature_extractor self.num_classes = num_classes # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50,添加一系列卷积层做为特征提取层 self._build_additional_features(self.feature_extractor.out_channels) # 第一个特征层层和最后两个特征层默认每个点生成四个大小的框, 其他层每个点默认生成6个 self.num_defaults = [4, 6, 6, 6, 4, 4] location_extractors = [] # 定位预测器,使用3*3卷积来预测定位 confidence_extractors = [] # 置信度预测器, 同样使用3*3卷积来预测 # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels): # nd is number_default_boxes, oc is output_channel location_extractors.append( nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1)) confidence_extractors.append( nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1)) self.loc = nn.ModuleList(location_extractors) self.conf = nn.ModuleList(confidence_extractors) self._init_weights() # default_box.dboxes (8732*4) default_box = dboxes300_coco() # 通过该函数生成默认的8732个default box self.compute_loss = Loss(default_box) self.encoder = Encoder(default_box) self.postprocess = PostProcess(default_box)
def __init__(self, backbone=None, num_classes=21, pretrain_path=None): super(SSD640, self).__init__() if backbone is None: raise Exception("backbone is None") if not hasattr(backbone, "out_channels"): raise Exception("the backbone not has attribute: out_channel") self.feature_extractor = backbone if pretrain_path is not None: self.feature_extractor.load_state_dict(torch.load(pretrain_path)) self.num_classes = num_classes # self._build_additional_features([2048, 1024, 512, 256]) self.num_defaults = [1, 2] location_extractors = [] confidence_extractors = [] for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels): # nd is number_default_boxes, oc is output_channel location_extractors.append( nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1)) confidence_extractors.append( nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1)) self.loc = nn.ModuleList(location_extractors) self.conf = nn.ModuleList(confidence_extractors) self._init_weights() self.default_box = dboxes300_coco() self.compute_loss = Loss(self.default_box) self.encoder = Encoder(self.default_box) self.postprocess = PostProcess(self.default_box)
def __init__(self, backbone=None, num_classes=21): super(SSD300, self).__init__() if backbone is None: raise Exception("backbone is None") if not hasattr(backbone, "out_channels"): raise Exception("the backbone not has attribute: out_channel") self.feature_extractor = backbone self.num_classes = num_classes # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 self._build_additional_features(self.feature_extractor.out_channels) self.num_defaults = [4, 6, 6, 6, 4, 4] location_extractors = [] confidence_extractors = [] # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels): # nd is number_default_boxes, oc is output_channel location_extractors.append( nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1)) confidence_extractors.append( nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1)) self.loc = nn.ModuleList(location_extractors) self.conf = nn.ModuleList(confidence_extractors) self._init_weights() default_box = dboxes300_coco() self.compute_loss = Loss(default_box) self.encoder = Encoder(default_box) self.postprocess = PostProcess(default_box)
def decode_results(predictions): dboxes = dboxes300_coco() encoder = Encoder(dboxes) ploc, plabel = [val.float() for val in predictions] results = encoder.decode_batch(ploc, plabel, criteria=0.5, max_output=20) return [[pred.detach().cpu().numpy() for pred in detections] for detections in results]
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) encoder = Encoder(dboxes) if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) for img, img_id, img_size, _, _ in test_set: if img is None: continue if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: path = test_set.coco.loadImgs(img_id)[0]["file_name"] output_img = cv2.imread( os.path.join(opt.data_path, "val2017", path)) height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = test_set.label_info[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imwrite( "{}/{}_prediction.jpg".format(opt.output, path[:-4]), output_img)
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) img = Image.open(opt.input).convert("RGB") img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1)) encoder = Encoder(dboxes) if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] output_img = cv2.imread(opt.input) if len(loc) > 0: height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) if opt.output is None: output = "{}_prediction.jpg".format(opt.input[:-4]) else: output = opt.output cv2.imwrite(output, output_img)
def __init__(self, backbone=None, num_classes=21): super(RetinaNet640, self).__init__() if backbone is None: raise Exception("backbone is None") if not hasattr(backbone, "out_channels"): raise Exception("the backbone not has attribute: out_channel") self.feature_extractor = backbone self.num_classes = num_classes # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50 self.predictor = Predictor(num_features=5, in_channels=256, num_layers_before_predictor=4, num_classes=num_classes, num_boxes=6) default_box = dboxes640_coco() self.compute_loss = Loss(default_box) self.encoder = Encoder(default_box) self.postprocess = PostProcess(default_box)
def __init__(self, dboxes, eval=False): self.eval = eval self.size = (300, 300) # only support 300x300 ssd self.dboxes = dboxes self.encoder = Encoder(self.dboxes) self.crop = SSDCropping() self.hflip = RandomHorizontalFlip() self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.img_trans = transforms.Compose([ transforms.Resize(self.size), transforms.ColorJitter(brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05), transforms.ToTensor(), self.normalize ]) self.trans_eval = transforms.Compose([ transforms.Resize(self.size), transforms.ToTensor(), self.normalize ])
def train(train_loop_func, logger, args): if args.amp: amp_handle = amp.init(enabled=args.fp16) # Check that GPUs are actually available use_cuda = not args.no_cuda # Setup multi-GPU if necessary args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 if args.seed is None: args.seed = np.random.randint(1e4) if args.distributed: args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) train_loader = get_train_loader(args, args.seed - 2**31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(backbone=args.backbone) args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) start_epoch = 0 iteration = 0 loss_func = Loss(dboxes) if use_cuda: ssd300.cuda() loss_func.cuda() if args.fp16 and not args.amp: ssd300 = network_to_half(ssd300) if args.distributed: ssd300 = DDP(ssd300) optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.fp16: if args.amp: optimizer = amp_handle.wrap_optimizer(optimizer) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): load_checkpoint(ssd300, args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage. cuda(torch.cuda.current_device())) start_epoch = checkpoint['epoch'] iteration = checkpoint['iteration'] scheduler.load_state_dict(checkpoint['scheduler']) ssd300.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Provided checkpoint is not path to a file') return inv_map = {v: k for k, v in val_dataset.label_map.items()} total_time = 0 if args.mode == 'evaluation': acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return mean, std = generate_mean_std(args) for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() scheduler.step() iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, train_loader, val_dataloader, encoder, iteration, logger, args, mean, std) end_epoch_time = time.time() - start_epoch_time total_time += end_epoch_time if args.local_rank == 0: logger.update_epoch_time(epoch, end_epoch_time) if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: logger.update_epoch(epoch, acc) if args.save and args.local_rank == 0: print("saving model...") obj = { 'epoch': epoch + 1, 'iteration': iteration, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'label_map': val_dataset.label_info } if args.distributed: obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() torch.save(obj, './models/epoch_{}.pt'.format(epoch)) train_loader.reset() print('total training time: {}'.format(total_time))
def train(train_loop_func, logger, args): # Check that GPUs are actually available use_cuda = not args.no_cuda train_samples = 118287 # Setup multi-GPU if necessary args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='smddp', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 if args.seed is None: args.seed = np.random.randint(1e4) if args.distributed: args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(seed=args.seed) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) train_loader = get_train_loader(args, args.seed - 2**31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path)) args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) start_epoch = 0 iteration = 0 loss_func = Loss(dboxes) if use_cuda: ssd300.cuda() loss_func.cuda() optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.amp: ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2') if args.distributed: ssd300 = DDP(ssd300) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): load_checkpoint(ssd300.module if args.distributed else ssd300, args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device())) start_epoch = checkpoint['epoch'] iteration = checkpoint['iteration'] scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Provided checkpoint is not path to a file') return inv_map = {v: k for k, v in val_dataset.label_map.items()} total_time = 0 if args.mode == 'evaluation': acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return mean, std = generate_mean_std(args) for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() scheduler.step() iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, train_loader, val_dataloader, encoder, iteration, logger, args, mean, std) end_epoch_time = time.time() - start_epoch_time total_time += end_epoch_time if torch.distributed.get_rank() == 0: throughput = train_samples / end_epoch_time logger.update_epoch_time(epoch, end_epoch_time) logger.update_throughput_speed(epoch, throughput) if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.save and args.local_rank == 0: print("saving model...") obj = {'epoch': epoch + 1, 'iteration': iteration, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'label_map': val_dataset.label_info} if args.distributed: obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() save_path = os.path.join(args.save, f'epoch_{epoch}.pt') torch.save(obj, save_path) logger.log('model path', save_path) train_loader.reset() if torch.distributed.get_rank() == 0: DLLogger.log((), { 'Total training time': '%.2f' % total_time + ' secs' }) logger.log_summary()
def main(opt): if torch.cuda.is_available(): torch.distributed.init_process_group(backend='nccl', init_method='env://') num_gpus = torch.distributed.get_world_size() torch.cuda.manual_seed(123) else: torch.manual_seed(123) num_gpus = 1 train_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } test_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": False, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } if opt.model == "ssd": dboxes = generate_dboxes(model="ssd") model = SSD(backbone=ResNet(), num_classes=len(coco_classes)) else: dboxes = generate_dboxes(model="ssdlite") model = SSDLite(backbone=MobileNetV2(), num_classes=len(coco_classes)) train_set = CocoDataset(opt.data_path, 2017, "train", SSDTransformer(dboxes, (300, 300), val=False)) train_loader = DataLoader(train_set, **train_params) test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) test_loader = DataLoader(test_set, **test_params) encoder = Encoder(dboxes) opt.lr = opt.lr * num_gpus * (opt.batch_size / 32) criterion = Loss(dboxes) optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) scheduler = MultiStepLR(optimizer=optimizer, milestones=opt.multistep, gamma=0.1) if torch.cuda.is_available(): model.cuda() criterion.cuda() if opt.amp: from apex import amp from apex.parallel import DistributedDataParallel as DDP model, optimizer = amp.initialize(model, optimizer, opt_level='O1') else: from torch.nn.parallel import DistributedDataParallel as DDP # It is recommended to use DistributedDataParallel, instead of DataParallel # to do multi-GPU training, even if there is only a single node. model = DDP(model) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) checkpoint_path = os.path.join(opt.save_folder, "SSD.pth") writer = SummaryWriter(opt.log_path) if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) first_epoch = checkpoint["epoch"] + 1 model.module.load_state_dict(checkpoint["model_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler"]) optimizer.load_state_dict(checkpoint["optimizer"]) else: first_epoch = 0 for epoch in range(first_epoch, opt.epochs): train(model, train_loader, epoch, writer, criterion, optimizer, scheduler, opt.amp) evaluate(model, test_loader, epoch, writer, encoder, opt.nms_threshold) checkpoint = { "epoch": epoch, "model_state_dict": model.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict() } torch.save(checkpoint, checkpoint_path)
def train(args): if args.amp: amp_handle = amp.init(enabled=args.fp16) args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) ssd300 = model(args) args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) iteration = 0 loss_func = Loss(dboxes) loss_func.cuda() optimizer = torch.optim.SGD( tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR( optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.fp16: if args.amp: optimizer = amp_handle.wrap_optimizer(optimizer) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.) val_dataloader, inv_map = get_val_dataloader(args) train_loader = get_train_loader(args, dboxes) acc = 0 logger = Logger(args.batch_size, args.local_rank) for epoch in range(0, args.epochs): logger.start_epoch() scheduler.step() iteration = train_loop( ssd300, loss_func, epoch, optimizer, train_loader, iteration, logger, args) logger.end_epoch() if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Epoch {:2d}, Accuracy: {:4f} mAP'.format(epoch, acc)) if args.data_pipeline == 'dali': train_loader.reset() return acc, logger.average_speed()
def __init__(self): self.default_box = dboxes300_coco() self.encoder = Encoder(self.default_box)
def main(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) train_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } eval_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } dboxes = generate_dboxes() model = SSD() train_set = OIDataset(SimpleTransformer(dboxes), train=True) train_loader = DataLoader(train_set, **train_params) val_set = OIDataset(SimpleTransformer(dboxes, eval=True), validation=True) val_loader = DataLoader(val_set, **eval_params) encoder = Encoder(dboxes) opt.lr = opt.lr * (opt.batch_size / 32) criterion = Loss(dboxes) optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) scheduler = MultiStepLR(optimizer=optimizer, milestones=opt.multistep, gamma=0.1) if torch.cuda.is_available(): model.cuda() criterion.cuda() model = torch.nn.DataParallel(model) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) checkpoint_path = os.path.join(opt.save_folder, "SSD.pth") writer = SummaryWriter(opt.log_path) if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) first_epoch = checkpoint["epoch"] + 1 model.module.load_state_dict(checkpoint["model_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler"]) optimizer.load_state_dict(checkpoint["optimizer"]) else: first_epoch = 0 for epoch in range(first_epoch, opt.epochs): train(model, train_loader, epoch, writer, criterion, optimizer, scheduler) evaluate(model, val_loader, encoder, opt.nms_threshold) checkpoint = { "epoch": epoch, "model_state_dict": model.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict() } torch.save(checkpoint, checkpoint_path)
def train(train_loop_func, logger, args): # Check that GPUs are actually available use_cuda = not args.no_cuda # Setup multi-GPU if necessary args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 if args.seed is None: args.seed = np.random.randint(1e4) if args.distributed: args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) train_loader = get_train_loader(args, args.seed - 2**31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path)) # args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) print(f"Actual starting LR: {args.learning_rate}") start_epoch = 0 iteration = 0 loss_func = Loss(dboxes) if use_cuda: ssd300.cuda() loss_func.cuda() # optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, # momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) optimizer = torch.optim.AdamW(tencent_trick(ssd300), lr=args.learning_rate, betas=(0.8, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=True) # scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) # scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=20, T_mult=1, eta_min=1e-6) scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=args.epochs, eta_min=1e-6) # scheduler = OneCycleLR(optimizer, max_lr=0.003, epochs=41, steps_per_epoch=173) # scheduler = CyclicLR(optimizer, base_lr=args.learning_rate, max_lr=2*args.learning_rate, # step_size_up=173*3, step_size_down=173*10) if args.amp: ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2') if args.distributed: ssd300 = DDP(ssd300) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): load_checkpoint(ssd300.module if args.distributed else ssd300, args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage. cuda(torch.cuda.current_device())) start_epoch = checkpoint['epoch'] iteration = checkpoint['iteration'] scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Provided checkpoint is not path to a file') return inv_map = {v: k for k, v in val_dataset.label_map.items()} total_time = 0 if args.mode == 'evaluation': acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return mean, std = generate_mean_std(args) for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() # scheduler.step() iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, scheduler, train_loader, val_dataloader, encoder, iteration, logger, args, mean, std) end_epoch_time = time.time() - start_epoch_time total_time += end_epoch_time # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate scheduler.step() if args.local_rank == 0: logger.update_epoch_time(epoch, end_epoch_time) if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: logger.update_epoch(epoch, acc) if args.save and args.local_rank == 0: print("saving model...") obj = { 'epoch': epoch + 1, 'iteration': iteration, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'label_map': val_dataset.label_info } if args.distributed: obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() torch.save(obj, './models/epoch_{}.pt'.format(epoch)) train_loader.reset() print('total training time: {}'.format(total_time))
def evaluate_test_dataset(): test_params = { "batch_size": 4, "shuffle": True, "drop_last": False, "num_workers": 4, "collate_fn": collate_fn } test_set = OIDataset(transformer, test=True) test_loader = DataLoader(test_set, **test_params) evaluate(model, test_loader, encoder, 0.45) if __name__ == "__main__": model = SSD() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SimpleTransformer(dboxes, eval=True) encoder = Encoder(dboxes) # evaluate_test_dataset() for image in tqdm(os.listdir(input_folder)): draw_prediction_one(input_folder + image)
def train(args): args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(len(cocoGt.cats) + 1) args.learning_rate = args.learning_rate * \ args.N_gpu * (args.batch_size / 32) iteration = 0 loss_func = Loss(dboxes) ssd300.cuda() loss_func.cuda() if args.fp16: ssd300 = network_to_half(ssd300) if args.distributed: ssd300 = DDP(ssd300) optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.) inv_map = {v: k for k, v in val_dataset.label_map.items()} avg_loss = 0.0 acc = 0 batch_perf = AverageMeter() end = time.time() train_start = end args.train_annotate = os.path.join(args.data, "annotations/instances_train2017.json") args.train_coco_root = os.path.join(args.data, "train2017") local_seed = set_seeds(args) if args.data_pipeline == 'no_dali': train_trans = SSDTransformer(dboxes, args, (300, 300), val=False) train_dataset = get_train_dataset(args, train_trans) train_loader = get_train_loader(train_dataset, args, args.num_workers) elif args.data_pipeline == 'dali': train_loader = get_train_dali_loader(args, dboxes, local_seed) for epoch in range(args.epochs): start_epoch_time = time.time() scheduler.step() epoch_loop(train_loader, args, ssd300, time.time(), loss_func, optimizer, iteration, avg_loss, batch_perf, epoch) torch.cuda.synchronize() if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) try: train_loader.reset() except AttributeError: pass if args.local_rank == 0: print( "Training end: Average speed: {:3f} img/sec, Total time: {:3f} sec, Final accuracy: {:3f} mAP" .format(args.N_gpu * args.batch_size / batch_perf.avg, time.time() - train_start, acc))
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) cap = cv2.VideoCapture(opt.input) if opt.output is None: output = "{}_prediction.mp4".format(opt.input[:-4]) else: output = opt.output height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"), int(cap.get(cv2.CAP_PROP_FPS)), (width, height)) encoder = Encoder(dboxes) while cap.isOpened(): flag, frame = cap.read() output_frame = np.copy(frame) if flag: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: break frame = Image.fromarray(frame) frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4), torch.zeros(1)) if torch.cuda.is_available(): frame = frame.cuda() with torch.no_grad(): ploc, plabel = model(frame.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_frame, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_frame, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) out.write(output_frame) cap.release() out.release()