def train(): dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(512, MEANS)) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=0, shuffle=True, collate_fn=detection_collate, pin_memory=False) model = EfficientDet(num_classes=21) model = model.cuda() optimizer = optim.AdamW(model.parameters(), lr=args.lr) criterion = FocalLoss() model.train() iteration = 0 for epoch in range(args.num_epoch): print('Start epoch: {} ...'.format(epoch)) total_loss = [] for idx, sample in enumerate(data_loader): images = sample['img'].cuda() classification, regression, anchors = model(images) classification_loss, regression_loss = criterion( classification, regression, anchors, sample['annot']) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() total_loss.append(loss.item()) if (iteration % 100 == 0): print( 'Epoch/Iteration: {}/{}, classification: {}, regression: {}, totol_loss: {}' .format(epoch, iteration, classification_loss.item(), regression_loss.item(), np.mean(total_loss))) iteration += 1 torch.save(model.state_dict(), './weights/checkpoint_{}.pth'.format(epoch))
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) net = EfficientDet(num_class=cfg['num_classes']) if args.cuda: net = net.cuda() # if args.cuda: # net = torch.nn.DataParallel(net) # cudnn.benchmark = True optimizer = optim.AdamW(net.parameters(), lr=args.lr) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) net.train() iteration = 0 for epoch in range(args.num_epoch): print('\n Start epoch: {} ...'.format(epoch)) for idx, (images, targets) in enumerate(data_loader): if args.cuda: images = Variable(images.cuda()) targets = [ Variable(ann.cuda(), volatile=True) for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss), end=' ') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iteration:', iteration) torch.save(net.state_dict(), 'weights/Effi' + repr(idx) + '.pth') iteration += 1 torch.save(net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = EfficientDet(num_class=cfg['num_classes']) net = ssd_net # if args.cuda: # net = torch.nn.DataParallel(ssd_net) # cudnn.benchmark = True # if args.resume: # print('Resuming training, loading {}...'.format(args.resume)) # ssd_net.load_weights(args.resume) # else: # vgg_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() # if not args.resume: # print('Initializing weights...') # # initialize newly added layers' weights with xavier method # ssd_net.extras.apply(weights_init) # ssd_net.loc.apply(weights_init) # ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) iteration = 0 for epoch in range(epoch_size): print('\n Start epoch: {} ...'.format(epoch)) total_loss = [] for idx, (images, targets) in enumerate(data_loader): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data conf_loss += loss_c.data total_loss.append(loss.item()) if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), ' || Mean_Loss : %.4f ||' %(np.mean(total_loss)), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + repr(iteration) + '.pth') iteration+=1 torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # Training dataset train_dataset = [] if (args.dataset == 'VOC'): train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'COCO'): train_dataset = CocoDataset(root_dir=args.dataset_root, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'MyDataset'): train_dataset = MyDataset(root_dir=args.dataset_root, set_name='train', mode='train', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = MyDataset(root_dir=args.dataset_root, set_name='valid', mode='train', transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=collater, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=1, num_workers=args.workers, shuffle=False, collate_fn=collater, pin_memory=True) checkpoint = [] if (args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network # args.start_epoch = checkpoint['epoch'] + 1 args.start_epoch = 0 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class']) if (args.resume is not None): # model.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = model.state_dict() # remove the keys corresponing to the linear layer in the pretrained_dict pretrained_dict.pop(bbox_head.retina_cls.weight) pretrained_dict.pop(bbox_head.retina_cls.bias) # now update the model dict with pretrained dict model_dict.update(pretrained_dict) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = model.cuda() print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) if (epoch + 1) % 5 == 0: test(valid_dataset, model, epoch, args) state = { 'epoch': epoch, 'parser': args, 'state_dict': get_state_dict(model) } torch.save( state, os.path.join(args.save_folder, args.dataset, args.network, "checkpoint_{}.pth".format(epoch)))