def trainval(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset_test = KAISTDetection(args.db_root, val_sets, BaseTransform(ssd_dim, means), AnnotationTransform()) loader_test = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=1, num_workers=4, shuffle=False) dataset_train = KAISTDetection(args.db_root, train_sets, SSDAugmentation(ssd_dim, means), AnnotationTransform()) loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) print('Training SSD on {}'.format(dataset_train.name)) if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), env=exp_time + exp_name, opts=dict(xlabel='Iteration', ylabel='Loss', ytype='log', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'], width=800, height=500, size=30)) # epoch_map = viz.line( # X=torch.zeros((1,)).cpu(), # Y=torch.zeros((1,)).cpu(), # env=exp_name, # opts=dict( # xlabel='Epoch', # ylabel='mAP', # title='mAP of trained model', # legend=['test mAP'], # width=800, height=500, size=30 # ) # ) # max_epoch = args.iterations // len(loader_train) + 1 max_epoch = args.epochs logger.info('Max epoch: {}'.format(max_epoch)) milestones = [int(max_epoch * 0.5), int(max_epoch * 0.75)] optim_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) logger.info('Milestones for LR schedulring: {}'.format(milestones)) best_mAP = 0.0 iter_per_epoch = len(loader_train) for epoch in range(max_epoch): logger.info('\n') optim_scheduler.step() # import cv2 # import sys # if sys.version_info[0] == 2: # import xml.etree.cElementTree as ET # else: # import xml.etree.ElementTree as ET # for ii in range(len(dataset_train.ids)): # frame_id = dataset_train.ids[ii] # blob = dataset_train.pull_item(ii) # ipdb.set_trace() # try: # target_transform = AnnotationTransform() # for ii in range(len(dataset_train.ids)): # frame_id = dataset_train.ids[ii] # target = ET.parse(dataset_train._annopath % ( *frame_id[:-1], *frame_id[-1] ) ).getroot() # set_id, vid_id, img_id = frame_id[-1] # vis = cv2.imread(dataset_train._imgpath % ( *frame_id[:-1], set_id, vid_id, 'visible', img_id ), cv2.IMREAD_COLOR ) # lwir = cv2.imread(dataset_train._imgpath % ( *frame_id[:-1], set_id, vid_id, 'lwir', img_id ), cv2.IMREAD_COLOR ) # # target = ET.parse(dataset_train._annopath % ( *frame_id[:-1], *frame_id[-1] ) ).getroot() # height, width, channels = vis.shape # target = target_transform(target, width, height) # target = np.array(target) # if len(target) == 0: # target = np.array([[-0.01, -0.01, -0.01, -0.01, -1]], dtype=np.float) # else: # valid = np.zeros( (len(target), 1) ) # for ii, bb in enumerate(target): # x1, y1, x2, y2, lbl, occ = bb # x1, y1, x2, y2 = x1*width, y1*height, x2*width, y2*height # w = x2 - x1 + 1 # h = y2 - y1 + 1 # if occ in dataset_train.cond['vRng'] and \ # x1 >= dataset_train.cond['xRng'][0] and \ # x2 >= dataset_train.cond['xRng'][0] and \ # x1 <= dataset_train.cond['xRng'][1] and \ # x2 <= dataset_train.cond['xRng'][1] and \ # y1 >= dataset_train.cond['yRng'][0] and \ # y2 >= dataset_train.cond['yRng'][0] and \ # y1 <= dataset_train.cond['yRng'][1] and \ # y2 <= dataset_train.cond['yRng'][1] and \ # h >= dataset_train.cond['hRng'][0] and \ # h <= dataset_train.cond['hRng'][1]: # valid[ii] = 1 # else: # valid[ii] = 0 # target = target[np.where(valid)[0], :] # except: # ipdb.set_trace() # vis, boxes, labels = dataset_train.transform(vis, target[:, :4].copy(), target[:, 4].copy()) # lwir, _, _ = dataset_train.transform(lwir, target[:, :4].copy(), target[:, 4].copy()) # target = np.hstack((boxes, np.expand_dims(labels, axis=1))) # target[np.where(labels == -1)[0], :-1] = 0.0 # if np.any( boxes > 10.0 ): # ipdb.set_trace() # # if len(target) > 2: # # vis, lwir = dataset_train.pull_image(ii) # # frame_id, gt, anno = dataset_train.pull_anno(ii) # # for b in gt: # # cv2.rectangle( vis, (int(b[0]),int(b[1])), (int(b[2]), int(b[3])), (255, 0, 0), 2 ) # # cv2.imwrite( 'vis.jpg', vis) # # ipdb.set_trace() # aug = SSDAugmentation(ssd_dim, means) # vis_ = aug( vis, np.array(target)[:, :4], np.array(target)[:, 4] ) for _iter, (color, lwir, targets, _, _, index) in enumerate(loader_train): if args.cuda: # images = Variable(images.cuda()) color = Variable(color.cuda()) lwir = Variable(lwir.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: # images = Variable(images) color = Variable(color) lwir = Variable(lwir) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(color, lwir) # out, sources = net(images) # for src in sources: # backprop optimizer.zero_grad() # if np.all(targets[:,-1] == -1): # ipdb.set_trace() # ipdb.set_trace() # loss_l, loss_c, problem = criterion(out, targets) loss_l, loss_c = criterion(out, targets) # if problem: # continue if loss_l.data.cpu().numpy() > 100: ipdb.set_trace() loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] iteration = epoch * len(loader_train) + _iter if iteration % 10 == 0: logger.info('[Epoch {:3d}] [Iter {:5d}/{:d}] loss: {:3.4f} = {:3.4f} (loc) + {:3.4f} (cls)\ \t[time: {:.3f}sec] [# of GT in minibatch: {:2d}]' .format( \ epoch, iteration, iter_per_epoch, loss.data[0], loss_l.data[0], loss_c.data[0], t1-t0, np.sum([np.sum(box.data.cpu().numpy()[:,-1] == 0) for box in targets]) ) ) if args.visdom and args.images_on_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom and iteration % 10 == 0: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), env=exp_time + exp_name, win=lot, update='append') # if ( epoch > 0 and epoch <= 100 and epoch % 20 == 0) or ( epoch > 100 and epoch % 10 == 0 ): if epoch > 0 and epoch % 1 == 0: # New epoch, validation ssd_net.set_phase('test') mAP = validation( net, loader_test, dataset_test, 'SSD300_{:s}_epoch_{:04d}'.format(exp_name, epoch)) ssd_net.set_phase('train') #ipdb.set_trace() # viz.line( # X=torch.ones((1, )).cpu() * (epoch+1), # Y=torch.Tensor([mAP]).cpu(), # win=epoch_map, # update=True # ) # if mAP > best_mAP: # print('Best mAP = {:.4f}'.format(mAP)) # best_mAP = mAP filename = os.path.join( jobs_dir, 'snapshots', 'ssd300_epoch_{:03d}_mAP_{:.4f}.pth'.format(epoch, mAP)) print('Saving state, {:s}'.format(filename)) torch.save(ssd_net.state_dict(), filename) filename = os.path.join(jobs_dir, 'snapshots', 'ssd300_epoch_{:03d}.pth'.format(epoch)) print('Saving state, {:s}'.format(filename)) torch.save(ssd_net.state_dict(), filename)
def train(): # if args.dataset == 'COCO': # if args.dataset_root == VOC_ROOT: # if not os.path.exists(COCO_ROOT): # parser.error('Must specify dataset_root if specifying dataset') # print("WARNING: Using default COCO dataset_root because " + # "--dataset_root was not specified.") # args.dataset_root = COCO_ROOT # cfg = coco # dataset = COCODetection(root=args.dataset_root, # transform=SSDAugmentation(cfg['min_dim'], # MEANS)) # elif args.dataset == 'VOC': if args.dataset == 'VID': # if args.dataset_root == COCO_ROOT: # parser.error('Must specify dataset if specifying dataset_root') cfg = vid dataset = VIDetection(root=VID_ROOT, phase='train', transform=SSDAugmentation(vid['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net print(net) if args.cuda: ## 取消并行化网络 net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: print(args.save_folder + args.basenet) vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data # images, targets = next(batch_iterator) try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) # print(len(images)) # print(images[0].shape) # print(len(targets)) # print(targets[0].shape) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() ## 问题 # print(images.shape) # print(targets) out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0])) if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(): args = parse_args() path_to_save = os.path.join(args.save_folder, args.dataset, args.version) os.makedirs(path_to_save, exist_ok=True) # cuda if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # mosaic augmentation if args.mosaic: print('use Mosaic Augmentation ...') # multi-scale if args.multi_scale: print('use the multi-scale trick ...') train_size = [640, 640] val_size = [512, 512] else: train_size = [512, 512] val_size = [512, 512] cfg = train_cfg # dataset and evaluator print("Setting Arguments.. : ", args) print("----------------------------------------------------------") print('Loading the dataset...') if args.dataset == 'voc': data_dir = VOC_ROOT num_classes = 20 dataset = VOCDetection(root=data_dir, img_size=train_size[0], transform=SSDAugmentation(train_size), mosaic=args.mosaic ) evaluator = VOCAPIEvaluator(data_root=data_dir, img_size=val_size, device=device, transform=BaseTransform(val_size), labelmap=VOC_CLASSES ) elif args.dataset == 'coco': data_dir = coco_root num_classes = 80 dataset = COCODataset( data_dir=data_dir, img_size=train_size[0], transform=SSDAugmentation(train_size), debug=args.debug, mosaic=args.mosaic ) evaluator = COCOAPIEvaluator( data_dir=data_dir, img_size=val_size, device=device, transform=BaseTransform(val_size) ) else: print('unknow dataset !! Only support voc and coco !!') exit(0) print('Training model on:', dataset.name) print('The dataset size:', len(dataset)) print("----------------------------------------------------------") # dataloader dataloader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, shuffle=True, collate_fn=detection_collate, num_workers=args.num_workers, pin_memory=True ) # build model if args.version == 'centernet': from models.centernet import CenterNet net = CenterNet(device, input_size=train_size, num_classes=num_classes, trainable=True) print('Let us train centernet on the %s dataset ......' % (args.dataset)) else: print('Unknown version !!!') exit() model = net model.to(device).train() # use tfboard if args.tfboard: print('use tensorboard') from torch.utils.tensorboard import SummaryWriter c_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) log_path = os.path.join('log/coco/', args.version, c_time) os.makedirs(log_path, exist_ok=True) writer = SummaryWriter(log_path) # keep training if args.resume is not None: print('keep training model: %s' % (args.resume)) model.load_state_dict(torch.load(args.resume, map_location=device)) # optimizer setup base_lr = args.lr tmp_lr = base_lr optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) max_epoch = cfg['max_epoch'] epoch_size = len(dataset) // args.batch_size # start training loop t0 = time.time() for epoch in range(args.start_epoch, max_epoch): # use cos lr if args.cos and epoch > 20 and epoch <= max_epoch - 20: # use cos lr tmp_lr = 0.00001 + 0.5*(base_lr-0.00001)*(1+math.cos(math.pi*(epoch-20)*1./ (max_epoch-20))) set_lr(optimizer, tmp_lr) elif args.cos and epoch > max_epoch - 20: tmp_lr = 0.00001 set_lr(optimizer, tmp_lr) # use step lr else: if epoch in cfg['lr_epoch']: tmp_lr = tmp_lr * 0.1 set_lr(optimizer, tmp_lr) for iter_i, (images, targets) in enumerate(dataloader): # WarmUp strategy for learning rate if not args.no_warm_up: if epoch < args.wp_epoch: tmp_lr = base_lr * pow((iter_i+epoch*epoch_size)*1. / (args.wp_epoch*epoch_size), 4) # tmp_lr = 1e-6 + (base_lr-1e-6) * (iter_i+epoch*epoch_size) / (epoch_size * (args.wp_epoch)) set_lr(optimizer, tmp_lr) elif epoch == args.wp_epoch and iter_i == 0: tmp_lr = base_lr set_lr(optimizer, tmp_lr) # to device images = images.to(device) # multi-scale trick if iter_i % 10 == 0 and iter_i > 0 and args.multi_scale: # randomly choose a new size size = random.randint(10, 19) * 32 train_size = [size, size] model.set_grid(train_size) if args.multi_scale: # interpolate images = torch.nn.functional.interpolate(images, size=train_size, mode='bilinear', align_corners=False) # make train label targets = [label.tolist() for label in targets] targets = tools.gt_creator(train_size, net.stride, args.num_classes, targets) targets = torch.tensor(targets).float().to(device) # forward and loss cls_loss, txty_loss, twth_loss, total_loss = model(images, target=targets) # backprop total_loss.backward() optimizer.step() optimizer.zero_grad() if iter_i % 10 == 0: if args.tfboard: # viz loss writer.add_scalar('class loss', cls_loss.item(), iter_i + epoch * epoch_size) writer.add_scalar('txty loss', txty_loss.item(), iter_i + epoch * epoch_size) writer.add_scalar('twth loss', twth_loss.item(), iter_i + epoch * epoch_size) writer.add_scalar('total loss', total_loss.item(), iter_i + epoch * epoch_size) t1 = time.time() print('[Epoch %d/%d][Iter %d/%d][lr %.6f]' '[Loss: cls %.2f || txty %.2f || twth %.2f ||total %.2f || size %d || time: %.2f]' % (epoch+1, max_epoch, iter_i, epoch_size, tmp_lr, cls_loss.item(), txty_loss.item(), twth_loss.item(), total_loss.item(), train_size[0], t1-t0), flush=True) t0 = time.time() # evaluation if (epoch) % args.eval_epoch == 0: model.trainable = False model.set_grid(val_size) model.eval() # evaluate evaluator.evaluate(model) # convert to training mode. model.trainable = True model.set_grid(train_size) model.train() # save model if (epoch + 1) % 10 == 0: print('Saving state, epoch:', epoch + 1) torch.save(model.state_dict(), os.path.join(path_to_save, args.version + '_' + repr(epoch + 1) + '.pth') )
def train(): cfg = voc dataset = VOCDetection(root=DATASET_ROOT, transform=SSDAugmentation(cfg['min_dim'], MEANS)) data_loader = data.DataLoader(dataset, train_batch, num_workers=1, shuffle=True, collate_fn=detection_collate, worker_init_fn=_worker_init_fn_(), pin_memory=True) # net ssd_net = SSDVGG16("train", cfg["min_dim"], cfg["num_classes"]) net = ssd_net # priorbox net_priorbox = PriorBox(cfg) with torch.no_grad(): priorboxes = net_priorbox.forward() # criterion criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, device) if USE_CUDA: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True net.to(device) priorboxes = priorboxes.to(device) criterion.to(device) optimizer = optim.SGD(net.parameters(), lr=base_lr, momentum=momentum, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, stepsize, gamma) num_epoch = max_iter // train_batch + 1 k = 0 loc_loss = 0 conf_loss = 0 epoch = 0 loss = 0 for i in range(0, num_epoch): net.train() # one eopch for batch_idx, (images, targets) in enumerate(data_loader): images = images.to(device) targets = [i.to(device) for i in targets] # forward t0 = time.time() out = net(images) # back optimizer.zero_grad() loc, conf = out loss_l, loss_c = criterion((loc, conf, priorboxes), targets) loss = loss_l + loss_c loss.backward() # clip grad torch.nn.utils.clip_grad_norm_(net.parameters(), 20.0) # optimize optimizer.step() scheduler.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if k % display == 0: log.info("iter: {}, lr: {}, loss is: {:.4f}, loss_loc is: {:.4f}, loss_conf is: {:.4f}, time per iter: {:.4f} s".format( k, optimizer.param_groups[0]['lr'], loss.item(), loss_l.item(), loss_c.item(), t1-t0)) if k % save_interval == 0: path = save_prefix + "_iter_{}.pkl".format(k) torch.save(net.to('cpu').state_dict(), path) net.to(device) log.info("save model: {}".format(path)) k += 1 log.info('epoch: {}, lr: {}, loss is: {}'.format(i, optimizer.param_groups[0]['lr'], loss.item())) log.info("optimize done...") path = save_prefix + "_final.pkl" torch.save(net.to('cpu').state_dict(), path) log.info("save model: {} ...".format(path))
def train(rank, args): if args.num_gpus > 1: multi_gpu_rescale(args) if rank == 0: if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # set up logger setup_logger(output=os.path.join(args.log_folder, cfg.name), distributed_rank=rank) logger = logging.getLogger("yolact.train") w = SummaryHelper(distributed_rank=rank, log_dir=os.path.join(args.log_folder, cfg.name)) w.add_text("argv", " ".join(sys.argv)) logger.info("Args: {}".format(" ".join(sys.argv))) import git with git.Repo(search_parent_directories=True) as repo: w.add_text("git_hash", repo.head.object.hexsha) logger.info("git hash: {}".format(repo.head.object.hexsha)) try: logger.info("Initializing torch.distributed backend...") dist.init_process_group( backend='nccl', init_method=args.dist_url, world_size=args.num_gpus, rank=rank ) except Exception as e: logger.error("Process group URL: {}".format(args.dist_url)) raise e dist.barrier() if torch.cuda.device_count() > 1: logger.info('Multiple GPUs detected! Turning off JIT.') collate_fn = detection_collate if cfg.dataset.name == 'YouTube VIS': dataset = YoutubeVIS(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, configs=cfg.dataset, transform=SSDAugmentationVideo(MEANS)) if cfg.dataset.joint == 'coco': joint_dataset = COCODetection(image_path=cfg.joint_dataset.train_images, info_file=cfg.joint_dataset.train_info, transform=SSDAugmentation(MEANS)) joint_collate_fn = detection_collate if args.validation_epoch > 0: setup_eval() val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, configs=cfg.dataset, transform=BaseTransformVideo(MEANS)) collate_fn = collate_fn_youtube_vis elif cfg.dataset.name == 'FlyingChairs': dataset = FlyingChairs(image_path=cfg.dataset.trainval_images, info_file=cfg.dataset.trainval_info) collate_fn = collate_fn_flying_chairs else: dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Set cuda device early to avoid duplicate model in master GPU if args.cuda: torch.cuda.set_device(rank) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. # use timer for experiments timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: logger.info('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume, args=args) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: logger.info('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) if cfg.flow.train_flow: criterion = OpticalFlowLoss() else: criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net.cuda(rank) criterion.cuda(rank) net = nn.parallel.DistributedDataParallel(net, device_ids=[rank], output_device=rank, broadcast_buffers=False, find_unused_parameters=True) # net = nn.DataParallel(net).cuda() # criterion = nn.DataParallel(criterion).cuda() optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) w.set_step(iteration) last_time = time.time() epoch_size = len(dataset) // args.batch_size // args.num_gpus num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 from data.sampler_utils import InfiniteSampler, build_batch_data_sampler infinite_sampler = InfiniteSampler(dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) train_sampler = build_batch_data_sampler(infinite_sampler, images_per_batch=args.batch_size) data_loader = data.DataLoader(dataset, num_workers=args.num_workers, collate_fn=collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=train_sampler) data_loader_iter = iter(data_loader) if cfg.dataset.joint: joint_infinite_sampler = InfiniteSampler(joint_dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) joint_train_sampler = build_batch_data_sampler(joint_infinite_sampler, images_per_batch=args.batch_size) joint_data_loader = data.DataLoader(joint_dataset, num_workers=args.num_workers, collate_fn=joint_collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=joint_train_sampler) joint_data_loader_iter = iter(joint_data_loader) dist.barrier() include_mask = cfg.include_mask save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() data_time_avg = MovingAverage(10) global loss_types # Forms the print order loss_avgs = { k: MovingAverage(100) for k in loss_types } def backward_and_log(prefix, net_outs, targets, masks, num_crowds, extra_loss=None): global include_mask optimizer.zero_grad() out = net_outs["pred_outs"] wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask(), include_mask) losses = {k: v.mean() for k, v in losses.items()} # Mean here because Dataparallel if extra_loss is not None: assert type(extra_loss) == dict losses.update(extra_loss) loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k), losses[k].item()) return losses logger.info('Begin training!') # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue while True: data_start_time = time.perf_counter() datum = next(data_loader_iter) dist.barrier() data_end_time = time.perf_counter() data_time = data_end_time - data_start_time if iteration != args.start_iter: data_time_avg.add(data_time) # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) elif cfg.lr_schedule == 'cosine': set_lr(optimizer, args.lr * ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) * .5)) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while cfg.lr_schedule == 'step' and step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma ** step_index)) global lr w.add_scalar('meta/lr', lr) if cfg.dataset.name == "FlyingChairs": imgs_1, imgs_2, flows = prepare_flow_data(datum) net_outs = net(None, extras=(imgs_1, imgs_2)) # Compute Loss optimizer.zero_grad() losses = criterion(net_outs, flows) losses = { k: v.mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('loss/%s' % k, losses[k].item()) elif cfg.dataset.joint or not cfg.dataset.is_video: if cfg.dataset.joint: joint_datum = next(joint_data_loader_iter) dist.barrier() # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(joint_datum) else: images, targets, masks, num_crowds = prepare_data(datum) extras = {"backbone": "full", "interrupt": False, "moving_statistics": {"aligned_feats": []}} net_outs = net(images,extras=extras) out = net_outs["pred_outs"] # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = { k: v.mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('joint/%s' % k, losses[k].item()) # Forward Pass if cfg.dataset.is_video: # reference frames references = [] moving_statistics = {"aligned_feats": [], "conf_hist": []} for idx, frame in enumerate(datum[:0:-1]): images, annots = frame extras = {"backbone": "full", "interrupt": True, "keep_statistics": True, "moving_statistics": moving_statistics} with torch.no_grad(): net_outs = net(images, extras=extras) moving_statistics["feats"] = net_outs["feats"] moving_statistics["lateral"] = net_outs["lateral"] keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(net_outs.keys()) - set(keys_to_save): del net_outs[key] references.append(net_outs) # key frame with annotation, but not compute full backbone frame = datum[0] images, annots = frame frame = (images, annots,) images, targets, masks, num_crowds = prepare_data(frame) extras = {"backbone": "full", "interrupt": not cfg.flow.base_backward, "moving_statistics": moving_statistics} gt_net_outs = net(images, extras=extras) if cfg.flow.base_backward: losses = backward_and_log("compute", gt_net_outs, targets, masks, num_crowds) keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(gt_net_outs.keys()) - set(keys_to_save): del gt_net_outs[key] # now do the warp if len(references) > 0: reference_frame = references[0] extras = {"backbone": "partial", "moving_statistics": moving_statistics} net_outs = net(images, extras=extras) extra_loss = yolact_net.extra_loss(net_outs, gt_net_outs) losses = backward_and_log("warp", net_outs, targets, masks, num_crowds, extra_loss=extra_loss) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time w.add_scalar('meta/data_time', data_time) w.add_scalar('meta/iter_time', elapsed) # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] if torch.cuda.is_available(): max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 # torch.cuda.reset_max_memory_allocated() else: max_mem_mb = None logger.info("""\ eta: {eta} epoch: {epoch} iter: {iter} \ {losses} {loss_total} \ time: {time} data_time: {data_time} lr: {lr} {memory}\ """.format( eta=eta_str, epoch=epoch, iter=iteration, losses=" ".join( ["{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses] ), loss_total="T: {:.3f}".format(sum([loss_avgs[k].get_avg() for k in losses])), data_time="{:.3f}".format(data_time_avg.get_avg()), time="{:.3f}".format(elapsed), lr="{:.6f}".format(lr), memory="max_mem: {:.0f}M".format(max_mem_mb) )) if rank == 0 and iteration % 100 == 0: if cfg.flow.train_flow: import flowiz as fz from layers.warp_utils import deform_op tgt_size = (64, 64) flow_size = flows.size()[2:] vis_data = [] for pred_flow in net_outs: vis_data.append(pred_flow) deform_gt = deform_op(imgs_2, flows) flows_pred = [F.interpolate(x, size=flow_size, mode='bilinear', align_corners=False) for x in net_outs] deform_preds = [deform_op(imgs_2, x) for x in flows_pred] vis_data.append(F.interpolate(flows, size=tgt_size, mode='area')) vis_data = [F.interpolate(flow[:1], size=tgt_size) for flow in vis_data] vis_data = [fz.convert_from_flow(flow[0].data.cpu().numpy().transpose(1, 2, 0)) .transpose(2, 0, 1).astype('float32') / 255 for flow in vis_data] def convert_image(image): image = F.interpolate(image, size=tgt_size, mode='area') image = image[0] image = image.data.cpu().numpy() image = image[::-1] image = image.transpose(1, 2, 0) image = image * np.array(STD) + np.array(MEANS) image = image.transpose(2, 0, 1) image = image / 255 image = np.clip(image, -1, 1) image = image[::-1] return image vis_data.append(convert_image(imgs_1)) vis_data.append(convert_image(imgs_2)) vis_data.append(convert_image(deform_gt)) vis_data.extend([convert_image(x) for x in deform_preds]) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) elif cfg.flow.warp_mode == "flow": import flowiz as fz tgt_size = (64, 64) vis_data = [] for pred_flow, _, _ in net_outs["preds_flow"]: vis_data.append(pred_flow) vis_data = [F.interpolate(flow[:1], size=tgt_size) for flow in vis_data] vis_data = [fz.convert_from_flow(flow[0].data.cpu().numpy().transpose(1, 2, 0)) .transpose(2, 0, 1).astype('float32') / 255 for flow in vis_data] input_image = F.interpolate(images, size=tgt_size, mode='area') input_image = input_image[0] input_image = input_image.data.cpu().numpy() input_image = input_image.transpose(1, 2, 0) input_image = input_image * np.array(STD[::-1]) + np.array(MEANS[::-1]) input_image = input_image.transpose(2, 0, 1) input_image = input_image / 255 input_image = np.clip(input_image, -1, 1) vis_data.append(input_image) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) iteration += 1 w.set_step(iteration) if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) logger.info('Saving state, iter: {}'.format(iteration)) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: logger.info('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: if rank == 0: compute_validation_map(yolact_net, val_dataset) dist.barrier() except KeyboardInterrupt: if args.interrupt_no_save: logger.info('No save on interrupt, just exiting...') elif rank == 0: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) return if rank == 0: yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion), device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, drop_last=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = \ str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if args.dataset == 'COCO18': # cfg = coco18 cfg = vococo rt = args.dataset_root or COCO_ROOT dataset = COCODetection(root=rt, transform=SSDAugmentation(cfg['min_dim'], MEANS), target_transform=COCOAnnotationTransform('COCO18')) elif args.dataset == 'COCO': cfg = coco # cfg = vococo rt = args.dataset_root or COCO_ROOT dataset = COCODetection(root=rt, image_sets=(('2017', 'train'),), transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': cfg = voc # cfg = coco_on_voc rt = args.dataset_root or VOC_ROOT dataset = VOCDetection(root=rt, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'BCCD': cfg = bccd rt = args.dataset_root or BCCD_ROOT dataset = BCCDDetection(root=rt, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'SHWD': cfg = shwd rt = args.dataset_root or SHWD_ROOT dataset = SHWDDetection(root=rt, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'helmet': cfg = helmet rt = args.dataset_root or HELMET_ROOT dataset = HelmetDetection(root=rt, transform=SSDAugmentation(cfg['min_dim'], MEANS)) else: raise RuntimeError() if args.custom_priors is not None: apt = IOAdapterSSD(cfg, 'test') apt.load(*torch.load(args.custom_priors)) custom_priors = apt.fit_output(apt.msks[0]) print('num_boxes = %d ' % custom_priors.size()[0]) custom_mbox = None # params = torch.load(args.custom_priors) # # bbox = gen_priors(params, args.prior_types, cfg) # gen = AdaptivePriorBox(cfg, phase='test') # custom_priors = gen.forward(params) # custom_mbox = [p.size(0) for p in params] if args.cuda: custom_priors = custom_priors.cuda() ssd_net = build_ssd('train', cfg, custom_mbox, custom_priors) else: # priors = torch.load('anchors/voc_baseline.pth') # if args.cuda: # priors = priors.cuda() ssd_net = build_ssd('train', cfg) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net).cuda() cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, best_prior_weight=args.k, use_gpu=args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for step in cfg['lr_steps']: if args.start_iter > step: print('over %d steps, adjust lr' % step) step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) else: break for iteration in range(args.start_iter, cfg['max_iter']): if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] # targets = targets.cuda() # else: # # targets = [ann for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if iteration != 0 and iteration % 2000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), ('weights/cache/%s_%s_' % (args.save_name, args.dataset)) + repr(iteration) + '.pth') name = '%s_%s' % (args.save_name, args.dataset) torch.save(ssd_net.state_dict(), args.save_folder + name + '.pth')
def interpret(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # net = net.cuda() # criterion = criterion.cuda() # criterion = criterion.cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size print("Dataset Size:") print(len(dataset)) num_epochs = math.ceil(cfg.max_iter / epoch_size) num_epochs = 1 # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin interpret!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue count = 0 for datum in data_loader: del datum count += 1 if count % 10000 == 0: print(count) continue except KeyboardInterrupt: print('Stopping early. Saving network...') print("Loaded Dataset Numbers") print(count)
def train(): if args.dataset == 'VOC': cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'SIXray': cfg = ray dataset = SIXrayDetection(root=args.dataset_root, transform=BaseTransform( cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) if epoch_size > cfg['max_iter']: epoch_size = cfg['max_iter'] print('Size of dataset:', epoch_size) for iteration in range(args.start_iter, epoch_size): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data # images, targets = next(batch_iterator) try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() # loc_loss += loss_l.data[0] --> # conf_loss += loss_c.data[0] loc_loss += loss_l.item() conf_loss += loss_c.item() loss_num = loss.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.3f ||' % loss_num, end=' ') if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 2000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), ('weights/ssd300_Ray_' + repr(iteration) + '_%.3f.pth' % loss_num)) torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(): if args.dataset == 'SIXRAY': if args.dataset_root == SIXray_ROOT: if not os.path.exists(SIXray_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default SIXRAY dataset_root because " + "--dataset_root was not specified.") args.dataset_root = SIXray_ROOT cfg = sixray # list=[] # print("cfg", cfg) dataset = SIXrayDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) # list.append(dataset) # print("dataset:", list) else: print("ERRO: Only Using default SIXRAY dataset_root.") return if args.visdom: import visdom global viz viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: # device_ids = [0,1] net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() # net = net.to(device) if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 1 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('epoch_size:', epoch_size) print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, drop_last=True) # data_loader = data.DataLoader(dataset, args.batch_size, # num_workers=args.num_workers, # shuffle=True, drop_last=True) print(len(data_loader)) print('load data over ~') # create batch iterator # batch_iterator = iter(data_loader)+ iter = 0 for epoch in range(1, 100): for i, data_ in enumerate(data_loader): iter += 1 images, targets = data_ print(targets) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] # images = Variable(images.to(device)) # targets = [Variable(ann.to(device), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # print('------output',(out[0].size(),out[1].size())) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data conf_loss += loss_c.data if iter % 1 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iter) + ' || Loss: %.4f ||' % (loss.data), end=' ') if args.visdom: update_vis_plot(iter, loss_l.data, loss_c.data, iter_plot, epoch_plot, 'append') if epoch % 1 == 0: print('Saving state, iter:', epoch) torch.save(ssd_net.state_dict(), 'weights/ssd300_SIXRAY_' + repr(epoch) + '.pth') if args.visdom and epoch != 0: update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0
def train(): cfg = ssd512 dataset = DetectionDataset(os.path.join(args.dataset_root, 'train.tsv'), transform=SSDAugmentation(cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg, args.use_pred_module) net = ssd_net print(net) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: resnext_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.resnext.load_state_dict(resnext_weights, strict=False) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, cfg, args.cuda, loss_type=args.loss_type) net.train() # loss counters loc_loss = 0 conf_loss = 0 print('Loading the dataset...', len(dataset)) epoch_size = len(dataset) // args.batch_size print('Using the specified args:') print(args) step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, drop_last=True) total_samples = len(data_loader.sampler) batch_size = data_loader.batch_size steps_per_epoch = math.ceil(total_samples / batch_size) # print(total_samples, batch_size, steps_per_epoch) # 16551 32 518 for epoch in range(args.start_epoch, cfg['max_epoch']): if epoch in cfg['lr_steps']: step_index += 1 for iteration, (images, targets) in enumerate(data_loader): # to make burnin working, we adjust lr every epoch lr = adjust_learning_rate(optimizer, args.gamma, step_index, epoch, iteration, steps_per_epoch) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] t0 = time.time() out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: print('iter {0:3d}/{1} || Loss: {2:6.4f} || lr: {3:.6f}|| {4:.4f} sec' .format(iteration, len(data_loader), loss.data, lr, (t1-t0))) print('Saving state, epoch:', epoch) torch.save(ssd_net.state_dict(), args.save_folder + args.weight_prefix + repr(epoch) + '.pth')
def train(): cfg = xray dataset = SIXrayDetection(root=args.dataset_root, image_set='train.txt', transform=SSDAugmentation(cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = ExponentialLR(optimizer, gamma=args.gamma) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 print('Loading the dataset...') print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = args.start_iter data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if iteration in cfg['lr_steps']: step_index += 1 scheduler.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_SIXray_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + dataset.name + '.pth')
def train(): if args.dataset == 'VOC': cfg = voc dataset = VOCDetection( transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'cs': cfg = cs dataset = CSDetection(transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.dataset_target == 'clipart': dataset_t = CLPDetection( transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset_target == 'water': dataset_t = WATERDetection( transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset_target == 'comic': dataset_t = COMICDetection( transform=SSDAugmentation(cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg, cfg['min_dim'], cfg['num_classes'], args.pa_list) net = ssd_net FL = FocalLoss(class_num=2, gamma=args.gamma_fl) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() step_per_epoch = len(dataset) // args.batch_size print('The number of dataset %s is %d' % (args.dataset, len(dataset))) print('The number of target dataset %s is %d' % (args.dataset_target, len(dataset_t))) print('Using the specified args:') print(args) print('Loading the dataset...') data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) data_loader_t = data.DataLoader(dataset_t, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) img_per_epoch = step_per_epoch * args.batch_size old_state = torch.zeros(cfg['num_classes'] - 1) for epoch in range(args.start_epoch, args.end_epoch + 1): epoch_time = time.time() if epoch in args.lr_epoch: adjust_learning_rate(optimizer, args.gamma) all_loss = 0 reg_loss = 0 cls_loss = 0 gpa_loss = 0 grc_loss = 0 gf_loss = 0 start_time = time.time() batch_iterator = iter(data_loader) batch_iterator_t = iter(data_loader_t) new_state = torch.zeros(cfg['num_classes'] - 1) for iteration in range(1, step_per_epoch + 1): if epoch == 1 and iteration <= 160: warm_up_lr(optimizer, iteration, 160) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) try: images_t, targets_t = next(batch_iterator_t) except StopIteration: batch_iterator_t = iter(data_loader_t) images_t, targets_t = next(batch_iterator_t) cls_onehot = gt_classes2cls_onehot(targets, cfg['num_classes'] - 1) # shape = [bs, num_classes] cls_onehot = Variable(torch.from_numpy(cls_onehot).cuda()) images = Variable(images.cuda()) targets = [Variable(ann.cuda()) for ann in targets] images_t = Variable(images_t.cuda()) optimizer.zero_grad() #### forward # source domain out, domain_g, domain_l, fea_lists, gcr_pre, global_feat, _ = net( images) loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c # target domain _, domain_g_t, domain_l_t, fea_lists_t, gcr_pre_t, global_feat_t, loss_kl = net( images_t, target=True) loss_kl *= args.kl_weight #### calculate new state and get w # new state ind_max_cls = torch.argmax(gcr_pre_t.detach(), 1) for i in ind_max_cls: new_state[i] += 1 # w1 w1 = dcbr_w1_weight(gcr_pre_t.sigmoid().detach()) # w2 w2 = torch.exp(1 - old_state[ind_max_cls] / img_per_epoch) weight = (w1 + w2) * 0.5 if epoch > 20 else torch.ones( w1.size(0)) # [bs] ################## domain adaptation loss ################## ###### source ## local dloss_l = 0.5 * torch.mean(domain_l**2) ## global # focal loss # domain_s = Variable(torch.zeros(domain_g.size(0)).long().cuda()) # dloss_g = 0.5 * FL(domain_g, domain_s) # weighted ce loss dloss_g = 0.5 * weight_ce_loss(domain_g, 0, torch.ones(domain_g.size(0))) * 0.1 ###### target ## local dloss_l_t = 0.5 * torch.mean((1 - domain_l_t)**2) ## global # focal loss # domain_s_t = Variable(torch.ones(domain_g_t.size(0)).long().cuda()) # dloss_g_t = 0.5 * FL(domain_g_t, domain_s_t) # weighted ce loss dloss_g_t = 0.5 * weight_ce_loss(domain_g_t, 1, weight) * args.dcbr_weight ###### gf : global feat loss loss_gf = 38 * torch.pow(global_feat - global_feat_t, 2.0).mean() loss += loss_gf ###### gcr loss loss_gcr = nn.BCEWithLogitsLoss()(gcr_pre, cls_onehot) * args.gcr_weight loss += loss_gcr ###### pa if epoch > 20: loss_gpa = get_pa_losses(fea_lists, fea_lists_t) loss += loss_gpa # loss += loss_kl ################## domain adaptation loss ################## ### backward loss += (dloss_g + dloss_g_t + dloss_l + dloss_l_t) loss.backward() optimizer.step() # all_loss += loss.item() reg_loss += loss_l.item() cls_loss += loss_c.item() grc_loss += loss_gcr.item() gf_loss += loss_gf.item() if epoch > 20 and loss_gpa: gpa_loss += loss_gpa.item() if iteration % args.disp_interval == 0: ## display all_loss /= args.disp_interval reg_loss /= args.disp_interval cls_loss /= args.disp_interval gpa_loss /= args.disp_interval grc_loss /= args.disp_interval gf_loss /= args.disp_interval det_loss = reg_loss + cls_loss end_time = time.time() get_lr = optimizer.param_groups[0]['lr'] print( '[epoch %2d][iter %4d/%4d]|| Loss: %.4f || lr: %.2e || Time: %.2f sec' % (epoch, iteration, step_per_epoch, all_loss, get_lr, end_time - start_time)) print( '\t det_loss: %.4f || reg_loss: %.4f || cls_loss: %.4f || la_ga_loss: %.4f || gpa_loss: %.4f || gcr_loss: %.4f || gf_loss: %.6f' % (det_loss, reg_loss, cls_loss, all_loss - det_loss - gpa_loss - grc_loss - gf_loss, gpa_loss, grc_loss, gf_loss)) ## log info = { 'all_loss': all_loss, 'det_loss': det_loss, 'reg_loss': reg_loss, 'cls_loss': cls_loss, 'la_ga_loss': all_loss - det_loss - gpa_loss - grc_loss - gf_loss, 'gpa_loss': gpa_loss, 'gcr_loss': grc_loss, 'gf_loss': gf_loss, } logger.add_scalars(args.name, info, iteration + (epoch - 1) * step_per_epoch) ## reset all_loss = 0 reg_loss = 0 cls_loss = 0 gpa_loss = 0 grc_loss = 0 gf_loss = 0 start_time = time.time() old_state = new_state print('This epoch cost %.4f sec' % (time.time() - epoch_time)) if (epoch % 10 == 0) or (epoch in [54, 58, 62, 66, 70]): save_pth = os.path.join(args.save_folder, str(epoch) + '.pth') print('Saving state', save_pth) torch.save(ssd_net.state_dict(), save_pth)
def train(): #import cProfile, pstats #from io import StringIO #pr = cProfile.Profile() #pr.enable() net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset = VOCDetection(args.voc_root, train_sets, SSDAugmentation(ssd_dim, means), AnnotationTransform()) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) batch_iterator = None data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) for iteration in range(args.start_iter, max_iter): #t00 = time.time() doTimingStuff = False and iteration % 10 == 0 if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) if iteration in stepvalues: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 # load train data #images, targets, feats = next(batch_iterator) images, targets = next(batch_iterator) if args.cuda: images = images.cuda() images = Variable(images) if args.top_down_source != '': feats = feats.cuda() feats = Variable(feats.cuda()) targets = [Variable(anno, volatile=True) for anno in targets] targets = [anno.cuda() for anno in targets] #targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward hints = None if args.use_hint: #hints = torch.zeros(len(targets), 20); # was zeros! also try 1/20 if args.top_down_source != '': #hints = hint_constants[1] * torch.ones (len (targets), 2048) #for irow in len(hints): hints = feats else: hints = hint_constants[1] * torch.ones( len(targets), 20) # was zeros! also try 1/20 #print('target type:',type(targets)) #print('targets:',targets) for i_target, t in enumerate( targets ): # if we're using a hint, we can't have more than one type of object per image. target_class = t[:, -1] assert len( set(target_class.data) ) == 1, 'cannot accept a heterogeneous hint (hint contains more than one class' #print('target_class:',target_class) hints[i_target, int(target_class.data[0])] = hint_constants[0] #hints[i_target,target_class[0].data] = 1 hints = Variable(hints) if args.cuda: hints = hints.cuda() t0 = time.time() # vgg, extra,loc,conf out = net(images, hints, args.hint_vgg, args.hint_extra, args.hint_loc, args.hint_conf) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('Timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') # hacky fencepost solution for 0th epoch plot if iteration == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) if iteration % 10 == 0: logger.log_value('loc loss', loss_l.data[0], iteration) logger.log_value('conf loss', loss_c.data[0], iteration) # log gradients. if False: for iprm, prm in enumerate(net.parameters()): if prm.requires_grad: #print(iprm) logger.log_value('prm {}'.format(iprm), prm.grad.abs().mean().data[0], iteration) if (iteration + 1) % args.checkpoint_freq == 0 or iteration == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), os.path.join(args.save_folder, 'ssd300_0712_' + repr(iteration + 1) + '.pth')) torch.save(ssd_net.state_dict(), args.save_folder + '' + args.version + '.pth')
def main(): global my_dict, keys, k_len, arr, xxx, args, log_file, best_prec1 relative_path = '/data4/lilin/my_code' parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training') parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer') parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model') parser.add_argument('--dataset', default='ucf24', help='pretrained base model') parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now parser.add_argument('--modality', default='rgb', type=str, help='INput tyep default rgb options are [rgb,brox,fastOF]') parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching') parser.add_argument('--batch_size', default=32, type=int, help='Batch size for training') parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') parser.add_argument('--max_iter', default=120000, type=int, help='Number of training iterations') parser.add_argument('--man_seed', default=123, type=int, help='manualseed for reproduction') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model') parser.add_argument('--base_lr', default=0.0005, type=float, help='initial learning rate') parser.add_argument('--lr', default=0.0005, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, help='momentum') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD') parser.add_argument('--gamma', default=0.2, type=float, help='Gamma update for SGD') parser.add_argument('--log_iters', default=True, type=bool, help='Print the loss at each iteration') parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') parser.add_argument('--data_root', default= relative_path + '/realtime/', help='Location of VOC root directory') parser.add_argument('--save_root', default= relative_path + '/realtime/saveucf24/', help='Location to save checkpoint models') parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold') parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation') parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold') parser.add_argument('--topk', default=50, type=int, help='topk for evaluation') parser.add_argument('--clip_gradient', default=40, type=float, help='gradients clip') parser.add_argument('--resume', default=None,type=str, help='Resume from checkpoint') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') parser.add_argument('--epochs', default=35, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--eval_freq', default=2, type=int, metavar='N', help='evaluation frequency (default: 5)') parser.add_argument('--snapshot_pref', type=str, default="ucf101_vgg16_ssd300_end2end") parser.add_argument('--lr_milestones', default=[-2, -5], type=float, help='initial learning rate') parser.add_argument('--arch', type=str, default="VGG16") parser.add_argument('--Finetune_SSD', default=False, type=str) parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument( '--step', type=int, default=[18, 27], nargs='+', help='the epoch where optimizer reduce the learning rate') parser.add_argument('--log_lr', default=False, type=str2bool, help='Use cuda to train model') parser.add_argument( '--print-log', type=str2bool, default=True, help='print logging or not') parser.add_argument( '--end2end', type=str2bool, default=False, help='print logging or not') ## Parse arguments args = parser.parse_args() print(__file__) file_name = (__file__).split('/')[-1] file_name = file_name.split('.')[0] print_log(args, file_name) ## set random seeds np.random.seed(args.man_seed) torch.manual_seed(args.man_seed) if args.cuda: torch.cuda.manual_seed_all(args.man_seed) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') args.cfg = v2 args.train_sets = 'train' args.means = (104, 117, 123) num_classes = len(CLASSES) + 1 args.num_classes = num_classes # args.step = [int(val) for val in args.step.split(',')] args.loss_reset_step = 30 args.eval_step = 10000 args.print_step = 10 args.data_root += args.dataset + '/' ## Define the experiment Name will used to same directory args.snapshot_pref = ('ucf101_CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}').format(args.dataset, args.modality, args.batch_size, args.basenet[:-14], int(args.lr*100000)) # + '_' + file_name + '_' + day print_log(args, args.snapshot_pref) if not os.path.isdir(args.save_root): os.makedirs(args.save_root) net = build_ssd(300, args.num_classes) if args.Finetune_SSD is True: print_log(args, "load snapshot") pretrained_weights = "/home2/lin_li/zjg_code/realtime/ucf24/rgb-ssd300_ucf24_120000.pth" pretrained_dict = torch.load(pretrained_weights) model_dict = net.state_dict() # 1. filter out unnecessary keys pretrained_dict_2 = {k: v for k, v in pretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict # pretrained_dict_2['vgg.25.bias'] = pretrained_dict['vgg.24.bias'] # pretrained_dict_2['vgg.25.weight'] = pretrained_dict['vgg.24.weight'] # pretrained_dict_2['vgg.27.bias'] = pretrained_dict['vgg.26.bias'] # pretrained_dict_2['vgg.27.weight'] = pretrained_dict['vgg.26.weight'] # pretrained_dict_2['vgg.29.bias'] = pretrained_dict['vgg.28.bias'] # pretrained_dict_2['vgg.29.weight'] = pretrained_dict['vgg.28.weight'] # pretrained_dict_2['vgg.32.bias'] = pretrained_dict['vgg.31.bias'] # pretrained_dict_2['vgg.32.weight'] = pretrained_dict['vgg.31.weight'] # pretrained_dict_2['vgg.34.bias'] = pretrained_dict['vgg.33.bias'] # pretrained_dict_2['vgg.34.weight'] = pretrained_dict['vgg.33.weight'] model_dict.update(pretrained_dict_2) # 3. load the new state dict elif args.resume is not None: if os.path.isfile(args.resume): print_log(args, ("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) if args.end2end is False: args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] net.load_state_dict(checkpoint['state_dict']) print_log(args, ("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print_log(args, ("=> no checkpoint found at '{}'".format(args.resume))) elif args.modality == 'fastOF': print_log(args, 'Download pretrained brox flow trained model weights and place them at:::=> ' + args.data_root + 'ucf24/train_data/brox_wieghts.pth') pretrained_weights = args.data_root + 'train_data/brox_wieghts.pth' print_log(args, 'Loading base network...') net.load_state_dict(torch.load(pretrained_weights)) else: vgg_weights = torch.load(args.data_root +'train_data/' + args.basenet) print_log(args, 'Loading base network...') net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() def xavier(param): init.xavier_uniform(param) def weights_init(m): if isinstance(m, nn.Conv2d): xavier(m.weight.data) m.bias.data.zero_() print_log(args, 'Initializing weights for extra layers and HEADs...') # initialize newly added layers' weights with xavier method if args.Finetune_SSD is False and args.resume is None: print_log(args, "init layers") net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) parameter_dict = dict(net.named_parameters()) # Get parmeter of network in dictionary format wtih name being key params = [] #Set different learning rate to bias layers and set their weight_decay to 0 for name, param in parameter_dict.items(): if args.end2end is False and name.find('vgg') > -1 and int(name.split('.')[1]) < 23:# :and name.find('cell') <= -1 param.requires_grad = False print_log(args, name + 'layer parameters will be fixed') else: if name.find('bias') > -1: print_log(args, name + 'layer parameters will be trained @ {}'.format(args.lr*2)) params += [{'params': [param], 'lr': args.lr*2, 'weight_decay': 0}] else: print_log(args, name + 'layer parameters will be trained @ {}'.format(args.lr)) params += [{'params':[param], 'lr': args.lr, 'weight_decay':args.weight_decay}] optimizer = optim.SGD(params, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) scheduler = None # scheduler = MultiStepLR(optimizer, milestones=args.step, gamma=args.gamma) print_log(args, 'Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality, full_test=False) train_data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) print_log(args, "train epoch_size: " + str(len(train_data_loader))) print_log(args, 'Training SSD on' + train_dataset.name) my_dict = copy.deepcopy(train_data_loader.dataset.train_vid_frame) keys = list(my_dict.keys()) k_len = len(keys) arr = np.arange(k_len) xxx = copy.deepcopy(train_data_loader.dataset.ids) # log_file = open(args.save_root + args.snapshot_pref + "_training_" + day + ".log", "w", 1) # log_file.write() print_log(args, args.snapshot_pref) for arg in vars(args): print(arg, getattr(args, arg)) print_log(args, str(arg)+': '+str(getattr(args, arg))) print_log(args, str(net)) torch.cuda.synchronize() for epoch in range(args.start_epoch, args.epochs): train(train_data_loader, net, criterion, optimizer, epoch, scheduler) print_log(args, 'Saving state, epoch:' + str(epoch)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, epoch = epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: torch.cuda.synchronize() tvs = time.perf_counter() mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=args.iou_thresh) # remember best prec@1 and save checkpoint is_best = mAP > best_prec1 best_prec1 = max(mAP, best_prec1) print_log(args, 'Saving state, epoch:' +str(epoch)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, is_best,epoch) for ap_str in ap_strs: # print(ap_str) print_log(args, ap_str) ptr_str = '\nMEANAP:::=>'+str(mAP) # print(ptr_str) # log_file.write() print_log(args, ptr_str) torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0-tvs) # print(prt_str) # log_file.write(ptr_str) print_log(args, ptr_str)
def train(): if args.dataset == 'CAR_CARPLATE_TWO_BRANCH': cfg = car if args.input_size == 512: cfg = change_cfg_for_ssd512(cfg) dataset = CAR_CARPLATEDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS), dataset_name='trainval') from data import CAR_CARPLATE_CLASSES as labelmap eval_dataset = CAR_CARPLATEDetection( root=args.dataset_root, transform=BaseTransform(args.input_size, MEANS), target_transform=CAR_CARPLATEAnnotationTransform( keep_difficult=True), dataset_name='test') if args.visdom: import visdom global viz viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net # summary summary(net, input_size=(3, int(cfg['min_dim']), int(cfg['min_dim']))) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load('weights/' + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.car_loc.apply(weights_init) ssd_net.car_conf.apply(weights_init) ssd_net.carplate_loc.apply(weights_init) ssd_net.carplate_conf.apply(weights_init) # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.weight_decay) car_criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) carplate_criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters car_loc_loss = 0 car_conf_loss = 0 carplate_loc_loss = 0 carplate_conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = [ 'Car Loc Loss', 'Car Conf Loss', 'Carplate Loc Loss', 'Carplate Conf Loss', 'Total Loss' ] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) lr = args.lr # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): epoch += 1 update_vis_plot(epoch, car_loc_loss, car_conf_loss, carplate_loc_loss, carplate_conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters car_loc_loss = 0 car_conf_loss = 0 carplate_loc_loss = 0 carplate_conf_loss = 0 if iteration in cfg['lr_steps']: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) with torch.no_grad(): targets = [Variable(ann.cuda()) for ann in targets] else: images = Variable(images) with torch.no_grad(): targets = [Variable(ann) for ann in targets] # forward t0 = time.time() car_loc_data, car_conf_data, car_priors, carplate_loc_data, carplate_conf_data, carplate_priors = net( images) # 去掉没有车辆或者车牌的预测和gt,不然计算loss有bug car_targets = [] carplate_targets = [] car_index = torch.zeros(len(targets)).type(torch.uint8) carplate_index = torch.zeros(len(targets)).type(torch.uint8) for ind, t in enumerate(targets): if (t[:, 4] == 0).sum() > 0: car_targets.append(t[t[:, 4] == 0]) car_index[ind] = 1 if (t[:, 4] == 1).sum() > 0: # 尽管车牌的label是1,但是进行计算的时候需要将label改为0 carplate_gt = t[t[:, 4] == 1] carplate_gt[:, 4] = 0 carplate_targets.append(carplate_gt) carplate_index[ind] = 1 car_index = car_index.bool() carplate_index = carplate_index.bool() car_loc_data = car_loc_data[car_index] car_conf_data = car_conf_data[car_index] carplate_loc_data = carplate_loc_data[carplate_index] carplate_conf_data = carplate_conf_data[carplate_index] # backprop optimizer.zero_grad() car_loss_l, car_loss_c = car_criterion( (car_loc_data, car_conf_data, car_priors), car_targets) car_loss = car_loss_l + car_loss_c carplate_loss_l, carplate_loss_c = carplate_criterion( (carplate_loc_data, carplate_conf_data, carplate_priors), carplate_targets) carplate_loss = carplate_loss_l + carplate_loss_c loss = car_loss + carplate_loss loss.backward() optimizer.step() t1 = time.time() car_loc_loss += car_loss_l.item() car_conf_loss += car_loss_c.item() carplate_loc_loss += carplate_loss_l.item() carplate_conf_loss += carplate_loss_c.item() if iteration % 100 == 0: log.l.info(''' Timer: {:.5f} sec.\t LR: {}.\t Iter: {}.\t Car_Loss_l: {:.5f}.\t Car_Loss_c: {:.5f}.\t LP_Loss_l: {:.5f}.\t LP_Loss_c: {:.5f}.\t Loss: {:.5f}. '''.format((t1 - t0), lr, iteration, car_loss_l.item(), car_loss_c.item(), carplate_loss_l.item(), carplate_loss_c.item(), car_loss_l.item() + car_loss_c.item() + carplate_loss_l.item() + carplate_loss_c.item())) if args.visdom: update_vis_plot(iteration, car_loss_l.item(), car_loss_c.item(), carplate_loss_l.item(), carplate_loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), 'weights/' + args.save_folder + 'ssd' + str(args.input_size) + '_' + repr(iteration) + '.pth') # load net for evaluation eval_net = build_ssd('test', args.input_size, cfg['num_classes']) # initialize SSD eval_net.load_state_dict( torch.load('weights/' + args.save_folder + 'ssd' + str(args.input_size) + '_' + repr(iteration) + '.pth')) eval_net.eval() print('Finished loading model!') if args.cuda: eval_net = eval_net.cuda() cudnn.benchmark = True # evaluation begin eval_results.test_net(args.eval_save_folder, args.obj_type, args.dataset_root, 'test', labelmap, eval_net, args.cuda, eval_dataset, BaseTransform(eval_net.size, MEANS), args.top_k, args.input_size, thresh=args.confidence_threshold) torch.save( ssd_net.state_dict(), 'weights/' + args.save_folder + '' + args.dataset + str(args.input_size) + '.pth') # load net for evaluation for the final model eval_net = build_ssd('test', args.input_size, cfg['num_classes']) # initialize SSD eval_net.load_state_dict( torch.load('weights/' + args.save_folder + '' + args.dataset + str(args.input_size) + '.pth')) eval_net.eval() print('Finished loading model!') if args.cuda: eval_net = eval_net.cuda() cudnn.benchmark = True # evaluation begin eval_results.test_net(args.eval_save_folder, args.obj_type, args.dataset_root, 'test', labelmap, eval_net, args.cuda, eval_dataset, BaseTransform(eval_net.size, MEANS), args.top_k, args.input_size, thresh=args.confidence_threshold)
args.save_folder = "weights/" if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print("WARNING: It looks like you have a CUDA device, but aren't " + "using CUDA.\nRun with --cuda for optimal training speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') cfg = voc dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'trainval')], transform=SSDAugmentation(cfg['min_dim'], MEANS)) def get_net(): ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) return ssd_net def show_img(data, gt_boxes): img = data # print(gt_boxes) # img = (img + 122 / 255) img_ = (img / 122 + 1.0) / 2 img_ = np.ascontiguousarray(img_) # print(img_) # print(img_.dtype)
def train(): net.train() logger.info('Loading Dataset...') dataset = SimDetection(transform=SSDAugmentation(args.dim, means)) epoch_size = len(dataset) // args.batch_size logger.info('Training SSD on {}'.format(dataset.name)) logger.info("epoch size: {}".format(epoch_size)) step_index = 0 batch_iterator = None data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) lr = args.lr epoch = 0 for iteration in tqdm(range(start_iter, args.iterations), desc="epoch {}/{} training".format( epoch, args.iterations // epoch_size)): if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) epoch += 1 if iteration in stepvalues: step_index += 1 old_lr = lr lr = adjust_learning_rate(optimizer, args.gamma, step_index) logger.info("iter {}, change lr from {:.8f} to {:.8f}".format( iteration, old_lr, lr)) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: logger.info(''' Timer: {:.5f} sec.\t LR: {:.7f}.\t Iter: {}.\t Loss_l: {:.5f}.\t Loss_c: {:.5f}. Loss: {:.5f} '''.format((t1 - t0), lr, iteration, loss_l.data[0], loss_c.data[0], loss.data[0])) if iteration % validate_per == 0 and iteration > 0: logger.info('Saving state, iter={}'.format(iteration)) torch.save( ssd_net.state_dict(), os.path.join(logger.get_logger_dir(), 'ssd-{}.pth'.format(repr(iteration)))) torch.save( ssd_net.state_dict(), os.path.join(logger.get_logger_dir(), 'ssd_{}.pth'.format(iteration))) logger.info("Congratulations..")
tensorized version of img, squeezed ''' return torch.Tensor(self.pull_image(index)).unsqueeze_(0) if __name__ == "__main__": train_cls = ['cow'] clsmap = dict(zip(train_cls, range(len(train_cls)))) # root = 'D:/NBU_thesis\Reference\Pascal_Voc\VOC2007test' root = 'D:/NBU_thesis\Reference\Pascal_Voc\VOCdevkit' from utils.augmentations import SSDAugmentation dataset = VOCTest( root, train_cls, set='trainval', transform=SSDAugmentation(300, (127, 127, 127)), target_transform=VOCAnnoTransform_subcls(class_to_ind=clsmap)) from data import detection_collate data_loader = data.DataLoader(dataset, 8, num_workers=2, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for i in range(500): images, targets = next(batch_iterator) if len(targets[0]) == 0:
def main(): global my_dict, keys, k_len, arr, xxx, args, log_file, best_prec1 parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training') parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer') parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model') parser.add_argument('--dataset', default='ucf24', help='pretrained base model') parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now parser.add_argument( '--modality', default='rgb', type=str, help='INput tyep default rgb options are [rgb,brox,fastOF]') parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching') parser.add_argument('--batch_size', default=40, type=int, help='Batch size for training') parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') parser.add_argument('--max_iter', default=120000, type=int, help='Number of training iterations') parser.add_argument('--man_seed', default=123, type=int, help='manualseed for reproduction') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model') parser.add_argument('--lr', '--learning-rate', default=0.0005, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, help='momentum') parser.add_argument('--stepvalues', default='70000,90000', type=str, help='iter number when learning rate to be dropped') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD') parser.add_argument('--gamma', default=0.2, type=float, help='Gamma update for SGD') parser.add_argument('--log_iters', default=True, type=bool, help='Print the loss at each iteration') parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') parser.add_argument('--data_root', default=relative_path + 'realtime/', help='Location of VOC root directory') parser.add_argument('--save_root', default=relative_path + 'realtime/saveucf24/', help='Location to save checkpoint models') parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold') parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation') parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold') parser.add_argument('--topk', default=50, type=int, help='topk for evaluation') parser.add_argument('--clip_gradient', default=40, type=float, help='gradients clip') parser.add_argument('--resume', default=None, type=str, help='Resume from checkpoint') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') parser.add_argument('--epochs', default=35, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--eval_freq', default=2, type=int, metavar='N', help='evaluation frequency (default: 5)') parser.add_argument('--snapshot_pref', type=str, default="ucf101_vgg16_ssd300_") parser.add_argument('--lr_milestones', default=[-2, -5], type=float, help='initial learning rate') parser.add_argument('--arch', type=str, default="VGG16") parser.add_argument('--Finetune_SSD', default=False, type=str) parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--gpus', nargs='+', type=int, default=[0, 1, 2, 3]) print(__file__) file_name = (__file__).split('/')[-1] file_name = file_name.split('.')[0] print(file_name) ## Parse arguments args = parser.parse_args() ## set random seeds np.random.seed(args.man_seed) torch.manual_seed(args.man_seed) if args.cuda: torch.cuda.manual_seed_all(args.man_seed) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') args.cfg = v2 args.train_sets = 'train' args.means = (104, 117, 123) num_classes = len(CLASSES) + 1 args.num_classes = num_classes args.stepvalues = [int(val) for val in args.stepvalues.split(',')] args.loss_reset_step = 30 args.eval_step = 10000 args.print_step = 10 args.data_root += args.dataset + '/' ## Define the experiment Name will used to same directory day = (time.strftime('%m-%d', time.localtime(time.time()))) args.snapshot_pref = ('ucf101_CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}').format( args.dataset, args.modality, args.batch_size, args.basenet[:-14], int(args.lr * 100000)) + '_' + file_name + '_' + day print(args.snapshot_pref) if not os.path.isdir(args.save_root): os.makedirs(args.save_root) net = build_refine_ssd(300, args.num_classes) net = torch.nn.DataParallel(net, device_ids=args.gpus) if args.Finetune_SSD is True: print("load snapshot") pretrained_weights = "/data4/lilin/my_code/realtime/ucf24/rgb-ssd300_ucf24_120000.pth" pretrained_dict = torch.load(pretrained_weights) model_dict = net.state_dict() # 1. filter out unnecessary keys pretrained_dict_2 = { k: v for k, v in pretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict model_dict.update(pretrained_dict_2) # 3. load the new state dict elif args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] net.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) elif args.modality == 'fastOF': print( 'Download pretrained brox flow trained model weights and place them at:::=> ', args.data_root + 'ucf24/train_data/brox_wieghts.pth') pretrained_weights = args.data_root + 'train_data/brox_wieghts.pth' print('Loading base network...') net.load_state_dict(torch.load(pretrained_weights)) else: vgg_weights = torch.load(args.data_root + 'train_data/' + args.basenet) print('Loading base network...') net.module.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() # initialize newly added layers' weights with xavier method if args.Finetune_SSD is False and args.resume is None: print('Initializing weights for extra layers and HEADs...') net.module.clstm_1.apply(weights_init) net.module.clstm_2.apply(weights_init) net.module.extras_r.apply(weights_init) net.module.loc_r.apply(weights_init) net.module.conf_r.apply(weights_init) net.module.extras.apply(weights_init) net.module.loc.apply(weights_init) net.module.conf.apply(weights_init) parameter_dict = dict(net.named_parameters( )) # Get parmeter of network in dictionary format wtih name being key params = [] #Set different learning rate to bias layers and set their weight_decay to 0 for name, param in parameter_dict.items(): if name.find('vgg') > -1 and int( name.split('.')[2]) < 23: # :and name.find('cell') <= -1 param.requires_grad = False print(name, 'layer parameters will be fixed') else: if name.find('bias') > -1: print( name, 'layer parameters will be trained @ {}'.format( args.lr * 2)) params += [{ 'params': [param], 'lr': args.lr * 2, 'weight_decay': 0 }] else: print(name, 'layer parameters will be trained @ {}'.format(args.lr)) params += [{ 'params': [param], 'lr': args.lr, 'weight_decay': args.weight_decay }] optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = RecurrentMultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) scheduler = None # scheduler = LogLR(optimizer, lr_milestones=args.lr_milestones, total_epoch=args.epochs) scheduler = MultiStepLR(optimizer, milestones=args.stepvalues, gamma=args.gamma) print('Loading Dataset...') num_gpu = len(args.gpus) rootpath = args.data_root imgtype = args.modality imagesDir = rootpath + imgtype + '/' split = 1 splitfile = rootpath + 'splitfiles/trainlist{:02d}.txt'.format(split) trainvideos = readsplitfile(splitfile) splitfile = rootpath + 'splitfiles/testlist{:02d}.txt'.format(split) testvideos = readsplitfile(splitfile) ####### val dataset does not need shuffle ####### val_data_loader = [] len_test = len(testvideos) random.shuffle(testvideos) for i in range(num_gpu): testvideos_temp = testvideos[int(i * len_test / num_gpu):int((i + 1) * len_test / num_gpu)] val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality, full_test=False, videos=testvideos_temp, istrain=False) val_data_loader.append( data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True, drop_last=True)) log_file = open( args.save_root + args.snapshot_pref + "_training_" + day + ".log", "w", 1) log_file.write(args.snapshot_pref + '\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg) + ': ' + str(getattr(args, arg)) + '\n') log_file.write(str(net)) torch.cuda.synchronize() len_train = len(trainvideos) for epoch in range(args.start_epoch, args.epochs): ####### shuffle train dataset ####### random.shuffle(trainvideos) train_data_loader = [] for i in range(num_gpu): trainvideos_temp = trainvideos[int(i * len_train / num_gpu):int((i + 1) * len_train / num_gpu)] train_dataset = UCF24Detection(args.data_root, 'train', SSDAugmentation( args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality, videos=trainvideos_temp, istrain=True) train_data_loader.append( data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True, drop_last=True)) print("Train epoch_size: ", len(train_data_loader)) print('Train SSD on', train_dataset.name) ########## train ########### train(train_data_loader, net, criterion, optimizer, scheduler, epoch, num_gpu) print('Saving state, epoch:', epoch) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, epoch=epoch) #### log lr ### # scheduler.step() # evaluate on validation set if ( epoch + 1 ) % args.eval_freq == 0 or epoch == args.epochs - 1 or epoch == 0: # torch.cuda.synchronize() tvs = time.perf_counter() mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=args.iou_thresh, num_gpu=num_gpu) # remember best prec@1 and save checkpoint is_best = mAP > best_prec1 best_prec1 = max(mAP, best_prec1) print('Saving state, epoch:', epoch) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, is_best, epoch) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str + '\n') ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n' print(ptr_str) log_file.write(ptr_str) torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0 - tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, cfg[str(ssd_dim)]['variance'], args.cuda) dataset = VOCDetection(args.voc_root, train_sets, SSDAugmentation(ssd_dim, means), AnnotationTransform()) data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) def train_one_iters(iteration, lr): loc_loss = 0 conf_loss = 0 t0 = time.time() for i, (images, targets) in enumerate(data_loader): if args.cuda: images = Variable(images.cuda())
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset = VOCDetection(args.voc_root, train_sets, SSDAugmentation(ssd_dim, means), AnnotationTransform()) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) batch_iterator = None data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) for iteration in range(args.start_iter, max_iter): if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) if iteration in stepvalues: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('Timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') # hacky fencepost solution for 0th epoch plot if iteration == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) if iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_0712_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.version + '.pth')
def train(args, net, optimizer, criterion, scheduler): log_file = open(args.save_root + "training.log", "w", 1) log_file.write(args.exp_name + '\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg) + ': ' + str(getattr(args, arg)) + '\n') log_file.write(str(net)) net.train() # loss counters batch_time = AverageMeter() losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() print('Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type, full_test=False) epoch_size = len(train_dataset) // args.batch_size print('Training SSD on', train_dataset.name) if args.visdom: import visdom viz = visdom.Visdom() viz.port = args.vis_port viz.env = args.exp_name # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 6)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=[ 'REG', 'CLS', 'AVG', 'S-REG', ' S-CLS', ' S-AVG' ])) # initialize visdom meanAP and class APs plot legends = ['meanAP'] for cls in CLASSES: legends.append(cls) val_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, args.num_classes)).cpu(), opts=dict(xlabel='Iteration', ylabel='Mean AP', title='Current SSD Validation mean AP', legend=legends)) batch_iterator = None train_data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) itr_count = 0 torch.cuda.synchronize() t0 = time.perf_counter() iteration = 0 while iteration <= args.max_iter: for i, (images, targets, img_indexs) in enumerate(train_data_loader): if iteration > args.max_iter: break iteration += 1 if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() scheduler.step() loc_loss = loss_l.data[0] conf_loss = loss_c.data[0] # print('Loss data type ',type(loc_loss)) loc_losses.update(loc_loss) cls_losses.update(conf_loss) losses.update((loc_loss + conf_loss) / 2.0) if iteration % args.print_step == 0 and iteration > 0: if args.visdom: losses_list = [ loc_losses.val, cls_losses.val, losses.val, loc_losses.avg, cls_losses.avg, losses.avg ] viz.line(X=torch.ones((1, 6)).cpu() * iteration, Y=torch.from_numpy( np.asarray(losses_list)).unsqueeze(0).cpu(), win=lot, update='append') torch.cuda.synchronize() t1 = time.perf_counter() batch_time.update(t1 - t0) print_line = 'Itration {:06d}/{:06d} loc-loss {:.3f}({:.3f}) cls-loss {:.3f}({:.3f}) ' \ 'average-loss {:.3f}({:.3f}) Timer {:0.3f}({:0.3f})'.format( iteration, args.max_iter, loc_losses.val, loc_losses.avg, cls_losses.val, cls_losses.avg, losses.val, losses.avg, batch_time.val, batch_time.avg) torch.cuda.synchronize() t0 = time.perf_counter() log_file.write(print_line + '\n') print(print_line) # if args.visdom and args.send_images_to_visdom: # random_batch_index = np.random.randint(images.size(0)) # viz.image(images.data[random_batch_index].cpu().numpy()) itr_count += 1 if itr_count % args.loss_reset_step == 0 and itr_count > 0: loc_losses.reset() cls_losses.reset() losses.reset() batch_time.reset() print('Reset accumulators of ', args.exp_name, ' at', itr_count * args.print_step) itr_count = 0 if (iteration % args.eval_step == 0 or iteration == 5000) and iteration > 0: torch.cuda.synchronize() tvs = time.perf_counter() print('Saving state, iter:', iteration) torch.save( net.state_dict(), args.save_root + 'ssd300_ucf24_' + repr(iteration) + '.pth') net.eval() # switch net to evaluation mode mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, iteration, iou_thresh=args.iou_thresh) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str + '\n') ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n' print(ptr_str) log_file.write(ptr_str) if args.visdom: aps = [mAP] for ap in ap_all: aps.append(ap) viz.line( X=torch.ones((1, args.num_classes)).cpu() * iteration, Y=torch.from_numpy(np.asarray(aps)).unsqueeze(0).cpu(), win=val_lot, update='append') net.train() # Switch net back to training mode torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0 - tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
def get_predicted_label(filename, net): with open(filename + '.binvox', 'rb') as f: model = utils.binvox_rw.read_as_3d_array(f).data transform = SSDAugmentation(cfg['min_dim'], MEANS, phase='test') images = [] for rot in range(6): img, _ = create_img(model, rot) img, _, _ = transform(img, 0, 0) images.append(img) images = torch.tensor(images).permute(0, 3, 1, 2).float() images = Variable(images.cuda()) #images = images.cuda() out = net(images, 'test') out.cuda() cur_boxes = np.zeros((0, 8)) for i in range(6): for j in range(out.shape[1]): label = out[i, j, 1].detach().cpu() if label == 0: continue score = out[i, j, 0].detach().cpu() x1 = tensor_to_float(out[i, j, 2]) y1 = tensor_to_float(out[i, j, 3]) x2 = tensor_to_float(out[i, j, 4]) y2 = tensor_to_float(out[i, j, 5]) z1 = 0.0 z2 = tensor_to_float(out[i, j, 6]) if x1 >= x2 or y1 >= y2 or z2 <= 0: continue a = z1 b = y1 c = x1 d = z2 e = y2 f = x2 if i == 1: a = 1 - z2 b = 1 - y2 c = x1 d = 1 - z1 e = 1 - y1 f = x2 elif i == 2: a = y1 b = 1 - z2 c = x1 d = y2 e = 1 - z1 f = x2 elif i == 3: a = 1 - y2 b = z1 c = x1 d = 1 - y1 e = z2 f = x2 elif i == 4: a = 1 - x2 b = y1 c = z1 d = 1 - x1 e = y2 f = z2 elif i == 5: a = x1 b = y1 c = 1 - z2 d = x2 e = y2 f = 1 - z1 cur_boxes = np.append( cur_boxes, np.array([a, b, c, d, e, f, label - 1, score]).reshape(1, 8), axis=0) keepidx = soft_nms_pytorch(cur_boxes[:, :7], cur_boxes[:, -1]) cur_boxes = cur_boxes[keepidx, :] cur_boxes[:, 0:6] = 10000 * cur_boxes[:, 0:6] return cur_boxes
def train(opt): train_dataset = VOCDetection(transform=SSDAugmentation( opt.DATASETS.MIN_DIM, opt.DATASETS.MEANS), opt=opt) test_dataset = VOCDetection(['test'], BaseTransform(300, opt.DATASETS.MEANS), VOCAnnotationTransform(), opt=opt) ssd_net = build_ssd('train', opt.DATASETS.MIN_DIM, opt.DATASETS.NUM_CLS) net = ssd_net # logger logger = make_logger("project", opt.OUTPUT_DIR, 'log') if len(opt.DEVICE_ID) > 1: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if opt.MODEL.RESUM: print('Resuming training, loading {}...'.format(opt.MODEL.RESUM)) ssd_net.load_weights(opt.MODEL.RESUM) else: vgg_weights = torch.load( os.path.join(opt.MODEL.BACKBONE_WEIGHTS, opt.MODEL.BACKBONE)) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if opt.DEVICE: net = net.cuda() if not opt.MODEL.RESUM: print('Initializing backbone_weights...') # initialize newly added layers' backbone_weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=opt.SOLVER.BASE_LR, momentum=opt.SOLVER.MOMENTUM, weight_decay=opt.SOLVER.WEIGHT_DECAY) criterion = MultiBoxLoss(opt.DATASETS.NUM_CLS, 0.5, True, 0, True, 3, 0.5, False, opt.DEVICE) epoch_size = len(train_dataset) // opt.DATALOADER.BATCH_SIZE train_loader = data.DataLoader(train_dataset, batch_size=opt.DATALOADER.BATCH_SIZE, num_workers=opt.DATALOADER.NUM_WORKERS, shuffle=True, collate_fn=detection_collate, pin_memory=True) # device = torch.device("cuda") trainer = Trainer(net, optimizer, criterion, logger, device=None, scheduler=None) trainer.run( opt=opt, train_loader=train_loader, # dataloader test_dataset=test_dataset, # dataset epoch_size=epoch_size)
def __init__(self): r = rospkg.RosPack() path = r.get_path('ssd_training') self.root = subt_ROOT self.basenet = 'vgg16_reducedfc.pth' self.batch_size = 16 self.start_iter = 0 self.num_workers = 10 self.cuda = True self.lr = 1e-4 self.momentum = 0.9 self.weight_decay = 5e-4 self.gamma = 0.1 self.save_folder = "src/weights/" self.resume = None if torch.cuda.is_available(): if self.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not self.cuda: print "WARNING: It looks like you have a CUDA device, but aren't " + \ "using CUDA.\nRun with --cuda for optimal training speed." torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.cfg = subt self.dataset = subtDetection(root=self.root, transform=SSDAugmentation( self.cfg['min_dim'], MEANS)) self.ssd_net = build_ssd('train', self.cfg['min_dim'], self.cfg['num_classes']) if self.cuda: self.net = torch.nn.DataParallel(self.ssd_net) cudnn.benchmark = True # if self.resume: # print 'Resuming training, loading {}...'.format(self.resume) # self.ssd_net.load_weights(self.resume) # else: # vgg_weights = torch.load(os.path.join(path, self.save_folder, self.basenet)) # print 'Loading base network...' # self.ssd_net.vgg.load_state_dict(vgg_weights) if not self.resume: print 'Initializing weights...' # initialize newly added layers' weights with xavier method self.ssd_net.extras.apply(self.weights_init) self.ssd_net.loc.apply(self.weights_init) self.ssd_net.conf.apply(self.weights_init) self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) self.criterion = MultiBoxLoss(self.batch_size, self.cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, self.cuda) self.net.train() self.loc_loss = 0 self.conf_loss = 0 self.epoch = 0 print 'Loading the dataset...' self.epoch_size = len(self.dataset) // self.batch_size self.step_index = 0 self.data_loader = data.DataLoader(self.dataset, self.batch_size, num_workers=self.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) self.batch_iterator = iter(self.data_loader) self.switch = False for iteration in range(self.start_iter, self.cfg['max_iter']): if self.switch: break if iteration % self.epoch_size == 0: # reset epoch loss counters self.loc_loss = 0 self.conf_loss = 0 self.batch_iterator = iter(self.data_loader) self.epoch += 1 if iteration in self.cfg['lr_steps']: self.step_index += 1 self.adjust_learning_rate(self.optimizer, self.gamma, self.step_index) # load train data images, targets = next(self.batch_iterator) if self.cuda: images = Variable(images.cuda()) targets = [ Variable(ann.cuda(), volatile=True) for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = self.net(images) # backprop self.optimizer.zero_grad() loss_l, loss_c = self.criterion(out, targets) loss = loss_l + loss_c loss.backward() self.optimizer.step() t1 = time.time() self.loc_loss += loss_l.data self.conf_loss += loss_c.data if iteration % 10 == 0: print 'timer: %.4f sec.' % (t1 - t0) print 'iter ' + repr(iteration), loss.item() if iteration != 0 and iteration % 1000 == 0: print 'Saving state, iter:', iteration torch.save( self.ssd_net.state_dict(), os.path.join(path, "src/weights", "ssd300_subt_" + repr(iteration) + '.pth'))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: # cudnn.benchmark = True # net = nn.DataParallel(net).cuda() # criterion = nn.DataParallel(criterion).cuda() net = nn.DataParallel(net) criterion = nn.DataParallel(criterion) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = { k: MovingAverage(100) for k in loss_types } print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma ** step_index)) # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(datum) # Forward Pass out = net(images) # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = { k: v.mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(yolact_net, val_dataset) except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() print('\n--- Generator created! ---') # NOTE # I maunally set the original image size and seg size as 138 # might change in the future, for example 550 if cfg.pred_seg: dis_size = 138 dis_net = Discriminator_Wgan(i_size = dis_size, s_size = dis_size) # Change the initialization inside the dis_net class inside # set the dis net's initial parameter values # dis_net.apply(gan_init) dis_net.train() print('--- Discriminator created! ---\n') if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) # optimizer_gen = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.decay) # if cfg.pred_seg: # optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr, momentum=args.momentum, # weight_decay=args.decay) # schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) # NOTE: Using the Ranger Optimizer for the generator optimizer_gen = Ranger(net.parameters(), lr = args.lr, weight_decay=args.decay) # optimizer_gen = optim.RMSprop(net.parameters(), lr = args.lr) # FIXME: Might need to modify the lr in the optimizer carefually # check this # def make_D_optimizer(cfg, model): # params = [] # for key, value in model.named_parameters(): # if not value.requires_grad: # continue # lr = cfg.SOLVER.BASE_LR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY # if "bias" in key: # lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS # params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] # optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) # return optimizer if cfg.pred_seg: optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr) # optimizer_dis = optim.RMSprop(dis_net.parameters(), lr = cfg.dis_lr) schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio, pred_seg=cfg.pred_seg) # criterion_dis = nn.BCELoss() # Take the advice from WGAN criterion_dis = DiscriminatorLoss_Maskrcnn() criterion_gen = GeneratorLoss_Maskrcnn() if args.batch_alloc is not None: # e.g. args.batch_alloc: 24,24 args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion, pred_seg=cfg.pred_seg)) if args.cuda: net = net.cuda() # NOTE if cfg.pred_seg: dis_net = nn.DataParallel(dis_net) dis_net = dis_net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # NOTE val_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers*2, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order # TODO: global command can modify global variable inside of the function. loss_avgs = { k: MovingAverage(100) for k in loss_types } # NOTE # Enable AMP amp_enable = cfg.amp scaler = torch.cuda.amp.GradScaler(enabled=amp_enable) print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer_gen, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer_gen, args.lr * (args.gamma ** step_index)) # NOTE if cfg.pred_seg: # ====== GAN Train ====== # train the gen and dis in different iteration # it_alter_period = iteration % (cfg.gen_iter + cfg.dis_iter) # FIXME: # present_time = time.time() for _ in range(cfg.dis_iter): # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == 0: # print('--- Generator freeze ---') # print('--- Discriminator training ---') if cfg.amp: with torch.cuda.amp.autocast(): # ----- Discriminator part ----- # seg_list is the prediction mask # can be regarded as generated images from YOLACT # pred_list is the prediction label # seg_list dim: list of (138,138,instances) # pred_list dim: list of (instances) losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) # input image size is [b, 3, 550, 550] # downsample to [b, 3, seg_h, seg_w] image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Because in the discriminator training, we do not # want the gradient flow back to the generator part # we detach seg_clas (mask_clas come the data, does not have grad) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) # p = elem_mul_p.squeeze().permute(1,2,0).cpu().detach().numpy() # g = elem_mul_g.squeeze().permute(1,2,0).cpu().detach().numpy() # image = image.squeeze().permute(1,2,0).cpu().detach().numpy() # from PIL import Image # seg_PIL = Image.fromarray(p, 'RGB') # mask_PIL = Image.fromarray(g, 'RGB') # seg_PIL.save('mul_seg.png') # mask_PIL.save('mul_mask.png') # raise RuntimeError # from matplotlib import pyplot as plt # fig, (ax1, ax2) = plt.subplots(1,2) # ax1.imshow(mask_show) # ax2.imshow(seg_show) # plt.show(block=False) # plt.pause(2) # plt.close() # if iteration % (cfg.gen_iter + cfg.dis_iter) == 0: # print(f'Probability of fake is fake: {output_pred.mean().item():.2f}') # print(f'Probability of real is real: {output_grou.mean().item():.2f}') # 0 for Fake/Generated # 1 for True/Ground Truth # fake_label = torch.zeros(b) # real_label = torch.ones(b) # Advice of practical implementation # from https://arxiv.org/abs/1611.08408 # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_pred = criterion_dis(output_pred,target=fake_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # Wasserstein Distance (Earth-Mover) loss_dis = criterion_dis(input=output_grou,target=output_pred) # Backprop the discriminator # Scales loss. Calls backward() on scaled loss to create scaled gradients. scaler.scale(loss_dis).backward() scaler.step(optimizer_dis) scaler.update() optimizer_dis.zero_grad() # clip the updated parameters _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == (cfg.dis_iter+1): # print('--- Generator training ---') # print('--- Discriminator freeze ---') # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() with torch.cuda.amp.autocast(): losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Perform forward pass of all-fake batch through D # NOTE this seg_clas CANNOT detach, in order to flow the # gradient back to the generator # output = dis_net(img = image, seg = seg_clas) # Since the log(1-D(G(x))) not provide sufficient gradients # We want log(D(G(x)) instead, this can be achieve by # use the real_label as target. # This step is crucial for the information of discriminator # to go into the generator. # Calculate G's loss based on this output # real_label = torch.ones(b) # loss_gen = criterion_dis(output,target=real_label) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) # Advice from WGAN # loss_gen = -torch.mean(output) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen # Generator backprop scaler.scale(loss).backward() scaler.step(optimizer_gen) scaler.update() optimizer_gen.zero_grad() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) loss_dis = criterion_dis(input=output_grou,target=output_pred) loss_dis.backward() optimizer_dis.step() optimizer_dis.zero_grad() _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen loss.backward() # Do this to free up vram even if loss is not finite optimizer_gen.zero_grad() if torch.isfinite(loss).item(): # since the optimizer_gen is for YOLACT only # only the gen will be updated optimizer_gen.step() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: # ====== Normal YOLACT Train ====== # Zero the grad to get ready to compute gradients optimizer_gen.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer_gen.step() # Add the loss to the moving average for bookkeeping _ = [loss_avgs[k].add(losses[k].item()) for k in losses] # for k in losses: # loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) if cfg.pred_seg: print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) # print(f'Generator loss: {loss_gen:.2f} | Discriminator loss: {loss_dis:.2f}') # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss # - T: Total loss if args.log: precision = 5 loss_info = {k: round(losses[k].item(), precision) for k in losses} loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: # NOTE: Validation loss # if cfg.pred_seg: # net.eval() # dis_net.eval() # cfg.gan_eval = True # with torch.no_grad(): # for datum in tqdm(val_loader, desc='GAN Validation'): # losses, seg_list, pred_list = net(datum) # losses, seg_list, pred_list = net(datum) # # TODO: warp below as a function # seg_list = [v.permute(2,1,0).contiguous() for v in seg_list] # b = len(seg_list) # batch size # _, seg_h, seg_w = seg_list[0].size() # seg_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # mask_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # target_list = [target for target in datum[1][0]] # mask_list = [interpolate(mask.unsqueeze(0), size = (seg_h,seg_w),mode='bilinear', \ # align_corners=False).squeeze() for mask in datum[1][1]] # for idx in range(b): # for i, (pred, i_target) in enumerate(zip(pred_list[idx], target_list[idx])): # seg_clas[idx, pred, ...] += seg_list[idx][i,...] # mask_clas[idx, i_target[-1].long(), ...] += mask_list[idx][i,...] # seg_clas = torch.clamp(seg_clas, 0, 1) # image = interpolate(torch.stack(datum[0]), size = (seg_h,seg_w), # mode='bilinear',align_corners=False) # real_label = torch.ones(b) # output_pred = dis_net(img = image, seg = seg_clas) # output_grou = dis_net(img = image, seg = mask_clas) # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # losses = { k: (v).mean() for k,v in losses.items() } # loss = sum([losses[k] for k in losses]) # val_loss = loss - cfg.lambda_dis*loss_dis # schedule_dis.step(loss_dis) # lr = [group['lr'] for group in optimizer_dis.param_groups] # print(f'Discriminator lr: {lr[0]}') # net.train() if epoch % args.validation_epoch == 0 and epoch > 0: cfg.gan_eval = False dis_net.eval() compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) else: print('Berkeley DeepDrive') cfg = bdd dataset = BDDDetection(root='/home/coin/datasets/BDD100K', type_sets='train', transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.tiny: if args.box_size_change: if args.lr == 1e-3: print('learning rate 1e-3') if args.minmax: save_model_name = 'weights/tiny_ssd300_' + 'batch' + str( args.batch_size) + '_lr3_max_' else: save_model_name = 'weights/tiny_ssd300_' + 'batch' + str( args.batch_size) + '_lr3_min_' else: print('learning rate 1e-4') if args.minmax: save_model_name = 'weights/tiny_ssd300_' + 'batch' + str( args.batch_size) + '_lr4_max_' else: save_model_name = 'weights/tiny_ssd300_' + 'batch' + str( args.batch_size) + '_lr4_min_' else: if args.lr == 1e-3: print('learning rate 1e-3') save_model_name = 'weights/tiny_param_ssd300_' + 'batch' + str( args.batch_size) + '_lr3_' else: print('learning rate 1e-4') save_model_name = 'weights/tiny_param_ssd300_' + 'batch' + str( args.batch_size) + '_lr4_' else: if args.box_size_change: if args.lr == 1e-3: print('learning rate 1e-3') if args.minmax: save_model_name = 'weights/tiny_size_ssd300_' + 'batch' + str( args.batch_size) + '_lr3_max_' else: save_model_name = 'weights/tiny_size_ssd300_' + 'batch' + str( args.batch_size) + '_lr3_min_' else: print('learning rate 1e-4') if args.minmax: save_model_name = 'weights/tiny_size_ssd300_' + 'batch' + str( args.batch_size) + '_lr4_max_' else: save_model_name = 'weights/tiny_size_ssd300_' + 'batch' + str( args.batch_size) + '_lr4_min_' else: if args.lr == 1e-3: print('learning rate 1e-3') save_model_name = 'weights/ssd300_' + 'batch' + str( args.batch_size) + '_lr3_' else: print('learning rate 1e-4') save_model_name = 'weights/ssd300_' + 'batch' + str( args.batch_size) + '_lr4_' if args.fine_tune: # voc에서 학습한 모델 로드! cfg = voc ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'], tiny=args.tiny, box_size_change=args.box_size_change, minmax=args.minmax) trained_model = os.path.join('weights', args.fine_tune) ssd_net.load_state_dict(torch.load(trained_model)) # bdd에 맞춘 모델 설계 cfg = bdd new_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'], tiny=args.tiny, box_size_change=args.box_size_change, minmax=args.minmax) # Origin SSD일 경우 # base-network만 freezing # if args.tiny: # for k, v in enumerate(ssd_net.vgg): # if k == 21 or k == 33: # for param in v.parameters(): # param.requires_grad = True # else: # for param in v.parameters(): # param.requires_grad = False # else: # for k, v in enumerate(ssd_net.vgg): # if k == 13 or k == 23: # for param in v.parameters(): # param.requires_grad = True # else: # for param in v.parameters(): # param.requires_grad = False new_net.vgg = ssd_net.vgg vgg_parameters = count_parameters(ssd_net.vgg) print('vgg parameters : ', vgg_parameters) # extras 중 feature map과 관련된 부분을 제외하고 고정 # for k, extra in enumerate(ssd_net.extras): # if k % 2 == 0: # for param in extra.parameters(): # param.requires_grad = False new_net.extras = ssd_net.extras new_net.loc = ssd_net.loc net = new_net save_model_name += 'fine' if args.box_size_change: net.conf[0] = nn.Conv2d(512, cfg['num_classes'] * 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[1] = nn.Conv2d(1024, cfg['num_classes'] * 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[2] = nn.Conv2d(512, cfg['num_classes'] * 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[3] = nn.Conv2d(256, cfg['num_classes'] * 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[4] = nn.Conv2d(256, cfg['num_classes'] * 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[5] = nn.Conv2d(256, cfg['num_classes'] * 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) else: net.conf[0] = nn.Conv2d(512, cfg['num_classes'] * 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[1] = nn.Conv2d(1024, cfg['num_classes'] * 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[2] = nn.Conv2d(512, cfg['num_classes'] * 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[3] = nn.Conv2d(256, cfg['num_classes'] * 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[4] = nn.Conv2d(256, cfg['num_classes'] * 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) net.conf[5] = nn.Conv2d(256, cfg['num_classes'] * 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) print('fine tuning!') else: ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'], tiny=args.tiny, box_size_change=args.box_size_change, minmax=args.minmax) net = ssd_net print('just training!') # if args.resume: # ssd_net.load_state_dict(torch.load('/home/dohun/Code/PythonCode/Paper_Models/SSD/ssd.pytorch/weights/tiny_ssd300_batch32_lr3_max_10000_test.pth')) # print('load 성공!') # print('Resuming training, loading {}...'.format(args.resume)) # # ssd_net.load_weights(args.resume) if not args.resume: # initialize newly added layers' weights with xavier method if args.fine_tune: net.conf.apply(weights_init) print('Initializing weights... when fine tuning') else: net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) print('Initializing weights...') if args.cuda: net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.cuda: net = net.cuda() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 epoch_iteration = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print('epoch_size', epoch_size) step_index = 0 # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 500, gamma=0.1) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # default max_iter is 120000 when batch size is 32, epoch_size 517 # 80000(1e-3), 10000(1e-4), 120000(1e-5) # default max_iter is 60000 when batch size is 64, epoch_size 258 # 40000(1e-3), 50000(1e-4), 60000(1e-5) # create batch iterator # epoch = 0 # if args.batch_size == 32: # if args.fine_tune: # print('fine') # max_iter = cfg['f_max_iter'] # lr_steps = [int(step) for step in cfg['f_lr_steps']] # print('max_iter', max_iter) # print('lr_steps', lr_steps) # else: # print('!fine') # max_iter = cfg['max_iter'] # lr_steps = [int(step) for step in cfg['lr_steps']] # print('max_iter', max_iter) # print('lr_steps', lr_steps) # elif args.batch_size == 64: # # batch_size = 64, 2020.01.01.15:21 돌린다 # max_iter = cfg['max_iter'] # lr_steps = [int(step) for step in cfg['lr_steps']] # print('max_iter', max_iter) # print('lr_steps', lr_steps) lr_steps = [int(step) for step in cfg['lr_steps']] max_iter = cfg['f_max_iter'] # max_iter, lr_steps = return_iter(args.batch_size, args.fine_tune, cfg) batch_iterator = iter(data_loader) for iteration in range(args.start_iter, max_iter): if iteration != 0 and ((iteration + 1) % epoch_size == 0): print('epoch {} || Localize Loss : {} Confidece Loss : {}'.format( epoch + 1, loc_loss / epoch_iteration, conf_loss / epoch_iteration)) loc_loss = 0 conf_loss = 0 epoch_iteration = 0 epoch += 1 if iteration in lr_steps: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) # print(images.dtype, images.size()) # print(targets[0].dtype, targets[0].size(), targets[0]) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) epoch_iteration += 1 if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] #requires_grad=True else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() # scheduler.step() if iteration % 100 == 0: # current_lr = scheduler.get_lr() print('timer: %.4f sec.' % (t1 - t0)) print('iteration : {} || Localize Loss : {} Confidence Loss : {}'. format(iteration, loss_l.item(), loss_c.item())) # print('current learning rate : {}'.format(current_lr)) for param_group in optimizer.param_groups: print('current learning rate : {}'.format(param_group['lr'])) if iteration != 0 and iteration % 500 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), save_model_name + repr(iteration) + '_.pth') torch.save(ssd_net.state_dict(), save_model_name + '_final.pth')
if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) #net.load_state_dict(args.trained_model) net.eval() print('Finished loading model!') # load data # dataset = VOCDetection(args.voc_root, [('2007', set_type)], # BaseTransform(300, dataset_mean), # VOCAnnotationTransform()) #SSDD dataset dataset = SSDDDetection(root=SSDD_ROOT, split='test', transform=SSDAugmentation(300, MEANS)) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)