def train(): if args.cuda: jt.flags.use_cuda = 1 if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # dataset = COCODetection(image_path=cfg.dataset.train_images, # info_file=cfg.dataset.train_info, # transform=SSDAugmentation(MEANS)) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=BaseTransform(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = EvalCOCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}..'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights..') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = NetLoss(net, criterion) # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(jt.zeros((1, 3, cfg.max_size, cfg.max_size))) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 dataset.set_attrs(batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) dataset.collate_batch = detection_collate data_loader = dataset save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: # jt.profiler.start(0, 0) i = 0 for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # data_loader.display_worker_status() # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients #optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) splits = prepare_data(datum) losses = net(*splits) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # loss.sync() # Backprop loss.sync() optimizer.step(loss) jt.sync(optimizer.param_groups[0]['params']) # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) # for k in losses: # loss_avgs[k].add(0) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 # loss_info = {k: round(float(losses[k].item()), precision) for k in losses} # loss_info['T'] = round(float(loss.item()), precision) loss_info = {k: round(float(0), precision) for k in losses} loss_info['T'] = round(float(0), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save..') os.remove(latest) i += 1 if i > 100: break if i > 100: break # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished # compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network..') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
make_row([iou_type] + [ '%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values() ])) print(make_sep(len(all_maps['box']) + 1)) print() if __name__ == '__main__': parse_args() if args.config is not None: set_cfg(args.config) if args.trained_model == 'interrupt': args.trained_model = SavePath.get_interrupt('weights/') elif args.trained_model == 'latest': args.trained_model = SavePath.get_latest('weights/', cfg.name) if args.config is None: model_path = SavePath.from_str(args.trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. args.config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % args.config) set_cfg(args.config) if args.detect: cfg.eval_mask_branch = False if args.dataset is not None:
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) train_dataset = get_dataset(cfg.train_dataset) # Parallel wraps the underlying module, but when saving and loading we don't want that STMask_net = STMask() net = STMask_net net.train() if args.log: log = Log(cfg.name, args.save_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) STMask_net.load_weights(path=args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights based COCO ...') STMask_net.init_weights(backbone_path='weights/' + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: STMask_net.freeze_bn() # Freeze bn so we don't kill our means if args.cuda: STMask_net( torch.ones(2, 2, 3, 384, 640).cuda(), [torch.zeros(1, 4).cuda()]) else: STMask_net(torch.ones(2, 2, 3, 384, 640), [torch.zeros(1, 4)]) data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # loss counters iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(train_dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue # for datum in data_loader: for i, data_batch in enumerate(data_loader): # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: cur_lr = (args.lr - cfg.lr_warmup_init) * ( iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init set_lr(optimizer, cur_lr) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 cur_lr = args.lr * (args.gamma**step_index) set_lr(optimizer, cur_lr) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(data_batch) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses ]) # same weights in three sub-losses # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = \ str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' Total: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['Total'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu if iteration % args.save_interval == 0 and epoch >= 7: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) STMask_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if iteration % args.save_interval == 0 and epoch >= 7: setup_eval() save_path_valid_metrics = save_path(epoch, iteration).replace( '.pth', '.txt') # valid datasets metrics_valid = compute_validation_map( STMask_net, valid_data=True, output_metrics_file=save_path_valid_metrics) # valid_sub # cfg.valid_sub_dataset.test_mode = False # metrics = compute_validation_map(STMask_net, valid_data=False, # output_metrics_file=save_path_valid_metrics) iteration += 1 except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) STMask_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() STMask_net.save_weights(save_path(epoch, iteration))
def train(rank, args): if args.num_gpus > 1: multi_gpu_rescale(args) if rank == 0: if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # set up logger setup_logger(output=os.path.join(args.log_folder, cfg.name), distributed_rank=rank) logger = logging.getLogger("yolact.train") w = SummaryHelper(distributed_rank=rank, log_dir=os.path.join(args.log_folder, cfg.name)) w.add_text("argv", " ".join(sys.argv)) logger.info("Args: {}".format(" ".join(sys.argv))) import git with git.Repo(search_parent_directories=True) as repo: w.add_text("git_hash", repo.head.object.hexsha) logger.info("git hash: {}".format(repo.head.object.hexsha)) try: logger.info("Initializing torch.distributed backend...") dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=args.num_gpus, rank=rank) except Exception as e: logger.error("Process group URL: {}".format(args.dist_url)) raise e dist.barrier() if torch.cuda.device_count() > 1: logger.info('Multiple GPUs detected! Turning off JIT.') collate_fn = detection_collate if cfg.dataset.name == 'YouTube VIS': dataset = YoutubeVIS(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, configs=cfg.dataset, transform=SSDAugmentationVideo(MEANS)) if cfg.dataset.joint == 'coco': joint_dataset = COCODetection( image_path=cfg.joint_dataset.train_images, info_file=cfg.joint_dataset.train_info, transform=SSDAugmentation(MEANS)) joint_collate_fn = detection_collate if args.validation_epoch > 0: setup_eval() val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, configs=cfg.dataset, transform=BaseTransformVideo(MEANS)) collate_fn = collate_fn_youtube_vis elif cfg.dataset.name == 'FlyingChairs': dataset = FlyingChairs(image_path=cfg.dataset.trainval_images, info_file=cfg.dataset.trainval_info) collate_fn = collate_fn_flying_chairs else: dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Set cuda device early to avoid duplicate model in master GPU if args.cuda: torch.cuda.set_device(rank) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. # use timer for experiments timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: logger.info('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume, args=args) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: logger.info('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) if cfg.flow.train_flow: criterion = OpticalFlowLoss() else: criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net.cuda(rank) criterion.cuda(rank) net = nn.parallel.DistributedDataParallel(net, device_ids=[rank], output_device=rank, broadcast_buffers=False, find_unused_parameters=True) # net = nn.DataParallel(net).cuda() # criterion = nn.DataParallel(criterion).cuda() optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) w.set_step(iteration) last_time = time.time() epoch_size = len(dataset) // args.batch_size // args.num_gpus num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 from data.sampler_utils import InfiniteSampler, build_batch_data_sampler infinite_sampler = InfiniteSampler(dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) train_sampler = build_batch_data_sampler(infinite_sampler, images_per_batch=args.batch_size) data_loader = data.DataLoader( dataset, num_workers=args.num_workers, collate_fn=collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=train_sampler) data_loader_iter = iter(data_loader) if cfg.dataset.joint: joint_infinite_sampler = InfiniteSampler(joint_dataset, seed=args.random_seed, num_replicas=args.num_gpus, rank=rank, shuffle=True) joint_train_sampler = build_batch_data_sampler( joint_infinite_sampler, images_per_batch=args.batch_size) joint_data_loader = data.DataLoader( joint_dataset, num_workers=args.num_workers, collate_fn=joint_collate_fn, multiprocessing_context="fork" if args.num_workers > 1 else None, batch_sampler=joint_train_sampler) joint_data_loader_iter = iter(joint_data_loader) dist.barrier() save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() data_time_avg = MovingAverage(10) global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} def backward_and_log(prefix, net_outs, targets, masks, num_crowds, extra_loss=None): optimizer.zero_grad() out = net_outs["pred_outs"] wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items()} # Mean here because Dataparallel if extra_loss is not None: assert type(extra_loss) == dict losses.update(extra_loss) loss = sum([losses[k] for k in losses]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k), losses[k].item()) return losses logger.info('Begin training!') # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue while True: data_start_time = time.perf_counter() datum = next(data_loader_iter) dist.barrier() data_end_time = time.perf_counter() data_time = data_end_time - data_start_time if iteration != args.start_iter: data_time_avg.add(data_time) # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) elif cfg.lr_schedule == 'cosine': set_lr( optimizer, args.lr * ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) * .5)) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while cfg.lr_schedule == 'step' and step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) global lr w.add_scalar('meta/lr', lr) if cfg.dataset.name == "FlyingChairs": imgs_1, imgs_2, flows = prepare_flow_data(datum) net_outs = net(None, extras=(imgs_1, imgs_2)) # Compute Loss optimizer.zero_grad() losses = criterion(net_outs, flows) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('loss/%s' % k, losses[k].item()) elif cfg.dataset.joint or not cfg.dataset.is_video: if cfg.dataset.joint: joint_datum = next(joint_data_loader_iter) dist.barrier() # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data( joint_datum) else: images, targets, masks, num_crowds = prepare_data( datum) extras = { "backbone": "full", "interrupt": False, "moving_statistics": { "aligned_feats": [] } } net_outs = net(images, extras=extras) out = net_outs["pred_outs"] # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) w.add_scalar('joint/%s' % k, losses[k].item()) # Forward Pass if cfg.dataset.is_video: # reference frames references = [] moving_statistics = {"aligned_feats": [], "conf_hist": []} for idx, frame in enumerate(datum[:0:-1]): images, annots = frame extras = { "backbone": "full", "interrupt": True, "keep_statistics": True, "moving_statistics": moving_statistics } with torch.no_grad(): net_outs = net(images, extras=extras) moving_statistics["feats"] = net_outs["feats"] moving_statistics["lateral"] = net_outs["lateral"] keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(net_outs.keys()) - set(keys_to_save): del net_outs[key] references.append(net_outs) # key frame with annotation, but not compute full backbone frame = datum[0] images, annots = frame frame = ( images, annots, ) images, targets, masks, num_crowds = prepare_data(frame) extras = { "backbone": "full", "interrupt": not cfg.flow.base_backward, "moving_statistics": moving_statistics } gt_net_outs = net(images, extras=extras) if cfg.flow.base_backward: losses = backward_and_log("compute", gt_net_outs, targets, masks, num_crowds) keys_to_save = ("outs_phase_1", "outs_phase_2") for key in set(gt_net_outs.keys()) - set(keys_to_save): del gt_net_outs[key] # now do the warp if len(references) > 0: reference_frame = references[0] extras = { "backbone": "partial", "moving_statistics": moving_statistics } net_outs = net(images, extras=extras) extra_loss = yolact_net.extra_loss( net_outs, gt_net_outs) losses = backward_and_log("warp", net_outs, targets, masks, num_crowds, extra_loss=extra_loss) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time w.add_scalar('meta/data_time', data_time) w.add_scalar('meta/iter_time', elapsed) # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] if torch.cuda.is_available(): max_mem_mb = torch.cuda.max_memory_allocated( ) / 1024.0 / 1024.0 # torch.cuda.reset_max_memory_allocated() else: max_mem_mb = None logger.info("""\ eta: {eta} epoch: {epoch} iter: {iter} \ {losses} {loss_total} \ time: {time} data_time: {data_time} lr: {lr} {memory}\ """.format(eta=eta_str, epoch=epoch, iter=iteration, losses=" ".join([ "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses ]), loss_total="T: {:.3f}".format( sum([loss_avgs[k].get_avg() for k in losses])), data_time="{:.3f}".format(data_time_avg.get_avg()), time="{:.3f}".format(elapsed), lr="{:.6f}".format(lr), memory="max_mem: {:.0f}M".format(max_mem_mb))) if rank == 0 and iteration % 100 == 0: if cfg.flow.train_flow: import flowiz as fz from layers.warp_utils import deform_op tgt_size = (64, 64) flow_size = flows.size()[2:] vis_data = [] for pred_flow in net_outs: vis_data.append(pred_flow) deform_gt = deform_op(imgs_2, flows) flows_pred = [ F.interpolate(x, size=flow_size, mode='bilinear', align_corners=False) for x in net_outs ] deform_preds = [ deform_op(imgs_2, x) for x in flows_pred ] vis_data.append( F.interpolate(flows, size=tgt_size, mode='area')) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] def convert_image(image): image = F.interpolate(image, size=tgt_size, mode='area') image = image[0] image = image.data.cpu().numpy() image = image[::-1] image = image.transpose(1, 2, 0) image = image * np.array(STD) + np.array(MEANS) image = image.transpose(2, 0, 1) image = image / 255 image = np.clip(image, -1, 1) image = image[::-1] return image vis_data.append(convert_image(imgs_1)) vis_data.append(convert_image(imgs_2)) vis_data.append(convert_image(deform_gt)) vis_data.extend( [convert_image(x) for x in deform_preds]) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) elif cfg.flow.warp_mode == "flow": import flowiz as fz tgt_size = (64, 64) vis_data = [] for pred_flow, _, _ in net_outs["preds_flow"]: vis_data.append(pred_flow) vis_data = [ F.interpolate(flow[:1], size=tgt_size) for flow in vis_data ] vis_data = [ fz.convert_from_flow( flow[0].data.cpu().numpy().transpose( 1, 2, 0)).transpose( 2, 0, 1).astype('float32') / 255 for flow in vis_data ] input_image = F.interpolate(images, size=tgt_size, mode='area') input_image = input_image[0] input_image = input_image.data.cpu().numpy() input_image = input_image.transpose(1, 2, 0) input_image = input_image * np.array( STD[::-1]) + np.array(MEANS[::-1]) input_image = input_image.transpose(2, 0, 1) input_image = input_image / 255 input_image = np.clip(input_image, -1, 1) vis_data.append(input_image) vis_data_stack = np.stack(vis_data, axis=0) w.add_images("preds_flow", vis_data_stack) iteration += 1 w.set_step(iteration) if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) logger.info('Saving state, iter: {}'.format(iteration)) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: logger.info('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: if rank == 0: compute_validation_map(yolact_net, val_dataset) dist.barrier() except KeyboardInterrupt: if args.interrupt_no_save: logger.info('No save on interrupt, just exiting...') elif rank == 0: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) return if rank == 0: yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(datum) # Forward Pass out = net(images) # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(yolact_net, val_dataset) except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() print('\n--- Generator created! ---') # NOTE # I maunally set the original image size and seg size as 138 # might change in the future, for example 550 if cfg.pred_seg: dis_size = 138 dis_net = Discriminator_Wgan(i_size = dis_size, s_size = dis_size) # Change the initialization inside the dis_net class inside # set the dis net's initial parameter values # dis_net.apply(gan_init) dis_net.train() print('--- Discriminator created! ---\n') if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) # optimizer_gen = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.decay) # if cfg.pred_seg: # optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr, momentum=args.momentum, # weight_decay=args.decay) # schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) # NOTE: Using the Ranger Optimizer for the generator optimizer_gen = Ranger(net.parameters(), lr = args.lr, weight_decay=args.decay) # optimizer_gen = optim.RMSprop(net.parameters(), lr = args.lr) # FIXME: Might need to modify the lr in the optimizer carefually # check this # def make_D_optimizer(cfg, model): # params = [] # for key, value in model.named_parameters(): # if not value.requires_grad: # continue # lr = cfg.SOLVER.BASE_LR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY # if "bias" in key: # lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR/5.0 # weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS # params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] # optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) # return optimizer if cfg.pred_seg: optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr) # optimizer_dis = optim.RMSprop(dis_net.parameters(), lr = cfg.dis_lr) schedule_dis = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio, pred_seg=cfg.pred_seg) # criterion_dis = nn.BCELoss() # Take the advice from WGAN criterion_dis = DiscriminatorLoss_Maskrcnn() criterion_gen = GeneratorLoss_Maskrcnn() if args.batch_alloc is not None: # e.g. args.batch_alloc: 24,24 args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion, pred_seg=cfg.pred_seg)) if args.cuda: net = net.cuda() # NOTE if cfg.pred_seg: dis_net = nn.DataParallel(dis_net) dis_net = dis_net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # NOTE val_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers*2, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order # TODO: global command can modify global variable inside of the function. loss_avgs = { k: MovingAverage(100) for k in loss_types } # NOTE # Enable AMP amp_enable = cfg.amp scaler = torch.cuda.amp.GradScaler(enabled=amp_enable) print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer_gen, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer_gen, args.lr * (args.gamma ** step_index)) # NOTE if cfg.pred_seg: # ====== GAN Train ====== # train the gen and dis in different iteration # it_alter_period = iteration % (cfg.gen_iter + cfg.dis_iter) # FIXME: # present_time = time.time() for _ in range(cfg.dis_iter): # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == 0: # print('--- Generator freeze ---') # print('--- Discriminator training ---') if cfg.amp: with torch.cuda.amp.autocast(): # ----- Discriminator part ----- # seg_list is the prediction mask # can be regarded as generated images from YOLACT # pred_list is the prediction label # seg_list dim: list of (138,138,instances) # pred_list dim: list of (instances) losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) # input image size is [b, 3, 550, 550] # downsample to [b, 3, seg_h, seg_w] image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Because in the discriminator training, we do not # want the gradient flow back to the generator part # we detach seg_clas (mask_clas come the data, does not have grad) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) # p = elem_mul_p.squeeze().permute(1,2,0).cpu().detach().numpy() # g = elem_mul_g.squeeze().permute(1,2,0).cpu().detach().numpy() # image = image.squeeze().permute(1,2,0).cpu().detach().numpy() # from PIL import Image # seg_PIL = Image.fromarray(p, 'RGB') # mask_PIL = Image.fromarray(g, 'RGB') # seg_PIL.save('mul_seg.png') # mask_PIL.save('mul_mask.png') # raise RuntimeError # from matplotlib import pyplot as plt # fig, (ax1, ax2) = plt.subplots(1,2) # ax1.imshow(mask_show) # ax2.imshow(seg_show) # plt.show(block=False) # plt.pause(2) # plt.close() # if iteration % (cfg.gen_iter + cfg.dis_iter) == 0: # print(f'Probability of fake is fake: {output_pred.mean().item():.2f}') # print(f'Probability of real is real: {output_grou.mean().item():.2f}') # 0 for Fake/Generated # 1 for True/Ground Truth # fake_label = torch.zeros(b) # real_label = torch.ones(b) # Advice of practical implementation # from https://arxiv.org/abs/1611.08408 # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_pred = criterion_dis(output_pred,target=fake_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # Wasserstein Distance (Earth-Mover) loss_dis = criterion_dis(input=output_grou,target=output_pred) # Backprop the discriminator # Scales loss. Calls backward() on scaled loss to create scaled gradients. scaler.scale(loss_dis).backward() scaler.step(optimizer_dis) scaler.update() optimizer_dis.zero_grad() # clip the updated parameters _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # freeze_pretrain(yolact_net, freeze=False) # freeze_pretrain(net, freeze=False) # freeze_pretrain(dis_net, freeze=False) # if it_alter_period == (cfg.dis_iter+1): # print('--- Generator training ---') # print('--- Discriminator freeze ---') # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() with torch.cuda.amp.autocast(): losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # Perform forward pass of all-fake batch through D # NOTE this seg_clas CANNOT detach, in order to flow the # gradient back to the generator # output = dis_net(img = image, seg = seg_clas) # Since the log(1-D(G(x))) not provide sufficient gradients # We want log(D(G(x)) instead, this can be achieve by # use the real_label as target. # This step is crucial for the information of discriminator # to go into the generator. # Calculate G's loss based on this output # real_label = torch.ones(b) # loss_gen = criterion_dis(output,target=real_label) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) # Advice from WGAN # loss_gen = -torch.mean(output) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen # Generator backprop scaler.scale(loss).backward() scaler.step(optimizer_gen) scaler.update() optimizer_gen.zero_grad() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) output_pred = dis_net(img = image.detach(), seg = seg_clas.detach()) output_grou = dis_net(img = image.detach(), seg = mask_clas.detach()) loss_dis = criterion_dis(input=output_grou,target=output_pred) loss_dis.backward() optimizer_dis.step() optimizer_dis.zero_grad() _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()] # ----- Generator part ----- # FIXME: # print(f'dis time pass: {time.time()-present_time:.2f}') # FIXME: # present_time = time.time() losses, seg_list, pred_list = net(datum) seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum) image_list = [img.to(cuda0) for img in datum[0]] image = interpolate(torch.stack(image_list), size = seg_size, mode='bilinear',align_corners=False) # GAN MaskRCNN output_pred = dis_net(img = image, seg = seg_clas) output_grou = dis_net(img = image, seg = mask_clas) loss_gen = criterion_gen(input=output_grou,target=output_pred) # since the dis is already freeze, the gradients will only # record the YOLACT losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) loss += loss_gen loss.backward() # Do this to free up vram even if loss is not finite optimizer_gen.zero_grad() if torch.isfinite(loss).item(): # since the optimizer_gen is for YOLACT only # only the gen will be updated optimizer_gen.step() # FIXME: # print(f'gen time pass: {time.time()-present_time:.2f}') # print('GAN part over') else: # ====== Normal YOLACT Train ====== # Zero the grad to get ready to compute gradients optimizer_gen.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer_gen.step() # Add the loss to the moving average for bookkeeping _ = [loss_avgs[k].add(losses[k].item()) for k in losses] # for k in losses: # loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) if cfg.pred_seg: print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) # print(f'Generator loss: {loss_gen:.2f} | Discriminator loss: {loss_dis:.2f}') # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss # - T: Total loss if args.log: precision = 5 loss_info = {k: round(losses[k].item(), precision) for k in losses} loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: # NOTE: Validation loss # if cfg.pred_seg: # net.eval() # dis_net.eval() # cfg.gan_eval = True # with torch.no_grad(): # for datum in tqdm(val_loader, desc='GAN Validation'): # losses, seg_list, pred_list = net(datum) # losses, seg_list, pred_list = net(datum) # # TODO: warp below as a function # seg_list = [v.permute(2,1,0).contiguous() for v in seg_list] # b = len(seg_list) # batch size # _, seg_h, seg_w = seg_list[0].size() # seg_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # mask_clas = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w) # target_list = [target for target in datum[1][0]] # mask_list = [interpolate(mask.unsqueeze(0), size = (seg_h,seg_w),mode='bilinear', \ # align_corners=False).squeeze() for mask in datum[1][1]] # for idx in range(b): # for i, (pred, i_target) in enumerate(zip(pred_list[idx], target_list[idx])): # seg_clas[idx, pred, ...] += seg_list[idx][i,...] # mask_clas[idx, i_target[-1].long(), ...] += mask_list[idx][i,...] # seg_clas = torch.clamp(seg_clas, 0, 1) # image = interpolate(torch.stack(datum[0]), size = (seg_h,seg_w), # mode='bilinear',align_corners=False) # real_label = torch.ones(b) # output_pred = dis_net(img = image, seg = seg_clas) # output_grou = dis_net(img = image, seg = mask_clas) # loss_pred = -criterion_dis(output_pred,target=real_label) # loss_grou = criterion_dis(output_grou,target=real_label) # loss_dis = loss_pred + loss_grou # losses = { k: (v).mean() for k,v in losses.items() } # loss = sum([losses[k] for k in losses]) # val_loss = loss - cfg.lambda_dis*loss_dis # schedule_dis.step(loss_dis) # lr = [group['lr'] for group in optimizer_dis.param_groups] # print(f'Discriminator lr: {lr[0]}') # net.train() if epoch % args.validation_epoch == 0 and epoch > 0: cfg.gan_eval = False dis_net.eval() compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() wandb.watch(yolact_net) net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print( f'Initializing weights from {args.save_folder + cfg.backbone.path}' ) yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} wandb.config.update({ 'max_iter': cfg.max_iter, 'epoch_size': epoch_size, 'num_epochs': num_epochs, 'batch_size': args.batch_size, 'init_lr': cfg.lr, 'init_momentum': cfg.momentum, 'init_decay': cfg.decay, 'init_lr_steps': cfg.lr_steps, 'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5, 'pred_scales': [[24], [48], [96], [192], [384]], }) best_mask_mAP = 0 print('Configs') print( '%-----------------------------------------------------------------------------%' ) print( f'Begin training! for {cfg.max_iter} iterations and {num_epochs} epochs at max' ) print( f'training on {len(dataset)} images | validating on {len(val_dataset)} images' ) print('preserve_aspect_ratio', cfg.preserve_aspect_ratio) print('LR_steps', cfg.lr_steps) print('Batch Size', args.batch_size) print( '%-----------------------------------------------------------------------------%' ) # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): print( '%----------------------------------------------------------------------------------------------%' ) print( f' EPOCH [{epoch}|{num_epochs}] LR [{cur_lr}] Best Mask mAP [{best_mask_mAP} %]' ) print( '%----------------------------------------------------------------------------------------------%' ) # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) print(f'Adjusting Learning rate to {cur_lr}') # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 20 == 0: eta_str = \ str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[Ep:%3d] Iter:%7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log and iteration % 20 == 0: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log_stuff = log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) iter_step = log_stuff['iter'] wandb.log({'[TRAIN] BBox Loss': log_stuff['loss']['B']}, step=iter_step) wandb.log({'[TRAIN] Mask Loss': log_stuff['loss']['M']}, step=iter_step) wandb.log( {'[TRAIN] Class Conf. Loss': log_stuff['loss']['C']}, step=iter_step) wandb.log( { '[TRAIN] Sem. Segmentation Loss': log_stuff['loss']['S'] }, step=iter_step) wandb.log( { '[TRAIN] Overall Training Loss': log_stuff['loss']['T'] }, step=iter_step) wandb.log({'Learning Rate': log_stuff['lr']}, step=iter_step) wandb.log({'Epoch': log_stuff['epoch']}, step=iter_step) log.log_gpu_stats = args.log_gpu last_iter_entry = iteration iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done after each epoch if args.validation_epoch > 0: try: cur_mask_mAP = compute_validation_map( epoch=epoch, iteration=last_iter_entry, yolact_net=yolact_net, dataset=val_dataset, log=log if args.log else None) if cur_mask_mAP > best_mask_mAP: best_mask_mAP = cur_mask_mAP wandb.config.update({'best_mask_mAP': best_mask_mAP}, allow_val_change=True) print( f'Found new best Mask mAP with {best_mask_mAP} %, Saving weights ...\n' ) SavePath.remove_prev_best(args.save_folder) yolact_net.save_weights( save_path( epoch, repr(last_iter_entry) + f'_mAP{best_mask_mAP}')) compute_validation_loss(net=net, data_loader=val_data_loader, epoch=epoch, iteration=last_iter_entry, log=log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) run.save() sys.exit() # Compute validation mAP after training is finished compute_validation_map(epoch=epoch, iteration=last_iter_entry, yolact_net=yolact_net, dataset=val_dataset, log=log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) run.save() sys.exit() yolact_net.save_weights(save_path(epoch, iteration)) run.save()
def interpret(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # net = net.cuda() # criterion = criterion.cuda() # criterion = criterion.cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size print("Dataset Size:") print(len(dataset)) num_epochs = math.ceil(cfg.max_iter / epoch_size) num_epochs = 1 # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin interpret!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue count = 0 for datum in data_loader: del datum count += 1 if count % 10000 == 0: print(count) continue except KeyboardInterrupt: print('Stopping early. Saving network...') print("Loaded Dataset Numbers") print(count)
def train(args, cfg, option, DataSet): if args.exp_name is not None: args.save_folder = os.path.join(args.save_folder, args.exp_name) args.log_folder = os.path.join(args.log_folder, args.exp_name) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder, exist_ok=True) if not os.path.exists(args.log_folder): os.makedirs(args.log_folder, exist_ok=True) if True: dataset = DataSet(image_path=cfg.dataset.train_images, mask_out_ch=cfg.gt_inst_ch, info_file=cfg.dataset.train_info, option=cfg.dataset, transform=SSDAugmentation(cfg, MEANS), running_mode='train') else: dataset = DataSet(image_path=cfg.dataset.valid_images, mask_out_ch=cfg.gt_inst_ch, info_file=cfg.dataset.valid_info, option=cfg.dataset, transform=SSDAugmentation(cfg, MEANS), running_mode='train') # Parallel wraps the underlying module, but when saving and loading we don't want that dvis_net = DVIS(cfg) net = dvis_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) dvis_net.load_weights(args.resume, load_firstLayer=option['model_1stLayer_en'], load_lastLayer=option['model_lastLayer_en']) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') dvis_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) #optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.decay) optimizer = optim.SGD([{ 'params': net.backbone.parameters(), 'lr': args.lr * option['bkb_lr_alpha'] }, { 'params': net.fpn.parameters(), 'lr': args.lr * option['fpn_lr_alpha'] }, { 'params': net.proto_net.parameters(), 'lr': args.lr * option['proto_net_lr_alpha'] }], lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = LossEvaluate(option, class_weights=cfg.dataset.sem_weights) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = NetLoss(net, criterion) net = CustomDataParallel(net) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: dvis_net.freeze_bn() # Freeze bn so we don't kill our means # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) writer = SummaryWriter(log_dir=args.log_folder) save_path = lambda epoch, iteration: SavePath( cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() loss_keys = [ 'binary', 'pi', 'l1', 'regul', 'iou', 'classify', 'eval_prec', 'eval_rec', 'eval_acc' ] vis_keys = ['preds', 'gts', 'rgb', 'wghts', 'grad'] loss_avgs = {k: MovingAverage(100) for k in loss_keys} print('Begin training!') # try-except so you can use ctrl+c to save early and stop training try: log_loss = dict() for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break if iteration < 99: iteration += 1 continue # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss0) ret = net(datum) # Mean here because Dataparallel and do Backprop losses = {k: ret[k].mean() for k in loss_keys if k in ret} det_loss_keys = [k for k in loss_keys if k in losses] all_loss = sum([losses[k] for k in det_loss_keys]) for k in det_loss_keys: loss_avgs[k].add(losses[k].item()) # backward and optimize if args.show_gradients == True: ret['preds_0'].retain_grad() all_loss.backward(retain_graph=True) ret['grad'] = ret['preds_0'].grad[:, 0, :, :] else: all_loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(all_loss).item(): optimizer.step() ret['preds'] = torch.nn.ReLU()(ret['preds']) vis_imgs = {k: ret[k] for k in vis_keys if k in ret} cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([ loss_avgs[k].get_avg() for k in det_loss_keys if 'eval' not in k ]) loss_labels = sum( [[k, loss_avgs[k].get_avg()] for k in loss_keys if k in det_loss_keys], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(det_loss_keys)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: log_step = 50 // args.batch_size for k in det_loss_keys: if k not in log_loss: log_loss[k] = loss_avgs[k].get_avg() else: log_loss[k] += loss_avgs[k].get_avg() if iteration % log_step == log_step - 1: for k in det_loss_keys: writer.add_scalar(k + '_loss', log_loss[k] / float(log_step), iteration / log_step) log_loss[k] = 0 log_fig_step = 100 if iteration % log_fig_step == log_fig_step - 1: if 'davis' in args.dataset: vis_imgs['rgb'] = vis_imgs['rgb'][:, :3, :, :] fig = plot_tfboard_figure( cfg, vis_imgs, show_grad=args.show_gradients) writer.add_figure('prediction _ grad', fig, global_step=iteration / log_fig_step) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) dvis_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) del ret, vis_imgs, losses # end of batch run # end of epoch except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) writer.close() dvis_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() writer.close() dvis_net.save_weights(save_path(epoch, iteration))