def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) prep_experiment(args, parser) writer = None _, _, _, extra_val_loaders, _ = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) criterion_aux = loss.get_loss_aux(args) net = network.get_net(args, criterion, criterion_aux) optim, scheduler = optimizer.get_optimizer(args, net) net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) net = network.warp_network_in_dataparallel(net, args.local_rank) epoch = 0 i = 0 if args.snapshot: epoch, mean_iu = optimizer.load_weights(net, optim, scheduler, args.snapshot, args.restore_optimizer) print("#### iteration", i) torch.cuda.empty_cache() # Main Loop # for epoch in range(args.start_epoch, args.max_epoch): for dataset, val_loader in extra_val_loaders.items(): print("Extra validating... This won't save pth file") validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)
def get_net(): """ Get Network for evaluation """ logging.info('Load model file: %s', args.snapshot) net = network.get_net(args, criterion=None) net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) net = network.warp_network_in_dataparallel(net, args.local_rank) net, _, _, _, _ = restore_snapshot(net, optimizer=None, scheduler=None, snapshot=args.snapshot, restore_optimizer_bool=False) net.eval() return net
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) if args.fp16: net, optim = amp.initialize(net, optim, opt_level="O1") net = network.warp_network_in_dataparallel(net, args.apex) if args.snapshot: optimizer.load_weights(net, optim, args.snapshot, args.restore_optimizer) torch.cuda.empty_cache() # Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) #snapshot="/srv/beegfs02/scratch/language_vision/data/Sound_Event_Prediction/audio/semanticPred/logs/ckpt/default/Omni-network.deepv3_audioBG_Spec_diffmask_Comp_noBG_Paralleltask_depth_noSeman.DeepWV3Plus/models_depth/SOP_epoch_"+str(14)+".pth" #optimizer.load_weights(net, optim, # snapshot, args.restore_optimizer) scheduler.step() train(train_loader, net, optim, epoch, writer) if args.apex: train_loader.sampler.set_epoch(epoch + 1) validate(val_loader, net, criterion_val, optim, epoch, writer) if args.class_uniform_pct: if epoch >= args.max_cu_epoch: train_obj.build_epoch(cut=True) if args.apex: train_loader.sampler.set_num_samples() else: train_obj.build_epoch()
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) if args.fp16: net, optim = amp.initialize(net, optim, opt_level="O1") net = network.warp_network_in_dataparallel(net, args.apex) if args.snapshot: optimizer.load_weights(net, optim, args.snapshot, args.restore_optimizer) torch.cuda.empty_cache() # Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) scheduler.step() train(train_loader, net, optim, epoch, writer) if args.apex: train_loader.sampler.set_epoch(epoch + 1) validate(val_loader, net, criterion_val, optim, epoch, writer) if args.class_uniform_pct: if epoch >= args.max_cu_epoch: train_obj.build_epoch(cut=True) if args.apex: train_loader.sampler.set_num_samples() else: train_obj.build_epoch()
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loaders, train_obj, extra_val_loaders, covstat_val_loaders = datasets.setup_loaders( args) criterion, criterion_val = loss.get_loss(args) criterion_aux = loss.get_loss_aux(args) net = network.get_net(args, criterion, criterion_aux) optim, scheduler = optimizer.get_optimizer(args, net) net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) net = network.warp_network_in_dataparallel(net, args.local_rank) epoch = 0 i = 0 if args.snapshot: epoch, mean_iu = optimizer.load_weights(net, optim, scheduler, args.snapshot, args.restore_optimizer) if args.restore_optimizer is True: iter_per_epoch = len(train_loader) i = iter_per_epoch * epoch else: epoch = 0 print("#### iteration", i) torch.cuda.empty_cache() # Main Loop # for epoch in range(args.start_epoch, args.max_epoch): while i < args.max_iter: # Update EPOCH CTR cfg.immutable(False) cfg.ITER = i cfg.immutable(True) i = train(train_loader, net, optim, epoch, writer, scheduler, args.max_iter) train_loader.sampler.set_epoch(epoch + 1) if (args.dynamic and args.use_isw and epoch % (args.cov_stat_epoch + 1) == args.cov_stat_epoch) \ or (args.dynamic is False and args.use_isw and epoch == args.cov_stat_epoch): net.module.reset_mask_matrix() for trial in range(args.trials): for dataset, val_loader in covstat_val_loaders.items( ): # For get the statistics of covariance validate_for_cov_stat(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False) net.module.set_mask_matrix() if args.local_rank == 0: print("Saving pth file...") evaluate_eval(args, net, optim, scheduler, None, None, [], writer, epoch, "None", None, i, save_pth=True) if args.class_uniform_pct: if epoch >= args.max_cu_epoch: train_obj.build_epoch(cut=True) train_loader.sampler.set_num_samples() else: train_obj.build_epoch() epoch += 1 # Validation after epochs if len(val_loaders) == 1: # Run validation only one time - To save models for dataset, val_loader in val_loaders.items(): validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i) else: if args.local_rank == 0: print("Saving pth file...") evaluate_eval(args, net, optim, scheduler, None, None, [], writer, epoch, "None", None, i, save_pth=True) for dataset, val_loader in extra_val_loaders.items(): print("Extra validating... This won't save pth file") validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)