def main(): state_dump = os.path.join(MODEL_OUT, "state.pth") model_dump = os.path.join(MODEL_OUT, "model.pth") if os.path.exists(model_dump): model = torch.load(model_dump) elif os.path.exists(state_dump): model = Net() model.load_state_dict(torch.load(state_dump)) else: model = Net() optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) loss_fn = nn.MSELoss() data_loader = dataloader.get_train_loader() train_model(model, data_loader, optimizer, loss_fn) print("Saving model state dict into {}".format(state_dump)) torch.save(model.state_dict(), state_dump) print("Saving model into {}".format(model_dump)) torch.save(model, model_dump) model.eval() image, expected = dataloader.get_random_image() predicted = test_inference(model, image) print("Predicted {}, expected {}".format(predicted, expected)) plt.imshow(image[0].reshape(28, 28), cmap="gray") plt.show()
def main(args): train_dl, style_img = get_train_loader(4) model = StyleTransfer(style_img, ['relu2_2'], ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'], alpha=1e3) x, y = next(iter(train_dl)) print(model.train_batch(x))
def run(): seed_everything(args.seed) df = pd.read_csv(os.path.join(args.data_dir, "train.csv")) df = process_data(df, args.subset) df_folds = create_folds(df, args.n_folds) train_image_ids = df_folds[df_folds["fold"] != args.fold].index.values valid_image_ids = df_folds[df_folds["fold"] == args.fold].index.values train_loader = get_train_loader( args.data_dir, df, train_image_ids, transforms=get_train_augs(args), do_cutmix=args.cutmix, batch_size=args.bs, num_workers=args.num_workers, ) valid_loader = get_valid_loader( args.data_dir, df, valid_image_ids, transforms=get_valid_augs(args), batch_size=args.bs, num_workers=args.num_workers, ) model = get_model(args.model_variant, model_dir=args.model_dir, checkpoint_path=args.load_path).cuda() if args.scheduler == "one_cycle": args.steps_per_epoch = len(train_image_ids) // args.bs scheduler_class, scheduler_params = get_scheduler(args) else: scheduler_class, scheduler_params = get_scheduler(args) learner = Learner(model, scheduler_class, scheduler_params, hparams=args) learner.fit(train_loader, valid_loader)
torch.manual_seed(config.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(config.seed) parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True if engine.distributed: torch.cuda.set_device(engine.local_rank) # data loader train_loader, train_sampler = get_train_loader(engine, ADE) # config network and criterion criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=-1) if engine.distributed: logger.info('Use the Multi-Process-SyncBatchNorm') BatchNorm2d = SyncBatchNorm # else: # BatchNorm2d = BatchNorm2d model = CPNet(config.num_classes, criterion=criterion, pretrained_model=config.pretrained_model, norm_layer=BatchNorm2d) init_weight(model.business_layer, nn.init.kaiming_normal_,
from models import CVAE from utils import trainEpochs, plot_results from dataloader import get_train_loader device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ----------Hyper Parameters---------- # hidden_size = 256 cond_embedding_size = 8 latent_size = 32 n_epochs = 900 batch_size = 16 learning_rate = 0.001 train_loader = get_train_loader(batch_size=batch_size) for KL_annealing_method in ['cyclical', 'monotonic']: cvae = CVAE(hidden_size, latent_size, cond_embedding_size).to(device) history = trainEpochs(train_loader=train_loader, cvae=cvae, n_epochs=n_epochs, learning_rate=learning_rate, KL_annealing_method=KL_annealing_method) plot_results(history, KL_annealing_method) filename = f'./histories/{KL_annealing_method}.pkl' with open(filename, "wb") as fp: pickle.dump(history, fp)
torch.manual_seed(config.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(config.seed) parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True if engine.distributed: torch.cuda.set_device(engine.local_rank) # data loader train_loader, train_sampler = get_train_loader(engine, DAVIS) # config network and criterion criterion = nn.BCEWithLogitsLoss(reduction='none') #criterion = BinaryDiceLoss() #criterion = sigmoid_focal_loss_star_jit if engine.distributed: logger.info('Use the Multi-Process-SyncBatchNorm') cfg = get_cfg() cfg.merge_from_file('./fpn_config/semantic_R_50_FPN_1x.yaml') model = AnchorDiffNet(cfg, embedding=cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM, batch_mode='sync')
parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True seed = config.seed if engine.distributed: seed = engine.local_rank torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # data loader train_loader, train_sampler = get_train_loader(engine, Cityscapes) # config network and criterion criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=255) ohem_criterion = ProbOhemCrossEntropy2d( ignore_label=255, thresh=0.7, min_kept=int(config.batch_size // len(engine.devices) * config.image_height * config.image_width // (16 * config.gt_down_sampling**2)), use_weight=False) if engine.distributed: BatchNorm2d = SyncBatchNorm model = BiSeNet(config.num_classes,
torch.manual_seed(config.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(config.seed) parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True if engine.distributed: torch.cuda.set_device(engine.local_rank) # data loader train_loader, train_sampler = get_train_loader(engine, PascalContext) # config network and criterion criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=-1) if engine.distributed: logger.info('Use the Multi-Process-SyncBatchNorm') BatchNorm2d = SyncBatchNorm model = CPNet(config.num_classes, criterion=criterion, pretrained_model=config.pretrained_model, norm_layer=BatchNorm2d) init_weight(model.business_layer, nn.init.kaiming_normal_, BatchNorm2d,
def main(): create_exp_dir(config.save, scripts_to_save=glob.glob('*.py') + glob.glob('*.sh')) logger = SummaryWriter(config.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(config.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("args = %s", str(config)) # preparation ################ torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # config network and criterion ################ min_kept = int(config.batch_size * config.image_height * config.image_width // (16 * config.gt_down_sampling**2)) ohem_criterion = ProbOhemCrossEntropy2d(ignore_label=255, thresh=0.7, min_kept=min_kept, use_weight=False) distill_criterion = nn.KLDivLoss() # data loader ########################### if config.is_test: data_setting = { 'img_root': config.img_root_folder, 'gt_root': config.gt_root_folder, 'train_source': config.train_eval_source, 'eval_source': config.eval_source, 'test_source': config.test_source, 'down_sampling': config.down_sampling } else: data_setting = { 'img_root': config.img_root_folder, 'gt_root': config.gt_root_folder, 'train_source': config.train_source, 'eval_source': config.eval_source, 'test_source': config.test_source, 'down_sampling': config.down_sampling } train_loader = get_train_loader(config, Cityscapes, test=config.is_test) # Model ####################################### models = [] evaluators = [] testers = [] lasts = [] for idx, arch_idx in enumerate(config.arch_idx): if config.load_epoch == "last": state = torch.load( os.path.join(config.load_path, "arch_%d.pt" % arch_idx)) else: state = torch.load( os.path.join( config.load_path, "arch_%d_%d.pt" % (arch_idx, int(config.load_epoch)))) model = Network([ state["alpha_%d_0" % arch_idx].detach(), state["alpha_%d_1" % arch_idx].detach(), state["alpha_%d_2" % arch_idx].detach() ], [ None, state["beta_%d_1" % arch_idx].detach(), state["beta_%d_2" % arch_idx].detach() ], [ state["ratio_%d_0" % arch_idx].detach(), state["ratio_%d_1" % arch_idx].detach(), state["ratio_%d_2" % arch_idx].detach() ], num_classes=config.num_classes, layers=config.layers, Fch=config.Fch, width_mult_list=config.width_mult_list, stem_head_width=config.stem_head_width[idx], ignore_skip=arch_idx == 0) mIoU02 = state["mIoU02"] latency02 = state["latency02"] obj02 = objective_acc_lat(mIoU02, latency02) mIoU12 = state["mIoU12"] latency12 = state["latency12"] obj12 = objective_acc_lat(mIoU12, latency12) if obj02 > obj12: last = [2, 0] else: last = [2, 1] lasts.append(last) model.build_structure(last) logging.info("net: " + str(model)) for b in last: if len(config.width_mult_list) > 1: plot_op(getattr(model, "ops%d" % b), getattr(model, "path%d" % b), width=getattr(model, "widths%d" % b), head_width=config.stem_head_width[idx][1], F_base=config.Fch).savefig(os.path.join( config.save, "ops_%d_%d.png" % (arch_idx, b)), bbox_inches="tight") else: plot_op(getattr(model, "ops%d" % b), getattr(model, "path%d" % b), F_base=config.Fch).savefig(os.path.join( config.save, "ops_%d_%d.png" % (arch_idx, b)), bbox_inches="tight") plot_path_width(model.lasts, model.paths, model.widths).savefig( os.path.join(config.save, "path_width%d.png" % arch_idx)) plot_path_width([2, 1, 0], [model.path2, model.path1, model.path0], [model.widths2, model.widths1, model.widths0]).savefig( os.path.join(config.save, "path_width_all%d.png" % arch_idx)) flops, params = profile(model, inputs=(torch.randn(1, 3, 1024, 2048), )) logging.info("params = %fMB, FLOPs = %fGB", params / 1e6, flops / 1e9) logging.info("ops:" + str(model.ops)) logging.info("path:" + str(model.paths)) logging.info("last:" + str(model.lasts)) model = model.cuda() init_weight(model, nn.init.kaiming_normal_, torch.nn.BatchNorm2d, config.bn_eps, config.bn_momentum, mode='fan_in', nonlinearity='relu') if arch_idx == 0 and len(config.arch_idx) > 1: partial = torch.load( os.path.join(config.teacher_path, "weights%d.pt" % arch_idx)) state = model.state_dict() pretrained_dict = {k: v for k, v in partial.items() if k in state} state.update(pretrained_dict) model.load_state_dict(state) elif config.is_eval: partial = torch.load( os.path.join(config.eval_path, "weights%d.pt" % arch_idx)) state = model.state_dict() pretrained_dict = {k: v for k, v in partial.items() if k in state} state.update(pretrained_dict) model.load_state_dict(state) evaluator = SegEvaluator(Cityscapes(data_setting, 'val', None), config.num_classes, config.image_mean, config.image_std, model, config.eval_scale_array, config.eval_flip, 0, out_idx=0, config=config, verbose=False, save_path=None, show_image=False) evaluators.append(evaluator) tester = SegTester(Cityscapes(data_setting, 'test', None), config.num_classes, config.image_mean, config.image_std, model, config.eval_scale_array, config.eval_flip, 0, out_idx=0, config=config, verbose=False, save_path=None, show_image=False) testers.append(tester) # Optimizer ################################### base_lr = config.lr if arch_idx == 1 or len(config.arch_idx) == 1: # optimize teacher solo OR student (w. distill from teacher) optimizer = torch.optim.SGD(model.parameters(), lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) models.append(model) # Cityscapes ########################################### if config.is_eval: logging.info(config.load_path) logging.info(config.eval_path) logging.info(config.save) # validation print("[validation...]") with torch.no_grad(): valid_mIoUs = infer(models, evaluators, logger) for idx, arch_idx in enumerate(config.arch_idx): if arch_idx == 0: logger.add_scalar("mIoU/val_teacher", valid_mIoUs[idx], 0) logging.info("teacher's valid_mIoU %.3f" % (valid_mIoUs[idx])) else: logger.add_scalar("mIoU/val_student", valid_mIoUs[idx], 0) logging.info("student's valid_mIoU %.3f" % (valid_mIoUs[idx])) exit(0) tbar = tqdm(range(config.nepochs), ncols=80) for epoch in tbar: logging.info(config.load_path) logging.info(config.save) logging.info("lr: " + str(optimizer.param_groups[0]['lr'])) # training tbar.set_description("[Epoch %d/%d][train...]" % (epoch + 1, config.nepochs)) train_mIoUs = train(train_loader, models, ohem_criterion, distill_criterion, optimizer, logger, epoch) torch.cuda.empty_cache() for idx, arch_idx in enumerate(config.arch_idx): if arch_idx == 0: logger.add_scalar("mIoU/train_teacher", train_mIoUs[idx], epoch) logging.info("teacher's train_mIoU %.3f" % (train_mIoUs[idx])) else: logger.add_scalar("mIoU/train_student", train_mIoUs[idx], epoch) logging.info("student's train_mIoU %.3f" % (train_mIoUs[idx])) adjust_learning_rate(base_lr, 0.992, optimizer, epoch + 1, config.nepochs) # validation if not config.is_test and ((epoch + 1) % 10 == 0 or epoch == 0): tbar.set_description("[Epoch %d/%d][validation...]" % (epoch + 1, config.nepochs)) with torch.no_grad(): valid_mIoUs = infer(models, evaluators, logger) for idx, arch_idx in enumerate(config.arch_idx): if arch_idx == 0: logger.add_scalar("mIoU/val_teacher", valid_mIoUs[idx], epoch) logging.info("teacher's valid_mIoU %.3f" % (valid_mIoUs[idx])) else: logger.add_scalar("mIoU/val_student", valid_mIoUs[idx], epoch) logging.info("student's valid_mIoU %.3f" % (valid_mIoUs[idx])) save(models[idx], os.path.join(config.save, "weights%d.pt" % arch_idx)) # test if config.is_test and (epoch + 1) >= 250 and (epoch + 1) % 10 == 0: tbar.set_description("[Epoch %d/%d][test...]" % (epoch + 1, config.nepochs)) with torch.no_grad(): test(epoch, models, testers, logger) for idx, arch_idx in enumerate(config.arch_idx): save(models[idx], os.path.join(config.save, "weights%d.pt" % arch_idx))
input = torch.mul(input, mask) target = torch.mul(target, mask) loss = F.mse_loss(input, target, reduction=self.reduction) return loss parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() seed = config.seed torch.manual_seed(seed) train_loader, train_sampler = get_train_loader(engine, NYUDataset) criterion = Mseloss() BatchNorm2d = nn.BatchNorm2d model = Displacement_Field() init_weight(model.displacement_net, nn.init.xavier_normal_, BatchNorm2d, config.bn_eps, config.bn_momentum) base_lr = config.lr total_iteration = config.nepochs * config.niters_per_epoch lr_policy = PolyLR(base_lr, config.lr_power, total_iteration) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device)
def main(pretrain=True): config.save = 'search-{}-{}'.format(config.save, time.strftime("%Y%m%d-%H%M%S")) create_exp_dir(config.save, scripts_to_save=glob.glob('*.py') + glob.glob('*.sh')) logger = SummaryWriter(config.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(config.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) assert type(pretrain) == bool or type(pretrain) == str update_arch = True if pretrain == True: update_arch = False logging.info("args = %s", str(config)) # preparation ################ torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # config network and criterion ################ min_kept = int(config.batch_size * config.image_height * config.image_width // (16 * config.gt_down_sampling**2)) ohem_criterion = ProbOhemCrossEntropy2d(ignore_label=255, thresh=0.7, min_kept=min_kept, use_weight=False) # Model ####################################### model = Network(config.num_classes, config.layers, ohem_criterion, Fch=config.Fch, width_mult_list=config.width_mult_list, prun_modes=config.prun_modes, stem_head_width=config.stem_head_width) flops, params = profile(model, inputs=(torch.randn(1, 3, 1024, 2048), ), verbose=False) logging.info("params = %fMB, FLOPs = %fGB", params / 1e6, flops / 1e9) model = model.cuda() if type(pretrain) == str: partial = torch.load(pretrain + "/weights.pt", map_location='cuda:0') state = model.state_dict() pretrained_dict = { k: v for k, v in partial.items() if k in state and state[k].size() == partial[k].size() } state.update(pretrained_dict) model.load_state_dict(state) else: init_weight(model, nn.init.kaiming_normal_, nn.BatchNorm2d, config.bn_eps, config.bn_momentum, mode='fan_in', nonlinearity='relu') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) architect = Architect(model, config) # Optimizer ################################### base_lr = config.lr parameters = [] parameters += list(model.stem.parameters()) parameters += list(model.cells.parameters()) parameters += list(model.refine32.parameters()) parameters += list(model.refine16.parameters()) parameters += list(model.head0.parameters()) parameters += list(model.head1.parameters()) parameters += list(model.head2.parameters()) parameters += list(model.head02.parameters()) parameters += list(model.head12.parameters()) optimizer = torch.optim.SGD(parameters, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) # lr policy ############################## lr_policy = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.978) # data loader ########################### data_setting = { 'img_root': config.img_root_folder, 'gt_root': config.gt_root_folder, 'train_source': config.train_source, 'eval_source': config.eval_source, 'down_sampling': config.down_sampling } train_loader_model = get_train_loader(config, EGTEA, portion=config.train_portion) train_loader_arch = get_train_loader(config, EGTEA, portion=config.train_portion - 1) evaluator = SegEvaluator(EGTEA(data_setting, 'val', None), config.num_classes, config.image_mean, config.image_std, model, config.eval_scale_array, config.eval_flip, 0, config=config, verbose=False, save_path=None, show_image=False) if update_arch: for idx in range(len(config.latency_weight)): logger.add_scalar("arch/latency_weight%d" % idx, config.latency_weight[idx], 0) logging.info("arch_latency_weight%d = " % idx + str(config.latency_weight[idx])) tbar = tqdm(range(config.nepochs), ncols=80) valid_mIoU_history = [] FPSs_history = [] latency_supernet_history = [] latency_weight_history = [] valid_names = ["8s", "16s", "32s", "8s_32s", "16s_32s"] arch_names = {0: "teacher", 1: "student"} for epoch in tbar: logging.info(pretrain) logging.info(config.save) logging.info("lr: " + str(optimizer.param_groups[0]['lr'])) logging.info("update arch: " + str(update_arch)) # training tbar.set_description("[Epoch %d/%d][train...]" % (epoch + 1, config.nepochs)) train(pretrain, train_loader_model, train_loader_arch, model, architect, ohem_criterion, optimizer, lr_policy, logger, epoch, update_arch=update_arch) torch.cuda.empty_cache() lr_policy.step() # validation tbar.set_description("[Epoch %d/%d][validation...]" % (epoch + 1, config.nepochs)) with torch.no_grad(): if pretrain == True: model.prun_mode = "min" valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False) for i in range(5): logger.add_scalar('mIoU/val_min_%s' % valid_names[i], valid_mIoUs[i], epoch) logging.info("Epoch %d: valid_mIoU_min_%s %.3f" % (epoch, valid_names[i], valid_mIoUs[i])) if len(model._width_mult_list) > 1: model.prun_mode = "max" valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False) for i in range(5): logger.add_scalar('mIoU/val_max_%s' % valid_names[i], valid_mIoUs[i], epoch) logging.info("Epoch %d: valid_mIoU_max_%s %.3f" % (epoch, valid_names[i], valid_mIoUs[i])) model.prun_mode = "random" valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False) for i in range(5): logger.add_scalar( 'mIoU/val_random_%s' % valid_names[i], valid_mIoUs[i], epoch) logging.info("Epoch %d: valid_mIoU_random_%s %.3f" % (epoch, valid_names[i], valid_mIoUs[i])) else: valid_mIoUss = [] FPSs = [] model.prun_mode = None for idx in range(len(model._arch_names)): # arch_idx model.arch_idx = idx valid_mIoUs, fps0, fps1 = infer(epoch, model, evaluator, logger) valid_mIoUss.append(valid_mIoUs) FPSs.append([fps0, fps1]) for i in range(5): # preds logger.add_scalar( 'mIoU/val_%s_%s' % (arch_names[idx], valid_names[i]), valid_mIoUs[i], epoch) logging.info("Epoch %d: valid_mIoU_%s_%s %.3f" % (epoch, arch_names[idx], valid_names[i], valid_mIoUs[i])) if config.latency_weight[idx] > 0: logger.add_scalar( 'Objective/val_%s_8s_32s' % arch_names[idx], objective_acc_lat(valid_mIoUs[3], 1000. / fps0), epoch) logging.info( "Epoch %d: Objective_%s_8s_32s %.3f" % (epoch, arch_names[idx], objective_acc_lat(valid_mIoUs[3], 1000. / fps0))) logger.add_scalar( 'Objective/val_%s_16s_32s' % arch_names[idx], objective_acc_lat(valid_mIoUs[4], 1000. / fps1), epoch) logging.info( "Epoch %d: Objective_%s_16s_32s %.3f" % (epoch, arch_names[idx], objective_acc_lat(valid_mIoUs[4], 1000. / fps1))) valid_mIoU_history.append(valid_mIoUss) FPSs_history.append(FPSs) if update_arch: latency_supernet_history.append(architect.latency_supernet) latency_weight_history.append(architect.latency_weight) save(model, os.path.join(config.save, 'weights.pt')) if type(pretrain) == str: # contains arch_param names: {"alphas": alphas, "betas": betas, "gammas": gammas, "ratios": ratios} for idx, arch_name in enumerate(model._arch_names): state = {} for name in arch_name['alphas']: state[name] = getattr(model, name) for name in arch_name['betas']: state[name] = getattr(model, name) for name in arch_name['ratios']: state[name] = getattr(model, name) state["mIoU02"] = valid_mIoUs[3] state["mIoU12"] = valid_mIoUs[4] if pretrain is not True: state["latency02"] = 1000. / fps0 state["latency12"] = 1000. / fps1 torch.save( state, os.path.join(config.save, "arch_%d_%d.pt" % (idx, epoch))) torch.save(state, os.path.join(config.save, "arch_%d.pt" % (idx))) if update_arch: for idx in range(len(config.latency_weight)): if config.latency_weight[idx] > 0: if (int(FPSs[idx][0] >= config.FPS_max[idx]) + int(FPSs[idx][1] >= config.FPS_max[idx])) >= 1: architect.latency_weight[idx] /= 2 elif (int(FPSs[idx][0] <= config.FPS_min[idx]) + int(FPSs[idx][1] <= config.FPS_min[idx])) > 0: architect.latency_weight[idx] *= 2 logger.add_scalar( "arch/latency_weight_%s" % arch_names[idx], architect.latency_weight[idx], epoch + 1) logging.info("arch_latency_weight_%s = " % arch_names[idx] + str(architect.latency_weight[idx]))
port = str(int(float(time.time())) % 20) os.environ['MASTER_PORT'] = str(10097 + int(port)) with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True seed = config.seed if engine.distributed: seed = engine.local_rank torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # data loader train_loader, train_sampler = get_train_loader(engine, NYUv2) if engine.distributed and (engine.local_rank == 0): tb_dir = config.tb_dir + '/{}'.format(time.strftime("%b%d_%d-%H-%M", time.localtime())) generate_tb_dir = config.tb_dir + '/tb' logger = SummaryWriter(log_dir=tb_dir) engine.link_tb(tb_dir, generate_tb_dir) # config network and criterion criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=255) if engine.distributed: BatchNorm2d = SyncBatchNorm model = Network(class_num=config.num_classes, feature=128, bn_momentum=config.bn_momentum,
torch.manual_seed(config.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(config.seed) parser = argparse.ArgumentParser() with Engine(custom_parser=parser) as engine: args = parser.parse_args() cudnn.benchmark = True if engine.distributed: torch.cuda.set_device(engine.local_rank) # data loader train_loader, train_sampler = get_train_loader(engine, Scut) # config network and criterion criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=255) # 默认使用交叉熵损失 edge_criterion = EdgeLoss(ignore_label=255) ''' min_kept = int(config.batch_size // len( engine.devices) * config.image_height * config.image_width // 16) criterion = ProbOhemCrossEntropy2d(ignore_label=255, thresh=0.7, min_kept=min_kept, use_weight=False) ''' if engine.distributed: logger.info('Use the Multi-Process-SyncBatchNorm')
cudnn.benchmark = True torch.manual_seed(123) if torch.cuda.is_available(): torch.cuda.manual_seed(123) device = torch.device('cuda') if CFG.cuda else torch.device('cpu') for path in [CFG.result_dir, CFG.model_dir]: if not os.path.isdir(path): os.mkdir(path) print('Setup ImageLoader') loader = get_train_loader(CFG.color_path, CFG.batch_size, resize=True, size=(512, 512)) print('Define Network') netG = define_U(device, in_channel=1, out_channel=3).to(device) netD = define_D(device, 3).to(device) print('Define Optimizer') optG = optim.Adam(netG.parameters(), lr=CFG.lr, betas=CFG.betas) optD = optim.Adam(netD.parameters(), lr=CFG.lr, betas=CFG.betas) if CFG.is_load_model: try: saved = torch.load(os.path.join(CFG.model_dir, CFG.model_name)) start_epoch = saved['epoch'] netG.load_state_dict(saved['netG'])
def process(args, train_id, test_loader): model = GazeNet() logger.info(json.dumps(vars(args), indent=2)) # TensorBoard SummaryWriter # writer = SummaryWriter() if args.tensorboard else None writer = None # set random seed seed = args.seed torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) # create output directory outdir = args.outdir if not os.path.exists(outdir): os.makedirs(outdir) outpath = os.path.join(outdir, 'config.json') with open(outpath, 'w') as fout: json.dump(vars(args), fout, indent=2) # data loaders train_loader = get_train_loader(args.dataset, train_id, args.batch_size, args.num_workers, True) print('data collected') # model model_path = os.path.join(outdir, 'model_state.pth') model = GazeNet() model = nn.DataParallel(model) model.cuda() if os.path.exists(os.path.join(outdir, 'model_state.pth')): model.load_state_dict(torch.load(model_path)) model.eval() else: None criterion = nn.MSELoss(size_average=True) # optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30], gamma=args.lr_decay) # Tensorboard Settings # config = { # 'tensorboard': args.tensorboard, # 'tensorboard_images': args.tensorboard_images, # 'tensorboard_parameters': args.tensorboard_parameters, # } config = None # run test before start training test(0, model, criterion, test_loader, config, writer) for epoch in range(1, args.epochs + 1): scheduler.step() train(epoch, model, optimizer, criterion, train_loader, config, writer) angle_error = test(epoch, model, criterion, test_loader, config, writer) state = OrderedDict([ ('args', vars(args)), ('state_dict', model.state_dict()), ('optimizer', optimizer.state_dict()), ('epoch', epoch), ('angle_error', angle_error), ]) torch.save(state, model_path)