def main(config): logger = config.get_logger("train") data_loader = CityscapesDataLoader( config["data_loader"]["args"]["data_dir"], config["data_loader"]["args"]["train_extra"], config["data_loader"]["args"]["batch_size"], config["data_loader"]["args"]["num_workers"], ) num_classes = config["arch"]["args"]["num_classes"] model = DeepLabv3Plus(num_classes=num_classes) logger.info( summary( model, (3, 1024, 2048), col_names=("kernel_size", "output_size", "num_params"), depth=5, verbose=0, ) ) device, device_ids = prepare_device(config["n_gpu"]) model = model.to(device) if len(device_ids) > 1: model = nn.DataParallel(model, device_ids=device_ids) ignore_index = config["loss"]["args"]["ignore_index"] criterion = nn.CrossEntropyLoss(ignore_index=ignore_index) metrics = SegmentationMetrics(num_classes, ignore_index) optimizer = torch.optim.SGD( model.parameters(), lr=config["optimizer"]["args"]["lr"], momentum=config["optimizer"]["args"]["momentum"], weight_decay=config["optimizer"]["args"]["weight_decay"], ) lr_scheduler = PolynomialLRDecay( optimizer, max_decay_steps=config["lr_scheduler"]["args"]["max_decay_steps"], end_learning_rate=config["lr_scheduler"]["args"]["end_learning_rate"], power=config["lr_scheduler"]["args"]["power"], ) trainer = Trainer( config=config, model=model, criterion=criterion, metrics=metrics, optimizer=optimizer, device=device, train_loader=data_loader.train_loader, val_loader=data_loader.val_loader, lr_scheduler=lr_scheduler, ) trainer.train()
def train(fold: int, verbose: int = 100) -> None: split_dataset('./data/dirty_mnist_2nd_answer.csv') df = pd.read_csv('./data/split_kfold.csv') df_train = df[df['kfold'] != fold].reset_index(drop=True) df_valid = df[df['kfold'] == fold].reset_index(drop=True) df_train.drop(['kfold'], axis=1).to_csv(f'./data/train-kfold-{fold}.csv', index=False) df_valid.drop(['kfold'], axis=1).to_csv(f'./data/valid-kfold-{fold}.csv', index=False) trainset = MnistDataset('./data/train', f'./data/train-kfold-{fold}.csv', transforms_train, a_train) train_loader = DataLoader(trainset, batch_size=config.batch_size, shuffle=True) validset = MnistDataset('./data/train', f'./data/valid-kfold-{fold}.csv', transforms_test, None) valid_loader = DataLoader(validset, batch_size=8, shuffle=False) num_epochs = config.epochs device = 'cuda' model = MnistModel().to(device) optimizer = optim.Adam(model.parameters(), lr=config.lr) decay_steps = (len(trainset) // config.batch_size) * config.epochs scheduler = PolynomialLRDecay(optimizer, max_decay_steps=decay_steps, end_learning_rate=1e-6, power=0.9) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0) # optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=0.9, momentum=0.9) criterion = torch.nn.BCELoss() for epoch in range(num_epochs): model.train() for i, (images, targets) in enumerate(train_loader): optimizer.zero_grad() images = images.to(device) targets = targets.to(device) outputs = model(images) loss = criterion(outputs, targets) loss.backward() optimizer.step() scheduler.step() if (i + 1) % verbose == 0: outputs = outputs > 0.5 acc = (outputs == targets).float().mean() print( f'Fold {fold} | Epoch {epoch} | Train_L: {loss.item():.7f} | Train_A: {acc.item():.7f}' ) model.eval() valid_acc = 0.0 valid_loss = 0.0 with torch.no_grad(): for i, (images, targets) in enumerate(valid_loader): images = images.to(device) targets = targets.to(device) outputs = model(images) loss = criterion(outputs, targets) valid_loss += loss.item() outputs = outputs > 0.5 valid_acc += (outputs == targets).float().mean() print( f'Fold {fold} | Epoch {epoch} | valid_L: {valid_loss / (i + 1):.7f} | valid_A: {valid_acc / (i + 1):.7f}\n' ) if epoch > num_epochs - 10 and epoch < num_epochs - 1: torch.save(model.state_dict(), f'./data/efficientnet7-f{fold}-{epoch}.pth')
shuffle=True) valid_dataset = MnistDataset_v2(imgs=imgs[valid_idx], labels=labels[valid_idx], transform=valid_transform) valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False) # optimizer # polynomial optimizer를 사용합니다. # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) decay_steps = (len(train_dataset) // batch_size) * epochs scheduler_poly_lr_decay = PolynomialLRDecay(optimizer, max_decay_steps=decay_steps, end_learning_rate=1e-6, power=0.9) criterion = torch.nn.BCELoss() epoch_accuracy = [] valid_accuracy = [] valid_losses = [] valid_best_accuracy = 0 # for epoch in range(epochs): # model.train() # batch_accuracy_list = [] # batch_loss_list = [] # start=time.time() # for n, (X, y) in enumerate((train_loader)): # X = torch.tensor(X, device=device, dtype=torch.float32)
# print("The ckp has been loaded sucessfully ") #net = torch.load("./model/MSAANet_2020-03-31_87.pth") # load the pretrained model #criterion = FocalLoss2d().to(device) criterion = torch.nn.BCELoss().to(device) #criterion = torch.nn.CrossEntropyLoss().to(device) train_loader, val_loader = get_dataset_loaders(5, batch_size) #opt = torch.optim.SGD(net.parameters(), lr=learning_rate) opt = Ranger(net.parameters(),lr=learning_rate) today=str(datetime.date.today()) logger = get_log(model_name + today +'_log.txt') #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=5,eta_min=4e-08) #scheduler = LR_Scheduler(args.lr_scheduler, args.lr, # args.n_epoch, len(train_loader), logger=logger, # lr_step=args.lr_step) # scheduler = PolynomialLRDecay(opt, max_decay_steps=100, end_learning_rate=0.0001, power=2.0) for epoch in range(num_epochs): logger.info("Epoch: {}/{}".format(epoch + 1, num_epochs)) scheduler.step() #scheduler(opt,i,.step() train_hist = train(train_loader, num_classes, device, net, opt, criterion) logger.info( ('loss={}'.format(train_hist["loss"]), 'precision={}'.format(train_hist["precision"]), 'recall={}'.format(train_hist["recall"]), 'f_score={}'.format(train_hist["f_score"]), 'oa={}'.format(train_hist["oa"])))
# Model Load model = Network_Efficientnet(b=b).to(device) model = nn.DataParallel(model, device_ids=[0, 1, 2]) # Optimizer & Scheduler # optimizer = torch.optim.Adam(model.parameters(), lr =1e-3) # Q = math.floor(len(train_dataset)/batch_size+1)*epochs/7 # lrs = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = Q) # optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3) optimizer = AdamP(model.parameters(), lr=1e-3, betas=(0.9, 0.999), weight_decay=1e-2) decay_steps = (len(train_dataset) // batch_size + 1) * (epochs - 2) plr = PolynomialLRDecay(optimizer, max_decay_steps=decay_steps, end_learning_rate=1e-6, power=0.9) # Loss criterion = nn.BCELoss() # Training best = 0 save = 0 for epoch in range(epochs): model.train() start = time.time() train_accuracy = 0 train_loss = 0 valid_accuracy = 0 valid_loss = 0
def main(args): # file path image_path = './train_image' path = './data' label_path = 'training data dic.txt' # Hyper Parameters PrograssiveModelDict = None if args.method == "efficientnet" or args.method == "efficientnetV2": METHOD = f"{args.method}-{args.method_level}" if args.method == "efficientnetV2": PrograssiveModelDict = PrograssiveBounds[args.method][ args.method_level] elif args.method == 'regnet': METHOD = args.method else: METHOD = args.method + args.method_level # Environment if args.use_gpu and torch.cuda.is_available(): device = torch.device('cuda') torch.backends.cudnn.benchmark = True else: device = torch.device('cpu') print('Warning! Using CPU.') Epoch = args.epochs BATCH_SIZE = args.batchsize lr = args.learning_rate split_rate = args.split_rate resize = args.resize resize_size = args.resize_size num_classes = 801 valid_batch_size = args.validbatchsize CHECKPOINT_FOLDER = args.checkpoint_root + METHOD + '/' START_EPOCH = getFinalEpoch( args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) + 1 if getFinalEpoch( args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) is not None else 0 is_useweight = True print("init data folder") Path(CHECKPOINT_FOLDER).mkdir(exist_ok=True, parents=True) label_dic = load_label_dic(label_path) word_dic = load_word_dic(label_path) transform = transforms.Compose([ transforms.ToTensor(), ]) clean_image_path = './color_dataset/' synthesis_path = './synthesis/' # clean_transform = transforms.Compose([ # transforms.Grayscale(num_output_channels=1), # transforms.Resize((resize_size, resize_size)), # transforms.ToTensor(), # ]) train_dataset = [] valid_dataset = [] for idx, dir_ in enumerate(os.listdir(clean_image_path)): # if args.pretrain_cleandataset: dataset = ChineseHandWriteDataset(root=clean_image_path + dir_, label_dic=label_dic, transform=transform, resize=resize, resize_size=resize_size) # dataset = CleanDataset(root=synthesis_path + dir_, label_dic=label_dic, transform=transform, resize=resize, # resize_size=resize_size, randaug=args.method=="efficientnetV2") train_set_size = int(len(dataset) * split_rate) valid_set_size = len(dataset) - train_set_size train_set, valid_set = data.random_split( dataset, [train_set_size, valid_set_size], torch.Generator().manual_seed(args.seed)) train_dataset.append(train_set) valid_dataset.append(valid_set) train_dataset = data.ConcatDataset(train_dataset) valid_dataset = data.ConcatDataset(valid_dataset) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=args.num_workers) valid_dataloader = DataLoader(valid_dataset, batch_size=valid_batch_size, pin_memory=False, num_workers=args.num_workers) print(f"model is {METHOD}") model = switchModel(in_features=train_dataset[0][0].shape[0], num_classes=num_classes, args=args, METHOD=METHOD) if args.load_model: modelPath = getModelPath(CHECKPOINT_FOLDER=CHECKPOINT_FOLDER, args=args) if modelPath != "": model.load_state_dict(torch.load(modelPath)) model.to(device) # get each class weight weights = None if is_useweight: weights = getWeights(root=clean_image_path, split_rate=split_rate) # Label smoothing # loss = SmoothCrossEntropyLoss(weight=weights).to(device) # Focal Loss loss = FocalLoss(weight=weights).to(device) optimizer = optim.AdamW(model.parameters(), lr=lr) scheduler_poly_lr_decay = PolynomialLRDecay( optimizer, max_decay_steps=100, end_learning_rate=args.ending_learning_rate, power=2.0) print("------------------ training start -----------------") result_param = { 'training_loss': [], 'training_accuracy': [], 'validation_loss': [], 'validation_accuracy': [] } for epoch in range(START_EPOCH, Epoch): batchI = 0 scheduler_poly_lr_decay.step(epoch) progressive = None if PrograssiveModelDict is not None: randaugment = RandAugment() progressive = prograssiveNow(epoch, Epoch, PrograssiveModelDict) randaugment.m = progressive["randarg"] since = time.time() running_training_loss = 0 running_training_correct = 0 running_valid_loss = 0 running_valid_correct = 0 dataset.train() model.train() train_bar = tqdm(train_dataloader) for imgst, label, folder, filename in train_bar: label = label.to(device) if progressive is not None: imgst, label = mixup(imgst, label, progressive["mix"]) toPIL = transforms.ToPILImage() transform = transforms.Compose([ transforms.Resize((int(progressive["imgsize"]), int(progressive["imgsize"]))), transforms.ToTensor(), ]) imgs = torch.zeros( (imgst.size()[0], 3, int(progressive["imgsize"]), int(progressive["imgsize"])) ) #int(progressive["imgsize"]),int(progressive["imgsize"]))) for i in range(imgst.size()[0]): imgs[i] = transform(randaugment(toPIL(imgst[i]))) imgs = imgs.to(device) # torchvision.utils.save_image(imgs,f"preprocessImgs/{epoch}-{batchI}.jpg") setDropout(model, progressive["drop"]) optimizer.zero_grad() out = model(imgs) loss_val = loss(out, label) _, pred_class = torch.max(out.data, 1) running_training_correct += torch.sum(pred_class == label) running_training_loss += loss_val loss_val.backward() optimizer.step() train_bar.set_description( desc='[%d/%d] | Train Loss:%.4f' % (epoch + 1, Epoch, loss_val.item() / len(imgs))) with torch.no_grad(): dataset.eval() model.eval() if progressive is not None: setDropout(model, 0) val_bar = tqdm(valid_dataloader) for imgs, label, folder, filename in val_bar: imgs = imgs.to(device) label = label.to(device) out = model(imgs) loss_val = loss(out, label) val_bar.set_description( desc='[%d/%d] | Validation Loss:%.4f' % (epoch + 1, Epoch, loss_val.item() / len(imgs))) _, pred_class = torch.max(out.data, 1) running_valid_correct += torch.sum(pred_class == label) running_valid_loss += loss_val result_param['training_loss'].append(running_training_loss.item() / len(train_dataset) * BATCH_SIZE) result_param['training_accuracy'].append( running_training_correct.item() / len(train_dataset)) result_param['validation_loss'].append( running_valid_loss.item() / len(valid_dataset) * valid_batch_size) result_param['validation_accuracy'].append( running_valid_correct.item() / len(valid_dataset)) print( "Epoch:{} Train Loss:{:.4f}, Train Accuracy:{:.4f}, Validation Loss:{:.4f}, Validation Accuracy:{:.4f}, Learning Rate:{:.4f}" .format(epoch + 1, result_param['training_loss'][-1], result_param['training_accuracy'][-1], result_param['validation_loss'][-1], result_param['validation_accuracy'][-1], optimizer.param_groups[0]['lr'])) now_time = time.time() - since print("Training time is:{:.0f}m {:.0f}s".format( now_time // 60, now_time % 60)) torch.save( model.state_dict(), str('./checkpoints/' + METHOD + '/' + "EPOCH_" + str(epoch) + ".pkl")) out_file = open( str('./checkpoints/' + METHOD + '/' + 'result_param.json'), "w+") json.dump(result_param, out_file, indent=4) if args.xgboost: print("---------------Two stage - XGboost---------------------") with torch.no_grad(): x_valid, y_valid = [], [] val_bar = tqdm(valid_dataloader) for imgs, label in val_bar: imgs = imgs.to(device) label = label.to(device) # to numpy imgs = CustomPredict(model, imgs).cpu().detach().numpy() label = label.cpu().detach().numpy() if not len(x_valid): x_valid, y_valid = imgs, label else: x_valid, y_valid = np.concatenate( (x_valid, imgs)), np.concatenate((y_valid, label)) xgb_train, xgb_label = [], [] train_bar = tqdm(train_dataloader) for imgs, label in train_bar: imgs = imgs.to(device) label = label.to(device) # to numpy imgs = CustomPredict(model, imgs).cpu().detach().numpy() label = label.cpu().detach().numpy() if not len(xgb_train): xgb_train, xgb_label = imgs, label else: xgb_train, xgb_label = np.concatenate( (xgb_train, imgs)), np.concatenate((xgb_label, label)) dval = xgboost.DMatrix(x_valid, y_valid) dtrain = xgboost.DMatrix(xgb_train, xgb_label) params = { 'max_depth': 5, # the maximum depth of each tree 'eta': lr, # the training step for each iteration 'objective': 'multi:softmax', # multiclass classification using the softmax objective 'num_class': 801, # the number of classes that exist in this datset 'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist', } xgbmodel = xgboost.Booster() # xgbmodel.load_model('xgboost.model') xgbmodel = xgboost.train(params, dtrain, num_boost_round=100, evals=[(dval, 'val'), (dtrain, 'train')]) print(sum(xgbmodel.predict(dval) == y_valid) / len(y_valid)) xgbmodel.save_model('xgboost.model')
args.lr, weight_decay=args.weight_decay) elif (args.optimizer.lower() == "radam"): optimizer = optim.RAdam(params_dict, args.lr, weight_decay=args.weight_decay) elif (args.optimizer.lower() == "ranger"): optimizer = optim.Ranger(params_dict, args.lr, weight_decay=args.weight_decay) else: raise ValueError("Optimizer type: ", args.optimizer, " is not supported or known") scheduler_poly_lr_decay = PolynomialLRDecay(optimizer, max_decay_steps=args.epochs, end_learning_rate=0.0001, power=0.9) def save_checkpoint(state, is_best, epoch, filepath): if epoch == 'init': filepath = os.path.join(filepath, 'init.pth.tar') torch.save(state, filepath) else: # filename = os.path.join(filepath, 'ckpt'+str(epoch)+'.pth.tar') # torch.save(state, filename) filename = os.path.join(filepath, 'ckpt.pth.tar') torch.save(state, filename) if is_best: shutil.copyfile(filename, os.path.join(filepath, 'model_best.pth.tar'))
import torch from torch_poly_lr_decay import PolynomialLRDecay if __name__ == '__main__': v = torch.zeros(10) optim = torch.optim.SGD([v], lr=0.01) scheduler = PolynomialLRDecay(optim, max_decay_steps=19, end_learning_rate=0.0001, power=2.0) for epoch in range(1, 20): scheduler.step(epoch) print(epoch, optim.param_groups[0]['lr'])