Пример #1
0
def main(config):
    logger = config.get_logger("train")

    data_loader = CityscapesDataLoader(
        config["data_loader"]["args"]["data_dir"],
        config["data_loader"]["args"]["train_extra"],
        config["data_loader"]["args"]["batch_size"],
        config["data_loader"]["args"]["num_workers"],
    )

    num_classes = config["arch"]["args"]["num_classes"]
    model = DeepLabv3Plus(num_classes=num_classes)
    logger.info(
        summary(
            model,
            (3, 1024, 2048),
            col_names=("kernel_size", "output_size", "num_params"),
            depth=5,
            verbose=0,
        )
    )

    device, device_ids = prepare_device(config["n_gpu"])
    model = model.to(device)
    if len(device_ids) > 1:
        model = nn.DataParallel(model, device_ids=device_ids)

    ignore_index = config["loss"]["args"]["ignore_index"]
    criterion = nn.CrossEntropyLoss(ignore_index=ignore_index)
    metrics = SegmentationMetrics(num_classes, ignore_index)

    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=config["optimizer"]["args"]["lr"],
        momentum=config["optimizer"]["args"]["momentum"],
        weight_decay=config["optimizer"]["args"]["weight_decay"],
    )
    lr_scheduler = PolynomialLRDecay(
        optimizer,
        max_decay_steps=config["lr_scheduler"]["args"]["max_decay_steps"],
        end_learning_rate=config["lr_scheduler"]["args"]["end_learning_rate"],
        power=config["lr_scheduler"]["args"]["power"],
    )

    trainer = Trainer(
        config=config,
        model=model,
        criterion=criterion,
        metrics=metrics,
        optimizer=optimizer,
        device=device,
        train_loader=data_loader.train_loader,
        val_loader=data_loader.val_loader,
        lr_scheduler=lr_scheduler,
    )
    trainer.train()
Пример #2
0
def train(fold: int, verbose: int = 100) -> None:
    split_dataset('./data/dirty_mnist_2nd_answer.csv')
    df = pd.read_csv('./data/split_kfold.csv')
    df_train = df[df['kfold'] != fold].reset_index(drop=True)
    df_valid = df[df['kfold'] == fold].reset_index(drop=True)

    df_train.drop(['kfold'], axis=1).to_csv(f'./data/train-kfold-{fold}.csv',
                                            index=False)
    df_valid.drop(['kfold'], axis=1).to_csv(f'./data/valid-kfold-{fold}.csv',
                                            index=False)

    trainset = MnistDataset('./data/train', f'./data/train-kfold-{fold}.csv',
                            transforms_train, a_train)
    train_loader = DataLoader(trainset,
                              batch_size=config.batch_size,
                              shuffle=True)

    validset = MnistDataset('./data/train', f'./data/valid-kfold-{fold}.csv',
                            transforms_test, None)
    valid_loader = DataLoader(validset, batch_size=8, shuffle=False)

    num_epochs = config.epochs
    device = 'cuda'

    model = MnistModel().to(device)

    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    decay_steps = (len(trainset) // config.batch_size) * config.epochs
    scheduler = PolynomialLRDecay(optimizer,
                                  max_decay_steps=decay_steps,
                                  end_learning_rate=1e-6,
                                  power=0.9)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
    # optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=0.9, momentum=0.9)
    criterion = torch.nn.BCELoss()

    for epoch in range(num_epochs):
        model.train()
        for i, (images, targets) in enumerate(train_loader):
            optimizer.zero_grad()

            images = images.to(device)
            targets = targets.to(device)

            outputs = model(images)
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()
            scheduler.step()

            if (i + 1) % verbose == 0:
                outputs = outputs > 0.5
                acc = (outputs == targets).float().mean()
                print(
                    f'Fold {fold} | Epoch {epoch} | Train_L: {loss.item():.7f} | Train_A: {acc.item():.7f}'
                )

        model.eval()
        valid_acc = 0.0
        valid_loss = 0.0
        with torch.no_grad():
            for i, (images, targets) in enumerate(valid_loader):
                images = images.to(device)
                targets = targets.to(device)

                outputs = model(images)
                loss = criterion(outputs, targets)
                valid_loss += loss.item()
                outputs = outputs > 0.5
                valid_acc += (outputs == targets).float().mean()
            print(
                f'Fold {fold} | Epoch {epoch} | valid_L: {valid_loss / (i + 1):.7f} | valid_A: {valid_acc / (i + 1):.7f}\n'
            )

        if epoch > num_epochs - 10 and epoch < num_epochs - 1:
            torch.save(model.state_dict(),
                       f'./data/efficientnet7-f{fold}-{epoch}.pth')
Пример #3
0
                              shuffle=True)

    valid_dataset = MnistDataset_v2(imgs=imgs[valid_idx],
                                    labels=labels[valid_idx],
                                    transform=valid_transform)
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=batch_size,
                              shuffle=False)

    # optimizer
    # polynomial optimizer를 사용합니다.
    #
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    decay_steps = (len(train_dataset) // batch_size) * epochs
    scheduler_poly_lr_decay = PolynomialLRDecay(optimizer,
                                                max_decay_steps=decay_steps,
                                                end_learning_rate=1e-6,
                                                power=0.9)

    criterion = torch.nn.BCELoss()

    epoch_accuracy = []
    valid_accuracy = []
    valid_losses = []
    valid_best_accuracy = 0
    # for epoch in range(epochs):
    #     model.train()
    #     batch_accuracy_list = []
    #     batch_loss_list = []
    #     start=time.time()
    #     for n, (X, y) in enumerate((train_loader)):
    #         X = torch.tensor(X, device=device, dtype=torch.float32)
    #    print("The ckp has been loaded sucessfully ")
    #net = torch.load("./model/MSAANet_2020-03-31_87.pth") # load the pretrained model
    #criterion = FocalLoss2d().to(device)
    criterion = torch.nn.BCELoss().to(device)
    #criterion = torch.nn.CrossEntropyLoss().to(device)
    train_loader, val_loader = get_dataset_loaders(5, batch_size)
    #opt = torch.optim.SGD(net.parameters(), lr=learning_rate)
    opt = Ranger(net.parameters(),lr=learning_rate)
    today=str(datetime.date.today())
    logger = get_log(model_name + today +'_log.txt')
    #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=5,eta_min=4e-08)
    #scheduler = LR_Scheduler(args.lr_scheduler, args.lr,
    #                                        args.n_epoch, len(train_loader), logger=logger,
    #                                        lr_step=args.lr_step)
    #
    scheduler = PolynomialLRDecay(opt, max_decay_steps=100, end_learning_rate=0.0001, power=2.0)



    for epoch in range(num_epochs):
        logger.info("Epoch: {}/{}".format(epoch + 1, num_epochs))
        scheduler.step()
        #scheduler(opt,i,.step()
        train_hist = train(train_loader, num_classes, device, net, opt, criterion)
        logger.info( ('loss={}'.format(train_hist["loss"]),
                     'precision={}'.format(train_hist["precision"]),
                     'recall={}'.format(train_hist["recall"]),
                     'f_score={}'.format(train_hist["f_score"]),
                      'oa={}'.format(train_hist["oa"])))

 
Пример #5
0
    # Model Load
    model = Network_Efficientnet(b=b).to(device)
    model = nn.DataParallel(model, device_ids=[0, 1, 2])

    # Optimizer & Scheduler
    # optimizer = torch.optim.Adam(model.parameters(), lr =1e-3)
    # Q = math.floor(len(train_dataset)/batch_size+1)*epochs/7
    # lrs = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = Q)
    # optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
    optimizer = AdamP(model.parameters(),
                      lr=1e-3,
                      betas=(0.9, 0.999),
                      weight_decay=1e-2)
    decay_steps = (len(train_dataset) // batch_size + 1) * (epochs - 2)
    plr = PolynomialLRDecay(optimizer,
                            max_decay_steps=decay_steps,
                            end_learning_rate=1e-6,
                            power=0.9)

    # Loss
    criterion = nn.BCELoss()

    # Training
    best = 0
    save = 0
    for epoch in range(epochs):
        model.train()
        start = time.time()
        train_accuracy = 0
        train_loss = 0
        valid_accuracy = 0
        valid_loss = 0
Пример #6
0
def main(args):

    # file path
    image_path = './train_image'
    path = './data'
    label_path = 'training data dic.txt'

    # Hyper Parameters
    PrograssiveModelDict = None
    if args.method == "efficientnet" or args.method == "efficientnetV2":
        METHOD = f"{args.method}-{args.method_level}"
        if args.method == "efficientnetV2":
            PrograssiveModelDict = PrograssiveBounds[args.method][
                args.method_level]
    elif args.method == 'regnet':
        METHOD = args.method
    else:
        METHOD = args.method + args.method_level

    # Environment
    if args.use_gpu and torch.cuda.is_available():
        device = torch.device('cuda')
        torch.backends.cudnn.benchmark = True
    else:
        device = torch.device('cpu')
        print('Warning! Using CPU.')

    Epoch = args.epochs
    BATCH_SIZE = args.batchsize
    lr = args.learning_rate
    split_rate = args.split_rate
    resize = args.resize
    resize_size = args.resize_size
    num_classes = 801
    valid_batch_size = args.validbatchsize
    CHECKPOINT_FOLDER = args.checkpoint_root + METHOD + '/'
    START_EPOCH = getFinalEpoch(
        args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) + 1 if getFinalEpoch(
            args=args, CHECKPOINT_FOLDER=CHECKPOINT_FOLDER) is not None else 0

    is_useweight = True
    print("init data folder")

    Path(CHECKPOINT_FOLDER).mkdir(exist_ok=True, parents=True)

    label_dic = load_label_dic(label_path)
    word_dic = load_word_dic(label_path)
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    clean_image_path = './color_dataset/'
    synthesis_path = './synthesis/'
    # clean_transform = transforms.Compose([
    #     transforms.Grayscale(num_output_channels=1),
    #     transforms.Resize((resize_size, resize_size)),
    #     transforms.ToTensor(),
    # ])

    train_dataset = []
    valid_dataset = []
    for idx, dir_ in enumerate(os.listdir(clean_image_path)):
        # if args.pretrain_cleandataset:
        dataset = ChineseHandWriteDataset(root=clean_image_path + dir_,
                                          label_dic=label_dic,
                                          transform=transform,
                                          resize=resize,
                                          resize_size=resize_size)
        # dataset = CleanDataset(root=synthesis_path + dir_, label_dic=label_dic, transform=transform, resize=resize,
        #                             resize_size=resize_size, randaug=args.method=="efficientnetV2")
        train_set_size = int(len(dataset) * split_rate)
        valid_set_size = len(dataset) - train_set_size
        train_set, valid_set = data.random_split(
            dataset, [train_set_size, valid_set_size],
            torch.Generator().manual_seed(args.seed))
        train_dataset.append(train_set)
        valid_dataset.append(valid_set)

    train_dataset = data.ConcatDataset(train_dataset)
    valid_dataset = data.ConcatDataset(valid_dataset)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=args.num_workers)

    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=valid_batch_size,
                                  pin_memory=False,
                                  num_workers=args.num_workers)

    print(f"model is {METHOD}")
    model = switchModel(in_features=train_dataset[0][0].shape[0],
                        num_classes=num_classes,
                        args=args,
                        METHOD=METHOD)
    if args.load_model:
        modelPath = getModelPath(CHECKPOINT_FOLDER=CHECKPOINT_FOLDER,
                                 args=args)
        if modelPath != "":
            model.load_state_dict(torch.load(modelPath))

    model.to(device)

    # get each class weight
    weights = None
    if is_useweight:
        weights = getWeights(root=clean_image_path, split_rate=split_rate)

    # Label smoothing
    # loss = SmoothCrossEntropyLoss(weight=weights).to(device)

    # Focal Loss
    loss = FocalLoss(weight=weights).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    scheduler_poly_lr_decay = PolynomialLRDecay(
        optimizer,
        max_decay_steps=100,
        end_learning_rate=args.ending_learning_rate,
        power=2.0)
    print("------------------ training start -----------------")

    result_param = {
        'training_loss': [],
        'training_accuracy': [],
        'validation_loss': [],
        'validation_accuracy': []
    }

    for epoch in range(START_EPOCH, Epoch):
        batchI = 0
        scheduler_poly_lr_decay.step(epoch)
        progressive = None
        if PrograssiveModelDict is not None:
            randaugment = RandAugment()

            progressive = prograssiveNow(epoch, Epoch, PrograssiveModelDict)
            randaugment.m = progressive["randarg"]

        since = time.time()
        running_training_loss = 0
        running_training_correct = 0
        running_valid_loss = 0
        running_valid_correct = 0
        dataset.train()
        model.train()

        train_bar = tqdm(train_dataloader)

        for imgst, label, folder, filename in train_bar:
            label = label.to(device)
            if progressive is not None:
                imgst, label = mixup(imgst, label, progressive["mix"])
                toPIL = transforms.ToPILImage()

                transform = transforms.Compose([
                    transforms.Resize((int(progressive["imgsize"]),
                                       int(progressive["imgsize"]))),
                    transforms.ToTensor(),
                ])
                imgs = torch.zeros(
                    (imgst.size()[0], 3, int(progressive["imgsize"]),
                     int(progressive["imgsize"]))
                )  #int(progressive["imgsize"]),int(progressive["imgsize"])))
                for i in range(imgst.size()[0]):
                    imgs[i] = transform(randaugment(toPIL(imgst[i])))
                imgs = imgs.to(device)
                # torchvision.utils.save_image(imgs,f"preprocessImgs/{epoch}-{batchI}.jpg")
                setDropout(model, progressive["drop"])

            optimizer.zero_grad()
            out = model(imgs)
            loss_val = loss(out, label)
            _, pred_class = torch.max(out.data, 1)
            running_training_correct += torch.sum(pred_class == label)
            running_training_loss += loss_val
            loss_val.backward()
            optimizer.step()
            train_bar.set_description(
                desc='[%d/%d] | Train Loss:%.4f' %
                (epoch + 1, Epoch, loss_val.item() / len(imgs)))
        with torch.no_grad():
            dataset.eval()
            model.eval()
            if progressive is not None:
                setDropout(model, 0)
            val_bar = tqdm(valid_dataloader)
            for imgs, label, folder, filename in val_bar:
                imgs = imgs.to(device)
                label = label.to(device)
                out = model(imgs)
                loss_val = loss(out, label)
                val_bar.set_description(
                    desc='[%d/%d] | Validation Loss:%.4f' %
                    (epoch + 1, Epoch, loss_val.item() / len(imgs)))
                _, pred_class = torch.max(out.data, 1)
                running_valid_correct += torch.sum(pred_class == label)
                running_valid_loss += loss_val

        result_param['training_loss'].append(running_training_loss.item() /
                                             len(train_dataset) * BATCH_SIZE)
        result_param['training_accuracy'].append(
            running_training_correct.item() / len(train_dataset))
        result_param['validation_loss'].append(
            running_valid_loss.item() / len(valid_dataset) * valid_batch_size)
        result_param['validation_accuracy'].append(
            running_valid_correct.item() / len(valid_dataset))

        print(
            "Epoch:{} Train Loss:{:.4f},  Train Accuracy:{:.4f},  Validation Loss:{:.4f},  Validation Accuracy:{:.4f}, Learning Rate:{:.4f}"
            .format(epoch + 1, result_param['training_loss'][-1],
                    result_param['training_accuracy'][-1],
                    result_param['validation_loss'][-1],
                    result_param['validation_accuracy'][-1],
                    optimizer.param_groups[0]['lr']))

        now_time = time.time() - since
        print("Training time is:{:.0f}m {:.0f}s".format(
            now_time // 60, now_time % 60))

        torch.save(
            model.state_dict(),
            str('./checkpoints/' + METHOD + '/' + "EPOCH_" + str(epoch) +
                ".pkl"))
        out_file = open(
            str('./checkpoints/' + METHOD + '/' + 'result_param.json'), "w+")
        json.dump(result_param, out_file, indent=4)

    if args.xgboost:
        print("---------------Two stage - XGboost---------------------")
        with torch.no_grad():

            x_valid, y_valid = [], []
            val_bar = tqdm(valid_dataloader)
            for imgs, label in val_bar:
                imgs = imgs.to(device)
                label = label.to(device)
                # to numpy
                imgs = CustomPredict(model, imgs).cpu().detach().numpy()
                label = label.cpu().detach().numpy()
                if not len(x_valid):
                    x_valid, y_valid = imgs, label
                else:
                    x_valid, y_valid = np.concatenate(
                        (x_valid, imgs)), np.concatenate((y_valid, label))

            xgb_train, xgb_label = [], []
            train_bar = tqdm(train_dataloader)
            for imgs, label in train_bar:
                imgs = imgs.to(device)
                label = label.to(device)
                # to numpy
                imgs = CustomPredict(model, imgs).cpu().detach().numpy()
                label = label.cpu().detach().numpy()

                if not len(xgb_train):
                    xgb_train, xgb_label = imgs, label
                else:
                    xgb_train, xgb_label = np.concatenate(
                        (xgb_train, imgs)), np.concatenate((xgb_label, label))

            dval = xgboost.DMatrix(x_valid, y_valid)
            dtrain = xgboost.DMatrix(xgb_train, xgb_label)

            params = {
                'max_depth': 5,  # the maximum depth of each tree
                'eta': lr,  # the training step for each iteration
                'objective':
                'multi:softmax',  # multiclass classification using the softmax objective
                'num_class':
                801,  # the number of classes that exist in this datset
                'updater': 'grow_gpu_hist',
                'tree_method': 'gpu_hist',
            }

            xgbmodel = xgboost.Booster()
            # xgbmodel.load_model('xgboost.model')
            xgbmodel = xgboost.train(params,
                                     dtrain,
                                     num_boost_round=100,
                                     evals=[(dval, 'val'), (dtrain, 'train')])

            print(sum(xgbmodel.predict(dval) == y_valid) / len(y_valid))
            xgbmodel.save_model('xgboost.model')
Пример #7
0
                                    args.lr,
                                    weight_decay=args.weight_decay)
elif (args.optimizer.lower() == "radam"):
    optimizer = optim.RAdam(params_dict,
                            args.lr,
                            weight_decay=args.weight_decay)
elif (args.optimizer.lower() == "ranger"):
    optimizer = optim.Ranger(params_dict,
                             args.lr,
                             weight_decay=args.weight_decay)
else:
    raise ValueError("Optimizer type: ", args.optimizer,
                     " is not supported or known")

scheduler_poly_lr_decay = PolynomialLRDecay(optimizer,
                                            max_decay_steps=args.epochs,
                                            end_learning_rate=0.0001,
                                            power=0.9)


def save_checkpoint(state, is_best, epoch, filepath):
    if epoch == 'init':
        filepath = os.path.join(filepath, 'init.pth.tar')
        torch.save(state, filepath)
    else:
        # filename = os.path.join(filepath, 'ckpt'+str(epoch)+'.pth.tar')
        # torch.save(state, filename)
        filename = os.path.join(filepath, 'ckpt.pth.tar')
        torch.save(state, filename)
        if is_best:
            shutil.copyfile(filename,
                            os.path.join(filepath, 'model_best.pth.tar'))
import torch

from torch_poly_lr_decay import PolynomialLRDecay

if __name__ == '__main__':
    v = torch.zeros(10)
    optim = torch.optim.SGD([v], lr=0.01)
    scheduler = PolynomialLRDecay(optim,
                                  max_decay_steps=19,
                                  end_learning_rate=0.0001,
                                  power=2.0)

    for epoch in range(1, 20):
        scheduler.step(epoch)

        print(epoch, optim.param_groups[0]['lr'])