示例#1
0
def get_lr_scheduler(optimizer, config):
    name = config.type
    args = copy_and_delete(config.toDict(), 'type')
    args = copy_and_delete(args, 'scheme')

    lr_scheduler = get_if_implemented(torch.optim.lr_scheduler, name)

    if lr_scheduler is None:
        try:
            lr_scheduler = get_module('./schedulers', name)
        except:
            pass

    if lr_scheduler is None:
        fcn = get_function('schedulers.functional', name)
        assert fcn, "No functional implementation of {} was found".format(name)
        fcn_wrapper = fcn(**args)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, [fcn_wrapper])
    else:
        lr_scheduler = lr_scheduler(optimizer, **args)

    return lr_scheduler
示例#2
0
def train(config):
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
                                        shuffle=True, num_workers=4, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
                                        shuffle=True, num_workers=4, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
                                shuffle=False, num_workers=4) for dset in dsets['test']]
    else:
        dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
                                shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]

    ## set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()  #加载基础网络结构
    parameter_list = base_network.get_parameters()
    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                                         **(optimizer_config["optim_params"]))

    crit = LabelSmoothingLoss(smoothing=0.1, classes=31)  #标签平滑操作

    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        base_network = nn.DataParallel(base_network,
                                       device_ids=[int(i) for i in gpus])

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    best_acc = 0.0
    start_time = time.time()
    for i in range(config["num_iterations"]):
        if i % config["test_interval"] == config["test_interval"] - 1:
            # 在这里进行测试的工作
            base_network.train(False)
            temp_acc = image_classification_test(dset_loaders,
                                                 base_network,
                                                 test_10crop=False)
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str + "\n")
            config["out_file"].flush()
            print(log_str)
            end_time = time.time()
            print('iter {} cost time {:.4f} sec.'.format(
                i, end_time - start_time))  # 打印时间间隔
            start_time = time.time()

        if i % config["snapshot_interval"] == 0:
            torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
                                                             "iter_{:05d}_model.pth.tar".format(i)))
        ## train one iter
        base_network.train(True)  # 训练模式
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()

        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])

        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        inputs_source, inputs_target, labels_source = inputs_source.cuda(
        ), inputs_target.cuda(), labels_source.cuda()
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)
        #目标域的熵正则化操作
        t_logit = outputs_target
        t_prob = F.softmax(t_logit)
        t_entropy_loss = get_entropy_loss(t_prob)  # 计算目标域的熵的损失
        entropy_loss = config['ENT_w'] * (t_entropy_loss)

        # classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)  # 源域的分类损失
        classifier_loss = crit(outputs_source, labels_source)

        total_loss = classifier_loss + entropy_loss
        if i % config["test_interval"] == config["test_interval"] - 1:
            print('total loss: {:.4f}, classifier loss: {:.4f}'.format(
                total_loss.item(), classifier_loss.item()))

        total_loss.backward()
        optimizer.step()
        if (i + 1) % 1000 == 0:
            model_path = os.path.join(
                os.path.dirname(config['save_model_name']), 'temp_model.pth')
            save_model(base_network, model_path)

    model_path = config['save_model_name']
    best_model_path = os.path.join(os.path.dirname(model_path),
                                   'best_model.pth')
    save_model(base_network, model_path)
    save_model(best_model, best_model_path)

    torch.save(best_model, osp.join(config["output_path"],
                                    "best_model.pth.tar"))
    return best_acc
示例#3
0
def train(config):
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
                                        shuffle=True, num_workers=4, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
                                        shuffle=True, num_workers=4, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                       transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
                                               shuffle=False, num_workers=4) for dset in dsets['test']]
    else:
        dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                  transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
                                          shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]
    crit = LabelSmoothingLoss(smoothing=0.05, classes=class_num)#标签平滑操作

    ## set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()  # 加载基础网络结构

    ## add additional network for some methods
    if config["loss"]["random"]:
        random_layer = network.RandomLayer([base_network.output_num(), class_num], config["loss"]["random_dim"])
        ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
    else:
        random_layer = None
        ad_net = network.AdversarialNetwork(base_network.output_num() * class_num, 1024)  # 对抗网络结构
    if config["loss"]["random"]:
        random_layer.cuda()
    ad_net = ad_net.cuda()
    parameter_list = base_network.get_parameters() + ad_net.get_parameters()

    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                                         **(optimizer_config["optim_params"]))

    #中心损失函数
    criterion_centor=CenterLoss(num_classes=class_num,feat_dim=256,use_gpu=True)
    optimizer_centerloss=torch.optim.SGD(criterion_centor.parameters(),lr=config['lr'])

    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus])
        base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus])

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    start_time = time.time()
    for i in range(config["num_iterations"]):

        if i % config["test_interval"] == config["test_interval"] - 1:
            # 在这里进行测试的工作
            base_network.train(False)
            temp_acc = image_classification_test(dset_loaders, \
                                                 base_network, test_10crop=prep_config["test_10crop"])
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str + "\n")
            config["out_file"].flush()
            print(log_str)
            end_time = time.time()
            print('iter {} cost time {:.4f} sec.'.format(i, end_time - start_time))  # 打印时间间隔
            start_time = time.time()

        if i % config["snapshot_interval"] == 0:
            torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
                                                             "iter_{:05d}_model.pth.tar".format(i)))

        loss_params = config["loss"]

        ## train one iter
        base_network.train(True)  # 训练模式
        ad_net.train(True)

        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        # optimizer_centerloss=lr_scheduler(optimizer_centerloss, i, **schedule_param)

        optimizer.zero_grad()
        optimizer_centerloss.zero_grad()

        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda()
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)
        softmax_out = nn.Softmax(dim=1)(outputs)
        if config['method'] == 'CDAN+E':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer)
        elif config['method'] == 'CDAN':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer)
        elif config['method'] == 'DANN':
            transfer_loss = loss.DANN(features, ad_net)
        else:
            raise ValueError('Method cannot be recognized.')
        # classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)  # 源域的分类损失
        classifier_loss = crit(outputs_source, labels_source)  # 源域的分类损失,标签平滑操作

        # 目标域的熵正则化操作
        outputs_target=outputs[len(inputs_source):,:]
        t_logit = outputs_target
        t_prob = torch.softmax(t_logit,dim=1)
        t_entropy_loss = get_entropy_loss(t_prob)  # 计算目标域的熵的损失
        entropy_loss = 0.05 * (t_entropy_loss)

        # 计算中心损失函数
        loss_centor = criterion_centor(features_source, labels_source)  # 中心损失计算

        total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + config['centor_w']*loss_centor
        if i % config["test_interval"] == config["test_interval"] - 1:
            print('total loss: {:.4f}, transfer loss: {:.4f}, classifier loss: {:.4f}, centor loss: {:.4f}'.format(
                total_loss.item(),transfer_loss.item(),classifier_loss.item(),config['centor_w']*loss_centor.item()
            ))
        total_loss.backward()
        optimizer.step()
        # by doing so, weight_cent would not impact on the learning of centers
        for param in criterion_centor.parameters():
            param.grad.data *= (1. / config['centor_w'])
        optimizer_centerloss.step()

    torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar"))
    return best_acc
示例#4
0
def main():
    config = vars(parse_args())
    now = datetime.datetime.now()

    if config["name"] is None:
        if config["deep_supervision"]:
            config["name"] = "%s_%s_wDS_%s" % (
                config["dataset"],
                config["arch"],
                now.strftime("%Y%m%d_%H%M%S"),
            )
        else:
            config["name"] = "%s_%s_woDS_%s" % (
                config["dataset"],
                config["arch"],
                now.strftime("%Y%m%d_%H%M%S"),
            )
    output_path = os.path.join(cfg.UNET_RESULTS_DIR, config["name"])
    try:
        os.makedirs(output_path, exist_ok=True)
    except Exception as e:
        print(e)

    models_path = os.path.join(output_path, "models")
    os.mkdir(models_path)

    with open(os.path.join(models_path, "config.yml"), "w") as f:
        yaml.dump(config, f)

    print("-" * 20)
    for key in config:
        print("%s: %s" % (key, config[key]))
    print("-" * 20)

    # Tensorboad 用のログを記録するディレクトリパス
    log_dir = os.path.join(output_path, "log")
    os.mkdir(log_dir)
    writer = SummaryWriter(log_dir=log_dir)

    # define loss function(criterion)
    if config["loss"] == "BCEWithLogitsLoss":
        criterion = nn.BCEWithLogitsLoss().cuda()
    else:
        criterion = losses.__dict__[config["loss"]]().cuda()

    cudnn.benchmark = True

    # create model
    print("=> creating model %s" % config["arch"])
    model = archs.__dict__[config["arch"]](config["num_classes"],
                                           config["input_channels"],
                                           config["deep_supervision"])

    model = model.cuda()

    # モデルを TensorBorad で表示するため,ログに保存
    # image = torch.randn(1, 3, 2224, 224)
    # writer.add_graph(model, image)

    params = filter(lambda p: p.requires_grad, model.parameters())
    if config["optimizer"] == "Adam":
        optimizer = optim.Adam(params,
                               lr=config["lr"],
                               weight_decay=config["weight_decay"])
    elif config["optimizer"] == "SGD":
        optimizer = optim.SGD(
            params,
            lr=config["lr"],
            momentum=config["momentum"],
            nesterov=config["nesterov"],
            weight_decay=config["weight_decay"],
        )
    else:
        raise NotImplementedError

    # scheduler
    if config["scheduler"] == "CosineAnnealingLR":
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer=optimizer,
                                                   T_max=config["epochs"],
                                                   eta_min=config["min_lr"])
    elif config["scheduler"] == "ReduceLROnPlateau":
        scheduler = lr_scheduler(
            optimizer=optimizer,
            factor=config["factor"],
            patience=config["patience"],
            verbose=1,
            min_lr=config["min_lr"],
        )
    elif config["scheduler"] == "MultiStepLR":
        scheduler = lr_scheduler.MultiStepLR(
            optimizer=optimizer,
            milestones=[int(e) for e in config["milestones"].split(",")],
            gamma=config["gamma"],
        )
    elif config["scheduler"] == "ConstantLR":
        scheduler = None
    else:
        raise NotImplementedError

    # Data loading code
    if config["dataset"] == "dsb2018_96":
        input_dir = cfg.DSB2018_96_DIR
        img_ids = glob(
            os.path.join(input_dir, "images", "*" + config["img_ext"]))
        img_ids = [os.path.splitext(os.path.basename(p))[0] for p in img_ids]

        train_img_ids, val_img_ids = train_test_split(img_ids,
                                                      test_size=0.2,
                                                      random_state=41)

        train_transform = Compose([
            transforms.RandomRotate90(),
            transforms.Flip(),
            OneOf(
                [
                    transforms.HueSaturationValue(),
                    transforms.RandomBrightness(),
                    transforms.RandomContrast(),
                ],
                p=1,
            ),
            transforms.Resize(config["input_h"], config["input_w"]),
            transforms.Normalize(),
        ])

        val_transform = Compose([
            transforms.Resize(config["input_h"], config["input_w"]),
            transforms.Normalize(),
        ])

        train_dataset = Dataset(
            img_ids=train_img_ids,
            img_dir=os.path.join(input_dir, "images"),
            mask_dir=os.path.join(input_dir, "masks"),
            img_ext=config["img_ext"],
            mask_ext=config["mask_ext"],
            num_classes=config["num_classes"],
            transform=train_transform,
        )

        val_dataset = Dataset(
            img_ids=val_img_ids,
            img_dir=os.path.join(input_dir, "images"),
            mask_dir=os.path.join(input_dir, "masks"),
            img_ext=config["img_ext"],
            mask_ext=config["mask_ext"],
            num_classes=config["num_classes"],
            transform=val_transform,
        )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config["batch_size"],
        shuffle=True,
        num_workers=config["num_workers"],
        drop_last=True,
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=config["batch_size"],
        shuffle=False,
        num_workers=config["num_workers"],
        drop_last=False,
    )

    log = OrderedDict([
        ("epoch", []),
        ("lr", []),
        ("loss", []),
        ("iou", []),
        ("val_loss", []),
        ("val_iou", []),
    ])

    best_iou = 0
    trigger = 0
    for epoch in range(config["epochs"] + 1):
        print("Epoch [%d/%d]" % (epoch, config["epochs"]))

        # train for one epoch
        train_log = train(config, train_loader, model, criterion, optimizer)
        # evaluate on validation set
        val_log = validate(config, val_loader, model, criterion)

        if config["scheduler"] == "CosineAnnealingLR":
            scheduler.step()
        elif config["scheduler"] == "ReduceLROnPlateau":
            scheduler.step(val_log["loss"])

        print("loss %.4f - iou %.4f - val_loss %.4f - val_iou %.4f" %
              (train_log["loss"], train_log["iou"], val_log["loss"],
               val_log["iou"]))

        log["epoch"].append(epoch)
        log["lr"].append(config["lr"])
        log["loss"].append(train_log["loss"])
        log["iou"].append(train_log["iou"])
        log["val_loss"].append(val_log["loss"])
        log["val_iou"].append(val_log["iou"])

        # Tensorboard用のデータ
        writer.add_scalar("training loss", train_log["loss"], epoch)
        writer.add_scalar("validation loss", val_log["loss"], epoch)

        pd.DataFrame(log).to_csv("%s/log.csv" % (log_dir), index=False)

        if epoch == 0:
            best_loss = val_log["loss"]
        trigger += 1

        # Best Model Save
        # if val_log['iou'] > best_iou:
        if (val_log["iou"] > best_iou) and (val_log["loss"] <= best_loss):
            torch.save(model.state_dict(), "%s/model.pth" % (models_path))
            best_iou = val_log["iou"]
            best_loss = val_log["loss"]
            print("=> saved best model")
            trigger = 0

        # early stopping
        if (config["early_stopping"] >= 0 and
                trigger >= config["early_stopping"]) or val_log["loss"] < 1e-4:
            print("=> early stopping")
            break

        torch.cuda.empty_cache()

    # summary writer を必要としない場合,close()メソッドを呼び出す
    writer.close()