def train(**kwargs):

    # first free all GPU memory
    t.cuda.empty_cache()
    """ Get options """

    opt = Config()
    print_options(opt)

    # overwrite options from commandline
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    device = t.device('cuda') if opt.gpu else t.device('cpu')

    # TODO: visualization
    """ Dataset """

    dataset = create_dataset(opt)
    dataset_size = len(dataset)
    iter_per_epoch = int(dataset_size / opt.batch_size)
    print(f'loaded {dataset_size} images for training')
    """ Create Network Instances """

    model_names = ['netG_x', 'netG_y', 'netD_x', 'netD_y']

    netG_x = ResnetGenerator(opt)
    netG_y = ResnetGenerator(opt)
    # print(netG_x)

    netD_x = NLayerDiscriminator(opt)
    netD_y = NLayerDiscriminator(opt)
    # print(netD_x)

    if opt.gpu:
        netG_x.to(device)
        summary(netG_x, input_size=(3, opt.crop_size, opt.crop_size))
        netG_y.to(device)

        netD_x.to(device)
        summary(netD_x, input_size=(3, opt.crop_size, opt.crop_size))
        netD_y.to(device)
    """ Define optimizer and Loss """
    optimizer_g = t.optim.Adam(itertools.chain(netG_x.parameters(),
                                               netG_y.parameters()),
                               lr=opt.g_lr,
                               betas=(opt.beta1, 0.999))
    optimizer_d = t.optim.Adam(itertools.chain(netD_x.parameters(),
                                               netD_y.parameters()),
                               lr=opt.d_lr,
                               betas=(opt.beta1, 0.999))
    optimizers = [optimizer_g, optimizer_d]
    """
    Forward cycle loss:  lambda_A * ||G_B(G_A(A)) - A|| (Eqn. (2) in the paper)
    Backward cycle loss: lambda_B * ||G_A(G_B(B)) - B|| (Eqn. (2) in the paper)
    Identity loss (optional):
    lambda_identity * (||G_A(B) - B|| * lambda_B + ||G_B(A) - A|| * lambda_A)
    (Sec 5.2 "Photo generation from paintings" in the paper)
    """

    lambda_X = 10.0  # weight for cycle loss (A -> B -> A^)
    lambda_Y = 10.0  # weight for cycle loss (B -> A -> B^)
    lambda_identity = 0.5

    # 定义 GAN 损失,define GAN loss.
    # it's a MSELoss() when initialized, only calculate later during iteration
    # criterionGAN = nn.MSELoss().to(device)
    criterionGAN = GANLoss(gan_mode='lsgan')

    # cycle loss
    criterionCycle = nn.L1Loss()

    # identical loss
    criterionIdt = nn.L1Loss()

    # loss meters
    loss_X_meter = MovingAverageValueMeter(opt.plot_every)
    loss_Y_meter = MovingAverageValueMeter(opt.plot_every)
    score_Dx_real_y = MovingAverageValueMeter(opt.plot_every)
    score_Dx_fake_y = MovingAverageValueMeter(opt.plot_every)

    losses = {}
    scores = {}
    """ use identity mapping. Setting lambda_identity other than 0 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set lambda_identity = 0.1 """

    for epoch in range(opt.max_epochs):
        epoch_start_time = time.time()
        """ calculate losses, gradients, and update network weights;
        called in every iteration
        """

        for i, data in enumerate(dataset):

            real_x = data['A'].to(device)
            real_y = data['B'].to(device)

            ######################
            # X -> Y' -> X^ cycle
            ######################

            optimizer_g.zero_grad()  # set g_x and g_y gradients to zero

            fake_y = netG_x(real_x)  # X -> Y'
            prediction = netD_x(fake_y)  #netD_x provide feedback to netG_x
            loss_G_X = criterionGAN(prediction, True)

            # cycle_consistance
            x_hat = netG_y(fake_y)  # Y' -> X^
            # Forward cycle loss x^ = || G_y(G_x(real_x)) ||
            loss_cycle_X = criterionCycle(x_hat, real_x) * lambda_X

            # identity loss
            if lambda_identity > 0:
                # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                idt_x = netG_x(real_y)
                loss_idt_x = criterionIdt(idt_x,
                                          real_y) * lambda_Y * lambda_identity
            else:
                loss_idt_x = 0.

            loss_X = loss_G_X + loss_cycle_X + loss_idt_x
            loss_X.backward(retain_graph=True)
            optimizer_g.step()

            loss_X_meter.add(loss_X.item())

            ######################
            # Y -> X' -> Y^ cycle
            ######################

            optimizer_g.zero_grad()  # set g_x and g_y gradients to zero

            fake_x = netG_y(real_y)  # Y -> X'
            prediction = netD_y(fake_x)
            loss_G_Y = criterionGAN(prediction, True)
            # print(f'loss_G_Y = {round(float(loss_G_Y), 3)}')

            y_hat = netG_x(fake_x)  # Y -> X' -> Y^
            # Forward cycle loss y^ = || G_x(G_y(real_y)) ||
            loss_cycle_Y = criterionCycle(y_hat, real_y) * lambda_Y

            # identity loss
            if lambda_identity > 0:
                # netG_y should be identiy if real_x is fed: ||netG_y(real_x) - real_x||
                idt_y = netG_y(real_x)
                loss_idt_y = criterionIdt(idt_y,
                                          real_x) * lambda_X * lambda_identity
            else:
                loss_idt_y = 0.

            loss_Y = loss_G_Y + loss_cycle_Y + loss_idt_y
            loss_Y.backward(retain_graph=True)
            optimizer_g.step()

            loss_Y_meter.add(loss_Y.item())

            ######################
            # netD_x
            ######################

            optimizer_d.zero_grad()

            # loss_real
            pred_real = netD_x(real_y)
            loss_D_x_real = criterionGAN(pred_real, True)
            score_Dx_real_y.add(float(pred_real.data.mean()))

            # loss_fake
            pred_fake = netD_x(fake_y)
            loss_D_x_fake = criterionGAN(pred_fake, False)
            score_Dx_fake_y.add(float(pred_fake.data.mean()))

            # loss and backward
            loss_D_x = (loss_D_x_real + loss_D_x_fake) * 0.5

            loss_D_x.backward()
            optimizer_d.step()

            ######################
            # netD_y
            ######################

            optimizer_d.zero_grad()

            # loss_real
            pred_real = netD_y(real_x)
            loss_D_y_real = criterionGAN(pred_real, True)

            # loss_fake
            pred_fake = netD_y(fake_x)
            loss_D_y_fake = criterionGAN(pred_fake, False)

            # loss and backward
            loss_D_y = (loss_D_y_real + loss_D_y_fake) * 0.5

            loss_D_y.backward()
            optimizer_d.step()

            # save snapshot
            if i % opt.plot_every == 0:
                filename = opt.name + '_snap_%03d_%05d.png' % (
                    epoch,
                    i,
                )
                test_path = os.path.join(opt.checkpoint_path, filename)
                tv.utils.save_image(fake_y, test_path, normalize=True)
                print(f'{filename} saved.')

                losses['loss_X'] = loss_X_meter.value()[0]
                losses['loss_Y'] = loss_Y_meter.value()[0]
                scores['score_Dx_real_y'] = score_Dx_real_y.value()[0]
                scores['score_Dx_fake_y'] = score_Dx_fake_y.value()[0]
                print(losses)
                print(scores)

            # print(f'iteration {i} finished')

        # save model
        if epoch % opt.save_every == 0 or epoch == opt.max_epochs - 1:
            save_filename = f'{opt.name}_netG_{epoch}.pth'
            save_filepath = os.path.join(opt.model_path, save_filename)
            t.save(netG_x.state_dict(), save_filepath)
            print(f'model saved as {save_filename}')

        # epoch end logs
        epoech_time = int(time.time() - epoch_start_time)

        print_options(opt,
                      epoch_log=True,
                      epoch=epoch,
                      time=epoech_time,
                      losses=losses,
                      scores=scores)
        print()
示例#2
0
def main(config, cuda):
    device = torch.device("cuda" if cuda and torch.cuda.is_available() else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    # Dataset
    dataset = CocoStuff10k(
        root=CONFIG.ROOT,
        split="train",
        image_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scale=True,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model)
    model.to(device)

    # Optimizer
    optimizer = {
        "sgd": torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[
                {
                    "params": get_lr_params(model.module, key="1x"),
                    "lr": CONFIG.LR,
                    "weight_decay": CONFIG.WEIGHT_DECAY,
                },
                {
                    "params": get_lr_params(model.module, key="10x"),
                    "lr": 10 * CONFIG.LR,
                    "weight_decay": CONFIG.WEIGHT_DECAY,
                },
                {
                    "params": get_lr_params(model.module, key="20x"),
                    "lr": 20 * CONFIG.LR,
                    "weight_decay": 0.0,
                },
            ],
            momentum=CONFIG.MOMENTUM,
        )
    }.get(CONFIG.OPTIMIZER)

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    criterion.to(device)

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
        range(1, CONFIG.ITER_MAX + 1),
        total=CONFIG.ITER_MAX,
        leave=False,
        dynamic_ncols=True,
    ):

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                data, target = next(loader_iter)
            except:
                loader_iter = iter(loader)
                data, target = next(loader_iter)

            # Image
            data = data.to(device)

            # Propagate forward
            outputs = model(data)

            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = target_.to(device)
                # Compute crossentropy loss
                loss += criterion(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG.ITER_TF == 0:
            writer.add_scalar("train_loss", loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("train_lr_group{}".format(i), o["lr"], iteration)
            # for name, param in model.named_parameters():
            #     name = name.replace('.', '/')
            #     writer.add_histogram(name, param, iteration, bins="auto")
            #     if param.requires_grad:
            #         writer.add_histogram(name + '/grad', param.grad, iteration, bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SNAP == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_{}.pth".format(iteration)),
            )

        # Save a model
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_current.pth"),
            )

    torch.save(
        model.module.state_dict(), osp.join(CONFIG.SAVE_DIR, "checkpoint_final.pth")
    )
示例#3
0
def train(config, cuda):
    # Auto-tune cuDNN
    torch.backends.cudnn.benchmark = True

    # Configuration
    device = get_device(cuda)
    CONFIG = Dict(yaml.load(open(config)))

    # Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET.NAME)(
        root=CONFIG.DATASET.ROOT,
        split=CONFIG.DATASET.SPLIT.TRAIN,
        base_size=CONFIG.IMAGE.SIZE.TRAIN.BASE,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN.CROP,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        warp=CONFIG.DATASET.WARP_IMAGE,
        scale=CONFIG.DATASET.SCALES,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = setup_model(CONFIG.MODEL.INIT_MODEL,
                        CONFIG.DATASET.N_CLASSES,
                        train=True)
    model.to(device)

    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )

    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)

    # TensorBoard logger
    writer = SummaryWriter(CONFIG.SOLVER.LOG_DIR)
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)

    # Freeze the batch norm pre-trained on COCO
    model.train()
    model.module.base.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.SOLVER.ITER_MAX + 1),
            total=CONFIG.SOLVER.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        loss = 0
        for _ in range(CONFIG.SOLVER.ITER_SIZE):
            try:
                images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                images, labels = next(loader_iter)

            images = images.to(device)
            labels = labels.to(device)

            # Propagate forward
            logits = model(images)

            # Loss
            iter_loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                _, _, H, W = logit.shape
                labels_ = resize_labels(labels, shape=(H, W))
                iter_loss += criterion(logit, labels_)

            # Backpropagate (just compute gradients wrt the loss)
            iter_loss /= CONFIG.SOLVER.ITER_SIZE
            iter_loss.backward()

            loss += float(iter_loss)

        average_loss.add(loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=iteration)

        # TensorBoard
        if iteration % CONFIG.SOLVER.ITER_TB == 0:
            writer.add_scalar("loss/train", average_loss.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("lr/group{}".format(i), o["lr"], iteration)
            if False:  # This produces a large log file
                for name, param in model.named_parameters():
                    name = name.replace(".", "/")
                    # Weight/gradient distribution
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(name + "/grad",
                                             param.grad,
                                             iteration,
                                             bins="auto")

        # Save a model
        if iteration % CONFIG.SOLVER.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_DIR,
                         "checkpoint_{}.pth".format(iteration)),
            )

        # To verify progress separately
        torch.save(
            model.module.state_dict(),
            osp.join(CONFIG.MODEL.SAVE_DIR, "checkpoint_current.pth"),
        )

    torch.save(
        model.module.state_dict(),
        osp.join(CONFIG.MODEL.SAVE_DIR, "checkpoint_final.pth"),
    )
示例#4
0
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except CyganException as e:
                self.logger.error(e)
                self.opt.continue_train = False
                self.reset_save()

        else:
            self.reset_save()

        self.add_file_logger()

        ######################
        # Dataset
        ######################

        if self.opt.model == 'base':
            dataset = SteelyDataset(self.opt.genreA,
                                    self.opt.genreB,
                                    self.opt.phase,
                                    use_mix=False)
        else:
            dataset = SteelyDataset(self.opt.genreA,
                                    self.opt.genreB,
                                    self.opt.phase,
                                    use_mix=True)

        dataset_size = len(dataset)
        iter_num = int(dataset_size / self.opt.batch_size)

        self.logger.info(
            f'Dataset loaded, genreA: {self.opt.genreA}, genreB: {self.opt.genreB}, total size: {dataset_size}.'
        )

        ######################
        # Initiate
        ######################

        lambda_A = 10.0  # weight for cycle loss (A -> B -> A^)
        lambda_B = 10.0  # weight for cycle loss (B -> A -> B^)

        lambda_identity = 0.5

        criterionGAN = GANLoss(gan_mode='lsgan')

        criterionCycle = nn.L1Loss()

        criterionIdt = nn.L1Loss()

        GLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        DLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        CycleLoss_meter = MovingAverageValueMeter(self.opt.plot_every)

        # loss meters
        losses = {}
        scores = {}

        losses_dict = {'loss_G': [], 'loss_D': [], 'loss_C': [], 'epoch': []}

        ######################
        # Start Training
        ######################

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=True)
            epoch_start_time = time.time()

            for i, data in enumerate(loader):

                real_A = torch.unsqueeze(data[:, 0, :, :],
                                         1).to(self.device, dtype=torch.float)
                real_B = torch.unsqueeze(data[:, 1, :, :],
                                         1).to(self.device, dtype=torch.float)

                gaussian_noise = torch.abs(
                    torch.normal(mean=torch.zeros(self.opt.data_shape),
                                 std=self.opt.gaussian_std)).to(
                                     self.device, dtype=torch.float)

                if self.opt.model == 'base':

                    ######################
                    # Generator
                    ######################

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_A = self.generator_B2A(real_B)  # Y -> X'

                    fake_B_copy = copy.copy(fake_B)
                    fake_A_copy = copy.copy(fake_A)

                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  # netD_x provide feedback to netG_x
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)

                    loss_G_A2B = criterionGAN(DB_fake, True)
                    loss_G_B2A = criterionGAN(DA_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^

                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity

                    else:
                        loss_idt_A = 0.
                        loss_idt_B = 0.

                    loss_idt = loss_idt_A + loss_idt_B

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_A2B = loss_G_A2B + loss_cycle_A2B + loss_idt_A
                    loss_A2B.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_B2A = loss_G_B2A + loss_cycle_B2A + loss_idt_B
                    loss_B2A.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    cycle_loss = loss_cycle_A2B + loss_cycle_B2A
                    CycleLoss_meter.add(cycle_loss.item())

                    loss_G = loss_G_A2B + loss_G_B2A + loss_idt
                    GLoss_meter.add(loss_G.item())

                    ######################
                    # Sample
                    ######################
                    fake_A_sample, fake_B_sample = (None, None)
                    if self.opt.use_image_pool:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # Discriminator
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    DB_real = self.discriminator_B(real_B + gaussian_noise)

                    loss_DA_real = criterionGAN(DA_real, True)
                    loss_DB_real = criterionGAN(DB_real, True)

                    # loss fake
                    if self.opt.use_image_pool:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)

                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)

                    else:
                        loss_DA_fake = criterionGAN(DA_fake, False)
                        loss_DB_fake = criterionGAN(DB_fake, False)

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5
                    loss_DA.backward()
                    self.DA_optimizer.step()

                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5
                    loss_DB.backward()
                    self.DB_optimizer.step()

                    loss_D = loss_DA + loss_DB
                    DLoss_meter.add(loss_D.item())

                else:
                    real_mixed = torch.unsqueeze(data[:, 2, :, :],
                                                 1).to(self.device,
                                                       dtype=torch.float)

                    ######################
                    # Generator
                    ######################

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_A = self.generator_B2A(real_B)  # Y -> X'

                    fake_B_copy = fake_B.detach().clone()
                    fake_A_copy = fake_A.detach().clone()

                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  # netD_x provide feedback to netG_x
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)

                    loss_G_A2B = criterionGAN(DB_fake, True)
                    loss_G_B2A = criterionGAN(DA_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^

                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity

                    else:
                        loss_idt_A = 0.
                        loss_idt_B = 0.

                    loss_idt = loss_idt_A + loss_idt_B

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_A2B = loss_G_A2B + loss_cycle_A2B + loss_idt_A
                    loss_A2B.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_B2A = loss_G_B2A + loss_cycle_B2A + loss_idt_B
                    loss_B2A.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    cycle_loss = loss_cycle_A2B + loss_cycle_B2A
                    CycleLoss_meter.add(cycle_loss.item())

                    loss_G = loss_G_A2B + loss_G_B2A + loss_idt
                    GLoss_meter.add(loss_G.item())

                    ######################
                    # Sample
                    ######################
                    fake_A_sample, fake_B_sample = (None, None)
                    if self.opt.use_image_pool:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # Discriminator
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    DB_real = self.discriminator_B(real_B + gaussian_noise)

                    DA_real_all = self.discriminator_A_all(real_mixed +
                                                           gaussian_noise)
                    DB_real_all = self.discriminator_B_all(real_mixed +
                                                           gaussian_noise)

                    loss_DA_real = criterionGAN(DA_real, True)
                    loss_DB_real = criterionGAN(DB_real, True)

                    loss_DA_all_real = criterionGAN(DA_real_all, True)
                    loss_DB_all_real = criterionGAN(DB_real_all, True)

                    # loss fake
                    if self.opt.use_image_pool:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)

                        DA_fake_sample_all = self.discriminator_A_all(
                            fake_A_sample + gaussian_noise)
                        DB_fake_sample_all = self.discriminator_B_all(
                            fake_B_sample + gaussian_noise)

                        loss_DA_all_fake = criterionGAN(
                            DA_fake_sample_all, False)
                        loss_DB_all_fake = criterionGAN(
                            DB_fake_sample_all, False)

                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)

                    else:
                        DA_fake_all = self.discriminator_A_all(fake_A_copy +
                                                               gaussian_noise)
                        DB_fake_all = self.discriminator_B_all(fake_B_copy +
                                                               gaussian_noise)

                        loss_DA_all_fake = criterionGAN(DA_fake_all, False)
                        loss_DB_all_fake = criterionGAN(DB_fake_all, False)

                        loss_DA_fake = criterionGAN(DA_fake, False)
                        loss_DB_fake = criterionGAN(DB_fake, False)

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5
                    loss_DA.backward()
                    self.DA_optimizer.step()

                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5
                    loss_DB.backward()
                    self.DB_optimizer.step()

                    self.DA_all_optimizer.zero_grad()
                    loss_DA_all = (loss_DA_all_real + loss_DA_all_fake) * 0.5
                    loss_DA_all.backward()
                    self.DA_all_optimizer.step()

                    self.DB_all_optimizer.zero_grad()
                    loss_DB_all = (loss_DB_all_real + loss_DB_all_fake) * 0.5
                    loss_DB_all.backward()
                    self.DB_all_optimizer.step()

                    loss_D = loss_DA + loss_DB + loss_DB_all + loss_DA_all
                    DLoss_meter.add(loss_D.item())

                ######################
                # Snapshot
                ######################

                if i % self.opt.plot_every == 0:
                    file_name = self.opt.name + '_snap_%03d_%05d.png' % (
                        epoch,
                        i,
                    )
                    # test_path = os.path.join(self.opt.checkpoint_path, file_name)
                    # tv.utils.save_image(fake_B, test_path, normalize=True)
                    # self.logger.info(f'Snapshot {file_name} saved.')

                    losses['loss_C'] = float(CycleLoss_meter.value()[0])
                    losses['loss_G'] = float(GLoss_meter.value()[0])
                    losses['loss_D'] = float(DLoss_meter.value()[0])

                    self.logger.info(str(losses))
                    self.logger.info('Epoch {} progress: {:.2%}\n'.format(
                        epoch, i / iter_num))

            # save model
            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            ######################
            # lr_scheduler
            ######################

            self.GA2B_scheduler.step(epoch)
            self.GB2A_scheduler.step(epoch)
            self.DA_scheduler.step(epoch)
            self.DB_scheduler.step(epoch)

            if self.opt.model != 'base':
                self.DA_all_scheduler.step(epoch)
                self.DB_all_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)

            ######################
            # Logging
            ######################

            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
            self.logger.info(str(losses) + '\n\n')

            ######################
            # Loss_Dict
            ######################

            losses_dict['loss_C'].append(losses['loss_C'])
            losses_dict['loss_G'].append(losses['loss_G'])
            losses_dict['loss_D'].append(losses['loss_D'])
            losses_dict['epoch'].append(epoch)

            with open(self.opt.loss_save_path, 'w') as f:
                json.dump(losses_dict, f)
            i]  #this is to retrive the data from datasets easy method
        img = img / 255

        loss = model.loss(img, bbox, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_value = loss.cpu().data.numpy()
        avg_loss.add(loss_value)
        ma20_loss.add(float(loss_value))
        print(
            '[epoch:{}]  [batch:{}/{}]  [sample_loss:{:.4f}]  [avg_loss:{:.4f}]  [ma20_loss:{:.4f}]'
            .format(epoch, i, len(trainval_dataset), loss_value,
                    avg_loss.value()[0],
                    ma20_loss.value()[0]))

model.eval()
for i in range(len(test_dataset)):
    img, _, _ = test_dataset[i]
    imgx = img / 255
    bbox_out, class_out, prob_out = model.predict(imgx, prob_threshold=0.95)

    vis_bbox(img,
             bbox_out,
             class_out,
             prob_out,
             label_names=voc_bbox_label_names)
    plt.show()
    fig = plt.gcf()
    fig.set_size_inches(11, 5)
示例#6
0
文件: plot.py 项目: lotharschulz/DL
valid_f = open(text_file)
valid_d = valid_f.readlines()
valid_f.close()

train_iter = []
train_loss = []
i = 0
ma_loss = MovingAverageValueMeter(windowsize=500)
for s in train_d:
    i = i + 1
    t = s.strip().split(' ')
    t_iter = int(t[0])
    ma_loss.add(float(t[1]))
    if i % 500 == 0:
        train_iter.append(t_iter)
        train_loss.append(ma_loss.value()[0])

valid_iter = []
valid_loss = []
i = 0
for s in valid_d:
    i = i + 1
    if i >= 0:
        t = s.strip().split(' ')
        t_iter = int(t[0])
        t_loss = float(t[1])
        valid_iter.append(t_iter)
        valid_loss.append(t_loss)

#==========
#plt.semilogx(x, b, marker='^', linewidth=0.5, color='k')
示例#7
0
def main(config, cuda):
    # Configuration
    with open(config) as f:
        CONFIG = yaml.load(f)

    cuda = cuda and torch.cuda.is_available()

    # Dataset
    dataset = get_dataset(CONFIG['DATASET'])(
        root=CONFIG['ROOT'],
        split='train',
        image_size=(CONFIG['IMAGE']['SIZE']['TRAIN'],
                    CONFIG['IMAGE']['SIZE']['TRAIN']),
        scale=True,
        flip=True,
        # preload=True
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=CONFIG['BATCH_SIZE'],
                                         num_workers=CONFIG['NUM_WORKERS'],
                                         shuffle=True)
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG['N_CLASSES'])
    state_dict = torch.load(CONFIG['INIT_MODEL'])
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    if cuda:
        model.cuda()

    # Optimizer
    optimizer = {
        'sgd':
        torch.optim.SGD(
            params=[
                {
                    'params': get_1x_lr_params(model),
                    'lr': float(CONFIG['LR'])
                },
                {
                    'params': get_10x_lr_params(model),
                    'lr': 10 * float(CONFIG['LR'])
                }  # NOQA
            ],
            lr=float(CONFIG['LR']),
            momentum=float(CONFIG['MOMENTUM']),
            weight_decay=float(CONFIG['WEIGHT_DECAY'])),
    }.get(CONFIG['OPTIMIZER'])

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG['IGNORE_LABEL'])
    if cuda:
        criterion.cuda()

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG['LOG_DIR'])
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    for iteration in tqdm(range(1, CONFIG['ITER_MAX'] + 1),
                          total=CONFIG['ITER_MAX'],
                          leave=False,
                          dynamic_ncols=True):

        # Polynomial lr decay
        poly_lr_scheduler(optimizer=optimizer,
                          init_lr=float(CONFIG['LR']),
                          iter=iteration - 1,
                          lr_decay_iter=CONFIG['LR_DECAY'],
                          max_iter=CONFIG['ITER_MAX'],
                          power=CONFIG['POLY_POWER'])

        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG['ITER_SIZE'] + 1):
            data, target = next(loader_iter)

            # Image
            data = data.cuda() if cuda else data
            data = Variable(data)

            # Forward propagation
            outputs = model(data)

            # Label
            target = resize_target(target, outputs[0].size(2))
            target = target.cuda() if cuda else target
            target = Variable(target)

            # Aggregate losses for [100%, 75%, 50%, Max]
            loss = 0
            for output in outputs:
                loss += criterion(output, target)

            loss /= CONFIG['ITER_SIZE']
            iter_loss += loss.data[0]
            loss.backward()

            # Reload dataloader
            if ((iteration - 1) * CONFIG['ITER_SIZE'] + i) % len(loader) == 0:
                loader_iter = iter(loader)

        loss_meter.add(iter_loss)

        # Back propagation
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG['ITER_TF'] == 0:
            writer.add_scalar('train_loss', loss_meter.value()[0], iteration)

        # Save a model
        if iteration % CONFIG['ITER_SNAP'] == 0:
            torch.save(
                model.state_dict(),
                osp.join(CONFIG['SAVE_DIR'],
                         'checkpoint_{}.pth.tar'.format(iteration)))  # NOQA
            writer.add_text('log', 'Saved a model', iteration)

    torch.save(model.state_dict(),
               osp.join(CONFIG['SAVE_DIR'], 'checkpoint_final.pth.tar'))
示例#8
0
def main(config, cuda, gpu):
    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    # CUDA check
    cuda = cuda and torch.cuda.is_available()

    if cuda:
        gpu_ids = [int(string) for string in gpu.split(',')]
        current_device = torch.cuda.current_device()
        print('Running on', torch.cuda.get_device_name(current_device),
              gpu_ids)

    # Dataset
    dataset = CocoStuff10k(
        root=CONFIG.ROOT,
        split='train',
        image_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scale=True,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model, device_ids=gpu_ids)
    if cuda:
        model.cuda()

    # Optimizer
    optimizer = {
        'sgd':
        torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                'params': get_lr_params(model.module, key='1x'),
                'lr': CONFIG.LR,
                'weight_decay': CONFIG.WEIGHT_DECAY
            }, {
                'params': get_lr_params(model.module, key='10x'),
                'lr': 10 * CONFIG.LR,
                'weight_decay': CONFIG.WEIGHT_DECAY
            }, {
                'params': get_lr_params(model.module, key='20x'),
                'lr': 20 * CONFIG.LR,
                'weight_decay': 0.0
            }],
            momentum=CONFIG.MOMENTUM,
        ),
    }.get(CONFIG.OPTIMIZER)

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    if cuda:
        criterion.cuda()

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.ITER_MAX + 1),
            total=CONFIG.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            data, target = next(loader_iter)

            # Image
            data = data.cuda() if cuda else data
            data = Variable(data)

            # Propagate forward
            outputs = model(data)

            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = target_.cuda() if cuda else target_
                target_ = Variable(target_)
                # Compute crossentropy loss
                loss += criterion(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += loss.data[0]

            # Reload dataloader
            if ((iteration - 1) * CONFIG.ITER_SIZE + i) % len(loader) == 0:
                loader_iter = iter(loader)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG.ITER_TF == 0:
            writer.add_scalar('train_loss', loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar('train_lr_group{}'.format(i), o['lr'],
                                  iteration)
            if iteration % 1000 != 0:
                continue
            for name, param in model.named_parameters():
                name = name.replace('.', '/')
                writer.add_histogram(name, param, iteration, bins="auto")
                if param.requires_grad:
                    writer.add_histogram(name + '/grad',
                                         param.grad,
                                         iteration,
                                         bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SNAP == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR,
                         'checkpoint_{}.pth'.format(iteration)),
            )

        # Save a model
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, 'checkpoint_current.pth'),
            )

    torch.save(
        model.module.state_dict(),
        osp.join(CONFIG.SAVE_DIR, 'checkpoint_final.pth'),
    )
示例#9
0
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except Exception as e:
                self.logger.error(e)
                return

        else:
            self.reset_save()

        self.logger.add_file_logger(self.opt.log_path)

        ######################
        # Dataset
        ######################

        dataset = ClassifierDataset(self.opt.genreA, self.opt.genreB, 'train')

        test_dataset = ClassifierDataset(self.opt.genreA, self.opt.genreB,
                                         'test')

        dataset_size = len(dataset)
        iter_num = int(dataset_size / self.opt.batch_size)

        plot_every = iter_num // 10

        self.logger.info(
            f'Dataset loaded, genreA: {self.opt.genreA}, genreB: {self.opt.genreB}, total size: {dataset_size}.'
        )

        ######################
        # Initiate
        ######################

        softmax_criterion = nn.BCELoss()

        Loss_meter = MovingAverageValueMeter(self.opt.plot_every)

        losses = {}

        ######################
        # Start Training
        ######################

        test_data = torch.from_numpy(test_dataset.get_data()).to(
            self.device, dtype=torch.float)

        gaussian_noise = torch.normal(mean=torch.zeros(test_data.shape),
                                      std=self.opt.gaussian_std).to(
                                          self.device, dtype=torch.float)
        # test_data += gaussian_noise

        real_test_label = torch.from_numpy(test_dataset.get_labels()).view(
            -1, 2).to(self.device, dtype=torch.float)

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=True)
            epoch_start_time = time.time()

            for i, batch in enumerate(loader):
                data = batch[0].to(self.device, dtype=torch.float)

                real_label = batch[1].view(self.opt.batch_size,
                                           2).to(self.device,
                                                 dtype=torch.float)

                self.classifier_optimizer.zero_grad()

                estimate_train = self.classifier(data)

                loss = softmax_criterion(estimate_train, real_label)

                loss.backward()

                self.classifier_optimizer.step()

                Loss_meter.add(loss.item())

                # test
                if i % plot_every == 0:
                    with torch.no_grad():
                        estimate_test = self.classifier(test_data)
                    estimate_test = nn.functional.softmax(estimate_test, dim=1)
                    test_prediction = torch.argmax(estimate_test, 1).eq(
                        torch.argmax(real_test_label, 1))
                    test_accuracy = torch.mean(
                        test_prediction.type(torch.float32)).cpu()

                    self.logger.info(
                        'Epoch {} progress {:.2%}: Loss: {}, Accuracy: {}\n'.
                        format(epoch, i / iter_num,
                               Loss_meter.value()[0], test_accuracy))

            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            self.classifier_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)
            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
示例#10
0
def train():
    """Create the model and start the training."""
    # === 1.Configuration
    print(CONFIG_PATH)
    # === select which GPU you want to use
    # === here assume to use 8 GPUs, idx are 0,1,2,3,...,7
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, CONFIG.EXP.GPU_IDX))

    device = get_device(torch.cuda.is_available())
    cudnn.benchmark = True
    comment_init = ""
    writer = SummaryWriter(comment=comment_init)  # Setup loss logger
    # === MovingAverageValueMeter(self,windowsize)
    # === - add(value): 记录value
    # === - reset()
    # === - value() : 返回MA和标准差
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)
    if not os.path.exists(CONFIG.MODEL.SAVE_PATH):
        os.makedirs(CONFIG.MODEL.SAVE_PATH)
    # Path to save models
    checkpoint_dir = os.path.join(
        CONFIG.EXP.OUTPUT_DIR,  # ./data
        "models",
        CONFIG.MODEL.NAME.lower(),  # DeepLabV2_ResNet101_MSC
        CONFIG.DATASET.SPLIT.TRAIN,  # train_aug
    )
    # === checkpoint_dir: ./data/DeepLabV2_ResNet101_MSC/train_aug
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    print("Checkpoint dst:", checkpoint_dir)

    # === 2.Dataloader ===
    trainloader = data.DataLoader(
        VOCDataSet(
            CONFIG.DATASET.DIRECTORY,
            CONFIG.DATASET.LIST_PATH,
            max_iters=CONFIG.SOLVER.ITER_MAX * CONFIG.SOLVER.BATCH_SIZE.TRAIN,
            crop_size=(CONFIG.IMAGE.SIZE.TRAIN, CONFIG.IMAGE.SIZE.TRAIN),
            scale=CONFIG.DATASET.RANDOM.SCALE,
            mirror=CONFIG.DATASET.RANDOM.MIRROR,
            mean=IMG_MEAN,
            label_path=CONFIG.DATASET.SEG_LABEL),  # for training 
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        shuffle=True,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        pin_memory=True)

    # 使用iter(dataloader)返回的是一个迭代器,可以使用next访问
    # loader_iter = iter(trainloader)

    # === 3.Create network & weights ===
    print("Model:", CONFIG.MODEL.NAME)

    # model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.DATASET.N_CLASSES)
    model = DeepLabV2_DRN105_MSC(n_classes=CONFIG.DATASET.N_CLASSES)
    state_dict = torch.load(CONFIG.MODEL.INIT_MODEL)
    # model.base.load_state_dict(state_dict, strict=False)  # to skip ASPP
    print("    Init:", CONFIG.MODEL.INIT_MODEL)
    # === show the skip weight
    for m in model.base.state_dict().keys():
        if m not in state_dict.keys():
            print("    Skip init:", m)

    # === DeepLabv2 = Res101+ASPP
    # === model.base = DeepLabv2
    # === model = MSC(DeepLabv2)
    # model.base.load_state_dict(state_dict,
    #                            strict=False)  # strict=False to skip ASPP
    model = nn.DataParallel(model)  # multi-GPU
    model.to(device)  # put in GPU is available
    # === 4.Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)  # put in GPU is available

    # === 5.optimizer ===
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )
    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    time_start = time.time()  # set start time
    # === training iteration ===
    for i_iter, batch in enumerate(trainloader, start=1):
        torch.set_grad_enabled(True)
        model.train()
        model.module.base.freeze_bn()
        optimizer.zero_grad()
        images, labels, _, _ = batch

        logits = model(images.to(device))
        # <<<<<<<<<<<<<<<<<<<<
        # === Loss
        # === logits = [logits] + logits_pyramid + [logits_max]
        iter_loss = 0
        loss = 0
        for logit in logits:
            # Resize labels for {100%, 75%, 50%, Max} logits
            _, _, H, W = logit.shape
            labels_ = resize_labels(labels, size=(H, W))
            iter_loss += criterion(logit, labels_.to(device))
        # iter_loss /= CONFIG.SOLVER.ITER_SIZE
        iter_loss /= 4
        iter_loss.backward()
        loss += float(iter_loss)

        average_loss.add(loss)
        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=i_iter)

        # TensorBoard
        writer.add_scalar("loss", average_loss.value()[0], global_step=i_iter)
        print(
            'iter/max_iter = [{}/{}]  completed, loss = {:4.3} time:{}'.format(
                i_iter, CONFIG.SOLVER.ITER_MAX,
                average_loss.value()[0], show_timing(time_start, time.time())))
        # print('iter = ', i_iter, 'of', args.num_steps, '',
        #       loss.data.cpu().numpy())

        # === save final model
        if i_iter >= CONFIG.SOLVER.ITER_MAX:
            print('save final model as...{}'.format(
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_' + str(CONFIG.SOLVER.ITER_MAX) + '.pth')))
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_' + str(CONFIG.SOLVER.ITER_MAX) + '.pth'))
            break
        if i_iter % CONFIG.EXP.EVALUATE_ITER == 0:
            print("Evaluation....")
            evaluate_gpu(model, writer, i_iter)

        # === Save model every 250 iteration==========================
        # because DataParalel will add 'module' in each name of layer.
        # so here use model.module.state_dict()
        # ============================================================
        if i_iter % CONFIG.MODEL.SAVE_EVERY_ITER == 0:
            print('saving model ...')
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_{}.pth'.format(i_iter)))
示例#11
0
def train(**kwargs):
    opt._parse(kwargs)

    image_folder_path = 'DataSets/images/'
    cvs_file_path = 'DataSets/labels.csv'

    dataset = DataSets(cvs_file_path, image_folder_path)
    data_size = len(dataset)
    indices = list(range(data_size))
    split = int(np.floor(data_size * 0.2))
    np.random.seed(42)
    np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    valid_sampler = torch.utils.data.SubsetRandomSampler(val_indices)

    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=1,
                                               sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             sampler=valid_sampler)
    print('load data')

    avg_loss = AverageValueMeter()
    ma20_loss = MovingAverageValueMeter(windowsize=20)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    start_epoch = 0
    best_map = -100
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    optimizer = optim.SGD(trainer.faster_rcnn.parameters(),
                          lr=opt.lr,
                          momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    if opt.load_path:
        print('load pretrained model from %s' % opt.load_path)
        checkpoint = torch.load(opt.load_path)
        start_epoch = checkpoint['epoch']
        best_map = checkpoint['best_map']
        trainer.faster_rcnn.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("> Loaded checkpoint '{}' (epoch {})".format(
            args.resume, start_epoch))

    #trainer.vis.text(dataset.db.label_names, win='labels')

# set tensor-board for visualization
    writer = SummaryWriter('runs/' + opt.log_root)

    for epoch in range(start_epoch, opt.epoch):
        trainer.train(mode=True)  #must set as that in tranning
        for ii, (img, _, _, bbox_, label_, scale,
                 _) in enumerate(train_loader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            optimizer.zero_grad()
            loss = trainer.forward(img, bbox, label, scale)
            loss.total_loss.backward()
            optimizer.step()
            #print(loss)
            #print(loss.total_loss)
            loss_value = loss.total_loss.cpu().data.numpy()
            avg_loss.add(float(loss_value))
            ma20_loss.add(float(loss_value))
            print(
                '[epoch:{}/{}]  [batch:{}/{}]  [sample_loss:{:.4f}] [avg_loss:{:.4f}]  [ma20_loss:{:.4f}]'
                .format(epoch, opt.epoch, ii + 1, len(train_loader),
                        loss.total_loss.data,
                        avg_loss.value()[0],
                        ma20_loss.value()[0]))

            if (ii + 1) % opt.plot_every == 0:
                niter = epoch * len(train_loader) + ii
                writer.add_scalar('Train/Loss', ma20_loss.value()[0], niter)

        eval_result = eval(val_loader, faster_rcnn, test_num=opt.test_num)
        print(eval_result['map'])

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            state = {
                "epoch": epoch + 1,
                "best_map": best_map,
                "model_state": trainer.faster_rcnn.state_dict(),
                "optimizer_state": optimizer.state_dict()
            }
            torch.save(state, opt.model_para)
        scheduler.step()
    state = {
        "epoch": epoch + 1,
        "best_map": best_map,
        "model_state": trainer.faster_rcnn.state_dict(),
        "optimizer_state": optimizer.state_dict()
    }
    torch.save(state, 'last_epoch.pkl')
    writer.close()
示例#12
0
文件: train.py 项目: yashkhem1/SPNet
def main(config, cuda, excludeval, embedding, continue_from, nolog, inputmix,
         imagedataset, experimentid, nshot, ishot):
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    #print(values)

    #in case you want to save to the location of script you're running
    datadir = os.path.join(
        '/home/SharedData/omkar/zscoseg/yash_manas/data/datasets',
        imagedataset)
    if not nolog:
        #name the savedir, might add logs/ before the datetime for clarity
        if experimentid is None:
            savedir = time.strftime('%Y%m%d%H%M%S')
        else:
            savedir = experimentid
        #the full savepath is then:
        savepath = os.path.join('logs', imagedataset, savedir)
        #in case the folder has not been created yet / except already exists error:
        try:
            os.makedirs(savepath)
            print("Log dir:", savepath)
        except:
            pass
        if continue_from is None:
            #now join the path in save_screenshot:
            shutil.copytree('./libs/', savepath + '/libs')
            shutil.copy2(osp.abspath(inspect.stack()[0][1]), savepath)
            shutil.copy2(config, savepath)
            args_dict = {}
            for a in args:
                args_dict[a] = values[a]
            with open(savepath + '/args.json', 'w') as fp:
                json.dump(args_dict, fp)

    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config), Loader=yaml.FullLoader))
    visibility_mask = {}
    if excludeval:
        seen_classes = np.load(datadir + '/split/seen_cls.npy')
    else:
        seen_classes = np.asarray(np.concatenate([
            np.load(datadir + '/split/seen_cls.npy'),
            np.load(datadir + '/split/val_cls.npy')
        ]),
                                  dtype=int)

    novel_classes = np.load(datadir + '/split/novel_cls.npy')
    seen_novel_classes = np.concatenate([seen_classes, novel_classes])

    seen_map = np.array([-1] * 256)
    for i, n in enumerate(list(seen_classes)):
        seen_map[n] = i

    visibility_mask[0] = seen_map.copy()
    for i, n in enumerate(list(novel_classes)):
        visibility_mask[i + 1] = seen_map.copy()
        visibility_mask[i + 1][n] = seen_classes.shape[0] + i
    if excludeval:
        train = np.load(datadir + '/split/train_list.npy')[:-CONFIG.VAL_SIZE]
    else:
        train = np.load(datadir + '/split/train_list.npy')

    novelset = []
    seenset = []

    if inputmix == 'novel' or inputmix == 'both':
        inverse_dict = pickle.load(
            open(datadir + '/split/inverse_dict_train.pkl', 'rb'))
        for icls, key in enumerate(novel_classes):
            if (inverse_dict[key].size > 0):
                for v in inverse_dict[key][ishot * 20:ishot * 20 + nshot]:
                    novelset.append((v, icls))
                    #print((v, icls))

    if inputmix == 'both':
        seenset = []
        inverse_dict = pickle.load(
            open(datadir + '/split/inverse_dict_train.pkl', 'rb'))
        for icls, key in enumerate(seen_classes):
            if (inverse_dict[key].size > 0):
                for v in inverse_dict[key][ishot * 20:ishot * 20 + nshot]:
                    seenset.append(v)

    if inputmix == 'seen':
        seenset = range(train.shape[0])

    sampler = RandomImageSampler(seenset, novelset)

    if inputmix == 'novel':
        visible_classes = seen_novel_classes
        if nshot is not None:
            nshot = str(nshot) + 'n'
    elif inputmix == 'seen':
        visible_classes = seen_classes
        if nshot is not None:
            nshot = str(nshot) + 's'
    elif inputmix == 'both':
        visible_classes = seen_novel_classes
        if nshot is not None:
            nshot = str(nshot) + 'b'

    print("Visible classes:", visible_classes.size, " \nClasses are: ",
          visible_classes, "\nTrain Images:", train.shape[0])

    #a Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET)(train=train,
                                          test=None,
                                          root=CONFIG.ROOT,
                                          split=CONFIG.SPLIT.TRAIN,
                                          base_size=513,
                                          crop_size=CONFIG.IMAGE.SIZE.TRAIN,
                                          mean=(CONFIG.IMAGE.MEAN.B,
                                                CONFIG.IMAGE.MEAN.G,
                                                CONFIG.IMAGE.MEAN.R),
                                          warp=CONFIG.WARP_IMAGE,
                                          scale=(0.5, 1.5),
                                          flip=True,
                                          visibility_mask=visibility_mask)

    # DataLoader
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=CONFIG.BATCH_SIZE.TRAIN,
                                         num_workers=CONFIG.NUM_WORKERS,
                                         sampler=sampler)

    if embedding == 'word2vec':
        class_emb = pickle.load(
            open(datadir + '/word_vectors/word2vec.pkl', "rb"))
    elif embedding == 'fasttext':
        class_emb = pickle.load(
            open(datadir + '/word_vectors/fasttext.pkl', "rb"))
    elif embedding == 'fastnvec':
        class_emb = np.concatenate([
            pickle.load(open(datadir + '/word_vectors/fasttext.pkl', "rb")),
            pickle.load(open(datadir + '/word_vectors/word2vec.pkl', "rb"))
        ],
                                   axis=1)
    else:
        print("invalid emb ", embedding)
        sys.exit()

    print((class_emb.shape))
    class_emb = F.normalize(torch.tensor(class_emb), p=2, dim=1).cuda()

    loader_iter = iter(loader)
    DeepLab = DeepLabV2_ResNet101_MSC
    #import ipdb; ipdb.set_trace()
    state_dict = torch.load(CONFIG.INIT_MODEL)

    # Model load
    model = DeepLab(class_emb.shape[1], class_emb[visible_classes])
    if continue_from is not None and continue_from > 0:
        print("Loading checkpoint: {}".format(continue_from))
        #import ipdb; ipdb.set_trace()
        model = nn.DataParallel(model)
        state_file = osp.join(savepath,
                              "checkpoint_{}.pth".format(continue_from))
        if osp.isfile(state_file + '.tar'):
            state_dict = torch.load(state_file + '.tar')
            model.load_state_dict(state_dict['state_dict'], strict=True)
        elif osp.isfile(state_file):
            state_dict = torch.load(state_file)
            model.load_state_dict(state_dict, strict=True)
        else:
            print("Checkpoint {} not found".format(continue_from))
            sys.exit()

    else:
        model.load_state_dict(
            state_dict, strict=False
        )  # make strict=True to debug if checkpoint is loaded correctly or not if performance is low
        model = nn.DataParallel(model)
    model.to(device)
    # Optimizer

    optimizer = {
        "sgd":
        torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            }],
            momentum=CONFIG.MOMENTUM,
        ),
        "adam":
        torch.optim.Adam(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            }])
        # Add any other optimizer
    }.get(CONFIG.OPTIMIZER)

    if 'optimizer' in state_dict:
        optimizer.load_state_dict(state_dict['optimizer'])
    print("Learning rate:", CONFIG.LR)
    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    criterion.to(device)

    if not nolog:
        # TensorBoard Logger
        if continue_from is not None:
            writer = SummaryWriter(
                savepath +
                '/runs/fs_{}_{}_{}'.format(continue_from, nshot, ishot))
        else:
            writer = SummaryWriter(savepath + '/runs')
        loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    pbar = tqdm(
        range(1, CONFIG.ITER_MAX + 1),
        total=CONFIG.ITER_MAX,
        leave=False,
        dynamic_ncols=True,
    )
    for iteration in pbar:

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                data, target = next(loader_iter)
            except:
                loader_iter = iter(loader)
                data, target = next(loader_iter)

            # Image
            data = data.to(device)

            # Propagate forward
            outputs = model(data)
            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = torch.tensor(target_).to(device)
                loss += criterion.forward(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            #print(loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)
            del data, target, outputs

        #print(iter_loss)
        pbar.set_postfix(loss="%.3f" % iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()
        if not nolog:
            loss_meter.add(iter_loss)
            # TensorBoard
            if iteration % CONFIG.ITER_TB == 0:
                writer.add_scalar("train_loss",
                                  loss_meter.value()[0], iteration)
                for i, o in enumerate(optimizer.param_groups):
                    writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                      iteration)
                if False:  # This produces a large log file
                    for name, param in model.named_parameters():
                        name = name.replace(".", "/")
                        writer.add_histogram(name,
                                             param,
                                             iteration,
                                             bins="auto")
                        if param.requires_grad:
                            writer.add_histogram(name + "/grad",
                                                 param.grad,
                                                 iteration,
                                                 bins="auto")

            # Save a model
            if continue_from is not None:
                if iteration in CONFIG.ITER_SAVE:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                        },
                        osp.join(
                            savepath, "checkpoint_{}_{}_{}_{}.pth.tar".format(
                                continue_from, nshot, ishot, iteration)),
                    )

                # Save a model (short term) [unnecessary for fewshot]
                if False and iteration % 100 == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                        },
                        osp.join(
                            savepath,
                            "checkpoint_{}_{}_{}_current.pth.tar".format(
                                continue_from, nshot, ishot)),
                    )
                    print(
                        osp.join(
                            savepath,
                            "checkpoint_{}_{}_{}_current.pth.tar".format(
                                continue_from, nshot, ishot)))
            else:
                if iteration % CONFIG.ITER_SAVE == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        },
                        osp.join(savepath,
                                 "checkpoint_{}.pth.tar".format(iteration)),
                    )

                # Save a model (short term)
                if iteration % 100 == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        },
                        osp.join(savepath, "checkpoint_current.pth.tar"),
                    )

        torch.cuda.empty_cache()

    if not nolog:
        if continue_from is not None:
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                },
                osp.join(
                    savepath, "checkpoint_{}_{}_{}_{}.pth.tar".format(
                        continue_from, nshot, ishot, CONFIG.ITER_MAX)))
        else:
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                osp.join(savepath,
                         "checkpoint_{}.pth.tar".format(CONFIG.ITER_MAX)))
示例#13
0
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except Exception as e:
                self.logger.error(e)
                self.opt.continue_train = False
                self.reset_save()
        else:
            self.reset_save()

        dataset = UnitRiffDataset(self.opt.dataset_name, self.opt.instr_type)
        dataset_size = len(dataset)

        self.logger.info(
            f'Dataset {self.opt.dataset_name} loaded, size {dataset_size}')

        ######################
        # Initiate
        ######################

        criterionGAN = nn.BCEWithLogitsLoss()

        GLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        DLoss_meter = MovingAverageValueMeter(self.opt.plot_every)

        losses = {}

        ######################
        # Start Training
        ######################

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=False)
            epoch_start_time = time.time()

            for i, data in enumerate(loader):

                batch_size = data.size(0)
                # print(batch_size)

                real_label = torch.ones(size=[batch_size, 1],
                                        device=self.device)
                fake_label = torch.zeros(size=[batch_size, 1],
                                         device=self.device)

                seed = np.array([
                    generate_random_seed(1,
                                         self.opt.instr_type,
                                         pattern=self.opt.chord_type)
                    for _ in range(batch_size)
                ])
                # print(seed.shape)
                noise = torch.randn(batch_size,
                                    self.opt.seed_size,
                                    device=self.device)
                seed = torch.from_numpy(seed).to(device=self.device,
                                                 dtype=torch.float)

                fake_data = self.generator(noise, seed, batch_size)
                D_fake = self.discriminator(fake_data, batch_size)

                real_data = torch.unsqueeze(data, 1).to(device=self.device,
                                                        dtype=torch.float)
                D_real = self.discriminator(real_data, batch_size)
                # print(D_fake.shape)

                ######################
                # Generator
                ######################

                self.G_optimizer.zero_grad()
                loss_G = criterionGAN(D_fake, real_label)
                loss_G.backward(retain_graph=True)

                self.G_optimizer.step()

                self.G_optimizer.zero_grad()
                loss_G = criterionGAN(D_fake, real_label)
                loss_G.backward(retain_graph=True)

                self.G_optimizer.step()

                GLoss_meter.add(loss_G.item())

                ######################
                # Discriminator
                ######################

                self.D_optimizer.zero_grad()

                loss_D_real = criterionGAN(D_real, real_label)
                loss_D_fake = criterionGAN(D_fake, fake_label)

                loss_D = 0.5 * loss_D_real + 0.5 * loss_D_fake
                loss_D.backward()

                self.D_optimizer.step()
                DLoss_meter.add(loss_D.item())

            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            losses['loss_G'] = float(GLoss_meter.value()[0])
            losses['loss_D'] = float(DLoss_meter.value()[0])

            self.G_scheduler.step(epoch)
            self.D_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)

            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
            self.logger.info(str(losses) + '\n\n')
示例#14
0
def train(config_path, cuda):
    """
    Training DeepLab by v2 protocol
    """

    # Configuration
    CONFIG = Dict(yaml.load(config_path))
    device = get_device(cuda)
    torch.backends.cudnn.benchmark = True

    # Dataset
    dataset = get_dataset(CONFIG.DATASET.NAME)(
        root=CONFIG.DATASET.ROOT,
        split=CONFIG.DATASET.SPLIT.TRAIN,
        ignore_label=CONFIG.DATASET.IGNORE_LABEL,
        mean_bgr=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        augment=True,
        base_size=CONFIG.IMAGE.SIZE.BASE,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scales=CONFIG.DATASET.SCALES,
        flip=True,
    )
    print(dataset)

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model check
    print("Model:", CONFIG.MODEL.NAME)
    assert (
        CONFIG.MODEL.NAME == "DeepLabV2_ResNet101_MSC"
    ), 'Currently support only "DeepLabV2_ResNet101_MSC"'

    # Model setup
    model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES)
    state_dict = torch.load(CONFIG.MODEL.INIT_MODEL)
    print("    Init:", CONFIG.MODEL.INIT_MODEL)
    for m in model.base.state_dict().keys():
        if m not in state_dict.keys():
            print("    Skip init:", m)
    model.base.load_state_dict(state_dict, strict=False)  # to skip ASPP
    model = nn.DataParallel(model)
    model.to(device)

    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)

    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )

    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    # Setup loss logger
    writer = SummaryWriter(os.path.join(CONFIG.EXP.OUTPUT_DIR, "logs", CONFIG.EXP.ID))
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)

    # Path to save models
    checkpoint_dir = os.path.join(
        CONFIG.EXP.OUTPUT_DIR,
        "models",
        CONFIG.EXP.ID,
        CONFIG.MODEL.NAME.lower(),
        CONFIG.DATASET.SPLIT.TRAIN,
    )
    makedirs(checkpoint_dir)
    print("Checkpoint dst:", checkpoint_dir)

    # Freeze the batch norm pre-trained on COCO
    model.train()
    model.module.base.freeze_bn()

    for iteration in tqdm(
        range(1, CONFIG.SOLVER.ITER_MAX + 1),
        total=CONFIG.SOLVER.ITER_MAX,
        dynamic_ncols=True,
    ):

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        loss = 0
        for _ in range(CONFIG.SOLVER.ITER_SIZE):
            try:
                _, images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                _, images, labels = next(loader_iter)

            # Propagate forward
            logits = model(images.to(device))

            # Loss
            iter_loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                _, _, H, W = logit.shape
                labels_ = resize_labels(labels, size=(H, W))
                iter_loss += criterion(logit, labels_.to(device))

            # Propagate backward (just compute gradients wrt the loss)
            iter_loss /= CONFIG.SOLVER.ITER_SIZE
            iter_loss.backward()

            loss += float(iter_loss)

        #print(loss)
        average_loss.add(loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=iteration)

        # TensorBoard
        if iteration % CONFIG.SOLVER.ITER_TB == 0:
            writer.add_scalar("loss/train", average_loss.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("lr/group_{}".format(i), o["lr"], iteration)
            for i in range(torch.cuda.device_count()):
                writer.add_scalar(
                    "gpu/device_{}/memory_cached".format(i),
                    torch.cuda.memory_cached(i) / 1024 ** 3,
                    iteration,
                )

            if False:
                for name, param in model.module.base.named_parameters():
                    name = name.replace(".", "/")
                    # Weight/gradient distribution
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(
                            name + "/grad", param.grad, iteration, bins="auto"
                        )

        # Save a model
        if iteration % CONFIG.SOLVER.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                os.path.join(checkpoint_dir, "checkpoint_{}.pth".format(iteration)),
            )

    torch.save(
        model.module.state_dict(), os.path.join(checkpoint_dir, "checkpoint_final.pth")
    )
示例#15
0
def main(config, cuda):
    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    dataset = get_dataset(CONFIG.DATASET)(
        data_path=CONFIG.ROOT,
        crop_size=256,
        scale=(0.6, 0.8, 1., 1.2, 1.4),
        rotation=15,
        flip=True,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
    )
    """
    # Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET)(
        root=CONFIG.ROOT,
        split=CONFIG.SPLIT.TRAIN,
        base_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        warp=CONFIG.WARP_IMAGE,
        scale=(0.5, 0.75, 1.0, 1.25, 1.5),
        flip=True,
    )
    """

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV3Plus_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model)
    model.to(device)

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    # Optimizer
    optimizer = torch.optim.Adam(
        params=get_params(model.module),
        lr=CONFIG.LR,
        weight_decay=CONFIG.WEIGHT_DECAY,
    )
    """
    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.MOMENTUM,
    )
    """
    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    criterion.to(device)
    max_pooling_loss = MaxPoolingLoss(ratio=0.3, p=1.7, reduce=True)

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.ITER_MAX + 1),
            total=CONFIG.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):
        """
        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )
        """

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                images, labels = next(loader_iter)

            images = images.to(device)
            labels = labels.to(device).unsqueeze(1).float()

            # Propagate forward
            logits = model(images)

            # Loss
            loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                labels_ = F.interpolate(labels,
                                        logit.shape[2:],
                                        mode="nearest")
                labels_ = labels_.squeeze(1).long()
                # Compute NLL and MPL
                nll_loss = criterion(logit, labels_)
                # loss += nll_loss
                loss += max_pooling_loss(nll_loss)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        if iteration % CONFIG.ITER_TB == 0:
            writer.add_scalar("train_loss", loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                  iteration)

            gt_viz, images_viz, predicts_viz = make_vizs(
                images, labels_, logits,
                (CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G,
                 CONFIG.IMAGE.MEAN.R))
            writer.add_image("gt/images", torch.from_numpy(images_viz[0]),
                             iteration)
            writer.add_image("gt/labels", torch.from_numpy(gt_viz[0]),
                             iteration)
            for i, predict_viz in enumerate(predicts_viz):
                writer.add_image("predict/" + str(i),
                                 torch.from_numpy(predict_viz[0]), iteration)

            if False:  # This produces a large log file
                for name, param in model.named_parameters():
                    name = name.replace(".", "/")
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(name + "/grad",
                                             param.grad,
                                             iteration,
                                             bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR,
                         "checkpoint_{}.pth".format(iteration)),
            )

        # Save a model (short term)
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_current.pth"),
            )

    torch.save(model.module.state_dict(),
               osp.join(CONFIG.SAVE_DIR, "checkpoint_final.pth"))
示例#16
0
class DataManager:
    def __init__(self, imagedataset, datadir, inputmix, embedding, device):
        self.imagedataset = imagedataset
        self.datadir = datadir
        self.inputmix = inputmix

        self.embedding = embedding
        self.device = device

    def generateSavepath(self, experimentid):
        # name the savedir, might add logs/ before the datetime for clarity
        if experimentid is None:
            savedir = time.strftime('%Y%m%d%H%M%S')
        else:
            savedir = experimentid

        self.savepath = os.path.join('logs', self.imagedataset, savedir)
        return self.savepath

    # getter method
    def get_savepath(self):
        return self.savepath

    def generateTB(self, period):
        self.writer = SummaryWriter(self.savepath + '/runs')
        self.loss_meter = MovingAverageValueMeter(20)
        self.tb = period

    def get_writer(self):
        return self.writer

    def createDirectory(self, values, config, args):
        try:
            os.makedirs(self.savepath)
            # print("Log dir:", savepath)
        except:
            pass

        # now join the path in save_screenshot:
        if os.path.exists(self.savepath + '/libs'):
            shutil.rmtree(self.savepath + '/libs')
        shutil.copytree('./libs/', self.savepath + '/libs')
        shutil.copy2(osp.abspath(inspect.stack()[0][1]), self.savepath)
        shutil.copy2(config, self.savepath)
        args_dict = {}
        for a in args:
            args_dict[a] = values[a]
        with open(self.savepath + '/args.json', 'w') as fp:
            json.dump(args_dict, fp)

    def loadClasses(self, bkg):
        self.seen_classes = np.load(
            self.datadir + '/split/seen_cls.npy')  #only the seen classes

        if bkg:
            self.seen_classes = np.asarray(np.concatenate(
                [np.array([0]), self.seen_classes]),
                                           dtype=int)  #seen classes + bkg

        self.novel_classes = np.load(self.datadir + '/split/novel_cls.npy')
        self.all_labels = np.genfromtxt(self.datadir + '/labels_2.txt',
                                        delimiter='\t',
                                        usecols=1,
                                        dtype='str')

        self.seen_classes = np.asarray(np.concatenate(
            [self.seen_classes,
             np.load(self.datadir + '/split/val_cls.npy')]),
                                       dtype=int)
        self.seen_novel_classes = np.concatenate(
            [self.seen_classes, self.novel_classes])
        self.to_ignore_classes = self.novel_classes

        if self.inputmix == 'seen':
            self.visible_classes = self.seen_classes
        else:
            self.visible_classes = self.seen_novel_classes

        print("Seen classes: ")
        print(self.seen_classes)
        print("all labels: ")
        print(self.all_labels)

        return self.seen_classes, self.novel_classes, self.seen_novel_classes, self.to_ignore_classes, self.visible_classes, self.all_labels

    def get_Classes(self):
        return self.seen_classes, self.novel_classes, self.seen_novel_classes, self.to_ignore_classes, self.visible_classes, self.all_labels, self.visibility_mask

    def loadData(self):

        self.train = np.load(self.datadir + '/split/train_list.npy')

        self.novelset = []
        self.seenset = []

        if self.inputmix == 'seen':
            self.seenset = range(self.train.shape[0])
        else:
            print("inputmix is not seen")
            exit()

        return self.train, self.seenset, self.novelset

    def get_data(self):
        return self.train, self.seenset, self.novelset

    def loadDatasets(self, CONFIG, bs):
        # Sampler
        sampler = MyDistributedSampler(
            self.seenset,
            self.novelset,
            num_replicas=torch.distributed.get_world_size(),
            rank=torch.distributed.get_rank())

        self.dataset = get_dataset(CONFIG.DATASET)(
            train=self.train,
            test=None,
            root=CONFIG.ROOT,
            transform=None,
            split=CONFIG.SPLIT.TRAIN,
            base_size=513,
            crop_size=CONFIG.IMAGE.SIZE.TRAIN,
            mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G,
                  CONFIG.IMAGE.MEAN.R),
            warp=CONFIG.WARP_IMAGE,
            scale=(0.5, 1.5),
            flip=True,
            visibility_mask=self.visibility_mask,
        )
        random.seed(42)
        # DataLoader
        self.loader = torch.utils.data.DataLoader(
            dataset=self.dataset,
            batch_size=bs,
            num_workers=CONFIG.NUM_WORKERS,
            # num_workers = 1,
            sampler=sampler,
            pin_memory=True)
        return self.dataset, self.loader

    def get_datasets(self):

        return self.dataset, self.loader

    def loadClassEmbs(self):
        # Word embeddings
        if self.embedding == 'word2vec':
            self.class_emb = pickle.load(
                open(self.datadir + '/word_vectors/word2vec.pkl', "rb"))
        elif self.embedding == 'fasttext':
            self.class_emb = pickle.load(
                open(self.datadir + '/word_vectors/fasttext.pkl', "rb"))
        elif self.embedding == 'fastnvec':
            self.class_emb = np.concatenate([
                pickle.load(
                    open(self.datadir + '/word_vectors/fasttext.pkl', "rb")),
                pickle.load(
                    open(self.datadir + '/word_vectors/word2vec.pkl', "rb"))
            ],
                                            axis=1)
        else:
            print("invalid emb ", self.embedding)
            exit()
        self.class_emb = F.normalize(torch.tensor(self.class_emb), p=2,
                                     dim=1).to(self.device)
        self.seen_class_emb = self.class_emb[self.seen_classes]
        self.to_ignore_class_emb = self.class_emb[self.to_ignore_classes]

        return self.class_emb, self.to_ignore_class_emb, self.seen_class_emb

    def get_clsEmbs(self):
        return self.class_emb, self.to_ignore_class_emb, self.seen_class_emb

    def loadClsMaps(self, bkg):

        self.seen_map = np.array([-1] * 256)
        for i, n in enumerate(list(self.seen_classes)):
            self.seen_map[n] = i

        self.all_map = np.array([-1] * 256)
        for i, n in enumerate(list(self.seen_classes)):
            self.all_map[n] = i
        for i, n in enumerate(self.to_ignore_classes, len(self.seen_classes)):
            self.all_map[n] = i

        self.inverse_map = np.array([-1] * 256)
        for i, n in enumerate(self.all_map):
            self.inverse_map[n] = i

        if bkg:
            for i, n in enumerate(self.to_ignore_classes):
                self.seen_map[n] = 0

        # viene usata per sapere quali predizioni sono unseen e quali no nel calcolo della percentuale
        self.cls_map_seen = np.array([0] * 256)
        for i, n in enumerate(self.to_ignore_classes):
            self.cls_map_seen[n] = 1

        self.cls_map = None
        self.cls_map = np.array([255] * 256)
        for i, n in enumerate(self.seen_classes):
            self.cls_map[n] = i

        # VISIBILITY MASK
        self.visibility_mask = {}
        self.visibility_mask[0] = self.seen_map.copy()

        print(self.visibility_mask[0])
        return self.seen_map, self.cls_map_seen, self.cls_map

    def getClsMaps(self):
        return self.seen_map, self.cls_map_seen, self.cls_map, self.inverse_map

    def savePerIteration(self, iter_loss, optimizer, model, iteration, save):

        self.loss_meter.add(iter_loss)
        # TensorBoard
        if iteration % self.tb == 0:
            self.writer.add_scalar("train_loss",
                                   self.loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                self.writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                       iteration)

        # Save a model (short term)
        if iteration > 0 and iteration % save == 0:
            print(
                "\nIteration: {} \nSaving (short term) model (iteration,state_dict,optimizer) ...\n "
                .format(iteration))
            with open(self.savepath + '/iteration.json', 'w') as fp:
                json.dump({'iteration': iteration}, fp)
            name = "checkpoint_current.pth.tar"
            if "voc" in self.savepath or iteration % 5000 == 0:
                name = "checkpoint_{}.pth.tar".format(iteration)
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }, osp.join(self.savepath, name))

    def saveFinal(self, optimizer, model):

        torch.save(
            {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, osp.join(self.savepath, "checkpoint_final.pth.tar"))
示例#17
0
            iter_loss /= CONFIG.SOLVER.ITER_SIZE
            iter_loss.backward()

            loss += float(iter_loss)

        average_loss.add(loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=iteration)

        # TensorBoard
        if iteration % CONFIG.SOLVER.ITER_TB == 0:
            writer.add_scalar("loss/train", average_loss.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("lr/group_{}".format(i), o["lr"], iteration)
            for i in range(torch.cuda.device_count()):
                writer.add_scalar(
                    "gpu/device_{}/memory_cached".format(i),
                    torch.cuda.memory_cached(i) / 1024**3,
                    iteration,
                )

        # Save a model
        if iteration % CONFIG.SOLVER.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                os.path.join(checkpoint_dir,
                             "checkpoint_{}.pth".format(iteration)),