Exemplo n.º 1
0
def train():
    torch.manual_seed(1337)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Config
    batch_size = 16
    image_size = 256
    learning_rate = 1e-3
    beta1, beta2 = (.5, .99)
    weight_decay = 1e-3
    epochs = 10

    # Dataloaders
    real_dataloader = get_dataloader(
        "./datasets/real_images/flickr_nuneaton/", size=image_size, bs=batch_size, trfs=get_no_aug_transform())

    # Lists to keep track of progress
    G_losses = []
    iters = 0

    tracked_images = next(iter(real_dataloader)).to(device)
    vutils.save_image(unnormalize(tracked_images),
                      "images/org.png", padding=2, normalize=True)

    # Models
    netG = Generator().to(device)

    scaler = torch.cuda.amp.GradScaler()

    optimizerG = AdamW(netG.parameters(), lr=learning_rate,
                       betas=(beta1, beta2), weight_decay=weight_decay)

    # Loss functions
    content_loss = ContentLoss().to(device)

    print("Starting Training Loop...")
    # For each epoch.
    for epoch in range(epochs):
        # For each batch in the dataloader.
        for i, real_data, in enumerate(tqdm(real_dataloader, desc=f"Training epoch {epoch}")):

            ############################
            # (1) Pre-train G
            ###########################

            # Reset Discriminator gradient.
            netG.zero_grad()

            # Format batch.
            real_data = real_data.to(device)

            with torch.cuda.amp.autocast():
                # Generate image
                generated_data = netG(real_data)

                # Calculate discriminator loss on all batches.
                errG = content_loss(generated_data, real_data)

            # Calculate gradients for G
            scaler.scale(errG).backward()

            # Update G
            scaler.step(optimizerG)

            scaler.update()

            # ---------------------------------------------------------------------------------------- #

            # Save Losses for plotting later
            G_losses.append(errG.item())

            # Check how the generator is doing by saving G's output on tracked_images
            if iters % 200 == 0:
                with torch.no_grad():
                    fake = netG(tracked_images).detach().cpu()
                vutils.save_image(unnormalize(
                    fake), f"images/{epoch}_{i}.png", padding=2)
                torch.save(netG, f"checkpoints/pretrained_netG_e{epoch}_i{iters}_l{errG.item()}.pth")

            iters += 1

    torch.save(netG.state_dict(), f"checkpoints/pretrained_netG_e{epoch}_i{iters}_l{errG.item()}.pth")
Exemplo n.º 2
0
def train():
    torch.manual_seed(1337)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Config
    batch_size = 32
    image_size = 256
    learning_rate = 1e-4
    beta1, beta2 = (.5, .99)
    weight_decay = 1e-4
    epochs = 1000

    # Models
    netD = Discriminator().to(device)
    netG = Generator().to(device)
    # Here you should load the pretrained G
    netG.load_state_dict(torch.load("./checkpoints/pretrained_netG.pth").state_dict())

    optimizerD = AdamW(netD.parameters(), lr=learning_rate, betas=(beta1, beta2), weight_decay=weight_decay)
    optimizerG = AdamW(netG.parameters(), lr=learning_rate, betas=(beta1, beta2), weight_decay=weight_decay)

    scaler = torch.cuda.amp.GradScaler()

    # Labels
    cartoon_labels = torch.ones (batch_size, 1, image_size // 4, image_size // 4).to(device)
    fake_labels    = torch.zeros(batch_size, 1, image_size // 4, image_size // 4).to(device)

    # Loss functions
    content_loss = ContentLoss().to(device)
    adv_loss     = AdversialLoss(cartoon_labels, fake_labels).to(device)
    BCE_loss     = nn.BCEWithLogitsLoss().to(device)

    # Dataloaders
    real_dataloader    = get_dataloader("./datasets/real_images/flickr30k_images/",           size = image_size, bs = batch_size)
    cartoon_dataloader = get_dataloader("./datasets/cartoon_images_smoothed/Studio Ghibli",   size = image_size, bs = batch_size, trfs=get_pair_transforms(image_size))

    # --------------------------------------------------------------------------------------------- #
    # Training Loop

    # Lists to keep track of progress
    img_list = []
    G_losses = []
    D_losses = []
    iters = 0

    tracked_images = next(iter(real_dataloader)).to(device)

    print("Starting Training Loop...")
    # For each epoch.
    for epoch in range(epochs):
        print("training epoch ", epoch)
        # For each batch in the dataloader.
        for i, (cartoon_edge_data, real_data) in enumerate(zip(cartoon_dataloader, real_dataloader)):

            ############################
            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            
            # Reset Discriminator gradient.
            netD.zero_grad()
            for param in netD.parameters():
                param.requires_grad = True

            # Format batch.
            cartoon_data   = cartoon_edge_data[:, :, :, :image_size].to(device)
            edge_data      = cartoon_edge_data[:, :, :, image_size:].to(device)
            real_data      = real_data.to(device)

            with torch.cuda.amp.autocast():
                # Generate image
                generated_data = netG(real_data)

                # Forward pass all batches through D.
                cartoon_pred   = netD(cartoon_data)      #.view(-1)
                edge_pred      = netD(edge_data)         #.view(-1)
                generated_pred = netD(generated_data.detach())    #.view(-1)

                # Calculate discriminator loss on all batches.
                errD = adv_loss(cartoon_pred, generated_pred, edge_pred)
            
            # Calculate gradients for D in backward pass
            scaler.scale(errD).backward()
            D_x = cartoon_pred.mean().item() # Should be close to 1

            # Update D
            scaler.step(optimizerD)


            ############################
            # (2) Update G network: maximize log(D(G(z)))
            ###########################
            
            # Reset Generator gradient.
            netG.zero_grad()
            for param in netD.parameters():
                param.requires_grad = False

            with torch.cuda.amp.autocast():
                # Since we just updated D, perform another forward pass of all-fake batch through D
                generated_pred = netD(generated_data) #.view(-1)

                # Calculate G's loss based on this output
                errG = BCE_loss(generated_pred, cartoon_labels) + content_loss(generated_data, real_data)

            # Calculate gradients for G
            scaler.scale(errG).backward()

            D_G_z2 = generated_pred.mean().item() # Should be close to 1
            
            # Update G
            scaler.step(optimizerG)

            scaler.update()
            
            # ---------------------------------------------------------------------------------------- #

            # Save Losses for plotting later
            G_losses.append(errG.item())
            D_losses.append(errD.item())

            # Check how the generator is doing by saving G's output on tracked_images
            if iters % 200 == 0:
                with torch.no_grad():
                    fake = netG(tracked_images)
                vutils.save_image(unnormalize(fake), f"images/{epoch}_{i}.png", padding=2)
                with open("images/log.txt", "a+") as f:
                    f.write(f"{datetime.now().isoformat(' ', 'seconds')}\tD: {np.mean(D_losses)}\tG: {np.mean(G_losses)}\n")
                D_losses = []
                G_losses = []

            if iters % 1000 == 0:
                torch.save(netG.state_dict(), f"checkpoints/netG_e{epoch}_i{iters}_l{errG.item()}.pth")
                torch.save(netD.state_dict(), f"checkpoints/netD_e{epoch}_i{iters}_l{errG.item()}.pth")

            iters += 1
                                     align_corners=False)
        input_m2 = torch.cat([morphed_flag * interest_mask, outline], dim=1)
    else:
        input_m2 = torch.cat([flag * interest_mask, outline], dim=1)

    # Second model
    if use_second_model:
        output = M2_model(input_m2)
        output = output * interest_mask
    else:
        output = input_m2[:, :-1]

    # Add to data structure
    for cnam, outp, outl, mask, fnam in zip(country_name, output, outline,
                                            ball_mask, file_name):
        image = TF.to_pil_image(unnormalize(outp))
        flag = Image.open(input_folder / "flags" /
                          f"{cnam}.png").convert("RGB")

        # Quantize
        if use_quantization:
            image = quantize_pil_image(image, flag, quantize_thresh)

        # Add outline
        image = Image.composite(Image.new("RGB", img_size, (0, 0, 0)), image,
                                TF.to_pil_image(outl))
        # Add transparent background
        image = Image.composite(Image.new("RGBA", img_size, (0, 0, 0, 0)),
                                image.convert("RGBA"),
                                TF.to_pil_image(1 - (mask + outl)))
Exemplo n.º 4
0
    # criterion
    criterion = nn.L1Loss().to(device)

    # optimizer
    optimizer = torch.optim.AdamW(model.parameters(),
                                  lr=lr,
                                  betas=betas,
                                  weight_decay=wd)

    run_id = '_'.join(
        ["GMM_P2", note,
         datetime.today().strftime('%Y-%m-%d-%H.%M.%S')])
    logger = SummaryWriter(f"./training/logs/{run_id}")

    logger.add_images("test/4_outline", test_outline)
    logger.add_images("test/3_flags", unnormalize(test_flag))
    logger.add_images("test/2_target", unnormalize(test_ball_flag))

    for e in count():
        running_loss = []
        for i, inputs in enumerate(tqdm(dataloader, desc=f"Epoch {e}")):
            global_step = e * len(dataloader) + i

            flag = inputs["flag"].to(device)
            outline = inputs["outline"].to(device)
            target = inputs["ball_flag"].to(device)
            target_mask = inputs["ball_flag_mask"].to(device)
            target = target * target_mask

            grid, theta = model(flag, outline)
                    input = torch.cat([GMM_morph, outline], dim=1)

                    output = G(input)
                    output = output * interest_mask

                    loss_G = critereon(GMM_morph * interest_mask, output,
                                       target)

                    running_loss["loss_G"].append(loss_G.item())

                for key, value in running_loss.items():
                    logger.add_scalar(f"test/{key}", np.mean(value),
                                      global_step)
                running_loss = {k: [] for k in running_loss}

                logger.add_images("test/0_output", unnormalize(output),
                                  global_step)
                logger.add_images("test/2_output_outline",
                                  unnormalize(output) * (1 - outline),
                                  global_step)
                # Log these only once
                if first_run:
                    logger.add_images("test/1_target", unnormalize(target),
                                      global_step)
                    logger.add_images("test/3_target_outline",
                                      unnormalize(target) * (1 - outline),
                                      global_step)
                    logger.add_images("test/4_input_morphed",
                                      unnormalize(GMM_morph), global_step)
                    logger.add_images("test/5_input_outline", outline,
                                      global_step)
def predict(
        outlines, 
        flags, 
        img_size=256,
        use_M1 = True,
        use_M2 = True,
        use_quantization = True,
        quantize_threshold = 0.01,
        use_cuda=True
    ):
    outlines = copy(outlines)
    flags = copy(flags)
    
    # Convert to uniform types
    if type(img_size) == int:
        img_size = (img_size, img_size)

    if type(outlines) != list:
        outlines = [outlines]

    if type(flags) != list:
        flags = [flags]


    # Set device
    device = torch.device("cuda" if torch.cuda.is_available()
                        and use_cuda else "cpu")

    # Set up models
    GMM_model = GMM(*img_size, use_cuda=use_cuda)
    GMM_model.load_state_dict(torch.load(r"../main_weights/GMM.pth"))
    M2_model = smp.Unet(encoder_name="resnet34", encoder_weights="imagenet", decoder_use_batchnorm=True,
                        decoder_attention_type="scse", in_channels=4, classes=3, activation=torch.nn.Tanh)
    M2_model.load_state_dict(torch.load(r"../main_weights/BSM.pth")["G"])
    GMM_model = GMM_model.to(device).eval()
    M2_model = M2_model.to(device).eval()

    masks = []
    uncrop_coords = []

    # Pre-process data
    for i, outline in enumerate(outlines):
        outline = outline.convert("RGBA")

        # For uncropping later
        coords = (img_size[0]//2-outline.width//2, img_size[1]//2-outline.height//2, outline.width, outline.height)
        uncrop_coords.append(coords)

        mask = outline
        ImageDraw.floodfill(mask, (0,0), (0,0,0,0), border=None, thresh=0)
        mask = resize_and_pad_ball(mask, img_size)
        masks.append(TF.to_tensor(mask.split()[-1]))

        outline = resize_and_pad_ball(outline, img_size)
        outline = get_black_mask(outline).astype(int)

        outlines[i] = TF.to_tensor(outline)

    color_flags = []

    # Pre-process data
    for i, flag in enumerate(flags):
        flag = flag.convert("RGBA")
        color_flags.append(flag.convert("RGB"))
        flag = fit_resize(flag, (img_size[0]-15, img_size[1]-15) )
        flag = TF.center_crop(flag, img_size[::-1])
        flag = Image.composite(flag, Image.new("RGB", img_size, (255, 255, 255)), flag)
        flags[i] = TF.to_tensor(flag)

    flags = torch.stack(flags).float().to(device)
    flags = TF.normalize(flags, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    outlines = torch.stack(outlines).float().to(device)
    masks = torch.stack(masks).float().to(device)

    interest_mask = masks - outlines

    # First model
    if use_M1:
        grid, _ = GMM_model(flags, outlines)
        morphed_flag = F.grid_sample(flags, grid, padding_mode="border", align_corners=False)
        input_m2 = torch.cat([morphed_flag*interest_mask, outlines], dim=1)
    else:
        input_m2 = torch.cat([flags*interest_mask, outlines], dim=1)
    
    # Second model
    if use_M2:
        output = M2_model(input_m2)
        output = output*interest_mask
    else:
        output = input_m2[:,:-1]

    finished_images = []

    # Add to data structure
    for outp, outl, mask, color_flag, uc in zip(output, outlines, masks, color_flags, uncrop_coords):
        image = TF.to_pil_image(unnormalize(outp))

        # Quantize
        if use_quantization:
            image = quantize_pil_image(
                image, 
                color_flag,
                quantize_threshold
            )

        # Add outline
        image = Image.composite(
            Image.new("RGB", img_size, (0, 0, 0)), 
            image, 
            TF.to_pil_image(outl)
        )
        # Add transparent background
        image = Image.composite(
            Image.new("RGBA", img_size, (0, 0, 0, 0)),
            image.convert("RGBA"),
            TF.to_pil_image(1-(mask+outl))
        )
        # Uncrop
        background = Image.new("RGBA", uc[-2:], (0, 0, 0, 0))
        background.paste(image, (-uc[0], -uc[1]))
        image = background

        finished_images.append(image)

    return finished_images