Пример #1
0
    def _load_network(path: str) -> SVBRDFAutoencoder:
        '''
        Loads an SVBRDFAutoencoder from the given SVBRDFAutoencoder configuration.

        Args:
            path: Path to the SVBRDFAutoencoder YAML configuration.
        
        Returns:
            SVBRDFAutoencoder instance as specified by the SVBRDFAutoencoder configuration.
        '''
        with open(path, 'r') as file:
            config = yaml.safe_load(file)
        for key in ('Dimensions', 'Parameters', 'Encoders', 'Decoder'):
            assert key in config, f'Scope "root" in configuration file is missing key "{key}".'
        for key in ('Path', 'Load'):
            assert key in config[
                'Parameters'], f'Scope "Parameters" in configuration file is missing key "{key}".'
        path = config['Parameters']['Path']
        load = config['Parameters']['Load']
        dims = config['Dimensions']
        encoders = {}
        for key in ('Local', 'Global', 'Periodic'):
            encoders[key] = Configuration._load_subnetwork(
                config=config['Encoders'][key])
        decoder = Configuration._load_subnetwork(config=config['Decoder'])
        device = utils.get_device_name()
        autoencoder = SVBRDFAutoencoder(dims=dims,
                                        path=path,
                                        encoders=encoders,
                                        decoder=decoder).to(device)
        if load:
            autoencoder.load()
        return autoencoder
Пример #2
0
def _relight_flow(config: Configuration) -> None:
    '''
    The "relight" flow renders a picture of a texture using a Light, Viewer, and Camera from a (trained) SVBRDF autoencoder.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, input_path, output_path = config.load_relight_flow(
        )
        autoencoder.eval()
        # It is assumed that the dimensions of the input image will be accepted by the network.
        input_image = image.load(path=input_path, encoding='sRGB')
        num_texture_rows = input_image.size(0)
        num_texture_cols = input_image.size(1)
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols)
        # By convention, PyTorch expects Tensors to be in [B, D, R, C] format.
        input_batch = torch.cat([input_image, input_distance],
                                dim=2).unsqueeze(0).permute(0, 3, 1, 2)
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.forward(input_batch))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
def compute_diversity_losses(autoencoder: SVBRDFAutoencoder,
                             network_svbrdf: SVBRDF, dataset: Dataset,
                             material: int, samples: int,
                             vgg19: VGG19) -> Dict[str, Tensor]:
    '''
    Computes the diversity loss of the given SVBRDF autoencoder with respect to the material from the specified Dataset.
    This loss is meant to train the global and periodic encoders, as well as the decoder of the SVBRDF autoencoder.

    Args:
        autoencoder: SVBRDFAutoencoder to be used for the loss calculation.
        network_svbrdf: SVBRDF intended for the output of the SVBRDF autoencoder.
        dataset: Dataset containing the material to be reconstructed.
        material: Material in the Dataset to be reconstructed.
        samples: Number of random latent fields to create.
        vgg19: (Shared) VGG-19 instance to use to compute the content and style losses.

    Returns:
        Dictionary containing the diversity content and style losses.
    '''
    # Each latent field must be derived from the same sample in case the texture is not a Markov Random Field.
    dataset_batch, (dataset_normals, dataset_svbrdf) = dataset.sample(material)
    latents = autoencoder.encode(dataset_batch).repeat(samples, 1, 1, 1)
    # Replacing the local latent field with a random one should, ideally, cause the reconstructed normal map and SVBRDF
    # parameters to portray a different sample from the same texture.
    channels = autoencoder.dimensions['Latent']['Local'][2]
    latents[:, :channels, :, :] = torch.rand_like(
        latents[:, :channels, :, :]) * 2 - 1
    network_normals, network_svbrdf.parameters = SVBRDFAutoencoder.interpret(
        autoencoder.decode(latents))
    # Surface radiance can be interpreted as a projection-corrected rendering of a texture.
    have_radiance, want_radiance = compute_radiance(
        network_normals=network_normals,
        network_svbrdf=network_svbrdf,
        dataset_normals=dataset_normals,
        dataset_svbrdf=dataset_svbrdf)

    # The content loss term encourages the network to reconstruct different spatial features of the input image.
    content_loss = -compute_content_loss(have_radiance=have_radiance,
                                         want_radiance=have_radiance.roll(
                                             1, 0),
                                         vgg19=vgg19)
    # The style loss term encourages the network to reconstruct spatially-independent features of an input image.
    style_loss = compute_style_loss(
        have_radiance=have_radiance,
        want_radiance=want_radiance.expand_as(have_radiance),
        vgg19=vgg19)
    return {'Content': content_loss, 'Style': style_loss}
Пример #4
0
def _feedback_flow(config: Configuration) -> None:
    '''
    The "feedback" flow iteratively infers the SVBRDF parameters of a texture, renders it, and feeds the output of the
    rendering back into the network.  The purpose of this flow is to test the robustness of an SVBRDF autoencoder.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, camera, (feedback_lights, feedback_viewer), (
            rendering_lights, rendering_viewer
        ), input_path, output_path, loops = config.load_feedback_flow()
        autoencoder.eval()

        # It is assumed that the dimensions of the input image will be accepted by the network.
        input_image = image.load(path=input_path, encoding='sRGB')
        num_texture_rows = input_image.size(0)
        num_texture_cols = input_image.size(1)
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols)

        # By convention, PyTorch expects Tensors to be in [B, D, R, C] format.
        input_batch = torch.cat([input_image, input_distance],
                                dim=2).unsqueeze(0).permute(0, 3, 1, 2)

        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.forward(input_batch))
        surface = utils.create_grid(num_rows=num_texture_rows,
                                    num_cols=num_texture_cols)

        for i in tqdm.tqdm(range(loops), desc='Feedback Looping'):
            # The slightly-awkward ordering of statements before and inside the loops ensures that |loops| can be set to zero.
            input_image = shader.shade(surface=surface,
                                       normals=normals,
                                       lights=feedback_lights,
                                       viewer=feedback_viewer,
                                       svbrdf=svbrdf)[0]
            input_batch = torch.cat([input_image, input_distance],
                                    dim=2).unsqueeze(0).permute(0, 3, 1, 2)
            normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
                autoencoder.forward(input_batch))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=rendering_lights,
                           viewer=rendering_viewer,
                           camera=camera,
                           path=output_path)
def compute_reconstruction_losses(autoencoder: SVBRDFAutoencoder,
                                  network_svbrdf: SVBRDF, dataset: Dataset,
                                  material: int, samples: int,
                                  vgg19: VGG19) -> Dict[str, Tensor]:
    '''
    Computes the reconstruction loss of the given SVBRDF autoencoder with respect to the material from the specified Dataset.
    This loss is meant to train all of the encoders as well as the decoder of the SVBRDF autoencoder.

    Args:
        autoencoder: SVBRDFAutoencoder to be used for the loss calculation.
        network_svbrdf: SVBRDF intended for the output of the SVBRDF autoencoder.
        dataset: Dataset containing the material to be reconstructed.
        material: Material in the Dataset to be reconstructed.
        samples: Number of material samples to take.
        vgg19: (Shared) VGG-19 instance to use to compute the content and style losses.

    Returns:
        Dictionary containing the reconstruction content, style, and texel losses.
    '''
    # Taking multiple samples accelerates training in comparison to SGD.
    dataset_batch, (dataset_normals,
                    dataset_svbrdf) = dataset.sample(material,
                                                     quantity=samples)
    network_normals, network_svbrdf.parameters = SVBRDFAutoencoder.interpret(
        autoencoder.forward(dataset_batch))
    # Surface radiance can be interpreted as a projection-corrected rendering of a texture.
    have_radiance, want_radiance = compute_radiance(
        network_normals=network_normals,
        network_svbrdf=network_svbrdf,
        dataset_normals=dataset_normals,
        dataset_svbrdf=dataset_svbrdf)

    # The content loss term encourages the network to reconstruct spatial features of an input image.
    content_loss = compute_content_loss(have_radiance=have_radiance,
                                        want_radiance=want_radiance,
                                        vgg19=vgg19)
    # The style loss term encourages the network to reconstruct spatially-independent features of an input image.
    style_loss = compute_style_loss(have_radiance=have_radiance,
                                    want_radiance=want_radiance,
                                    vgg19=vgg19)
    # The texel loss term is a stricter content loss term.
    texel_loss = compute_texel_loss(have_radiance=have_radiance,
                                    want_radiance=want_radiance)
    return {'Content': content_loss, 'Style': style_loss, 'Texel': texel_loss}
Пример #6
0
def _blend_flow(config: Configuration) -> None:
    '''
    The "blend" flow blends two textures using a (trained) SVBRDF autoencoder and renders the result.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, alphas, input_paths, output_path = config.load_blend_flow(
        )
        for key in ('Local', 'Global', 'Periodic'):
            assert key in alphas, f'Alphas dictionary is missing key "{key}".'
            assert 0 <= alphas[
                key] <= 1, f'Alpha value for key "{key}" falls outside the closed interval [0, 1].'
        autoencoder.eval()

        # It is assumed that the dimensions of the input images will be accepted by the network.
        input_images = torch.stack([
            image.load(path=input_path, encoding='sRGB')
            for input_path in input_paths
        ],
                                   dim=0)

        # The radial distance field should be the same for both input images.
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols)
        # By convention, PyTorch expects Tensors to be in [B, D, R, C] format.
        input_batch = torch.cat(
            [input_images, input_distance.expand(2, -1, -1, -1)],
            dim=3).permute(0, 3, 1, 2)

        # The blended latent tensor must have a batch dimension to proceed through the SVBRDF decoder.
        texture_latents = autoencoder.encode(input_batch)
        blended_latents = torch.zeros_like(texture_latents[:1])
        start_channel = 0
        for key in ('Local', 'Global', 'Periodic'):
            # Crucially, the latent components must be traversed in smallest-to-greatest-depth order.
            step_channel = autoencoder.dimensions['Latent'][key][2]
            stop_channel = start_channel + step_channel
            channels = slice(start_channel, stop_channel)
            # An alpha value of 0 represents the first texture while an alpha value of 1 represents the second texture.
            blended_latents[0, channels, :, :] = texture_latents[0, channels, :, :] * (1 - alphas[key]) + \
                                                 texture_latents[1, channels, :, :] * alphas[key]
            start_channel = stop_channel
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(blended_latents))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
def publish_image_results(dashboard: SummaryWriter, mode: str, steps: int,
                          autoencoder: SVBRDFAutoencoder,
                          network_svbrdf: SVBRDF, dataset: Dataset,
                          materials: Iterable[int]) -> None:
    '''
    Publishes a series of images to the given TensorBoard depicting reconstructions (diverse and otherwise) of the
    specified materials.

    Args:
        dashboard: TensorBoard to host the published data.
        mode: Mode associated with the published data (i.e., "Training" or "Testing").
        steps: Step count associated with the published data.
        autoencoder: SVBRDFAutoencoder to be used to reconstruct the Dataset images.
        svbrdf: SVBRDF intended for the output of the SVBRDF autoencoder.
        dataset: Dataset consisting of inputs to the SVBRDF autoencoder and ground-truth Tensors.
        materials: Indices of the materials in the Dataset to be reconstructed.
    '''
    for material in materials:
        texture = dataset.textures[material]
        # One sample should be enough to hint at the reconstruction performance of the SVBRDF autoencoder.
        dataset_batch, (dataset_normals,
                        dataset_svbrdf) = dataset.sample(material)
        network_normals, network_svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.forward(dataset_batch))
        have_radiance, want_radiance = compute_radiance(
            network_normals=network_normals,
            network_svbrdf=network_svbrdf,
            dataset_normals=dataset_normals,
            dataset_svbrdf=dataset_svbrdf)
        # The sRGB colour space applies a desirable gamma correction.
        input_image = image.convert_RGB_to_sRGB(dataset_batch[0, :3].permute(
            1, 2, 0))
        have_image = image.convert_RGB_to_sRGB(have_radiance[0])
        want_image = image.convert_RGB_to_sRGB(want_radiance[0])
        reconstruction_images = [input_image, want_image, have_image]
        # By convention, the shader module in this repository outputs radiance in [B, R, C, 3] order.
        dashboard.add_images(tag=f'{mode} / {texture}',
                             global_step=steps,
                             dataformats='NHWC',
                             img_tensor=torch.stack(reconstruction_images,
                                                    dim=0))
Пример #8
0
def _warp_flow(config: Configuration) -> None:
    '''
    The "warp" flow renders a plane from a source texture by sampling a local latent field uniformly at random.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, output_size, input_path, output_path = config.load_warp_flow(
        )
        autoencoder.eval()

        # It is assumed that the dimensions of the input image will be accepted by the SVBRDF autoencoder network.
        input_images = image.load(path=input_path,
                                  encoding='sRGB').unsqueeze(0)
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols).unsqueeze(0)
        input_batch = torch.cat([input_images, input_distance],
                                dim=3).permute(0, 3, 1, 2)

        # The expansion ratios represent the multiplicative scaling in size from the latent field to the output texture.
        row_expansion_ratio = autoencoder.dimensions['Texture']['Output'][
            0] // autoencoder.dimensions['Latent']['Local'][0]
        col_expansion_ratio = autoencoder.dimensions['Texture']['Output'][
            1] // autoencoder.dimensions['Latent']['Local'][1]

        # The value at each position in the local field is sampled uniformly at random to simulate structural noise.
        num_warped_rows = output_size[0] // row_expansion_ratio
        num_warped_cols = output_size[1] // col_expansion_ratio
        local_field = torch.rand(
            (1, autoencoder.dimensions['Latent']['Local'][2], num_warped_rows,
             num_warped_cols))
        # The global field is the same everywhere to preserve the look and feel of the input texture.
        global_field = autoencoder.encoders['Global'].forward(
            input_batch).expand(1, num_warped_rows, num_warped_cols,
                                -1).permute(0, 3, 1, 2)
        # The periodic field is derived directly from the global field.
        periodic_field = autoencoder.derive_periodic_field(global_field)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        latents = torch.cat([local_field, global_field, periodic_field], dim=1)
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(latents))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
Пример #9
0
def _tile_flow(config: Configuration) -> None:
    '''
    The "tile" flow attempts to synthesize a tileable output texture from a given input texture.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, overlap, input_path, output_path = config.load_tile_flow(
        )
        autoencoder.eval()

        # It is assumed that the dimensions of the input image will be accepted by the network.
        input_image = image.load(path=input_path, encoding='sRGB')
        input_distance = utils.create_radial_distance_field(
            num_rows=autoencoder.dimensions['Texture']['Input'][0],
            num_cols=autoencoder.dimensions['Texture']['Input'][1])
        input_batch = torch.cat([input_image, input_distance],
                                dim=2).unsqueeze(0).permute(0, 3, 1, 2)

        # As long as the perceptive field of an output pixel is less than the size of the latent field, a tileable
        # output texture can be obtained by decoding a tiling of the latent field (interpolated for good measure).
        latent_tiles_row = autoencoder.encode(input_batch).expand(
            3, -1, -1, -1).permute(0, 2, 3, 1)
        latent_field_row = utils.interpolate(latent_tiles_row,
                                             overlap=overlap).expand(
                                                 3, -1, -1, -1)
        latent_field = utils.interpolate(
            latent_field_row.transpose(1, 2),
            overlap=overlap).transpose(0, 1).unsqueeze(0).permute(0, 3, 1, 2)

        # The center crop of the output image will be tileable as long as the latent field was smoothly convolved.
        output = autoencoder.decode(latent_field)
        output_row_padding = output.size(
            2) // 2 - autoencoder.dimensions['Texture']['Output'][0] // 2
        output_col_padding = output.size(
            3) // 2 - autoencoder.dimensions['Texture']['Output'][1] // 2
        cropped_output = output[:, :, output_row_padding:-output_row_padding,
                                output_col_padding:-output_col_padding]

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            cropped_output)
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
def optimize(
        autoencoder: SVBRDFAutoencoder,
        svbrdf: SVBRDF,
        datasets: Dict[str, Dataset],
        optimizer: torch.optim.Optimizer,  # type: ignore
        epochs: int,
        cycles: int,
        samples: int,
        frequencies: Dict[str, int],
        loss_weights: Weights,
        early_stopping: Dict,
        experiment: str) -> None:
    '''
    Optimizes the given SVBRDF autoencoder using the provided SVBRDF, Datasets, Optimizer, and hyperparameters.

    Args:
        autoencoder: SVBRDFAutoencoder to be optimized.
        svbrdf: SVBRDF intended for the output of the SVBRDF autoencoder.
        datasets: Mapping between Dataset names (e.g., "Training") and, well, Datasets.
        optimizer: Optimizer that updates the parameters in the SVBRDF autoencoder.
        epochs: Number of training epochs.
        cycles: Number of training steps to execute during each epoch.
        samples: Size of a training batch.
        frequencies: Mapping between event names (e.g., "Parameter Checkpoint") and the number of training steps between
                     executions of these events.
        loss_weights: Mapping between loss types (e.g., "Reconstruction") and dictionaries that associate loss components
                      (e.g., "Style") with their corresponding weights.
        experiment: Name of the current experiment.
    '''
    # The structure of the given dictionaries are checked here rather than the Configuration to avoid bloating distant
    # code and keep the relevant implementation in one place.
    for key in ('Training', 'Testing'):
        assert key in datasets, f'Dataset dictionary is missing key {key}.'
    for key in ('Reconstruction', ):
        assert key in loss_weights, f'Loss weights dictionary is missing key {key}.'
    # for key in ('Content', 'Style'):
    #     assert key in loss_weights['Diversity'], f'Loss weights dictionary is missing key {key} under scope "Diversity".'
    for key in ('Content', 'Style', 'Texel'):
        assert key in loss_weights[
            'Reconstruction'], f'Loss weights dictionary is missing key {key} under scope "Reconstruction".'
    for key in ('Tests Publication', 'Image Publication',
                'Parameter Checkpoint'):
        assert key in frequencies, f'Frequencies dictionary is missing key {key}.'
    for key in ('Epsilon', 'Patience'):
        assert key in early_stopping, f'Early stopping dictionary is missing key {key}.'

    def zero_backward_step(loss: Tensor) -> None:
        '''Updates the parameters of the SVBRDF autoencoder using the given loss Tensor, taking care to clear gradients beforehand.'''
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Sharing the same VGG-19 network instance across all invocations is absolutely critical to performance.
    # Furthermore, the VGG-19 network must be initialized after the primary computation device has been set.
    vgg19 = VGG19().to(utils.get_device_name())
    vgg19.eval()

    # A SummaryWriter can be used to publish data to TensorBoard.
    run_name = datetime.datetime.now().strftime(f'%Y-%m-%d_%H-%M-%S')
    dashboard = SummaryWriter(f'results/runs/{run_name} - {experiment}')

    # Tracking the number of training steps is helpful for TensorBoard tracking and implementing "every-X-step" behaviour.
    steps = 0

    # The best testing loss record tracks the minimum testing loss achieved so far.
    best_testing_loss = torch.tensor(float('inf'))
    # The early stopping counter tracks the number of early stopping checks that have transpired since the best testing loss was updated.
    early_stopping_counter = 0

    # Each epoch introduces a new material to the training loop.
    for epoch in range(epochs):
        for cycle in tqdm.tqdm(range(cycles),
                               desc=f'Epoch {epoch} Progress',
                               total=cycles):
            # Materials from the Dataset are selected in a round-robin fashion following the training technique described
            # in the Diversified Texture Synthesis with Feed-forward Networks paper.
            material = cycle % min(epoch + 1, len(datasets['Training']))

            # The reconstruction loss encourages the network to accurately infer the SVBRDF parameters of a given texture.
            reconstruction_losses = compute_reconstruction_losses(
                autoencoder=autoencoder,
                network_svbrdf=svbrdf,
                samples=samples,
                dataset=datasets['Training'],
                material=material,
                vgg19=vgg19)
            reconstruction_losses['Total'] = loss_weights['Reconstruction']['Content'] * reconstruction_losses['Content'] + \
                                             loss_weights['Reconstruction']['Style'] * reconstruction_losses['Style'] + \
                                             loss_weights['Reconstruction']['Texel'] * reconstruction_losses['Texel']
            zero_backward_step(loss=reconstruction_losses['Total'])

            # The diversity loss encourages the network to encode the style of a texture in the global latent vector.
            # diversity_losses = compute_diversity_losses(autoencoder=autoencoder, network_svbrdf=svbrdf, samples=samples,
            #                                             dataset=datasets['Training'], material=material, vgg19=vgg19)
            # diversity_losses['Total'] = loss_weights['Diversity']['Content'] * diversity_losses['Content'] + \
            #                             loss_weights['Diversity']['Style'] * diversity_losses['Style']
            # zero_backward_step(loss=diversity_losses['Total'])

            # The training losses are published to the dashboard after each training iteration.
            texture = datasets['Training'].textures[material]
            losses = {'Reconstruction': reconstruction_losses}
            publish_scalar_results(dashboard=dashboard,
                                   mode='Training',
                                   steps=steps,
                                   texture=texture,
                                   losses=losses)

            # Adding one to the number of steps avoids triggering an event on the first training iteration.
            progress = steps + 1
            if progress % frequencies['Parameter Checkpoint'] == 0:
                autoencoder.save()
            if progress % frequencies['Image Publication'] == 0:
                materials = range(min(epoch + 1, len(datasets['Training'])))
                publish_image_results(dashboard=dashboard,
                                      mode='Training',
                                      steps=steps,
                                      autoencoder=autoencoder,
                                      network_svbrdf=svbrdf,
                                      dataset=datasets['Training'],
                                      materials=materials)
            if progress % frequencies['Tests Publication'] == 0:
                publish_testing_results(dashboard=dashboard,
                                        steps=steps,
                                        autoencoder=autoencoder,
                                        network_svbrdf=svbrdf,
                                        dataset=datasets['Testing'],
                                        samples=samples,
                                        loss_weights=loss_weights,
                                        vgg19=vgg19)
            if progress % frequencies['Early Stopping'] == 0 and epoch >= len(
                    datasets['Training']):
                # The testing loss is the mean loss of each material in the testing dataset.
                testing_loss = compute_testing_loss(
                    autoencoder=autoencoder,
                    network_svbrdf=svbrdf,
                    dataset=datasets['Testing'],
                    samples=samples,
                    loss_weights=loss_weights,
                    vgg19=vgg19)
                # The epsilon factor avoids delaying the early stopping due to noise.
                if testing_loss < best_testing_loss - early_stopping['Epsilon']:
                    best_testing_loss = testing_loss
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1
                    if early_stopping_counter >= early_stopping['Patience']:
                        logging.info(
                            'Early stopping triggered at epoch %d cycle %d',
                            epoch, cycle)
                        # The parameter weights may no longer be "optimal" but they are probably close enough.
                        autoencoder.save()
                        dashboard.close()
                        return
            steps += 1
    dashboard.close()
Пример #11
0
def _shuffle_flow(config: Configuration) -> None:
    '''
    The "shuffle" flow expands the SVBRDF parameters of an image to fill an arbitrary plane by shuffling latent tiles.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, tile_size, output_size, input_path, output_path = config.load_shuffle_flow(
        )
        autoencoder.eval()

        # Continuing to index sizes with 0 and 1 is simultaneously confusing and a potential debugging nightmare.
        num_tile_rows, num_tile_cols = tile_size
        num_output_rows, num_output_cols = output_size

        # Similarly, it is worthwhile to give names to the otherwise-generic SVBRDF autoencoder dimensions.
        num_latent_rows = autoencoder.dimensions['Latent']['Local'][0]
        num_latent_cols = autoencoder.dimensions['Latent']['Local'][1]
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        row_expansion_ratio = autoencoder.dimensions['Texture']['Output'][
            0] // num_latent_rows
        col_expansion_ratio = autoencoder.dimensions['Texture']['Output'][
            1] // num_latent_cols

        # These sanity checks may seem obvious but you never know...
        assert num_tile_rows <= num_latent_rows, 'Tile height cannot exceed the height of the latent field.'
        assert num_tile_cols <= num_latent_cols, 'Tile width cannot exceed the width of the latent field.'
        assert num_output_rows % (
            row_expansion_ratio * num_tile_rows
        ) == 0, 'Latent height inferred from the output height must be a multiple of the tile height.'
        assert num_output_cols % (
            col_expansion_ratio * num_tile_cols
        ) == 0, 'Latent width inferred from the output width must be a multiple of the tile width.'

        # It is assumed that the dimensions of the input images will be accepted by the network.
        input_images = image.load(path=input_path,
                                  encoding='sRGB').unsqueeze(0)
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols).unsqueeze(0)
        input_batch = torch.cat([input_images, input_distance],
                                dim=3).permute(0, 3, 1, 2)
        input_latent = autoencoder.encode(input_batch)

        # As mentioned in the assertions, the size of the shuffled latent field can be inferred from the desired output texture size.
        num_shuffled_rows = num_output_rows // row_expansion_ratio
        num_shuffled_cols = num_output_cols // col_expansion_ratio
        shuffled_latent = torch.zeros(
            (1, input_latent.size(1), num_shuffled_rows, num_shuffled_cols),
            device=utils.get_device_name())

        # The shuffled latent is populated with random tiles from the input image latent.
        for row in range(0, shuffled_latent.size(2), num_tile_rows):
            for col in range(0, shuffled_latent.size(3), num_tile_cols):
                original_row_crop, original_col_crop = utils.sample_embedded_rectangle(
                    num_outer_rows=input_latent.size(2),
                    num_inner_rows=num_tile_rows,
                    num_outer_cols=input_latent.size(3),
                    num_inner_cols=num_tile_cols)
                shuffled_row_crop, shuffled_col_crop = slice(
                    row, row + num_tile_rows), slice(col, col + num_tile_cols)
                shuffled_latent[:, :, shuffled_row_crop,
                                shuffled_col_crop] = input_latent[:, :,
                                                                  original_row_crop,
                                                                  original_col_crop]

        # The periodic latent component needs to be aligned with its relative position in the field.
        channels = {
            key: autoencoder.dimensions['Latent'][key][2]
            for key in ('Local', 'Global', 'Periodic')
        }
        global_field = shuffled_latent[:, channels['Local']:channels['Local'] +
                                       channels['Global'], :, :]
        shuffled_latent[:, -channels[
            'Periodic']:, :, :] = autoencoder.derive_periodic_field(
                global_field)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(shuffled_latent))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
Пример #12
0
def _album_flow(config: Configuration) -> None:
    '''
    The "album" flow generates an image by blending the latent fields of a random sample of input images.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, output_size, overlap, input_paths, output_path = config.load_album_flow(
        )
        autoencoder.eval()

        # Interpreting the indexing of dimensions exactly once saves more minutes of debugging than keystrokes.
        num_output_rows = output_size[0]
        num_output_cols = output_size[1]
        num_texture_input_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_input_cols = autoencoder.dimensions['Texture']['Input'][1]
        num_texture_output_rows = autoencoder.dimensions['Texture']['Output'][
            0]
        num_texture_output_cols = autoencoder.dimensions['Texture']['Output'][
            1]

        # The number of rows and columns that constitute the latent grid (to be decoded into the output image) can be
        # inferred from the relative size of the output image and the SVBRDF autoencoder input.
        num_grid_rows = num_output_rows // num_texture_output_rows
        num_grid_cols = num_output_cols // num_texture_output_cols
        assert (num_output_rows % num_texture_output_rows == 0) and (num_output_cols % num_texture_output_cols == 0), \
               'SVBRDF autoencoder output size must divide output image size.'

        # The images to be included in the latent grid are chosen uniformly at random with replacement from the specified input images.
        input_images = torch.stack([
            image.load(path=input_path, encoding='sRGB')
            for input_path in input_paths
        ],
                                   dim=0)
        album_images = input_images[torch.randint(low=0,
                                                  high=len(input_images) - 1,
                                                  size=(num_grid_rows *
                                                        num_grid_cols, ))]

        # Before feeding the images through the SVBRDF autoencoder, they must be augmented with a radial distance field.
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_input_rows, num_cols=num_texture_input_cols)
        input_batch = torch.cat([
            album_images,
            input_distance.expand(album_images.size(0), -1, -1, -1)
        ],
                                dim=3).permute(0, 3, 1, 2)

        # The latent field is assembled by splitting the batch of latent tiles according to their latent grid rows and
        # then interpolating the latent field between each tile in a bilinear fashion.
        texture_latents = torch.stack(autoencoder.encode(input_batch).permute(
            0, 2, 3, 1).split(num_grid_cols, dim=0),
                                      dim=0)
        blended_latents = utils.interpolate(
            torch.stack([
                utils.interpolate(texture_latent_row,
                                  overlap=overlap).transpose(0, 1)
                for texture_latent_row in texture_latents
            ],
                        dim=0),
            overlap=overlap).transpose(0, 1).unsqueeze(0).permute(0, 3, 1, 2)

        # The previous blending procedure leaves the periodic latent component out of alignment with the field indices.
        channels = {
            key: autoencoder.dimensions['Latent'][key][2]
            for key in ('Local', 'Global', 'Periodic')
        }
        global_field = blended_latents[:, channels['Local']:channels['Local'] +
                                       channels['Global'], :, :]
        blended_latents[:, -channels[
            'Periodic']:, :, :] = autoencoder.derive_periodic_field(
                global_field)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(blended_latents))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
Пример #13
0
def _mosaic_flow(config: Configuration) -> None:
    '''
    The "mosaic" flow reconstructs an image of arbitrary scale by partitioning the given image into smaller images,
    encoding the smaller images as latent fields, and then blending the resulting latent fields in a bilinear fashion.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, overlap, input_path, output_path = config.load_mosaic_flow(
        )
        autoencoder.eval()

        input_image = image.load(path=input_path, encoding='sRGB')

        # If the input size of the SVBRDF autoencoder does not evenly divide the input image, no valid partitioning exists.
        num_image_rows = input_image.size(0)
        num_image_cols = input_image.size(1)
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        assert (num_image_rows % num_texture_rows == 0) and (
            num_image_cols % num_texture_cols == 0
        ), 'SVBRDF autoencoder input size must divide input image size.'

        # The input batch is constructed by splitting the image and distance fragments by row and then by column, and
        # then concatenating the result in such a way as to form a single column which can be stacked to form a batch.
        input_distance = utils.create_radial_distance_field(
            num_rows=num_image_rows, num_cols=num_image_cols)
        input_tensor = torch.cat([input_image, input_distance], dim=2)
        input_batch = torch.cat([
            torch.stack([
                input_batch_tile
                for input_batch_tile in input_batch_row.split(num_texture_cols,
                                                              dim=1)
            ],
                        dim=0)
            for input_batch_row in input_tensor.split(num_texture_rows, dim=0)
        ],
                                dim=0).permute(0, 3, 1, 2)

        # The latent dimensions defining the size of a sample latent field can be visualized as follows:
        #   +------------+------------+------------+------------+        +----+----+----+----+
        #   |            |            |            |            |        |  1 |  2 |  3 |  4 |
        #   |      1     |      2     |      3     |      4     |        +----+----+----+----+
        #   |            |            |            |            |        |  5 |  6 |  7 |  8 |
        #   +------------+------------+------------+------------+        +----+----+----+----+
        #   |            |            |            |            |        |  9 | 10 | 11 | 12 |
        #   |      5     |      6     |      7     |      8     |        +----+----+----+----+
        #   |            |            |            |            |         <----- Grid ------>
        #   +------------+------------+------------+------------+
        #   |            |            |            |            |
        #   |      9     |     10     |     11     |     12     |
        #   |            |            |            |            |
        #   +------------+------------+------------+------------+
        #    <-- Tile -->
        #    <----------------- Latent Field ------------------>
        num_grid_cols = num_image_cols // num_texture_cols

        # The latent field is assembled by splitting the batch of latent tiles according to their latent grid rows and
        # then interpolating the latent field between each tile in a bilinear fashion.
        texture_latents = torch.stack(autoencoder.encode(input_batch).permute(
            0, 2, 3, 1).split(num_grid_cols, dim=0),
                                      dim=0)
        blended_latents = utils.interpolate(
            torch.stack([
                utils.interpolate(texture_latent_row,
                                  overlap=overlap).transpose(0, 1)
                for texture_latent_row in texture_latents
            ],
                        dim=0),
            overlap=overlap).transpose(0, 1).unsqueeze(0).permute(0, 3, 1, 2)

        # The previous blending procedure leaves the periodic latent component out of alignment with the field indices.
        channels = {
            key: autoencoder.dimensions['Latent'][key][2]
            for key in ('Local', 'Global', 'Periodic')
        }
        global_field = blended_latents[:, channels['Local']:channels['Local'] +
                                       channels['Global'], :, :]
        blended_latents[:, -channels[
            'Periodic']:, :, :] = autoencoder.derive_periodic_field(
                global_field)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(blended_latents))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
Пример #14
0
def _morph_flow(config: Configuration) -> None:
    '''
    The "morph" flow morphs one texture into another over a series of discrete tiles.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, between, input_paths, output_path = config.load_morph_flow(
        )
        autoencoder.eval()

        # The total number of tiles includes the two textures on either end as well as the tiles between the textures.
        tiles = 2 + between
        device = utils.get_device_name()

        # It is assumed that the dimensions of the input images will be accepted by the network.
        input_images = torch.stack([
            image.load(path=input_path, encoding='sRGB')
            for input_path in input_paths
        ],
                                   dim=0)
        # The radial distance field should be the same for both input images.
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols)
        # By convention, PyTorch expects Tensors to be in [B, D, R, C] format.
        input_batch = torch.cat(
            [input_images, input_distance.expand(2, -1, -1, -1)],
            dim=3).permute(0, 3, 1, 2)

        # The width and height of the SVBRDF autoencoder latent are shared between all latent components.
        num_latent_rows = autoencoder.dimensions['Latent']['Local'][0]
        num_latent_cols = autoencoder.dimensions['Latent']['Local'][1]

        # The local field latent is blended such that each texel within a tile has the same alpha component.
        #     +------------+------------+------------+------------+------------+
        #     |  α = 0.00  |  α = 0.25  |  α = 0.50  |  α = 0.75  |  α = 1.00  |
        #     +------------+------------+------------+------------+------------+
        local_encoder_output = autoencoder.encoders['Local'].forward(
            input_batch)
        local_field_output = local_encoder_output.repeat(1, 1, 1,
                                                         tiles).permute(
                                                             0, 2, 3, 1)
        local_field_alphas = torch.linspace(
            0, 1, tiles,
            device=device).repeat_interleave(num_latent_cols).expand(
                num_latent_rows, -1).unsqueeze(-1)
        local_field = torch.lerp(local_field_output[0], local_field_output[1],
                                 local_field_alphas).permute(2, 0, 1)

        # The global field latent is blended continuously between the left and right textures.
        #     +------------+------------+------------+------------+------------+
        #     |  α = 0.00  |  α = 0.00 ... ... 0.50 ... ... 1.00  |  α = 1.00  |
        #     +------------+------------+------------+------------+------------+
        global_encoder_output = autoencoder.encoders['Global'].forward(
            input_batch)
        global_field_output = global_encoder_output.expand(
            num_latent_rows, num_latent_cols * tiles, -1,
            -1).permute(2, 0, 1, 3)
        global_field_alphas = torch.cat([
            torch.zeros(num_latent_cols, device=device),
            torch.linspace(0, 1, num_latent_cols * between, device=device),
            torch.ones(num_latent_cols, device=device)
        ]).expand(num_latent_rows, -1).unsqueeze(-1)
        global_field = torch.lerp(global_field_output[0],
                                  global_field_output[1],
                                  global_field_alphas).permute(2, 0, 1)

        # Fortunately, the periodic field latent does not demand any special treatment.
        periodic_field = autoencoder.derive_periodic_field(
            global_field.unsqueeze(0)).squeeze(0)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        latents = torch.cat([local_field, global_field, periodic_field],
                            dim=0).unsqueeze(0)
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(latents))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)
Пример #15
0
def _merge_flow(config: Configuration) -> None:
    '''
    The "merge" flow melds two overlapping textures by smoothly blending their latent fields.

    Args:
        config: Configuration specifying the parameters of the flow.
    '''
    with torch.no_grad():
        autoencoder, svbrdf, lights, viewer, camera, overlap, input_paths, output_path = config.load_merge_flow(
        )
        autoencoder.eval()

        # It is assumed that the dimensions of the input images will be accepted by the network.
        input_images = torch.stack([
            image.load(path=input_path, encoding='sRGB')
            for input_path in input_paths
        ],
                                   dim=0)
        # The radial distance field should be the same for both input images.
        num_texture_rows = autoencoder.dimensions['Texture']['Input'][0]
        num_texture_cols = autoencoder.dimensions['Texture']['Input'][1]
        input_distance = utils.create_radial_distance_field(
            num_rows=num_texture_rows, num_cols=num_texture_cols)
        # By convention, PyTorch expects Tensors to be in [B, D, R, C] format.
        input_batch = torch.cat(
            [input_images, input_distance.expand(2, -1, -1, -1)],
            dim=3).permute(0, 3, 1, 2)

        # The width and height of the SVBRDF autoencoder latent are shared between all latent components.
        num_latent_rows = autoencoder.dimensions['Latent']['Local'][0]
        num_latent_cols = autoencoder.dimensions['Latent']['Local'][1]

        # The latent field corresponding to each texture must be padded in the region where it has no influence.
        device = utils.get_device_name()
        channels = {
            key: autoencoder.dimensions['Latent'][key][2]
            for key in ('Local', 'Global', 'Periodic')
        }
        padding = torch.zeros(
            (num_latent_rows, num_latent_cols - overlap, sum(
                channels.values())),
            device=device)

        # The latent field is blended smoothly across the overlapping region as follows:
        #     +------------+---------------------+------------+
        #     |  α = 0.00  |  α = 0.00 ... 1.00  |  α = 1.00  |
        #     +------------+---------------------+------------+
        #                   <----- Overlap ----->
        texture_latents = autoencoder.encode(input_batch).permute(0, 2, 3, 1)
        widened_latents = torch.stack([
            torch.cat([texture_latents[0], padding], dim=1),
            torch.cat([padding, texture_latents[1]], dim=1)
        ],
                                      dim=0)
        alphas = torch.cat([
            torch.zeros(num_latent_cols - overlap, device=device),
            torch.linspace(0, 1, overlap, device=device),
            torch.ones(num_latent_cols - overlap, device=device)
        ]).expand(num_latent_rows, -1).unsqueeze(-1)
        blended_latents = torch.lerp(widened_latents[0], widened_latents[1],
                                     alphas).permute(2, 0, 1)

        # The periodic component should be replaced to be consistent with the blended global field..
        global_field = blended_latents[channels['Local']:channels['Local'] +
                                       channels['Global'], :, :]
        blended_latents[
            -channels['Periodic']:, :, :] = autoencoder.derive_periodic_field(
                global_field.unsqueeze(0)).squeeze(0)

        # The fully-convolutional nature of the SVBRDF decoder trivializes the creation of textures with arbitrary sizes.
        normals, svbrdf.parameters = SVBRDFAutoencoder.interpret(
            autoencoder.decode(blended_latents.unsqueeze(0)))
        _shade_render_save(normals=normals,
                           svbrdf=svbrdf,
                           lights=lights,
                           viewer=viewer,
                           camera=camera,
                           path=output_path)