Exemplo n.º 1
0
def deep_dream_video(config):
    video_path = os.path.join(config['inputs_path'], config['input'])
    tmp_input_dir = os.path.join(config['out_videos_path'], 'tmp_input')
    tmp_output_dir = os.path.join(config['out_videos_path'], 'tmp_out')
    config['dump_dir'] = tmp_output_dir
    os.makedirs(tmp_input_dir, exist_ok=True)
    os.makedirs(tmp_output_dir, exist_ok=True)

    metadata = video_utils.dump_frames(video_path, tmp_input_dir)

    last_img = None
    for frame_id, frame_name in enumerate(sorted(os.listdir(tmp_input_dir))):
        print(f'Processing frame {frame_id}')
        frame_path = os.path.join(tmp_input_dir, frame_name)
        frame = utils.load_image(frame_path, target_shape=config['img_width'])
        if config['blend'] is not None and last_img is not None:
            # 1.0 - get only the current frame, 0.5 - combine with last dreamed frame and stabilize the video
            frame = utils.linear_blend(last_img, frame, config['blend'])

        dreamed_frame = deep_dream_static_image(config, frame)
        last_img = dreamed_frame
        utils.save_and_maybe_display_image(
            config,
            dreamed_frame,
            should_display=config['should_display'],
            name_modifier=frame_id)

    video_utils.create_video_from_intermediate_results(config, metadata)

    shutil.rmtree(tmp_input_dir)  # remove tmp files
    print(f'Deleted tmp frame dump directory {tmp_input_dir}.')
Exemplo n.º 2
0
def deep_dream_video_ouroboros(config):
    img_path = os.path.join(config['inputs_path'], config['input'])
    # load numpy, [0, 1], channel-last, RGB image, None will cause it to start from the uniform noise [0, 1] image
    frame = None if config['use_noise'] else utils.load_image(img_path, target_shape=config['img_width'])

    for frame_id in range(config['video_length']):
        print(f'Dream iteration {frame_id+1}.')
        frame = deep_dream_static_image(config, frame)
        utils.save_and_maybe_display_image(config, frame, should_display=config['should_display'], name_modifier=frame_id)
        frame = utils.transform_frame(config, frame)  # transform frame e.g. central zoom, spiral, etc.

    video_utils.create_video_from_intermediate_results(config)
Exemplo n.º 3
0
def deep_dream_video_ouroboros(config):
    """
    Feeds the output dreamed image back to the input and repeat

    Name etymology for nerds: https://en.wikipedia.org/wiki/Ouroboros

    """
    ts = time.time()
    assert any([config['input_name'].lower().endswith(img_ext) for img_ext in SUPPORTED_IMAGE_FORMATS]), \
        f'Expected an image, but got {config["input_name"]}. Supported image formats {SUPPORTED_IMAGE_FORMATS}.'

    utils.print_ouroboros_video_header(config)  # print some ouroboros-related metadata to the console

    img_path = utils.parse_input_file(config['input'])
    # load numpy, [0, 1] range, channel-last, RGB image
    # use_noise and consequently None value, will cause it to initialize the frame with uniform, [0, 1] range, noise
    frame = None if config['use_noise'] else utils.load_image(img_path, target_shape=config['img_width'])

    for frame_id in range(config['ouroboros_length']):
        print(f'Ouroboros iteration {frame_id+1}.')
        # Step 1: apply DeepDream and feed the last iteration's output to the input
        frame = deep_dream_static_image(config, frame)
        dump_path = utils.save_and_maybe_display_image(config, frame, name_modifier=frame_id)
        print(f'Saved ouroboros frame to: {os.path.relpath(dump_path)}\n')

        # Step 2: transform frame e.g. central zoom, spiral, etc.
        # Note: this part makes amplifies the psychodelic-like appearance
        frame = utils.transform_frame(config, frame)

    video_utils.create_video_from_intermediate_results(config)
    print(f'time elapsed = {time.time()-ts} seconds.')
def stylize_static_image(inference_config):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    content_img_path = os.path.join(inference_config['content_images_path'], inference_config['content_img_name'])
    content_image = utils.prepare_img(content_img_path, inference_config['img_width'], device)

    # load the weights and set the model to evaluation mode
    stylization_model = TransformerNet().to(device)
    training_state = torch.load(os.path.join(inference_config["model_binaries_path"], inference_config["model_name"]))
    utils.print_model_metadata(training_state)
    state_dict = training_state["state_dict"]
    stylization_model.load_state_dict(state_dict, strict=True)
    stylization_model.eval()

    with torch.no_grad():
        stylized_img = stylization_model(content_image).to('cpu').numpy()[0]
        utils.save_and_maybe_display_image(inference_config, stylized_img, should_display=True)
Exemplo n.º 5
0
def deep_dream_video(config):
    video_path = utils.parse_input_file(config['input'])
    tmp_input_dir = os.path.join(OUT_VIDEOS_PATH, 'tmp_input')
    tmp_output_dir = os.path.join(OUT_VIDEOS_PATH, 'tmp_out')
    config['dump_dir'] = tmp_output_dir
    os.makedirs(tmp_input_dir, exist_ok=True)
    os.makedirs(tmp_output_dir, exist_ok=True)

    metadata = video_utils.extract_frames(video_path, tmp_input_dir)
    config['fps'] = metadata['fps']
    utils.print_deep_dream_video_header(config)

    last_img = None
    for frame_id, frame_name in enumerate(sorted(os.listdir(tmp_input_dir))):
        # Step 1: load the video frame
        print(f'Processing frame {frame_id}')
        frame_path = os.path.join(tmp_input_dir, frame_name)
        frame = utils.load_image(frame_path, target_shape=config['img_width'])

        # Step 2: potentially blend it with the last frame
        if config['blend'] is not None and last_img is not None:
            # blend: 1.0 - use the current frame, 0.0 - use the last frame, everything in between will blend the two
            frame = utils.linear_blend(last_img, frame, config['blend'])

        # Step 3: Send the blended frame to some good old DeepDreaming
        dreamed_frame = deep_dream_static_image(config, frame)

        # Step 4: save the frame and keep the reference
        last_img = dreamed_frame
        dump_path = utils.save_and_maybe_display_image(config, dreamed_frame, name_modifier=frame_id)
        print(f'Saved DeepDream frame to: {os.path.relpath(dump_path)}\n')

    video_utils.create_video_from_intermediate_results(config)

    shutil.rmtree(tmp_input_dir)  # remove tmp files
    print(f'Deleted tmp frame dump directory {tmp_input_dir}.')
Exemplo n.º 6
0
def generate_new_images(model_name,
                        cgan_digit=None,
                        generation_mode=True,
                        slerp=True,
                        a=None,
                        b=None,
                        should_display=True):
    """ Generate imagery using pre-trained generator (using vanilla_generator_000000.pth by default)

    Args:
        model_name (str): model name you want to use (default lookup location is BINARIES_PATH).
        cgan_digit (int): if specified generate that exact digit.
        generation_mode (enum):  generate a single image from a random vector, interpolate between the 2 chosen latent
         vectors, or perform arithmetic over latent vectors (note: not every mode is supported for every model type)
        slerp (bool): if True use spherical interpolation otherwise use linear interpolation.
        a, b (numpy arrays): latent vectors, if set to None you'll be prompted to choose images you like,
         and use corresponding latent vectors instead.
        should_display (bool): Display the generated images before saving them.

    """

    model_path = os.path.join(BINARIES_PATH, model_name)
    assert os.path.exists(
        model_path
    ), f'Could not find the model {model_path}. You first need to train your generator.'

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare the correct (vanilla, cGAN, DCGAN, ...) model, load the weights and put the model into evaluation mode
    model_state = torch.load(model_path)
    gan_type = model_state["gan_type"]
    print(f'Found {gan_type} GAN!')
    _, generator = utils.get_gan(device, gan_type)
    generator.load_state_dict(model_state["state_dict"], strict=True)
    generator.eval()

    # Generate a single image, save it and potentially display it
    if generation_mode == GenerationMode.SINGLE_IMAGE:
        generated_imgs_path = os.path.join(DATA_DIR_PATH, 'generated_imagery')
        os.makedirs(generated_imgs_path, exist_ok=True)

        generated_img, _ = generate_from_random_latent_vector(
            generator, cgan_digit if gan_type == GANType.CGAN.name else None)
        utils.save_and_maybe_display_image(generated_imgs_path,
                                           generated_img,
                                           should_display=should_display)

    # Pick 2 images you like between which you'd like to interpolate (by typing 'y' into console)
    elif generation_mode == GenerationMode.INTERPOLATION:
        assert gan_type == GANType.VANILLA.name or gan_type == GANType.DCGAN.name, f'Got {gan_type} but only VANILLA/DCGAN are supported for the interpolation mode.'

        interpolation_name = "spherical" if slerp else "linear"
        interpolation_fn = spherical_interpolation if slerp else linear_interpolation

        grid_interpolated_imgs_path = os.path.join(
            DATA_DIR_PATH, 'interpolated_imagery')  # combined results dir
        decomposed_interpolated_imgs_path = os.path.join(
            grid_interpolated_imgs_path,
            f'tmp_{gan_type}_{interpolation_name}_dump'
        )  # dump separate results
        if os.path.exists(decomposed_interpolated_imgs_path):
            shutil.rmtree(decomposed_interpolated_imgs_path)
        os.makedirs(grid_interpolated_imgs_path, exist_ok=True)
        os.makedirs(decomposed_interpolated_imgs_path, exist_ok=True)

        latent_vector_a, latent_vector_b = [None, None]

        # If a and b were not specified loop until the user picked the 2 images he/she likes.
        found_good_vectors_flag = False
        if a is None or b is None:
            while not found_good_vectors_flag:
                generated_img, latent_vector = generate_from_random_latent_vector(
                    generator)
                plt.imshow(generated_img)
                plt.title('Do you like this image?')
                plt.show()
                user_input = input(
                    "Do you like this generated image? [y for yes]:")
                if user_input == 'y':
                    if latent_vector_a is None:
                        latent_vector_a = latent_vector
                        print('Saved the first latent vector.')
                    elif latent_vector_b is None:
                        latent_vector_b = latent_vector
                        print('Saved the second latent vector.')
                        found_good_vectors_flag = True
                else:
                    print('Well lets generate a new one!')
                    continue
        else:
            print(
                'Skipping latent vectors selection section and using cached ones.'
            )
            latent_vector_a, latent_vector_b = [a, b]

        # Cache latent vectors
        if a is None or b is None:
            np.save(os.path.join(grid_interpolated_imgs_path, 'a.npy'),
                    latent_vector_a)
            np.save(os.path.join(grid_interpolated_imgs_path, 'b.npy'),
                    latent_vector_b)

        print(f'Lets do some {interpolation_name} interpolation!')
        interpolation_resolution = 47  # number of images between the vectors a and b
        num_interpolated_imgs = interpolation_resolution + 2  # + 2 so that we include a and b

        generated_imgs = []
        for i in range(num_interpolated_imgs):
            t = i / (num_interpolated_imgs - 1)  # goes from 0. to 1.
            current_latent_vector = interpolation_fn(t, latent_vector_a,
                                                     latent_vector_b)
            generated_img = generate_from_specified_numpy_latent_vector(
                generator, current_latent_vector)

            print(f'Generated image [{i+1}/{num_interpolated_imgs}].')
            utils.save_and_maybe_display_image(
                decomposed_interpolated_imgs_path,
                generated_img,
                should_display=should_display)

            # Move from channel last to channel first (CHW->HWC), PyTorch's save_image function expects BCHW format
            generated_imgs.append(
                torch.tensor(np.moveaxis(generated_img, 2, 0)))

        interpolated_block_img = torch.stack(generated_imgs)
        interpolated_block_img = nn.Upsample(
            scale_factor=2.5, mode='nearest')(interpolated_block_img)
        save_image(
            interpolated_block_img,
            os.path.join(
                grid_interpolated_imgs_path,
                utils.get_available_file_name(grid_interpolated_imgs_path)),
            nrow=int(np.sqrt(num_interpolated_imgs)))

    elif generation_mode == GenerationMode.VECTOR_ARITHMETIC:
        assert gan_type == GANType.DCGAN.name, f'Got {gan_type} but only DCGAN is supported for arithmetic mode.'

        # Generate num_options face images and create a grid image from them
        num_options = 100
        generated_imgs = []
        latent_vectors = []
        padding = 2
        for i in range(num_options):
            generated_img, latent_vector = generate_from_random_latent_vector(
                generator)
            generated_imgs.append(
                torch.tensor(np.moveaxis(generated_img, 2,
                                         0)))  # make_grid expects CHW format
            latent_vectors.append(latent_vector)
        stacked_tensor_imgs = torch.stack(generated_imgs)
        final_tensor_img = make_grid(stacked_tensor_imgs,
                                     nrow=int(np.sqrt(num_options)),
                                     padding=padding)
        display_img = np.moveaxis(final_tensor_img.numpy(), 0, 2)

        # For storing latent vectors
        num_of_vectors_per_category = 3
        happy_woman_latent_vectors = []
        neutral_woman_latent_vectors = []
        neutral_man_latent_vectors = []

        # Make it easy - by clicking on the plot you pick the image.
        def onclick(event):
            if event.dblclick:
                pass
            else:  # single click
                if event.button == 1:  # left click
                    x_coord = event.xdata
                    y_coord = event.ydata
                    column = int(x_coord / (64 + padding))
                    row = int(y_coord / (64 + padding))

                    # Store latent vector corresponding to the image that the user clicked on.
                    if len(happy_woman_latent_vectors
                           ) < num_of_vectors_per_category:
                        happy_woman_latent_vectors.append(
                            latent_vectors[10 * row + column])
                        print(
                            f'Picked image row={row}, column={column} as {len(happy_woman_latent_vectors)}. happy woman.'
                        )
                    elif len(neutral_woman_latent_vectors
                             ) < num_of_vectors_per_category:
                        neutral_woman_latent_vectors.append(
                            latent_vectors[10 * row + column])
                        print(
                            f'Picked image row={row}, column={column} as {len(neutral_woman_latent_vectors)}. neutral woman.'
                        )
                    elif len(neutral_man_latent_vectors
                             ) < num_of_vectors_per_category:
                        neutral_man_latent_vectors.append(
                            latent_vectors[10 * row + column])
                        print(
                            f'Picked image row={row}, column={column} as {len(neutral_man_latent_vectors)}. neutral man.'
                        )
                    else:
                        plt.close()

        plt.figure(figsize=(10, 10))
        plt.imshow(display_img)
        # This is just an example you could also pick 3 neutral woman images with sunglasses, etc.
        plt.title(
            'Click on 3 happy women, 3 neutral women and \n 3 neutral men images (order matters!)'
        )
        cid = plt.gcf().canvas.mpl_connect('button_press_event', onclick)
        plt.show()
        plt.gcf().canvas.mpl_disconnect(cid)
        print('Done choosing images.')

        # Calculate the average latent vector for every category (happy woman, neutral woman, neutral man)
        happy_woman_avg_latent_vector = np.mean(
            np.array(happy_woman_latent_vectors), axis=0)
        neutral_woman_avg_latent_vector = np.mean(
            np.array(neutral_woman_latent_vectors), axis=0)
        neutral_man_avg_latent_vector = np.mean(
            np.array(neutral_man_latent_vectors), axis=0)

        # By subtracting neutral woman from the happy woman we capture the "vector of smiling". Adding that vector
        # to a neutral man we get a happy man's latent vector! Our latent space has amazingly beautiful structure!
        happy_man_latent_vector = neutral_man_avg_latent_vector + (
            happy_woman_avg_latent_vector - neutral_woman_avg_latent_vector)

        # Generate images from these latent vectors
        happy_women_imgs = np.hstack([
            generate_from_specified_numpy_latent_vector(generator, v)
            for v in happy_woman_latent_vectors
        ])
        neutral_women_imgs = np.hstack([
            generate_from_specified_numpy_latent_vector(generator, v)
            for v in neutral_woman_latent_vectors
        ])
        neutral_men_imgs = np.hstack([
            generate_from_specified_numpy_latent_vector(generator, v)
            for v in neutral_man_latent_vectors
        ])

        happy_woman_avg_img = generate_from_specified_numpy_latent_vector(
            generator, happy_woman_avg_latent_vector)
        neutral_woman_avg_img = generate_from_specified_numpy_latent_vector(
            generator, neutral_woman_avg_latent_vector)
        neutral_man_avg_img = generate_from_specified_numpy_latent_vector(
            generator, neutral_man_avg_latent_vector)

        happy_man_img = generate_from_specified_numpy_latent_vector(
            generator, happy_man_latent_vector)

        display_vector_arithmetic_results([
            happy_women_imgs, happy_woman_avg_img, neutral_women_imgs,
            neutral_woman_avg_img, neutral_men_imgs, neutral_man_avg_img,
            happy_man_img
        ])
    else:
        raise Exception(f'Generation mode not yet supported.')
Exemplo n.º 7
0
    # You usually won't need to change these as often
    parser.add_argument("--should_display", action='store_true', help="Display intermediate dreaming results (default False)")
    parser.add_argument("--spatial_shift_size", type=int, help='Number of pixels to randomly shift image before grad ascent', default=32)
    parser.add_argument("--smoothing_coefficient", type=float, help='Directly controls standard deviation for gradient smoothing', default=0.5)
    parser.add_argument("--use_noise", action='store_true', help="Use noise as a starting point instead of input image (default False)")
    args = parser.parse_args()

    # Wrapping configuration into a dictionary
    config = dict()
    for arg in vars(args):
        config[arg] = getattr(args, arg)
    config['dump_dir'] = OUT_VIDEOS_PATH if config['create_ouroboros'] else OUT_IMAGES_PATH
    config['dump_dir'] = os.path.join(config['dump_dir'], f'{config["model_name"]}_{config["pretrained_weights"]}')
    config['input_name'] = os.path.basename(config['input'])

    # Create Ouroboros video (feeding neural network's output to it's input)
    if config['create_ouroboros']:
        deep_dream_video_ouroboros(config)

    # Create a blended DeepDream video
    elif any([config['input_name'].lower().endswith(video_ext) for video_ext in SUPPORTED_VIDEO_FORMATS]):  # only support mp4 atm
        deep_dream_video(config)

    else:  # Create a static DeepDream image
        print('Dreaming started!')
        img = deep_dream_static_image(config, img=None)  # img=None -> will be loaded inside of deep_dream_static_image
        dump_path = utils.save_and_maybe_display_image(config, img)
        print(f'Saved DeepDream static image to: {os.path.relpath(dump_path)}\n')

Exemplo n.º 8
0
        default=False)
    args = parser.parse_args()

    # Wrapping configuration into a dictionary - keeping things clean
    config = dict()
    for arg in vars(args):
        config[arg] = getattr(args, arg)
    config['inputs_path'] = inputs_path
    config['out_images_path'] = out_images_path
    config['out_videos_path'] = out_videos_path
    config['dump_dir'] = config['out_videos_path'] if config[
        'is_video'] else config['out_images_path']
    config['dump_dir'] = os.path.join(
        config['dump_dir'],
        f'{config["model"].name}_{config["pretrained_weights"].name}')

    # DeepDream algorithm in 3 flavours: static image, video and ouroboros (feeding net output to it's input)
    if any([
            config['input'].endswith(video_ext)
            for video_ext in SUPPORTED_VIDEO_FORMATS
    ]):  # only support mp4 atm
        deep_dream_video(config)
    elif config['is_video']:
        deep_dream_video_ouroboros(config)
    else:
        img = deep_dream_static_image(
            config, img=None
        )  # img=None -> will be loaded inside of deep_dream_static_image
        utils.save_and_maybe_display_image(
            config, img, should_display=config['should_display'])
def stylize_static_image(inference_config):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare the model - load the weights and put the model into evaluation mode
    stylization_model = TransformerNet().to(device)
    training_state = torch.load(
        os.path.join(inference_config["model_binaries_path"],
                     inference_config["model_name"]))
    state_dict = training_state["state_dict"]
    stylization_model.load_state_dict(state_dict, strict=True)
    stylization_model.eval()

    if inference_config['verbose']:
        utils.print_model_metadata(training_state)

    with torch.no_grad():
        if os.path.isdir(
                inference_config['content_input']
        ):  # do a batch stylization (every image in the directory)
            img_dataset = utils.SimpleDataset(
                inference_config['content_input'],
                inference_config['img_width'])
            img_loader = DataLoader(img_dataset,
                                    batch_size=inference_config['batch_size'])

            try:
                processed_imgs_cnt = 0
                for batch_id, img_batch in enumerate(img_loader):
                    processed_imgs_cnt += len(img_batch)
                    if inference_config['verbose']:
                        print(
                            f'Processing batch {batch_id + 1} ({processed_imgs_cnt}/{len(img_dataset)} processed images).'
                        )

                    img_batch = img_batch.to(device)
                    stylized_imgs = stylization_model(img_batch).to(
                        'cpu').numpy()
                    for stylized_img in stylized_imgs:
                        utils.save_and_maybe_display_image(
                            inference_config,
                            stylized_img,
                            should_display=False)
            except Exception as e:
                print(e)
                print(
                    f'Consider making the batch_size (current = {inference_config["batch_size"]} images) or img_width (current = {inference_config["img_width"]} px) smaller'
                )
                exit(1)

        else:  # do stylization for a single image
            content_img_path = os.path.join(
                inference_config['content_images_path'],
                inference_config['content_input'])
            content_image = utils.prepare_img(content_img_path,
                                              inference_config['img_width'],
                                              device)
            stylized_img = stylization_model(content_image).to(
                'cpu').numpy()[0]
            utils.save_and_maybe_display_image(
                inference_config,
                stylized_img,
                should_display=inference_config['should_not_display'])