def run_style_transfer(input_img: T.Tensor,
                       num_steps=1000,
                       style_weight=10000.,
                       content_weight=1.,
                       tv_weight=0):
    content_img_resized = F.resize(content_img, 1024)

    input_img = input_img.detach()[None].permute(0, 3, 1, 2).contiguous()
    input_img = F.resize(input_img, 1024)
    input_img = T.nn.Parameter(input_img, requires_grad=True)

    vgg_loss = losses.StyleTransferLosses(vgg_weight_file, content_img_resized,
                                          style_img, px_content_layers,
                                          px_style_layers)
    vgg_loss.to(device).eval()
    optimizer = optim.Adam([input_img], lr=1e-3)
    logger.info('Optimizing pixel-wise canvas..')
    for _ in mon.iter_batch(range(num_steps)):
        optimizer.zero_grad()
        input = T.clamp(input_img, 0., 1.)
        content_score, style_score = vgg_loss(input)

        style_score *= style_weight
        content_score *= content_weight
        tv_score = 0. if not tv_weight else tv_weight * losses.tv_loss(
            input_img)
        loss = style_score + content_score + tv_score
        loss.backward(inputs=[input_img])
        optimizer.step()

        # plot some stuffs
        mon.plot('pixel style loss', style_score)
        mon.plot('pixel content loss', content_score)
        if tv_weight:
            mon.plot('pixel tv loss', tv_score)

        if mon.iter % mon.print_freq == 0:
            mon.imwrite('pixel stylized', input)

    return T.clamp(input_img, 0., 1.)
示例#2
0
def main(unused_agrv=None):
    """main

    :param args:
        argparse.Namespace object from argparse.parse_args().
    """
    # Unpack command-line arguments.
    train_dir = FLAGS.train_dir
    style_dataset = FLAGS.style_dataset
    model_name = FLAGS.model_name
    preprocess_size = [FLAGS.image_size, FLAGS.image_size]
    batch_size = FLAGS.batch_size
    n_epochs = FLAGS.n_epochs
    learn_rate = FLAGS.learning_rate
    content_weights = FLAGS.content_weights
    style_weights = FLAGS.style_weights
    num_pipe_buffer = FLAGS.num_pipe_buffer
    num_styles = FLAGS.num_styles
    train_steps = FLAGS.train_steps
    upsample_method = FLAGS.upsample_method

    # Setup input pipeline (delegate it to CPU to let GPU handle neural net)
    files = tf.train.match_filenames_once(train_dir + '/train-*')
    style_files = tf.train.match_filenames_once(style_dataset)
    print("style %s" % style_files)

    with tf.variable_scope('input_pipe'), tf.device('/cpu:0'):
        _, style_labels, style_grams = datapipe.style_batcher(
            style_files, batch_size, preprocess_size, n_epochs,
            num_pipe_buffer)
        batch_op = datapipe.batcher(files, batch_size, preprocess_size,
                                    n_epochs, num_pipe_buffer)
    """ Set up weight of style and content image """
    content_weights = ast.literal_eval(content_weights)
    style_weights = ast.literal_eval(style_weights)

    target_grams = []
    for name, val in style_weights.iteritems():
        target_grams.append(style_grams[name])

    # Alter the names to include a namescope that we'll use + output suffix.
    loss_style_layers = []
    loss_style_weights = []
    loss_content_layers = []
    loss_content_weights = []
    for key, val in style_weights.iteritems():
        loss_style_layers.append(key + ':0')
        loss_style_weights.append(val)
    for key, val in content_weights.iteritems():
        loss_content_layers.append(key + ':0')
        loss_content_weights.append(val)

    # Load in image transformation network into default graph.
    shape = [batch_size] + preprocess_size + [3]
    with tf.variable_scope('styleNet'):
        X = tf.placeholder(tf.float32, shape=shape, name='input')
        Y = transform(X, style_labels, num_styles, upsample_method)
        print(Y)

    # Connect vgg directly to the image transformation network.
    with tf.variable_scope('vgg'):
        vggnet = vgg16.vgg16(Y)

    # Get the gram matrices' tensors for the style loss features.
    input_img_grams = losses.get_grams(loss_style_layers)

    # Get the tensors for content loss features.
    content_layers = losses.get_layers(loss_content_layers)

    # Create loss function
    content_targets = tuple(
        tf.placeholder(tf.float32,
                       shape=layer.get_shape(),
                       name='content_input_{}'.format(i))
        for i, layer in enumerate(content_layers))
    cont_loss = losses.content_loss(content_layers, content_targets,
                                    loss_content_weights)
    style_loss = losses.style_loss(input_img_grams, target_grams,
                                   loss_style_weights)
    tv_loss = losses.tv_loss(Y)
    loss = cont_loss + style_loss + tv_loss

    # We do not want to train VGG, so we must grab the subset.
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope='styleNet')

    # Setup step + optimizer
    global_step = tf.Variable(0, name='global_step', trainable=False)
    optimizer = tf.train.AdamOptimizer(learn_rate).minimize(
        loss, global_step, train_vars)

    if not os.path.exists('./models'):  # Dir that save final models to
        os.makedirs('./models')
    final_saver = tf.train.Saver(train_vars)

    # We must include local variables because of batch pipeline.
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Begin training.
    print 'Starting training...'
    with tf.Session() as sess:
        # Initialization
        sess.run(init_op)
        vggnet.load_weights(vgg16.checkpoint_file(), sess)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                current_step = sess.run(global_step)
                batch = sess.run(batch_op)

                # Collect content targets
                content_data = sess.run(content_layers, feed_dict={Y: batch})
                feed_dict = {X: batch, content_targets: content_data}

                _, loss_out = sess.run([optimizer, loss], feed_dict=feed_dict)
                if (current_step % 10 == 0):
                    print current_step, loss_out

                # Throw error if we reach number of steps to break after.
                if current_step == train_steps:
                    print('Done training.')
                    break
        except tf.errors.OutOfRangeError:
            print('Done training.')
        finally:
            # Save the model (the image transformation network) for later usage
            # in predict.py
            final_saver.save(sess,
                             'models/' + model_name + '_final.ckpt',
                             write_meta_graph=False)
            coord.request_stop()

        coord.join(threads)
def main(args):
    """main
    :param args:
        argparse.Namespace object from argparse.parse_args().
    """
    # Unpack command-line arguments.
    train_dir = args.train_dir
    style_img_path = args.style_img_path
    model_name = args.model_name
    preprocess_size = args.preprocess_size
    batch_size = args.batch_size
    n_epochs = args.n_epochs
    run_name = args.run_name
    learn_rate = args.learn_rate
    loss_content_layers = args.loss_content_layers
    loss_style_layers = args.loss_style_layers
    content_weights = args.content_weights
    style_weights = args.style_weights
    num_steps_ckpt = args.num_steps_ckpt
    num_pipe_buffer = args.num_pipe_buffer
    num_steps_break = args.num_steps_break
    beta_val = args.beta
    style_target_resize = args.style_target_resize
    upsample_method = args.upsample_method

    # Load in style image that will define the model.
    style_img = utils.imread(style_img_path)
    style_img = utils.imresize(style_img, style_target_resize)
    style_img = style_img[np.newaxis, :].astype(np.float32)

    # Alter the names to include a namescope that we'll use + output suffix.
    loss_style_layers = ['vgg/' + i + ':0' for i in loss_style_layers]
    loss_content_layers = ['vgg/' + i + ':0' for i in loss_content_layers]

    # Get target Gram matrices from the style image.
    with tf.variable_scope('vgg'):
        X_vgg = tf.placeholder(tf.float32, shape=style_img.shape, name='input')
        vggnet = vgg16.vgg16(X_vgg)
    with tf.Session() as sess:
        vggnet.load_weights('libs/vgg16_weights.npz', sess)
        print('Precomputing target style layers.')
        target_grams = sess.run(utils.get_grams(loss_style_layers),
                                feed_dict={X_vgg: style_img})

    # Clean up so we can re-create vgg connected to our image network.
    print('Resetting default graph.')
    tf.reset_default_graph()

    # Load in image transformation network into default graph.
    shape = [batch_size] + preprocess_size + [3]
    with tf.variable_scope('img_t_net'):
        X = tf.placeholder(tf.float32, shape=shape, name='input')
        Y = create_net(X, upsample_method)

    # Connect vgg directly to the image transformation network.
    with tf.variable_scope('vgg'):
        vggnet = vgg16.vgg16(Y)

    # Get the gram matrices' tensors for the style loss features.
    input_img_grams = utils.get_grams(loss_style_layers)

    # Get the tensors for content loss features.
    content_layers = utils.get_layers(loss_content_layers)

    # Create loss function
    content_targets = tuple(
        tf.placeholder(tf.float32,
                       shape=layer.get_shape(),
                       name='content_input_{}'.format(i))
        for i, layer in enumerate(content_layers))
    cont_loss = losses.content_loss(content_layers, content_targets,
                                    content_weights)
    style_loss = losses.style_loss(input_img_grams, target_grams,
                                   style_weights)
    tv_loss = losses.tv_loss(Y)
    beta = tf.placeholder(tf.float32, shape=[], name='tv_scale')
    loss = cont_loss + style_loss + beta * tv_loss
    with tf.name_scope('summaries'):
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('style_loss', style_loss)
        tf.summary.scalar('content_loss', cont_loss)
        tf.summary.scalar('tv_loss', beta * tv_loss)

    # Setup input pipeline (delegate it to CPU to let GPU handle neural net)
    files = tf.train.match_filenames_once(train_dir + '/train-*')
    with tf.variable_scope('input_pipe'), tf.device('/cpu:0'):
        batch_op = datapipe.batcher(files, batch_size, preprocess_size,
                                    n_epochs, num_pipe_buffer)

    # We do not want to train VGG, so we must grab the subset.
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope='img_t_net')

    # Setup step + optimizer
    global_step = tf.Variable(0, name='global_step', trainable=False)
    optimizer = tf.train.AdamOptimizer(learn_rate).minimize(
        loss, global_step, train_vars)

    # Setup subdirectory for this run's Tensoboard logs.
    if not os.path.exists('./summaries/train/'):
        os.makedirs('./summaries/train/')
    if run_name is None:
        current_dirs = [
            name for name in os.listdir('./summaries/train/')
            if os.path.isdir('./summaries/train/' + name)
        ]
        name = model_name + '0'
        count = 0
        while name in current_dirs:
            count += 1
            name = model_name + '{}'.format(count)
        run_name = name

    # Savers and summary writers
    if not os.path.exists('./training'):  # Dir that we'll later save .ckpts to
        os.makedirs('./training')
    if not os.path.exists('./models'):  # Dir that save final models to
        os.makedirs('./models')
    saver = tf.train.Saver()
    final_saver = tf.train.Saver(train_vars)
    merged = tf.summary.merge_all()
    full_log_path = './summaries/train/' + run_name
    train_writer = tf.summary.FileWriter(full_log_path)

    # We must include local variables because of batch pipeline.
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Begin training.
    print('Starting training...')
    with tf.Session() as sess:
        # Initialization
        sess.run(init_op)
        vggnet.load_weights('libs/vgg16_weights.npz', sess)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                current_step = sess.run(global_step)
                batch = sess.run(batch_op)

                # Collect content targets
                content_data = sess.run(content_layers, feed_dict={Y: batch})

                feed_dict = {
                    X: batch,
                    content_targets: content_data,
                    beta: beta_val
                }
                if (current_step % num_steps_ckpt == 0):
                    # Save a checkpoint
                    save_path = 'training/' + model_name + '.ckpt'
                    saver.save(sess, save_path, global_step=global_step)
                    summary, _, loss_out = sess.run([merged, optimizer, loss],
                                                    feed_dict=feed_dict)
                    train_writer.add_summary(summary, current_step)
                    print(current_step, loss_out)

                elif (current_step % 10 == 0):
                    # Collect some diagnostic data for Tensorboard.
                    summary, _, loss_out = sess.run([merged, optimizer, loss],
                                                    feed_dict=feed_dict)
                    train_writer.add_summary(summary, current_step)

                    # Do some standard output.
                    print(current_step, loss_out)
                else:
                    _, loss_out = sess.run([optimizer, loss],
                                           feed_dict=feed_dict)

                # Throw error if we reach number of steps to break after.
                if current_step == num_steps_break:
                    print('Done training.')
                    break
        except tf.errors.OutOfRangeError:
            print('Done training.')
        finally:
            # Save the model (the image transformation network) for later usage
            # in predict.py
            final_saver.save(sess, 'models/' + model_name + '_final.ckpt')

            coord.request_stop()

        coord.join(threads)
def main(args):
    # Unpack command-line arguments.
    style_img_path = args.style_img_path
    cont_img_path = args.cont_img_path
    learn_rate = args.learn_rate
    loss_content_layers = args.loss_content_layers
    loss_style_layers = args.loss_style_layers
    content_weights = args.content_weights
    style_weights = args.style_weights
    num_steps_break = args.num_steps_break
    beta = args.beta
    style_target_resize = args.style_target_resize
    cont_target_resize = args.cont_target_resize
    output_img_path = args.output_img_path

    # Load in style image that will define the model.
    style_img = utils.imread(style_img_path)
    style_img = utils.imresize(style_img, style_target_resize)
    style_img = style_img[np.newaxis, :].astype(np.float32)

    # Alter the names to include a namescope that we'll use + output suffix.
    loss_style_layers = ['vgg/' + i + ':0' for i in loss_style_layers]
    loss_content_layers = ['vgg/' + i + ':0' for i in loss_content_layers]

    # Get target Gram matrices from the style image.
    with tf.variable_scope('vgg'):
        X_vgg = tf.placeholder(tf.float32, shape=style_img.shape, name='input')
        vggnet = vgg16.vgg16(X_vgg)
    with tf.Session() as sess:
        vggnet.load_weights('libs/vgg16_weights.npz', sess)
        print 'Precomputing target style layers.'
        target_grams = sess.run(utils.get_grams(loss_style_layers),
                                feed_dict={'vgg/input:0': style_img})

    # Clean up so we can re-create vgg at size of input content image for
    # training.
    print 'Resetting default graph.'
    tf.reset_default_graph()

    # Read in + resize the content image.
    cont_img = utils.imread(cont_img_path)
    cont_img = utils.imresize(cont_img, cont_target_resize)
    cont_img = cont_img[np.newaxis, :].astype(np.float32)

    # Setup VGG and initialize it with white noise image that we'll optimize.
    shape = cont_img.shape
    with tf.variable_scope('to_train'):
        white_noise = np.random.rand(shape[0], shape[1], shape[2],
                                     shape[3]) * 255.0
        white_noise = tf.constant(white_noise.astype(np.float32))
        X = tf.get_variable('input', dtype=tf.float32, initializer=white_noise)
    with tf.variable_scope('vgg'):
        vggnet = vgg16.vgg16(X)

    # Get the gram matrices' tensors for the style loss features.
    input_img_grams = utils.get_grams(loss_style_layers)

    # Get the tensors for content loss features.
    content_layers = utils.get_layers(loss_content_layers)

    # Get the target content features
    with tf.Session() as sess:
        vggnet.load_weights('libs/vgg16_weights.npz', sess)
        print 'Precomputing target content layers.'
        content_targets = sess.run(content_layers,
                                   feed_dict={'to_train/input:0': cont_img})

    # Create loss function
    cont_loss = losses.content_loss(content_layers, content_targets,
                                    content_weights)
    style_loss = losses.style_loss(input_img_grams, target_grams,
                                   style_weights)
    tv_loss = losses.tv_loss(X)
    loss = cont_loss + style_loss + beta * tv_loss

    # We do not want to train VGG, so we must grab the subset.
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope='to_train')

    # Setup step + optimizer
    global_step = tf.Variable(0, name='global_step', trainable=False)
    optimizer = tf.train.AdamOptimizer(learn_rate) \
                  .minimize(loss, global_step, train_vars)

    # Initializer
    init_op = tf.global_variables_initializer()

    # Begin training
    with tf.Session() as sess:
        sess.run(init_op)
        vggnet.load_weights('libs/vgg16_weights.npz', sess)

        current_step = 0
        while current_step < num_steps_break:
            current_step = sess.run(global_step)

            if (current_step % 10 == 0):
                # Collect some diagnostic data for Tensorboard.
                _, loss_out = sess.run([optimizer, loss])

                # Do some standard output.
                print current_step, loss_out
            else:
                # optimizer.minimize(sess)
                _, loss_out = sess.run([optimizer, loss])

        # Upon finishing, get the X tensor (our image).
        img_out = sess.run(X)

    # Save it.
    img_out = np.squeeze(img_out)
    utils.imwrite(output_img_path, img_out)
def solve(Config):
    gc.enable()
    # get the style feature
    style_features = losses.get_style_feature(Config)
    # prepare some dirs for use
    # tf.reset_default_graph()
    model_dir = Config.model_dir
    if not osp.exists(model_dir):
        os.mkdir(model_dir)

    # construct the graph and model
    # prepare the dataset
    images = Dataset(Config).imagedata_pipelines()
    # the trainnet

    generated = model.inference_trainnet(images)
    # concat the content image and the generated together to save time and feed to the vgg net one time
    # preprocess the generated
    preprocess_generated = preprocess(generated, Config)
    layer_infos = Vgg(Config.feature_path).build(
        tf.concat([preprocess_generated, images], 0))
    # get the loss
    content_loss = losses.content_loss(layer_infos, Config.content_layers)
    style_loss = losses.style_loss(layer_infos, Config.style_layers,
                                   style_features)
    tv_loss = losses.tv_loss(generated)
    loss = Config.style_weight * style_loss + Config.content_weight * content_loss + Config.tv_weight * tv_loss
    # train op
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tf.train.AdamOptimizer(Config.lr).minimize(
        loss, global_step=global_step)

    # add summary
    with tf.name_scope('losses'):
        tf.summary.scalar('content_loss', content_loss)
        tf.summary.scalar('style_loss', style_loss)
        tf.summary.scalar('tv_loss', tv_loss)
    with tf.name_scope('weighted_losses'):
        tf.summary.scalar('weighted_content_loss',
                          content_loss * Config.content_weight)
        tf.summary.scalar('weighted_style_loss',
                          style_loss * Config.style_weight)
        tf.summary.scalar('weighted_tv_loss', tv_loss * Config.tv_weight)
    tf.summary.scalar('total_loss', loss)
    tf.summary.image('generated', generated)
    tf.summary.image('original', images)
    summary = tf.summary.merge_all()
    summary_path = osp.join(model_dir, 'summary')
    if not osp.exists(summary_path):
        os.mkdir(summary_path)
    writer = tf.summary.FileWriter(summary_path)

    # the saver loader
    saver = tf.train.Saver(tf.global_variables())
    #for var in tf.global_variables():
    #    print var
    restore = tf.train.latest_checkpoint(model_dir)

    # begin training work
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # restore the variables
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])

        # if we need finetune?
        if Config.finetune:
            if restore:
                print 'restoring model from {}'.format(restore)
                saver.restore(sess, restore)
            else:
                print 'no model exist, from scratch'

            # pop the data queue
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        print 'begin training'
        start_time = time.time()
        local_time = time.time()
        for step in xrange(Config.max_iter + 1):
            _, loss_value = sess.run([train_op, loss])
            #plt.imshow(np.uint8(gen[0,...]))
            if step % Config.display == 0 or step == Config.max_iter:
                print "{}[iterations], train loss {}, time consumes {}s".format(
                    step, loss_value,
                    time.time() - local_time)
                local_time = time.time()
            assert not np.isnan(loss_value), 'model with loss nan'
            if step != 0 and (step % Config.snapshot == 0
                              or step == Config.max_iter):
                # save the generated to see
                print 'adding summary and saving snapshot...'
                saver.save(sess,
                           osp.join(model_dir, 'model.ckpt'),
                           global_step=step)
                summary_str = sess.run(summary)
                writer.add_summary(summary_str, global_step=step)
                writer.flush()
        coord.request_stop()
        coord.join(threads)
        sess.close()

        print 'done, consumes time {}s'.format(time.time() - start_time)
示例#6
0
 def _create_tv_loss(self):
     self.tv_loss = tv_loss(self.x[0])