def prepare_dataset(input_dir, output_dir, size, images_per_file, file_prefix): assert os.path.exists(input_dir), 'Input directory does not exist' assert os.path.isdir(input_dir), '%s is not a directory' % input_dir assert os.path.exists(output_dir), 'Output directory does not exist' assert os.path.isdir(output_dir), '%s is not a directory' % output_dir filenames = extract_image_names_recursive(input_dir) random.shuffle(filenames) print("%s images found in %s" % (len(filenames), input_dir)) start = time.time() errors = 0 rate = 0 update_stat_every = 100 writer = None for i, filename in enumerate(filenames): if i % images_per_file == 0: # roll to a new file pass if writer: writer.close() output_file = '%s-%04i.tfrecords' % (file_prefix, i // images_per_file) output_path = os.path.join(output_dir, output_file) writer = tf.python_io.TFRecordWriter(output_path) try: image = load_image(filename, size) image = prepare_image(image, normalize=False) example = build_example(image) writer.write(example.SerializeToString()) except (OSError, OverflowError, ValueError): errors += 1 print(i, '\t', '%0.4f image/sec, %s errors' % (rate, errors), end='\r') if i % update_stat_every == 0: rate = i / (time.time() - start) print('%s images processed at %0.4f image/sec. %s errors occurred.' % (len(filenames), rate, errors))
def style_transfer( content=None, content_dir=None, content_size=512, style=None, style_dir=None, style_size=512, crop=None, preserve_color=None, alpha=1.0, style_interp_weights=None, mask=None, output_dir='/output', save_ext='jpg', gpu=0, vgg_weights='/floyd_models/vgg19_weights_normalized.h5', decoder_weights='/floyd_models/decoder_weights.h5', tf_checkpoint_dir=None): assert bool(content) != bool(content_dir), 'Either content or content_dir should be given' assert bool(style) != bool(style_dir), 'Either style or style_dir should be given' if not os.path.exists(output_dir): print('Creating output dir at', output_dir) os.mkdir(output_dir) # Assume that it is either an h5 file or a name of a TensorFlow checkpoint # NOTE: For now, artificially switching off pretrained h5 weights decoder_in_h5 = False # decoder_weights.endswith('.h5') if content: content_batch = [content] else: assert mask is None, 'For spatial control use the --content option' content_batch = extract_image_names_recursive(content_dir) if style: style = style.split(',') if mask: assert len(style) == 2, 'For spatial control provide two style images' style_batch = [style] elif len(style) > 1: # Style blending if not style_interp_weights: # by default, all styles get equal weights style_interp_weights = np.array([1.0/len(style)] * len(style)) else: # normalize weights so that their sum equals to one style_interp_weights = [float(w) for w in style_interp_weights.split(',')] style_interp_weights = np.array(style_interp_weights) style_interp_weights /= np.sum(style_interp_weights) assert len(style) == len(style_interp_weights), """--style and --style_interp_weights must have the same number of elements""" style_batch = [style] else: style_batch = style else: assert mask is None, 'For spatial control use the --style option' style_batch = extract_image_names_recursive(style_dir) print('Number of content images:', len(content_batch)) print('Number of style images:', len(style_batch)) if gpu >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) data_format = 'channels_first' else: os.environ['CUDA_VISIBLE_DEVICES'] = '' data_format = 'channels_last' image, content, style, target, encoder, decoder, otherLayers = _build_graph(vgg_weights, decoder_weights if decoder_in_h5 else None, alpha, data_format=data_format) with tf.Session() as sess: if decoder_in_h5: sess.run(tf.global_variables_initializer()) elif tf_checkpoint_dir is not None: # Some checkpoint was provided saver = tf.train.Saver() saver.restore(sess, os.path.join(tf_checkpoint_dir, 'adain-final')) for content_path, style_path in product(content_batch, style_batch): content_name = get_filename(content_path) content_image = load_image(content_path, content_size, crop=True) if isinstance(style_path, list): # Style blending/Spatial control style_paths = style_path style_name = '_'.join(map(get_filename, style_paths)) # Gather all style images in one numpy array in order to get # their activations in one pass style_images = None for i, style_path in enumerate(style_paths): style_image = load_image(style_path, style_size, crop) if preserve_color: style_image = coral(style_image, content_image) style_image = prepare_image(style_image) if style_images is None: shape = tuple([len(style_paths)]) + style_image.shape style_images = np.empty(shape) assert style_images.shape[1:] == style_image.shape, """Style images must have the same shape""" style_images[i] = style_image style_features = sess.run(encoder, feed_dict={ image: style_images }) content_image = prepare_image(content_image) content_feature = sess.run(encoder, feed_dict={ image: content_image[np.newaxis,:] }) if mask: # For spatial control, extract foreground and background # parts of the content using the corresponding masks, # run them individually through AdaIN then combine if data_format == 'channels_first': _, c, h, w = content_feature.shape content_view_shape = (c, -1) mask_shape = lambda mask: (c, len(mask), 1) mask_slice = lambda mask: (slice(None),mask) else: _, h, w, c = content_feature.shape content_view_shape = (-1, c) mask_shape = lambda mask: (1, len(mask), c) mask_slice = lambda mask: (mask,slice(None)) mask = load_mask(mask, h, w).reshape(-1) fg_mask = np.flatnonzero(mask == 1) bg_mask = np.flatnonzero(mask == 0) content_feat_view = content_feature.reshape(content_view_shape) content_feat_fg = content_feat_view[mask_slice(fg_mask)].reshape(mask_shape(fg_mask)) content_feat_bg = content_feat_view[mask_slice(bg_mask)].reshape(mask_shape(bg_mask)) style_feature_fg = style_features[0] style_feature_bg = style_features[1] target_feature_fg = sess.run(target, feed_dict={ content: content_feat_fg[np.newaxis,:], style: style_feature_fg[np.newaxis,:] }) target_feature_fg = np.squeeze(target_feature_fg) target_feature_bg = sess.run(target, feed_dict={ content: content_feat_bg[np.newaxis,:], style: style_feature_bg[np.newaxis,:] }) target_feature_bg = np.squeeze(target_feature_bg) target_feature = np.zeros_like(content_feat_view) target_feature[mask_slice(fg_mask)] = target_feature_fg target_feature[mask_slice(bg_mask)] = target_feature_bg target_feature = target_feature.reshape(content_feature.shape) else: # For style blending, get activations for each style then # take a weighted sum. target_feature = np.zeros(content_feature.shape) for style_feature, weight in zip(style_features, style_interp_weights): target_feature += sess.run(target, feed_dict={ content: content_feature, style: style_feature[np.newaxis,:] }) * weight else: # NOTE: This is the part we care about, if only 1 style image is provided. style_name = get_filename(style_path) style_image = load_image(style_path, style_size, crop=True) # This only gives us square crop style_image = center_crop_np(style_image) # Actually crop the center out if preserve_color: style_image = coral(style_image, content_image) style_image = prepare_image(style_image, True, data_format) content_image = prepare_image(content_image, True, data_format) # Extract other layers conv3_1_layer, conv4_1_layer = otherLayers style_feature, conv3_1_out_style, conv4_1_out_style = sess.run([encoder, conv3_1_layer, conv4_1_layer], feed_dict={ image: style_image[np.newaxis,:] }) content_feature = sess.run(encoder, feed_dict={ image: content_image[np.newaxis,:] }) target_feature = sess.run(target, feed_dict={ content: content_feature, style: style_feature }) output = sess.run(decoder, feed_dict={ content: content_feature, target: target_feature, style: style_feature }) # Grab the relevant layer outputs to see what's being minimized. conv3_1_out_output, conv4_1_out_output = sess.run([conv3_1_layer, conv4_1_layer], feed_dict={ image: output }) filename = '%s_stylized_%s.%s' % (content_name, style_name, save_ext) filename = os.path.join(output_dir, filename) save_image(filename, output[0], data_format=data_format) print('Output image saved at', filename) # TODO: Change these layers. layersToViz = [conv3_1_out_style, conv4_1_out_style, conv3_1_out_output, conv4_1_out_output]