def parse_args(): '''Parses command line arguments for the image analogy command.''' parser = argparse.ArgumentParser( description='Neural image analogies with Keras.') parser.add_argument('a_image_path', metavar='ref', type=str, help='Path to the reference image mask (A)') parser.add_argument('ap_image_path', metavar='base', type=str, help='Path to the source image (A\')') parser.add_argument('b_image_path', metavar='ref', type=str, help='Path to the new mask for generation (B)') parser.add_argument('result_prefix', metavar='res_prefix', type=str, help='Prefix for the saved results (B\')') # size-related parser.add_argument('--width', dest='out_width', type=int, default=0, help='Set output width') parser.add_argument('--height', dest='out_height', type=int, default=0, help='Set output height') parser.add_argument('--scales', dest='num_scales', type=int, default=3, help='Run at N different scales') parser.add_argument('--min-scale', dest='min_scale', type=float, default=0.25, help='Smallest scale to iterate') parser.add_argument('--a-scale-mode', dest='a_scale_mode', type=str, default='none', help='Method of scaling A and A\' relative to B') parser.add_argument('--a-scale', dest='a_scale', type=float, default=1.0, help='Additional scale factor for A and A\'') parser.add_argument( '--output-full', dest='output_full_size', action='store_true', help= 'Output all intermediate images at full size regardless of current scale.' ) # optimizer parser.add_argument('--iters', dest='num_iterations_per_scale', type=int, default=5, help='Number of iterations per scale') parser.add_argument('--model', dest='match_model', type=str, default='patchmatch', help='Matching algorithm (patchmatch or brute)') parser.add_argument( '--mrf-nnf-steps', dest='mrf_nnf_steps', type=int, default=5, help= 'Number of patchmatch updates per iteration for local coherence loss.') parser.add_argument( '--randomize-mrf-nnf', dest='randomize_mnf_nnf', action='store_true', help= 'Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.' ) parser.add_argument( '--analogy-nnf-steps', dest='analogy_nnf_steps', type=int, default=15, help= 'Number of patchmatch updates for the analogy loss (done once per scale).' ) # loss parser.add_argument('--tv-w', dest='tv_weight', type=float, default=1.0, help='Weight for TV loss.') parser.add_argument('--analogy-w', dest='analogy_weight', type=float, default=1.0, help='Weight for analogy loss.') parser.add_argument( '--analogy-layers', dest='analogy_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help= 'Comma-separated list of layer names to be used for the analogy loss') parser.add_argument( '--use-full-analogy', dest='use_full_analogy', action="store_true", help= 'Use the full set of analogy patches (slower/more memory but maybe more accurate)' ) parser.add_argument('--mrf-w', dest='mrf_weight', type=float, default=0.5, help='Weight for MRF loss between A\' and B\'') parser.add_argument( '--mrf-layers', dest='mrf_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the MRF loss') parser.add_argument('--b-content-w', dest='b_bp_content_weight', type=float, default=0.0, help='Weight for content loss between B and B\'') parser.add_argument( '--content-layers', dest='b_content_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help= 'Comma-separated list of layer names to be used for the content loss') parser.add_argument( '--nstyle-w', dest='neural_style_weight', type=float, default=0.0, help='Weight for neural style loss between A\' and B\'') parser.add_argument( '--nstyle-layers', dest='neural_style_layers', action=CommaSplitAction, default=['conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'], help= 'Comma-separated list of layer names to be used for the neural style') parser.add_argument('--patch-size', dest='patch_size', type=int, default=1, help='Patch size used for matching.') parser.add_argument( '--patch-stride', dest='patch_stride', type=int, default=1, help='Patch stride used for matching. Currently required to be 1.') # VGG parser.add_argument('--vgg-weights', dest='vgg_weights', type=str, default=os.environ.get(VGG_ENV_VAR, 'vgg16_weights.h5'), help='Path to VGG16 weights.') parser.add_argument('--pool-mode', dest='pool_mode', type=str, default='max', help='Pooling mode for VGG ("avg" or "max")') # jitter parser.add_argument('--jitter', dest='jitter', type=float, default=0, help='Magnitude of random shift at scale x1') parser.add_argument('--color-jitter', dest='color_jitter', type=float, default=0, help='Magnitude of random jitter to each pixel') parser.add_argument( '--contrast', dest='contrast_percent', type=float, default=0.02, help= 'Drop the bottom x percentile and scale by the top (100 - x)th percentile' ) args = parser.parse_args() # hack for CPU users :( assert args.a_scale_mode in ( 'ratio', 'none', 'match' ), 'a-scale-mode must be set to one of "ratio", "none", or "match"' if K._BACKEND == 'theano': from keras.backend import theano_backend if args.a_scale_mode != 'match': if not theano_backend._on_gpu(): print( 'Theano CPU mode detected. Forcing a-scale-mode to "match"' ) args.a_scale_mode = 'match' # prevent conv2d errors when using CPU args.a_scale = 1.0 elif theano_backend.dnn and not theano_backend.dnn.dnn_available(): print( 'Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"' ) args.a_scale_mode = 'match' # prevent conv2d errors when using CUDA without cuDNN args.a_scale = 1.0 # make sure weights are in place if not os.path.exists(args.vgg_weights): print('Model weights not found (see "--vgg-weights" parameter).') return None return args
'Use the full set of analogy patches (slower/more memory but maybe more accurate)' ) args = parser.parse_args() a_image_path = args.a_image_path ap_image_path = args.ap_image_path b_image_path = args.b_image_path result_prefix = args.result_prefix weights_path = args.vgg_weights a_scale_mode = args.a_scale_mode assert a_scale_mode in ( 'ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"' # hack for CPU users :( from keras.backend import theano_backend if not theano_backend._on_gpu(): a_scale_mode = 'match' # prevent conv2d errors when using CPU args.a_scale = 1 print('CPU mode detected. Forcing a-scale-mode to "match"') # these are the weights of the different loss components total_variation_weight = args.tv_weight analogy_weight = args.analogy_weight b_bp_content_weight = args.b_bp_content_weight mrf_weight = args.mrf_weight patch_size = args.patch_size patch_stride = 1 analogy_layers = args.analogy_layers.split(',') mrf_layers = args.mrf_layers.split(',') b_content_layers = args.content_layers.split(',')
def deconv2d_fast(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) # TH kernel shape: (depth, input_depth, rows, cols) # TF kernel shape: (rows, cols, input_depth, depth) x = x.dimshuffle((0, 3, 1, 2)) kernel = kernel.dimshuffle((3, 2, 0, 1)) if image_shape: image_shape = (image_shape[0], image_shape[3], image_shape[1], image_shape[2]) if filter_shape: filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0], filter_shape[1]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': assert (strides == (1, 1)) conv_out = dnn.dnn_conv(img=x, kerns=kernel, border_mode='full') shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] else: conv_out = dnn.dnn_conv(img=x, conv_mode='cross', kerns=kernel, border_mode=border_mode, subsample=strides) else: if border_mode == 'same': th_border_mode = 'full' assert (strides == (1, 1)) elif border_mode == 'valid': th_border_mode = 'valid' else: raise Exception('Border mode not supported: ' + str(border_mode)) conv_out = T.nnet.conv2d( x, kernel, border_mode=th_border_mode, subsample=strides, filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern input_shape=image_shape, filter_shape=filter_shape) if border_mode == 'same': shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
default=3, help='Patch size used for matching.') parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true", help='Use the full set of analogy patches (slower/more memory but maybe more accurate)') args = parser.parse_args() a_image_path = args.a_image_path ap_image_path = args.ap_image_path b_image_path = args.b_image_path result_prefix = args.result_prefix weights_path = args.vgg_weights a_scale_mode = args.a_scale_mode assert a_scale_mode in ('ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"' # hack for CPU users :( from keras.backend import theano_backend if not theano_backend._on_gpu(): a_scale_mode = 'match' # prevent conv2d errors when using CPU args.a_scale = 1 print('CPU mode detected. Forcing a-scale-mode to "match"') # these are the weights of the different loss components total_variation_weight = args.tv_weight analogy_weight = args.analogy_weight b_bp_content_weight = args.b_bp_content_weight mrf_weight = args.mrf_weight patch_size = args.patch_size patch_stride = 1 analogy_layers = args.analogy_layers.split(',') mrf_layers = args.mrf_layers.split(',') b_content_layers = args.content_layers.split(',')
def deconv2d(x, kernel, n_kern=32, strides=(1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) # TH kernel shape: (depth, input_depth, rows, cols) # TF kernel shape: (rows, cols, input_depth, depth) x = x.dimshuffle((0, 3, 1, 2)) kernel = kernel.dimshuffle((3, 2, 0, 1)) if image_shape: image_shape = (image_shape[0], image_shape[3], image_shape[1], image_shape[2]) if filter_shape: filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0], filter_shape[1]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': assert (strides == (1, 1)) def conv_me(single_kernel, single_image): # scan iterates over leading dimension # single_kernel - Dx3x3 => 1xDx3x3 # single_result - Nx28x28 => Nx1x28x28 [BC01] return dnn.dnn_conv(img=single_image.reshape( (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), kerns=single_kernel.reshape((single_kernel.shape[0], 1, single_kernel.shape[1], single_kernel.shape[2])), conv_mode='cross', # <<<<< IMPORTANT 111, dont flip kern border_mode='full') results, updates = theano.scan(fn=conv_me, outputs_info=None, sequences=[kernel, x.dimshuffle((1, 0, 2, 3))]) s = results.sum(axis=0) conv_out = T.reshape(s, (s.shape[0], 1, s.shape[1], s.shape[2])) shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] else: raise NotImplementedError() # conv_out = dnn.dnn_conv(img=x, # kerns=kernel, # border_mode=border_mode, # subsample=strides) else: if border_mode == 'same': th_border_mode = 'full' assert (strides == (1, 1)) elif border_mode == 'valid': th_border_mode = 'valid' else: raise Exception('Border mode not supported: ' + str(border_mode)) def conv_me_nnet(single_kernel, single_image): # scan iterates over leading dimension # single_kernel - Dx3x3 => 1xDx3x3 # single_result - Nx28x28 => Nx1x28x28 [BC01] return T.nnet.conv2d( single_image, # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])), single_kernel, # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), input_shape=(image_shape[0], image_shape[2], image_shape[3]), filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]), filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern border_mode='full', ) conv_out = T.nnet.conv2d( x, # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])), kernel, # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), input_shape=(image_shape[0], image_shape[2], image_shape[3]), filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]), filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern border_mode='full', ) # if not True: # # SLOW AS F**K # results, updates = theano.scan(fn=conv_me_nnet, # outputs_info=None, # sequences=[kernel, x.dimshuffle((1, 0, 2, 3))]) # # conv_out = results.sum(axis=0) # else: # conv_out = None # for i in range(n_kern): # if conv_out is None: # conv_out = conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:]) # else: # conv_out += conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:]) if border_mode == 'same': # print(x) shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] # raise NotImplementedError() if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
def deconv2d_fast(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) # TH kernel shape: (depth, input_depth, rows, cols) # TF kernel shape: (rows, cols, input_depth, depth) x = x.dimshuffle((0, 3, 1, 2)) kernel = kernel.dimshuffle((3, 2, 0, 1)) if image_shape: image_shape = (image_shape[0], image_shape[3], image_shape[1], image_shape[2]) if filter_shape: filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0], filter_shape[1]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': assert (strides == (1, 1)) conv_out = dnn.dnn_conv(img=x, kerns=kernel, border_mode='full') shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] else: conv_out = dnn.dnn_conv(img=x, conv_mode='cross', kerns=kernel, border_mode=border_mode, subsample=strides) else: if border_mode == 'same': th_border_mode = 'full' assert (strides == (1, 1)) elif border_mode == 'valid': th_border_mode = 'valid' else: raise Exception('Border mode not supported: ' + str(border_mode)) conv_out = T.nnet.conv2d(x, kernel, border_mode=th_border_mode, subsample=strides, filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern input_shape=image_shape, filter_shape=filter_shape) if border_mode == 'same': shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
def deconv2d(x, kernel, n_kern=32, strides=(1, 1), border_mode='valid', dim_ordering='th', image_shape=None, filter_shape=None): ''' Run on cuDNN if available. border_mode: string, "same" or "valid". ''' if dim_ordering not in {'th', 'tf'}: raise Exception('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'tf': # TF uses the last dimension as channel dimension, # instead of the 2nd one. # TH input shape: (samples, input_depth, rows, cols) # TF input shape: (samples, rows, cols, input_depth) # TH kernel shape: (depth, input_depth, rows, cols) # TF kernel shape: (rows, cols, input_depth, depth) x = x.dimshuffle((0, 3, 1, 2)) kernel = kernel.dimshuffle((3, 2, 0, 1)) if image_shape: image_shape = (image_shape[0], image_shape[3], image_shape[1], image_shape[2]) if filter_shape: filter_shape = (filter_shape[3], filter_shape[2], filter_shape[0], filter_shape[1]) if _on_gpu() and dnn.dnn_available(): if border_mode == 'same': assert (strides == (1, 1)) def conv_me(single_kernel, single_image): # scan iterates over leading dimension # single_kernel - Dx3x3 => 1xDx3x3 # single_result - Nx28x28 => Nx1x28x28 [BC01] return dnn.dnn_conv( img=single_image.reshape( (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), kerns=single_kernel.reshape( (single_kernel.shape[0], 1, single_kernel.shape[1], single_kernel.shape[2])), conv_mode='cross', # <<<<< IMPORTANT 111, dont flip kern border_mode='full') results, updates = theano.scan( fn=conv_me, outputs_info=None, sequences=[kernel, x.dimshuffle((1, 0, 2, 3))]) s = results.sum(axis=0) conv_out = T.reshape(s, (s.shape[0], 1, s.shape[1], s.shape[2])) shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] else: raise NotImplementedError() # conv_out = dnn.dnn_conv(img=x, # kerns=kernel, # border_mode=border_mode, # subsample=strides) else: if border_mode == 'same': th_border_mode = 'full' assert (strides == (1, 1)) elif border_mode == 'valid': th_border_mode = 'valid' else: raise Exception('Border mode not supported: ' + str(border_mode)) def conv_me_nnet(single_kernel, single_image): # scan iterates over leading dimension # single_kernel - Dx3x3 => 1xDx3x3 # single_result - Nx28x28 => Nx1x28x28 [BC01] return T.nnet.conv2d( single_image, # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])), single_kernel, # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), input_shape=(image_shape[0], image_shape[2], image_shape[3]), filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]), filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern border_mode='full', ) conv_out = T.nnet.conv2d( x, # T.reshape(single_image, (single_image.shape[0], 1, single_image.shape[1], single_image.shape[2])), kernel, # T.reshape(single_kernel, (1, single_kernel.shape[0], single_kernel.shape[1], single_kernel.shape[2])), input_shape=(image_shape[0], image_shape[2], image_shape[3]), filter_shape=(filter_shape[1], filter_shape[2], filter_shape[3]), filter_flip=False, # <<<<< IMPORTANT 111, dont flip kern border_mode='full', ) # if not True: # # SLOW AS F**K # results, updates = theano.scan(fn=conv_me_nnet, # outputs_info=None, # sequences=[kernel, x.dimshuffle((1, 0, 2, 3))]) # # conv_out = results.sum(axis=0) # else: # conv_out = None # for i in range(n_kern): # if conv_out is None: # conv_out = conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:]) # else: # conv_out += conv_me_nnet(kernel[i,:,:,:], x[:,i,:,:]) if border_mode == 'same': # print(x) shift_x = (kernel.shape[2] - 1) // 2 shift_y = (kernel.shape[3] - 1) // 2 conv_out = conv_out[:, :, shift_x:x.shape[2] + shift_x, shift_y:x.shape[3] + shift_y] # raise NotImplementedError() if dim_ordering == 'tf': conv_out = conv_out.dimshuffle((0, 2, 3, 1)) return conv_out
def parse_args(): '''Parses command line arguments for the image analogy command.''' parser = argparse.ArgumentParser(description='Neural image analogies with Keras.') parser.add_argument('a_image_path', metavar='ref', type=str, help='Path to the reference image mask (A)') parser.add_argument('ap_image_path', metavar='base', type=str, help='Path to the source image (A\')') parser.add_argument('b_image_path', metavar='ref', type=str, help='Path to the new mask for generation (B)') parser.add_argument('result_prefix', metavar='res_prefix', type=str, help='Prefix for the saved results (B\')') # size-related parser.add_argument('--width', dest='out_width', type=int, default=0, help='Set output width') parser.add_argument('--height', dest='out_height', type=int, default=0, help='Set output height') parser.add_argument('--scales', dest='num_scales', type=int, default=3, help='Run at N different scales') parser.add_argument('--min-scale', dest='min_scale', type=float, default=0.25, help='Smallest scale to iterate') parser.add_argument('--a-scale-mode', dest='a_scale_mode', type=str, default='ratio', help='Method of scaling A and A\' relative to B') parser.add_argument('--a-scale', dest='a_scale', type=float, default=1.0, help='Additional scale factor for A and A\'') parser.add_argument('--output-full', dest='output_full_size', action='store_true', help='Output all intermediate images at full size regardless of current scale.') # optimizer parser.add_argument('--iters', dest='num_iterations_per_scale', type=int, default=5, help='Number of iterations per scale') parser.add_argument('--model', dest='match_model', type=str, default='patchmatch', help='Matching algorithm (patchmatch or brute)') parser.add_argument('--mrf-nnf-steps', dest='mrf_nnf_steps', type=int, default=7, help='Number of patchmatch updates per iteration for local coherence loss.') parser.add_argument('--no-zoom-mrf-nnf', dest='reset_mnf_steps', action='store_true', help='Randomize the local coherence similarity matrix at the start of a new scale instead of scaling up the last one.') parser.add_argument('--analogy-nnf-steps', dest='analogy_nnf_steps', type=int, default=15, help='Number of patchmatch updates for the analogy loss (done once per scale).') # loss parser.add_argument('--mrf-w', dest='mrf_weight', type=float, default=0.5, help='Weight for MRF loss between A\' and B\'') parser.add_argument('--b-content-w', dest='b_bp_content_weight', type=float, default=0.0, help='Weight for content loss between B and B\'') parser.add_argument('--analogy-w', dest='analogy_weight', type=float, default=1.0, help='Weight for analogy loss.') parser.add_argument('--tv-w', dest='tv_weight', type=float, default=1.0, help='Weight for TV loss.') parser.add_argument('--analogy-layers', dest='analogy_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the analogy loss') parser.add_argument('--mrf-layers', dest='mrf_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the MRF loss') parser.add_argument('--content-layers', dest='b_content_layers', action=CommaSplitAction, default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the content loss') parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true", help='Use the full set of analogy patches (slower/more memory but maybe more accurate)') parser.add_argument('--patch-size', dest='patch_size', type=int, default=1, help='Patch size used for matching.') parser.add_argument('--patch-stride', dest='patch_stride', type=int, default=1, help='Patch stride used for matching. Currently required to be 1.') # VGG parser.add_argument('--vgg-weights', dest='vgg_weights', type=str, default=os.environ.get(VGG_ENV_VAR, 'vgg16_weights.h5'), help='Path to VGG16 weights.') parser.add_argument('--pool-mode', dest='pool_mode', type=str, default='max', help='Pooling mode for VGG ("avg" or "max")') # jitter parser.add_argument('--jitter', dest='jitter', type=float, default=0, help='Magnitude of random shift at scale x1') parser.add_argument('--color-jitter', dest='color_jitter', type=float, default=0, help='Magnitude of random jitter to each pixel') parser.add_argument('--contrast', dest='contrast_percent', type=float, default=0.02, help='Drop the bottom x percentile and scale by the top (100 - x)th percentile') args = parser.parse_args() # hack for CPU users :( assert args.a_scale_mode in ('ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"' if K._BACKEND == 'theano': from keras.backend import theano_backend if args.a_scale_mode != 'match': if not theano_backend._on_gpu(): print('Theano CPU mode detected. Forcing a-scale-mode to "match"') args.a_scale_mode = 'match' # prevent conv2d errors when using CPU args.a_scale = 1.0 elif theano_backend.dnn and not theano_backend.dnn.dnn_available(): print('Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"') args.a_scale_mode = 'match' # prevent conv2d errors when using CUDA without cuDNN args.a_scale = 1.0 # make sure weights are in place if not os.path.exists(args.vgg_weights): print('Model weights not found (see "--vgg-weights" parameter).') return None return args
def parse_args(): """Parses command line arguments for the image analogy command.""" parser = argparse.ArgumentParser(description="Neural image analogies with Keras.") parser.add_argument("a_image_path", metavar="ref", type=str, help="Path to the reference image mask (A)") parser.add_argument("ap_image_path", metavar="base", type=str, help="Path to the source image (A')") parser.add_argument("b_image_path", metavar="ref", type=str, help="Path to the new mask for generation (B)") parser.add_argument("result_prefix", metavar="res_prefix", type=str, help="Prefix for the saved results (B')") # size-related parser.add_argument("--width", dest="out_width", type=int, default=0, help="Set output width") parser.add_argument("--height", dest="out_height", type=int, default=0, help="Set output height") parser.add_argument("--scales", dest="num_scales", type=int, default=3, help="Run at N different scales") parser.add_argument("--min-scale", dest="min_scale", type=float, default=0.25, help="Smallest scale to iterate") parser.add_argument( "--a-scale-mode", dest="a_scale_mode", type=str, default="none", help="Method of scaling A and A' relative to B" ) parser.add_argument( "--a-scale", dest="a_scale", type=float, default=1.0, help="Additional scale factor for A and A'" ) parser.add_argument( "--output-full", dest="output_full_size", action="store_true", help="Output all intermediate images at full size regardless of current scale.", ) # optimizer parser.add_argument( "--iters", dest="num_iterations_per_scale", type=int, default=5, help="Number of iterations per scale" ) parser.add_argument( "--model", dest="match_model", type=str, default="patchmatch", help="Matching algorithm (patchmatch or brute)" ) parser.add_argument( "--mrf-nnf-steps", dest="mrf_nnf_steps", type=int, default=5, help="Number of patchmatch updates per iteration for local coherence loss.", ) parser.add_argument( "--randomize-mrf-nnf", dest="randomize_mnf_nnf", action="store_true", help="Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.", ) parser.add_argument( "--analogy-nnf-steps", dest="analogy_nnf_steps", type=int, default=15, help="Number of patchmatch updates for the analogy loss (done once per scale).", ) # loss parser.add_argument("--tv-w", dest="tv_weight", type=float, default=1.0, help="Weight for TV loss.") parser.add_argument("--analogy-w", dest="analogy_weight", type=float, default=1.0, help="Weight for analogy loss.") parser.add_argument( "--analogy-layers", dest="analogy_layers", action=CommaSplitAction, default=["conv3_1", "conv4_1"], help="Comma-separated list of layer names to be used for the analogy loss", ) parser.add_argument( "--use-full-analogy", dest="use_full_analogy", action="store_true", help="Use the full set of analogy patches (slower/more memory but maybe more accurate)", ) parser.add_argument( "--mrf-w", dest="mrf_weight", type=float, default=0.5, help="Weight for MRF loss between A' and B'" ) parser.add_argument( "--mrf-layers", dest="mrf_layers", action=CommaSplitAction, default=["conv3_1", "conv4_1"], help="Comma-separated list of layer names to be used for the MRF loss", ) parser.add_argument( "--b-content-w", dest="b_bp_content_weight", type=float, default=0.0, help="Weight for content loss between B and B'", ) parser.add_argument( "--content-layers", dest="b_content_layers", action=CommaSplitAction, default=["conv3_1", "conv4_1"], help="Comma-separated list of layer names to be used for the content loss", ) parser.add_argument( "--nstyle-w", dest="neural_style_weight", type=float, default=0.0, help="Weight for neural style loss between A' and B'", ) parser.add_argument( "--nstyle-layers", dest="neural_style_layers", action=CommaSplitAction, default=["conv2_1", "conv3_1", "conv4_1", "conv5_1"], help="Comma-separated list of layer names to be used for the neural style", ) parser.add_argument("--patch-size", dest="patch_size", type=int, default=1, help="Patch size used for matching.") parser.add_argument( "--patch-stride", dest="patch_stride", type=int, default=1, help="Patch stride used for matching. Currently required to be 1.", ) # VGG parser.add_argument( "--vgg-weights", dest="vgg_weights", type=str, default=os.environ.get(VGG_ENV_VAR, "vgg16_weights.h5"), help="Path to VGG16 weights.", ) parser.add_argument( "--pool-mode", dest="pool_mode", type=str, default="max", help='Pooling mode for VGG ("avg" or "max")' ) # jitter parser.add_argument("--jitter", dest="jitter", type=float, default=0, help="Magnitude of random shift at scale x1") parser.add_argument( "--color-jitter", dest="color_jitter", type=float, default=0, help="Magnitude of random jitter to each pixel" ) parser.add_argument( "--contrast", dest="contrast_percent", type=float, default=0.02, help="Drop the bottom x percentile and scale by the top (100 - x)th percentile", ) args = parser.parse_args() # hack for CPU users :( assert args.a_scale_mode in ( "ratio", "none", "match", ), 'a-scale-mode must be set to one of "ratio", "none", or "match"' if K._BACKEND == "theano": from keras.backend import theano_backend if args.a_scale_mode != "match": if not theano_backend._on_gpu(): print('Theano CPU mode detected. Forcing a-scale-mode to "match"') args.a_scale_mode = "match" # prevent conv2d errors when using CPU args.a_scale = 1.0 elif theano_backend.dnn and not theano_backend.dnn.dnn_available(): print('Theano CUDA without cuDNN detected. Forcing a-scale-mode to "match"') args.a_scale_mode = "match" # prevent conv2d errors when using CUDA without cuDNN args.a_scale = 1.0 # make sure weights are in place if not os.path.exists(args.vgg_weights): print('Model weights not found (see "--vgg-weights" parameter).') return None return args