def learn_to_move(nprims = 200, nbatch = 50, width = 224, height = 224): """Creates a network which takes as input a image and returns a cost. Network extracts features of image to create shape params which are rendered. The similarity between the rendered image and the actual image is the cost """ assert nbatch % 2 == 0 # Minibatch must be even in size params_per_prim = 3 nshape_params = nprims * params_per_prim # Render the input shapes fragCoords = T.tensor3('fragCoords') shape_params = T.tensor3("scenes") res, scan_updates = symbolic_render(nprims, shape_params, fragCoords, width, height) res_reshape = res.dimshuffle([2,'x',0,1]) # Split batch in half and give each image two channels res_reshape_split = T.reshape(res_reshape, (nbatch/2, 2, width, height)) # Put the different convnets into two channels net = {} net['input'] = InputLayer((nbatch/2, 2, width, height), input_var = res_reshape_split) net['conv1'] = ConvLayer(net['input'], num_filters=96, filter_size=7, stride=2) net['norm1'] = NormLayer(net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False) net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5) net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False) net['conv3'] = ConvLayer(net['pool2'], num_filters=512, filter_size=3, pad=1) net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, pad=1) net['conv5'] = ConvLayer(net['conv4'], num_filters=512, filter_size=3, pad=1) net['pool5'] = PoolLayer(net['conv5'], pool_size=3, stride=3, ignore_border=False) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['drop6'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['drop6'], num_units=1, nonlinearity=lasagne.nonlinearities.tanh) # net['fc7'] = DenseLayer(net['pool5'], num_units=nshape_params, nonlinearity=lasagne.nonlinearities.tanh) output_layer = net['fc7'] output = lasagne.layers.get_output(output_layer) #3 First half mvoe learning_rate = 1.0 shape_params_split = T.reshape(shape_params, (nprims, nbatch/2, 2, params_per_prim)) first_half_params = shape_params_split[:,:,0,:] # Get partial derivatives of half of the.g parameters with respect to the cost and move them # Have to be careful about splitting to make sure that first half of params are those that render to # first channel of each image (and not that they render first half of all images in all channels) # shape_params_split = T.reshape(shape_params, (nprims, nbatch/2, 2, 4)) summed_op = T.sum(output) / nbatch delta_shape = T.grad(summed_op, shape_params) delta_shape_split = T.reshape(delta_shape, (nprims, nbatch/2, 2, params_per_prim)) first_half_delta = delta_shape_split[:,:,0,:] new_first_half = first_half_params - learning_rate * first_half_delta # Then render this half again to produce new images (width, height, nbatch/2) res2, scan_updates2 = symbolic_render(nprims, new_first_half, fragCoords, width, height) res_reshape2 = res2.dimshuffle([2,0,1]) # unchanged images unchanged_img = res_reshape_split[:,1,:,:] changed_img = res_reshape_split[:,0,:,:] eps = 1e-9 diff = T.maximum(eps, (unchanged_img - res_reshape2)**2) loss1 = T.sum(diff) / (nbatch/2*width*height) ## Loss2 is to force change, avoid plateaus # diff2 = T.maximum(eps, (changed_img - res_reshape2)**2) # sumdiff2 = T.sum(diff2) / (nbatch/2*width*height) # mu = 0 # sigma = 0.05 # a = 1/(sigma*np.sqrt(2*np.pi)) # b = mu # c = sigma # loss2 = a*T.exp((-sumdiff2**2)/(2*c**2))/40.0 # loss = loss1 + loss2 param_diff = T.sum(first_half_delta**2)/nbatch loss2 = -gauss(param_diff, mu=10.0, sigma=100.0)*600 loss = loss1 + loss2 params = lasagne.layers.get_all_params(output_layer, trainable=True) # network_updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=1.0, momentum=1.0) network_updates = lasagne.updates.adam(loss, params) # network_updates = lasagne.updates.rmsprop(loss, params) ## Merge Updates for k in network_updates.keys(): assert not(scan_updates.has_key(k)) scan_updates[k] = network_updates[k] for k in scan_updates2.keys(): # assert not(scan_updates.has_key(k)) #FIXME scan_updates[k] = scan_updates2[k] params = lasagne.layers.get_all_params(output_layer) last_layer_params = T.grad(loss, params[-2]) print("Compiling Loss Function") netcost = function([fragCoords, shape_params], [loss, loss1, loss2, param_diff, summed_op, delta_shape, res2, last_layer_params, unchanged_img, changed_img, res_reshape2], updates=scan_updates, mode=curr_mode) return netcost, output_layer
def second_order(nprims = 200, nbatch = 50): """Creates a network which takes as input a image and returns a cost. Network extracts features of image to create shape params which are rendered. The similarity between the rendered image and the actual image is the cost """ width = 224 height = 224 params_per_prim = 3 nshape_params = nprims * params_per_prim img = T.tensor4("input image") net = {} net['input'] = InputLayer((nbatch, 1, 224, 224), input_var = img) net['conv1'] = ConvLayer(net['input'], num_filters=96, filter_size=7, stride=2) net['norm1'] = NormLayer(net['conv1'], alpha=0.0001) # caffe has alpha = alpha * pool_size net['pool1'] = PoolLayer(net['norm1'], pool_size=3, stride=3, ignore_border=False) net['conv2'] = ConvLayer(net['pool1'], num_filters=256, filter_size=5) net['pool2'] = PoolLayer(net['conv2'], pool_size=2, stride=2, ignore_border=False) net['conv3'] = ConvLayer(net['pool2'], num_filters=512, filter_size=3, pad=1) net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, pad=1) net['conv5'] = ConvLayer(net['conv4'], num_filters=512, filter_size=3, pad=1) net['pool5'] = PoolLayer(net['conv5'], pool_size=3, stride=3, ignore_border=False) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['drop6'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['drop6'], num_units=nshape_params) # net['fc7'] = DenseLayer(net['pool5'], num_units=nshape_params, nonlinearity=lasagne.nonlinearities.tanh) output_layer = net['fc7'] output = lasagne.layers.get_output(output_layer) scaled_output = output - 1 ## Render these parameters shape_params = T.reshape(scaled_output, (nprims, nbatch, params_per_prim)) fragCoords = T.tensor3('fragCoords') print "Symbolic Render" res, scan_updates = symbolic_render(nprims, shape_params, fragCoords, width, height) res_reshape = res.dimshuffle([2,'x',0,1]) # Simply using pixel distance eps = 1e-9 diff = T.maximum(eps, (res_reshape - img)**2) loss1 = T.sum(diff) / (224*224*nbatch) mean_shape = T.mean(shape_params, axis=1) # mean across batches mean_shape = T.reshape(mean_shape, (nprims, 1, params_per_prim)) scale = 1.0 diff2 = T.maximum(eps, (mean_shape - shape_params)**2) loss2 = T.sum(diff2) / (nprims * params_per_prim * nbatch) mu = 0 sigma = 4 a = 1/(sigma*np.sqrt(2*np.pi)) b = mu c = sigma loss2 = -a*T.exp((-loss2**2)/(2*c**2)) loss = loss2 + loss1 # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(output_layer, trainable=True) network_updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) ## Merge Updates for k in network_updates.keys(): assert not(scan_updates.has_key(k)) scan_updates[k] = network_updates[k] print("Compiling Loss Function") grad = T.grad(loss, params[0]) netcost = function([fragCoords, img], [loss, grad, res_reshape, shape_params, diff, loss1, loss2], updates=scan_updates, mode=curr_mode) # netcost = function([fragCoords, img], loss, updates=scan_updates, mode=curr_mode) ## Generate Render Function to make data # Generate initial rays exfragcoords = gen_fragcoords(width, height) print("Compiling Renderer") render = make_render(nprims, width, height) return render, netcost, output_layer