def theano_kernel_derivative(imshp,kshp,featshp,stride=1): features = T.tensor4(dtype=theano.config.floatX) kernel = T.tensor4(dtype=theano.config.floatX) image = T.tensor4(dtype=theano.config.floatX) # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation) kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3]) featshp_logical = (featshp[0],featshp[1],featshp[2]*stride,featshp[3]*stride) kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3]) image_estimate = conv2d(features,kernel_rotated,border_mode='full', image_shape=featshp,filter_shape=kshp_rotated, imshp_logical=featshp_logical[1:],kshp_logical=kshp[2:]) image_error = image - image_estimate image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1] imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3]) featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3]) features_rot = T.transpose(features,[1,0,2,3]) featshp_rot_logical = (featshp_rot[0],featshp_rot[1],featshp_rot[2]*stride,featshp_rot[3]*stride) kernel_grad_rot = -conv2d(image_error_rot,features_rot, image_shape=imshp_rot,filter_shape=featshp_rot, imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:]) kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3]) return function(inputs=[image,features,kernel],outputs=kernel_grad)
def test_graph(self): # define common values first groups = 3 bottom = np.random.rand(3, 6, 5, 5).astype(theano.config.floatX) kern = np.random.rand(9, 2, 3, 3).astype(theano.config.floatX) bottom_sym = T.tensor4('bottom') kern_sym = T.tensor4('kern') # grouped convolution graph conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym) gconv_func = theano.function([bottom_sym, kern_sym], conv_group, mode=self.mode) # Graph for the normal hard way kern_offset = kern_sym.shape[0] // groups bottom_offset = bottom_sym.shape[1] // groups split_conv_output = [self.conv()(bottom_sym[:, i * bottom_offset:(i + 1) * bottom_offset, :, :], kern_sym[i * kern_offset:(i + 1) * kern_offset, :, :, :]) for i in range(groups)] concatenated_output = T.concatenate(split_conv_output, axis=1) conv_func = theano.function([bottom_sym, kern_sym], concatenated_output, mode=self.mode) # calculate outputs for each graph gconv_output = gconv_func(bottom, kern) conv_output = conv_func(bottom, kern) # compare values utt.assert_allclose(gconv_output, conv_output)
def fix_gpu_transfer(): kshp=(10,2,10,10) featshp=(3,10,11,11) stride=8 mask = False imshp = (featshp[0],kshp[1],featshp[2]*stride + kshp[2] - 1,featshp[3]*stride + kshp[3] - 1) # num images, channels, szy, szx from theano import tensor as T x = T.tensor4() a = T.tensor4() A = T.tensor4() image_error = helper_T_l2_cost_conv(x,a,A,imshp,kshp,featshp,stride=(stride,stride),mask=mask) cost = .5*T.sum(image_error **2) func = function([x,a,A],cost) import theano theano.printing.debugprint(func) x_in = np.random.randn(*imshp).astype(np.float32) a_in = np.random.randn(*featshp).astype(np.float32) A_in = np.random.randn(*kshp).astype(np.float32) from time import time as now repeats = 10 t0 = now() for i in range(repeats): output = func(x_in,a_in,A_in) t = now() - t0 print 'time / iter = %f' % (t/repeats)
def __init__(self,test_data_x,test_data_y): self.test_data_x=test_data_x self.test_data_y=test_data_y test = T.tensor4('test') pred = T.tensor4('pred') dc = dice_coef(test,pred) self.dc = theano.function([test,pred],dc)
def __init__(self): X1 = T.tensor4() X2 = T.tensor4() X = [X1, X2] Y = [T.ivector()] model = Model() #conv1 model.add(Conv(filter_shape = (32, 3, 3, 3), regularizers = {'W': l1(0.0001)}, w_shared = True, n_inputs = 2)) model.add(Conv(filter_shape = (32, 32, 2, 2), regularizers = {'W': l1(0.0001)}, w_shared = True, n_inputs = 2)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) #conv2 model.add(Conv(filter_shape = (32, 32, 3, 3), regularizers = {'W': l1(0.0001)}, w_shared = True, n_inputs = 2)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) #abs_diff model.add(Abs_diff()) #conv3 model.add(Conv(filter_shape = (32, 32, 3, 3), regularizers = {'W': l1(0.0001)}, w_shared = True)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) model.add(Flatten()) self.f = theano.function(X, model.f(X, is_train = True)) model.add(Fully((2880, 512))) model.add(Activation(mode = 'tanh')) model.add(Dropout(0.5)) model.add(Fully((512, 2))) model.add(Activation(mode = 'softmax')) model.build(CostFunc.nll, RMSprop(), X, Y) self.model = model
def run_network_on_image(): import make_seqs ims1, ims1a, ims2=make_seqs.make_seqs(slength=6,num_seqs=1000) input_var1 = T.tensor4('inputs') input_var2 = T.tensor4('inputs_comp') network = compare_net.build_cnn_new_conv(input_var1, input_var2) if (os.path.isfile('net.npy')): spars=np.load('net.npy') lasagne.layers.set_all_param_values(network,spars) test_corr = lasagne.layers.get_output(network, deterministic=True) test_fn = theano.function([input_var1, input_var2], [test_corr]) tcorr_same=test_fn(ims1,ims1a) tcorr_diff=test_fn(ims1,ims2) tt_same_std=np_standardize(tcorr_same[0]) tt_diff_std=np_standardize(tcorr_diff[0]) corrs_same=get_shifted_correlations(tt_same_std) corrs_diff=get_shifted_correlations(tt_diff_std) dps=optimize_dp(corrs_same) dpd=optimize_dp(corrs_diff) print(np.min(dps),np.max(dps),np.min(dpd),np.max(dpd)) import pylab as py py.figure(1) py.hist(dps,alpha=.5) py.hist(dpd,alpha=.5) py.show() print('done ')
def test_theano_transposed_convolution(self): # how to use t_mk_conv_transpose from deconv.tdeconv_utils import t_mk_conv_transpose in4 = T.tensor4(name='conv_in', dtype=theano.config.floatX) f4 = T.tensor4(name='filters', dtype=theano.config.floatX) f_t_conv = theano.function( [in4], t_mk_conv_transpose(in4, f4), givens=[(f4, self.filters)] ) test_input = np.array( [[[[0, 1, 0], [0, 1, 0], [0, 1, 0]], [[0, 0, 0], [1, 1, 1], [0, 0, 0]]]], dtype=theano.config.floatX ) ground_truth = np.array( [[[[ 0, 0, 0, 0, 0], [-1, -1, -1, 0, 0], [ 0, 0, 0, 1, 0], [ 0, 0, 0, 1, 0], [ 0, 0, 0, 1, 0]]]], dtype=theano.config.floatX ) assert_true(np.all(f_t_conv(test_input) == ground_truth))
def create_iter_funcs_valid(l_out, bs=None, N=50, mc_dropout=False): X = T.tensor4('X') y = T.ivector('y') X_batch = T.tensor4('X_batch') y_batch = T.ivector('y_batch') if not mc_dropout: y_hat = layers.get_output(l_out, X, deterministic=True) else: if bs is None: raise ValueError('a fixed batch size is required for mc dropout') X_repeat = T.extra_ops.repeat(X, N, axis=0) y_sample = layers.get_output( l_out, X_repeat, deterministic=False) sizes = [X_repeat.shape[0] / X.shape[0]] * bs y_sample_split = T.as_tensor_variable( T.split(y_sample, sizes, bs, axis=0)) y_hat = T.mean(y_sample_split, axis=1) valid_loss = T.mean( T.nnet.categorical_crossentropy(y_hat, y)) valid_acc = T.mean( T.eq(y_hat.argmax(axis=1), y)) valid_iter = theano.function( inputs=[theano.Param(X_batch), theano.Param(y_batch)], outputs=[valid_loss, valid_acc], givens={ X: X_batch, y: y_batch, }, ) return valid_iter
def make_apply_gabor_function(filter_stack_shape,complex_cell=True): stim_tnsr = tnsr.tensor4('stim_tnsr') ##T x n_color_channels x stim_size x stim_size real_filter_stack_tnsr = tnsr.tensor4('real_feature_map_tnsr') ##D x n_color_channels x stim_size x stim_size. complex imag_filter_stack_tnsr = tnsr.tensor4('imag_feature_map_tnsr') ##D x n_color_channels x stim_size x stim_size. complex real_feature_map_tnsr = tnsr.nnet.conv2d(stim_tnsr, real_filter_stack_tnsr, filter_shape = filter_stack_shape, border_mode = 'full') ##produces T x D x stim_size x stim_size maps imag_feature_map_tnsr = tnsr.nnet.conv2d(stim_tnsr, imag_filter_stack_tnsr, filter_shape = filter_stack_shape, border_mode = 'full') ##produces T x D x stim_size x stim_size maps if complex_cell: ##for filtering with complex gabors, we need an operation for squaring/summing real/imag parts abs_value = tnsr.sqrt(tnsr.sqr(real_feature_map_tnsr) + tnsr.sqr(imag_feature_map_tnsr)) ##functionize feature mapping make_feature_maps = function(inputs = [stim_tnsr,real_filter_stack_tnsr,imag_filter_stack_tnsr], outputs = abs_value) else: make_feature_maps = function(inputs = [stim_tnsr,real_filter_stack_tnsr], outputs = real_feature_map_tnsr) return make_feature_maps
def get_every_layer_functions_only_h(): functions = [] img = T.tensor4('img') X_h = next_stacks_only_h( img, N_CHANNELS, "Dec.PixInput", filter_size = 7, hstack = "hstack_a", residual = False ) print "Compiling layer 0 ......." functions.append(theano.function([img], X_h)) for i in xrange(PIXEL_CNN_LAYERS): X_h_inp = T.tensor4('X_h_inp') X_h = next_stacks_only_h(X_h_inp, DIM_PIX, "Dec.Pix"+str(i+1), filter_size = PIXEL_CNN_FILTER_SIZE) print "Compiling layer {} .......".format(i+1) functions.append(theano.function([X_h_inp], X_h)) X_h = T.tensor4('X_h') output = lib.ops.conv2d.Conv2D('Dec.PixOut1', input_dim=DIM_PIX, output_dim=2*DIM_PIX, filter_size=1, inputs=X_h) output = PixCNNGate(output) # skip_outputs.append(output) # output = PixCNNGate(lib.ops.conv2d.Conv2D('Dec.PixOut2', input_dim=DIM_1, output_dim=2*DIM_1, filter_size=1, inputs=output)) output = lib.ops.conv2d.Conv2D('Dec.PixOut2', input_dim=DIM_PIX, output_dim=2*DIM_PIX, filter_size=1, inputs=output) output = PixCNNGate(output) # skip_outputs.append(output) output = lib.ops.conv2d.Conv2D('Dec.PixOut3', input_dim=DIM_PIX, output_dim=N_CHANNELS, filter_size=1, inputs=output, he_init=False) # output = lib.ops.conv2d.Conv2D('Dec.PixOut3', input_dim=DIM_PIX*len(skip_outputs), output_dim=N_CHANNELS, filter_size=1, inputs=T.concatenate(skip_outputs, axis=1), he_init=False) output = T.nnet.sigmoid(output) print "Compiling output function :)" functions.append(theano.function([X_h], output)) return functions
def create_iter_funcs_train(l_out, lr, mntm, wd): X = T.tensor4('X') y = T.ivector('y') X_batch = T.tensor4('X_batch') y_batch = T.ivector('y_batch') y_hat = layers.get_output(l_out, X, deterministic=False) # softmax loss train_loss = T.mean( T.nnet.categorical_crossentropy(y_hat, y)) # L2 regularization train_loss += wd * regularize_network_params(l_out, l2) train_acc = T.mean( T.eq(y_hat.argmax(axis=1), y)) all_params = layers.get_all_params(l_out, trainable=True) updates = lasagne.updates.nesterov_momentum( train_loss, all_params, lr, mntm) train_iter = theano.function( inputs=[theano.Param(X_batch), theano.Param(y_batch)], outputs=[train_loss, train_acc], updates=updates, givens={ X: X_batch, y: y_batch, }, ) return train_iter
def functions(encoder, network, l_rate=1.): # For network X = T.tensor4() Y = T.tensor4() # X = Y parameters = nn.layers.get_all_params(layer=network, trainable=True) output = nn.layers.get_output(layer_or_layers=network, inputs=X) all_layers = nn.layers.get_all_layers(network) loss = T.mean(nn.objectives.squared_error(output, Y)) updates = nn.updates.sgd( loss_or_grads=loss, params=parameters, learning_rate=l_rate) training_function = theano.function( inputs=[X, Y], outputs=loss, updates=updates) test_function = theano.function( inputs=[X, Y], outputs=[loss, output]) # For encoder code_output = nn.layers.get_output(layer_or_layers=encoder, inputs=X) code_function = theano.function(inputs=[X], outputs=code_output) # For decoder Z = T.tensor4() decode_output = nn.layers.get_output( layer_or_layers=network, inputs={encoder: Z}) decode_function = theano.function(inputs=[Z], outputs=decode_output) return training_function, test_function, code_function, decode_function
def burn(): sz = 128 img_shp = [sz, sz, sz, sz] kern_shp = [sz // 2, sz, 3, 3] out_shp = get_conv_output_shape(img_shp, kern_shp, 'valid', (1, 1)) img = T.tensor4('img') kern = T.tensor4('kern') out = T.tensor4('out') def rand(shp): return np.random.rand(*shp).astype(theano.config.floatX) img = theano.shared(rand(img_shp)) kern = theano.shared(rand(kern_shp)) out = theano.shared(rand(out_shp)) # beta 1 is needed to force the reuse of out, otherwise, it is # replaced by a GpuAllocEmpty o1 = dnn._dnn_conv(img, kern, conv_mode='conv', out=out, beta=1.) mode = theano.compile.get_default_mode().including( "local_remove_all_assert") f = theano.function([], [o1], mode=mode) theano.printing.debugprint(f) print("Start computation") for i in range(10000): f.fn() print("Computation stopped")
def test_mask_loss_sobel(): th_mask, th_img = T.tensor4(), T.tensor4() ml = mask_loss_sobel(th_mask, th_img) mask_loss = theano.function([th_mask, th_img], [ml.loss] + list(ml.sobel_mask) + list(ml.sobel_img)) mask_idx = next(masks(1)) image_ok = 0.5 * np.ones_like(mask_idx) image_ok[mask_idx > MASK["IGNORE"]] = 1 image_ok[mask_idx < MASK["BACKGROUND_RING"]] = 0 print() loss, sobel_mask_x, sobel_mask_y, sobel_img_x, sobel_img_y = \ mask_loss(mask_idx, image_ok) plt.set_cmap('gray') plt.subplot(221) plt.imshow(sobel_mask_x[0, 0]) plt.subplot(222) plt.imshow(sobel_mask_y[0, 0]) plt.colorbar() plt.subplot(223) plt.imshow(sobel_img_x[0, 0]) plt.subplot(224) plt.imshow(sobel_img_y[0, 0]) plt.colorbar() plt.savefig("mask_loss_sobel.png") print() print("mask_loss: {}".format(mask_loss(mask_idx, image_ok))) assert loss == 0
def compile(self): # Helper function for rendering test images during training, or standalone inference mode. input_tensor, seed_tensor = T.tensor4(), T.tensor4() input_layers = {self.network['img']: input_tensor, self.network['seed']: seed_tensor} output = lasagne.layers.get_output([self.network[k] for k in ['seed','out']], input_layers, deterministic=True) self.predict = theano.function([seed_tensor], output) if not args.train: return output_layers = [self.network['out'], self.network[args.perceptual_layer], self.network['disc']] gen_out, percept_out, disc_out = lasagne.layers.get_output(output_layers, input_layers, deterministic=False) # Generator loss function, parameters and updates. self.gen_lr = theano.shared(np.array(0.0, dtype=theano.config.floatX)) self.adversary_weight = theano.shared(np.array(0.0, dtype=theano.config.floatX)) gen_losses = [self.loss_perceptual(percept_out) * args.perceptual_weight, self.loss_total_variation(gen_out) * args.smoothness_weight, self.loss_adversarial(disc_out) * self.adversary_weight] gen_params = lasagne.layers.get_all_params(self.network['out'], trainable=True) print(' - {} tensors learned for generator.'.format(len(gen_params))) gen_updates = lasagne.updates.adam(sum(gen_losses, 0.0), gen_params, learning_rate=self.gen_lr) # Discriminator loss function, parameters and updates. self.disc_lr = theano.shared(np.array(0.0, dtype=theano.config.floatX)) disc_losses = [self.loss_discriminator(disc_out)] disc_params = list(itertools.chain(*[l.get_params() for k, l in self.network.items() if 'disc' in k])) print(' - {} tensors learned for discriminator.'.format(len(disc_params))) grads = [g.clip(-5.0, +5.0) for g in T.grad(sum(disc_losses, 0.0), disc_params)] disc_updates = lasagne.updates.adam(grads, disc_params, learning_rate=self.disc_lr) # Combined Theano function for updating both generator and discriminator at the same time. updates = collections.OrderedDict(list(gen_updates.items()) + list(disc_updates.items())) self.fit = theano.function([input_tensor, seed_tensor], gen_losses + [disc_out.mean(axis=(1,2,3))], updates=updates)
def set_generator_update_function(feature_function, energy_function, generator_function, generator_params, generator_bn_params, generator_optimizer, generator_bn_optimizer): # set input data, hidden data, noise_data annealing rate input_data = T.tensor4(name='input_data', dtype=theano.config.floatX) hidden_data = T.matrix(name='hidden_data', dtype=theano.config.floatX) noise_data = T.tensor4(name='noise_data', dtype=theano.config.floatX) annealing = T.scalar(name='annealing', dtype=theano.config.floatX) # annealing scale annealing_scale = 1.0#/(1.0+99.0*(0.99**annealing)) # get sample data sample_data = generator_function(hidden_data, is_train=True) # sample_data = sample_data + noise_data # get feature data input_feature = feature_function(input_data, is_train=True) sample_feature = feature_function(sample_data, is_train=True) # get energy value input_energy = energy_function(input_feature, is_train=True) sample_energy = energy_function(sample_feature, is_train=True) # get generator update cost negative_phase = T.mean(sample_energy*annealing_scale) generator_updates_cost = negative_phase # get generator updates generator_updates = generator_optimizer(generator_params, generator_updates_cost) generator_bn_updates = generator_bn_optimizer(generator_bn_params, generator_updates_cost) # update function input update_function_inputs = [input_data, hidden_data, noise_data, annealing] # update function output update_function_outputs = [input_energy, sample_energy] # update function update_function = theano.function(inputs=update_function_inputs, outputs=update_function_outputs, updates=generator_updates+generator_bn_updates, on_unused_input='ignore') return update_function
def test_mask_loss_median(): th_mask, th_img = T.tensor4(), T.tensor4() cuda_out = mask_loss_median(th_mask, th_img, impl='cuda') cuda_mask_loss = theano.function([th_mask, th_img], [cuda_out['loss'], cuda_out['median_black'], cuda_out['loss_per_sample'], cuda_out['black_white_loss']]) theano_mask_loss = theano.function([th_mask, th_img], mask_loss_median(th_mask, th_img, impl='theano')['loss']) mask_idx = next(masks(1)) image_ok = np.zeros_like(mask_idx) image_ok[mask_idx > MASK["IGNORE"]] = 1 outs = cuda_mask_loss(mask_idx, image_ok) for s in outs[1:]: print(s.shape) assert (cuda_mask_loss(mask_idx, image_ok)[0] == 0).all() assert (theano_mask_loss(mask_idx, image_ok) == 0).all() t = Timer(lambda: cuda_mask_loss(mask_idx, image_ok)) n = 10 print("cuda implementation: {}".format(t.timeit(number=n) / n)) t = Timer(lambda: theano_mask_loss(mask_idx, image_ok)) print("theano implementation: {}".format(t.timeit(number=n) / n))
def test_batch_normalization_train_without_running_averages(): # compile and run batch_normalization_train without running averages utt.seed_rng() x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy') data_shape = (5, 10, 30, 25) param_shape = (1, 10, 30, 25) # forward pass out, x_mean, x_invstd = bn.batch_normalization_train(x, scale, bias, 'per-activation') # backward pass grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # compile f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads) # check if the abstract Ops have been replaced assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain, bn.AbstractBatchNormInference, bn.AbstractBatchNormTrainGrad)) for n in f.maker.fgraph.toposort()]) # run X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) f(X, Scale, Bias, Dy)
def fit(self, X, y=None): self.n_features = y.shape[0] self.weights['input'] = theano.shared(value=np.zeros(( self.n_features, X.shape[1], self.spatial[0], self.spatial[1]), dtype=theano.config.floatX), name='w', borrow=True) input = T.tensor4(name='input') target = T.tensor4(name='target') decay = T.scalar(name='decay') xy = T.nnet.conv2d(input.transpose(1,0,2,3), target.transpose(1,0,2,3), border_mode=self.pad, subsample=self.stride) xx = T.sum(T.power(input, 2), axis=(0,2,3)) k = ifelse(self.hidden_matrices['input'] is None, ) lam = theano.shared(value=self._C, name='constrain', borrow=True) prediction = T.nnet.conv2d(input, self.weights['input'], border_mode=self.pad, subsample=self.stride) weights, _ = theano.scan( fn=lambda a, k, c: a/(k+c), outputs_info=None, sequences=[self.hidden_matrices['A'].transpose(1,0,2,3), self.hidden_matrices['K']], non_sequences=lam) new_weights = weights.transpose(1,0,2,3) updates = [(self.hidden_matrices['K'], self.hidden_matrices['K'].dot(decay)+xx), (self.hidden_matrices['A'], self.hidden_matrices['A'].dot(decay) + xy), (self.weights['input'], new_weights)] self.conv_fct['train'] = theano.function([input, target, decay], prediction, updates=updates) self.conv_fct['predict'] = theano.function([input], prediction) return self.conv_fct['train'](X, y, 1)
def get_dc_input_layers(shape): """ Creates input layer for the CNN. Works for 2D and 3D input. Returns ------- net: Ordered Dictionary net config with 3 entries: input, kspace_input, mask. """ if len(shape) > 4: # 5D input_var = tensor5('input_var') kspace_input_var = tensor5('kspace_input_var') mask_var = tensor5('mask') else: input_var = T.tensor4('input_var') kspace_input_var = T.tensor4('kspace_input_var') mask_var = T.tensor4('mask') input_layer = InputLayer(shape, input_var=input_var, name='input') kspace_input_layer = InputLayer(shape, input_var=kspace_input_var, name='kspace_input') mask_layer = InputLayer(shape, input_var=mask_var, name='mask') return input_layer, kspace_input_layer, mask_layer
def train_model(): batch_size = 16 num_epochs = c.ch4_train_epochs sz = c.fcn_img_size version=2 for i in xrange(5): data = u.DataH5PyStreamer(os.path.join(c.data_intermediate, 'ch4_256.hdf5'), batch_size=batch_size, folds=(5,i)) input_var = T.tensor4('input') label_var = T.tensor4('label') net, output, output_det = m.build_fcn_segmenter(input_var, (None, 1, sz, sz), version=version) params = nn.layers.get_all_params(net['output'], trainable=True) lr = theano.shared(nn.utils.floatX(3e-3)) loss = du.sorenson_dice(output, label_var) te_loss = du.sorenson_dice(output_det, label_var) te_acc = nn.objectives.binary_accuracy(output_det, label_var).mean() updates = nn.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([input_var, label_var], loss, updates=updates) test_fn = theano.function([input_var, label_var], te_loss) acc_fn = theano.function([input_var, label_var], te_acc) pred_fn = theano.function([input_var], output_det) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn = train_fn, test_fn=test_fn, max_per_epoch=-1, use_tqdm=False, tr_transform=lambda x: du.segmenter_data_transform(x, rotate=(-180, 180)), te_transform=lambda x: du.segmenter_data_transform(x, rotate=None), last_layer = net['output'], save_params_to=os.path.join(c.params_dir, 'ch4seg_v{}/test_ch4seg_f{}_v{}.npz'\ .format(version, i, version)))
def compile_dream(self, X_train, shapes, indices, initializer): self.dream_compiled = True self.X_dream = [] index = 0 for i in range(len(X_train)): if i in indices: self.X_dream.append(theano.shared(initializer(shapes[index]).astype('float32'))) index += 1 else: X_train[i] = atleast_4d(X_train[i][[0]]) self.X_dream.append(theano.shared(X_train[i].astype('float32'))) y_hat_test, layer_updates = self.tree.get_output(self.params_shared, self.X_dream[:], True) preds = y_hat_test.flatten(self.num_output_dims).mean(axis=None) self.dream_optimizer.build([self.X_dream[index] for index in indices]) updates = list(self.dream_optimizer.get_updates([self.X_dream[index] for index in indices], -preds)) for i, update in enumerate(updates): updates[i] = (update[0], update[1].astype('float32')) updates += layer_updates y_pred = T.tensor4(dtype='float32') y = T.tensor4(dtype='float32') accuracy = self.accuracy.get_accuracy(y_pred, y) self.dream_accuracy_theano = theano.function([y_pred, y], accuracy) self.dream_update = theano.function( inputs=[], outputs=preds, updates=updates )
def getTheanoConvFunction(patchsize=None, imagesize=None): """ Return a theano function erforming valid convolution of a filter on an image """ # Define the size of the images and filters to allow Theano to # further optimize the convolution op image_shape = (None, 1, imagesize, imagesize) filter_shape = (None, 1, patchsize, patchsize) # Define the input variables to the function img = T.tensor4(dtype='floatX') filter = T.tensor4(dtype='floatX') mask = T.tensor4(dtype='floatX') # Convolve the image with both the filter and the mask convImgWithFilter = T.nnet.conv.conv2d(img, filter, border_mode='valid', image_shape=image_shape, filter_shape=filter_shape) # Norm convImgWithFilter by the norm of each portions of the image's norm # to avoid a brighter region taking the lead on a darker, better-fitting # one. convImgWithMask = T.nnet.conv.conv2d(img**2, mask, border_mode='valid', image_shape=image_shape, filter_shape=filter_shape) convImgWithMask = convImgWithMask ** 0.5 normConvImgWithFilter = convImgWithFilter / (convImgWithMask ** 0.5) # Compile and return the theano function f = theano.function([img, filter, mask], normConvImgWithFilter) return f
def theano_convolution(input_size, dtype, num_kernels, ksize, mode, iternum): rng = np.random.RandomState(23455) # instantiate 4D tensor for input if dtype == np.float32: input = T.tensor4(name='input', dtype='float32') else: input = T.tensor4(name='input', dtype='float64') # initialize shared variable for weights. w_shp = (num_kernels, input_size[-1], ksize, ksize) w_bound = np.sqrt(input_size[-1] * ksize * ksize) W = theano.shared( np.asarray( rng.uniform( low=-1.0 / w_bound, high=1.0 / w_bound, size=w_shp), dtype=dtype), name ='W') conv_out = conv.conv2d(input, W, border_mode=mode) # create theano function to compute filtered images f = theano.function([input], conv_out) img = np.random.random_sample(input_size).astype(dtype) # put image in 4D tensor of shape (1, 3, height, width) img_ = img.swapaxes(0, 2).swapaxes(1, 2).reshape(1, input_size[-1], input_size[0], input_size[1]) img_ = np.ascontiguousarray(img_) # just in case theano want to initialize something, we will run the function once first. filtered_img = f(img_) start = time.time() for i in range(iternum): filtered_img = f(img_) print 'theano time:', (time.time() - start) / iternum
def test_pooling(): shift = [[0, 1], [0, 1]] pool_shape = [2, 2] limits = [2, 2] inpt = prepare_array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) output = prepare_array([[5, 6], [8, 9]]) inpt_expr = tensor4('input') output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits) f = theano.function([inpt_expr], output_expr) assert np.allclose(f(inpt), output) shift = [[0], [0, 1]] pool_shape = [2, 2] limits = [1, 2] output = prepare_array([[5, 6]]) inpt_expr = tensor4('input') output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits) f = theano.function([inpt_expr], output_expr) assert np.allclose(f(inpt), output) shift = [[0, 1], [0, 1]] pool_shape = [1, 2] limits = [3, 2] output = prepare_array([[2, 3], [5, 6], [8, 9]]) inpt_expr = tensor4('input') output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits) f = theano.function([inpt_expr], output_expr) assert np.allclose(f(inpt), output)
def create_iter_funcs_test(l_out, bs, N=50): X = T.tensor4('X') X_batch = T.tensor4('X_batch') X_repeat = T.extra_ops.repeat(X, N, axis=0) y_sample = layers.get_output( l_out, X_repeat, deterministic=False) # the number of splits needs to be pre-defined sizes = [X_repeat.shape[0] / X.shape[0]] * bs y_sample_split = T.as_tensor_variable( T.split(y_sample, sizes, bs, axis=0)) y_hat = T.mean(y_sample_split, axis=1) #y_var = T.var(y_sample_split, axis=1) test_iter = theano.function( inputs=[theano.Param(X_batch)], outputs=y_hat, #outputs=[y_hat, y_var], givens={ X: X_batch, }, ) return test_iter
def __init__(self): X1 = T.tensor4() X2 = T.tensor4() X = [X1, X2] Y = [T.ivector()] model = Model() #conv1 model.add(Conv(filter_shape = (25, 3, 5, 5), w_shared = True, n_inputs = 2)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) #conv2 model.add(Conv(filter_shape = (25, 25, 3, 3), w_shared = True, n_inputs = 2)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) #abs_diff model.add(Abs_diff()) #conv3 model.add(Conv(filter_shape = (25, 25, 3, 3), w_shared = True)) model.add(Pooling(pool_size = (2,2))) model.add(Activation(mode = 'tanh')) model.add(Flatten()) model.add(Fully((25*18*5, 500))) model.add(Activation(mode = 'tanh')) model.add(Fully((500, 2))) model.add(Activation(mode = 'softmax')) model.build(CostFunc.nll, RMSprop(), X, Y) self.model = model
def test_dtype_upcast(self): """ Checks dtype upcast for CorrMM methods. """ def rand(shape, dtype='float64'): r = numpy.asarray(numpy.random.rand(*shape), dtype=dtype) return r * 2 - 1 ops = [corr.CorrMM, corr.CorrMM_gradWeights, corr.CorrMM_gradInputs] a_shapes = [[4, 5, 6, 3], [1, 5, 6, 3], [1, 5, 6, 3]] b_shapes = [[7, 5, 3, 2], [1, 5, 3, 1], [7, 1, 3, 1]] dtypes = ['float32', 'float64'] for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes): for a_dtype in dtypes: for b_dtype in dtypes: c_dtype = theano.scalar.upcast(a_dtype, b_dtype) a_tens = T.tensor4(dtype=a_dtype) b_tens = T.tensor4(dtype=b_dtype) a_tens_val = rand(a_shape, dtype=a_dtype) b_tens_val = rand(b_shape, dtype=b_dtype) c_tens = op()(a_tens, b_tens) f = theano.function([a_tens, b_tens], c_tens, mode=self.mode) assert_equals(f(a_tens_val, b_tens_val).dtype, c_dtype)
def __init__(self, Tt, N, H, W): self.batch_size=N self.seq_length=Tt self.Q = T.tensor4('Q',dtype=config.floatX) # (T,N,H,W), will reshape to (T,N,1,H,W) for convolution self.P = T.tensor3('P',dtype=config.floatX) # (T,N,D=2) self.Y = T.tensor4('Y',dtype=config.floatX) # (T,N,H,W) self.alpha = T.scalar('alpha',dtype=config.floatX) # learning rate self.Q_view = self.Q.reshape((Tt,N,1,H,W)) self.CONV1 = TemporalConvReluLayer(input_var=self.Q_view,layerid='CONV1') self.CONV2 = TemporalConvReluLayer(input_var=self.CONV1.output, n_input_channels=8, n_filters=16, layerid='CONV2') self.POOL = T.signal.pool.pool_2d(self.CONV2.output,(2,2)) PandQ = T.concatenate([self.POOL.reshape((Tt,N,4*H*W)), self.P], axis=2) self.LSTM = LSTMLayer(input_var=PandQ,num_units=512, layerid='LSTM', sequence=Tt, in_dim=(32*32*16/4+2)) #self.LSTM2 = LSTMLayer(input_var=self.LSTM.output,num_units=512,layerid='LSTM2',in_dim=(512)) #self.LSTM3 = LSTMLayer(input_var=self.LSTM2.output,num_units=512,layerid='LSTM3',in_dim=(512)) self.FC = TemporalFC(input_var=self.LSTM.output, num_units=H*W, layerid='FC', in_dim=512) #Y_pred = T.nnet.softmax(self.FC.output.reshape((Tt*N,H*W))).reshape((Tt,N,H,W))*14.0 Y_pred = T.nnet.sigmoid(self.FC.output.reshape(self.Q.shape)) self.output = Y_pred #self.loss = T.nnet.binary_crossentropy(Y_pred,self.Y).mean(dtype=config.floatX) #self.loss = (T.abs_(Y_pred - self.Y)).mean(dtype=config.floatX) #self.loss = ((Y_pred - self.Y) ** 2).mean(dtype=config.floatX) self.loss = -(self.Y * T.log(Y_pred)*14 + (1-self.Y)* T.log(1-Y_pred)).mean(dtype=config.floatX) #self.compute_loss = function([self.Q,self.P,self.Y],outputs=self.loss) self.params = {} self.params.update(self.CONV1.params) self.params.update(self.CONV2.params) self.params.update(self.LSTM.params) self.params.update(self.FC.params) self.train_args = [self.Q,self.P,self.Y,self.alpha] self.predict_args = [self.Q,self.P] # super constructor creates gradients, _train, and _predict super(PongRNNModel,self).__init__()
def setUp(self): super(TestConv2D, self).setUp() self.input = T.tensor4('input', dtype=self.dtype) self.input.name = 'default_V' self.filters = T.tensor4('filters', dtype=self.dtype) self.filters.name = 'default_filters' if not conv.imported_scipy_signal and theano.config.cxx == "": raise SkipTest("conv2d tests need SciPy or a c++ compiler")
def main(num_epochs=20, needsNormalization=True): # Load the dataset print "Loading data..." train_data, train_labels = load_dataset('usps/train.gz') test_data, test_labels = load_dataset('usps/test.gz') print "train_data has dimensions",train_data.shape print "train_labels has dimensions", train_labels.shape print "test_data has dimensions", test_data.shape print "test_labels has dimensions", test_labels.shape # normalize the data if (needsNormalization): train_data = normalize(train_data) test_data = normalize(test_data) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create CNN print "Building model and compiling functions..." network = build_cnn(input_var) # TODO: setup training criterion and loss functions # * use categorical crossentropy loss # * use SGD with Nesterov momentum 0.9 and learning rate 0.1 for optimization prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.1, momentum=0.9) # actual training print "Starting training..." # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # We iterate over epochs: for epoch in range(num_epochs): print "epoch",epoch train_err = 0 train_batches = 0 # TODO: process an epoch # * use a minibach-size of 128 # * keep track of the training loss after each epoch and print it for batch in iterate_minibatches(train_data, train_labels, 128, shuffle=True): inputs, targets = batch inputs=inputs.astype(np.float32) targets=targets.astype(np.int32) train_err +=train_fn(inputs, targets) train_batches += 1 print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) # After training, we compute the test error # TODO: use the trained network to classify the test data # * print the test loss # * also print the test accuracy print "STARTING VALIDATION" val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(test_data, test_labels, 128, shuffle=False): inputs, targets = batch inputs=inputs.astype(np.float32) targets=targets.astype(np.int32) err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100))
def build_network_from_ae(classn): input_var = T.tensor4('input_var') layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var) layer = batch_norm( layers.Conv2DLayer(layer, 100, filter_size=(5, 5), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Conv2DLayer(layer, 120, filter_size=(5, 5), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=2, mode='average_inc_pad') layer = batch_norm( layers.Conv2DLayer(layer, 240, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Conv2DLayer(layer, 320, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=2, mode='average_inc_pad') layer = batch_norm( layers.Conv2DLayer(layer, 640, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) prely = batch_norm( layers.Conv2DLayer(layer, 1024, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) featm = batch_norm( layers.Conv2DLayer(prely, 640, filter_size=(1, 1), nonlinearity=leaky_rectify)) feat_map = batch_norm( layers.Conv2DLayer(featm, 100, filter_size=(1, 1), nonlinearity=rectify, name="feat_map")) maskm = batch_norm( layers.Conv2DLayer(prely, 100, filter_size=(1, 1), nonlinearity=leaky_rectify)) mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1, filter_size=(1, 1), nonlinearity=None), beta=None, gamma=None) mask_map = SoftThresPerc(mask_rep, perc=98.4, alpha=0.1, beta=init.Constant(0.5), tight=100.0, name="mask_map") enlyr = ChInnerProdMerge(feat_map, mask_map, name="encoder") layer = batch_norm( layers.Deconv2DLayer(enlyr, 1024, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 640, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 640, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 320, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 320, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 240, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 120, filter_size=(5, 5), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 100, filter_size=(5, 5), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = layers.Deconv2DLayer(layer, 3, filter_size=(1, 1), stride=1, crop='same', nonlinearity=identity) glblf = batch_norm( layers.Conv2DLayer(prely, 128, filter_size=(1, 1), nonlinearity=leaky_rectify)) glblf = layers.Pool2DLayer(glblf, pool_size=(5, 5), stride=5, mode='average_inc_pad') glblf = batch_norm( layers.Conv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) gllyr = batch_norm(layers.Conv2DLayer(glblf, 5, filter_size=(1, 1), nonlinearity=rectify), name="global_feature") glblf = batch_norm( layers.Deconv2DLayer(gllyr, 256, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(9, 9), stride=5, crop=(2, 2), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = layers.Deconv2DLayer(glblf, 3, filter_size=(1, 1), stride=1, crop='same', nonlinearity=identity) layer = layers.ElemwiseSumLayer([layer, glblf]) network = ReshapeLayer(layer, ([0], -1)) old_params = layers.get_all_params(network, trainable=True) # Adding more layers aug_var = T.matrix('aug_var') target_var = T.imatrix('targets') add_a = batch_norm( layers.Conv2DLayer(enlyr, 320, filter_size=(1, 1), nonlinearity=leaky_rectify)) add_b = batch_norm( layers.Conv2DLayer(add_a, 320, filter_size=(1, 1), nonlinearity=leaky_rectify)) add_c = batch_norm( layers.Conv2DLayer(add_b, 320, filter_size=(1, 1), nonlinearity=leaky_rectify)) add_d = batch_norm( layers.Conv2DLayer(add_c, 320, filter_size=(1, 1), nonlinearity=leaky_rectify)) add_0 = layers.Pool2DLayer(add_d, pool_size=(25, 25), stride=25, mode='average_inc_pad') add_1 = batch_norm( layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify)) add_2 = batch_norm( layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify)) add_3 = batch_norm( layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify)) add_4 = batch_norm( layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify)) aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var) cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1) hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify) network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid) all_params = layers.get_all_params(network, trainable=True) new_params = [x for x in all_params if x not in old_params] return network, new_params, input_var, aug_var, target_var
def anomaly(experiment_name, dataset="mnist", bayesian_approximation="dropout", inside_labels=[0, 1], num_epochs=50, batch_size=128, acc_threshold=0.6, weight_decay=1e-5, dropout_p=0.5, fc_layers=[512, 512], plot=True): """ This methods trains a neural network classifier on a subset of classes. After the training, it uses uncertainty measures (e.g. entropy) to detect anomalies. The anomalous classes are the ones that are not part of the training subset. dataset = "mnist" or "cifar" For MNIST we use a fully-connected MLP. For CIFAR10 we use a convolutional net (similar to LeNet) bayesian_approximation = "dropout" for Yarin Gal's method - work either with MNIST bayesian_approximation = "variational" for fully-factorized Gaussian variational approximation - only work with MNIST. inside_labels are the subset of trained classes, the other classes are only used for testing. """ n_out = len(inside_labels) # Prepare Theano variables for inputs and targets # Load the dataset print("Loading data...") if dataset == "mnist": input_var = T.matrix('inputs') target_var = T.ivector('targets') n_in = [28 * 28] X_train, y_train, X_test, y_test, X_test_all, y_test_all = datasets.load_MNIST( inside_labels) if bayesian_approximation == "dropout": model = models.mlp_dropout(input_var, target_var, n_in, n_out, fc_layers, dropout_p, weight_decay) elif bayesian_approximation == "variational": model = models.mlp_variational(input_var, target_var, n_in, n_out, fc_layers, batch_size, len(X_train) / float(batch_size)) elif dataset == "cifar": input_var = T.tensor4('inputs') target_var = T.ivector('targets') n_in = [3, 32, 32] X_train, y_train, X_test, y_test, X_test_all, y_test_all = datasets.load_CIFAR10( inside_labels) model = models.convnet_dropout(input_var, target_var, n_in, n_out, dropout_p, weight_decay) df = pd.DataFrame() # Mini-batch training with ADAM epochs = training.train(model, X_train, y_train, X_test, y_test, batch_size, num_epochs, acc_threshold) # Mini-batch testing acc, bayes_acc = training.test(model, X_test, y_test, batch_size) df.set_value(experiment_name, "test_acc", acc) df.set_value(experiment_name, "bayes_test_acc", bayes_acc) # Uncertainty prediction test_mean_std_bayesian = {x: [] for x in range(10)} test_mean_std_deterministic = {x: [] for x in range(10)} test_entropy_bayesian = {x: [] for x in range(10)} test_entropy_deterministic = {x: [] for x in range(10)} for i in range(len(X_test_all)): bayesian_probs = model.probabilities( np.tile(X_test_all[i], batch_size).reshape([-1] + n_in)) bayesian_entropy = model.entropy_bayesian( np.tile(X_test_all[i], batch_size).reshape([-1] + n_in)) classical_probs = model.probabilities_deterministic( X_test_all[i][np.newaxis, :])[0] classical_entropy = model.entropy_deterministic( X_test_all[i][np.newaxis, :]) predictive_mean = np.mean(bayesian_probs, axis=0) predictive_std = np.std(bayesian_probs, axis=0) test_mean_std_bayesian[y_test_all[i]].append( np.concatenate((predictive_mean, predictive_std))) test_entropy_bayesian[y_test_all[i]].append(bayesian_entropy) test_entropy_deterministic[y_test_all[i]].append(classical_entropy) test_mean_std_deterministic[y_test_all[i]].append(classical_probs) # Plotting if plot: for k in sorted(test_mean_std_bayesian.keys()): sns.plt.figure() #sns.plt.hist(test_pred_mean[k], label = "Prediction mean for " + str(k)) sns.plt.hist(test_entropy_bayesian[k], label="Bayesian Entropy v1 for " + str(k)) #sns.plt.hist(test_pred_std[k], label = "Prediction std for " + str(k)) #sns.plt.hist(test_entropy_deterministic[k], label = "Classical entropy for " + str(k)) sns.plt.legend() sns.plt.show() # Anomaly detection using simple threshold def anomaly_detection_old(anomaly_score_dict, name, df): threshold = np.logspace(-30, 1.0, 1000) acc = {} for t in threshold: tp = 0.0 tn = 0.0 for l in anomaly_score_dict: if l in inside_labels: tp += (np.array(anomaly_score_dict[l]) < t).mean() else: tn += (np.array(anomaly_score_dict[l]) >= t).mean() tp /= len(inside_labels) tn /= 10.0 - len(inside_labels) bal_acc = (tp + tn) / 2.0 f1_score = 2.0 * tp / (2.0 + tp - tn) acc[t] = [bal_acc, f1_score, tp, tn] print("{}\tscore\tthreshold\tTP\tTN".format(name)) sorted_acc = sorted(acc.items(), key=lambda x: x[1][0], reverse=True) df.set_value(experiment_name, name + ' bal_acc', sorted_acc[0][1][0]) df.set_value(experiment_name, name + ' bal_acc_threshold', sorted_acc[0][0]) print("\tbalanced acc\t{:.3f}\t{:.6f}\t\t{:.3f}\t{:.3f}".format( sorted_acc[0][1][0], sorted_acc[0][0], sorted_acc[0][1][2], sorted_acc[0][1][3])) sorted_acc = sorted(acc.items(), key=lambda x: x[1][1], reverse=True) df.set_value(experiment_name, name + ' f1_score', sorted_acc[0][1][1]) df.set_value(experiment_name, name + ' f1_score_threshold', sorted_acc[0][0]) print("\tf1 score\t{:.3f}\t{:.6f}\t\t{:.3f}\t{:.3f}".format( sorted_acc[0][1][1], sorted_acc[0][0], sorted_acc[0][1][2], sorted_acc[0][1][3])) return df # Anomaly detection using logistic regression def anomaly_detection(anomaly_score_dict, name, df): X = [] y = [] for l in anomaly_score_dict: X += anomaly_score_dict[l] if l in inside_labels: y += [0] * len(anomaly_score_dict[l]) else: y += [1] * len(anomaly_score_dict[l]) X = np.array(X) y = np.array(y) X, y = utils.shuffle(X, y, random_state=0) X_train = X[:len(X) / 2] X_test = X[len(X) / 2:] y_train = y[:len(y) / 2] y_test = y[len(y) / 2:] clf = linear_model.LogisticRegression(C=1.0) clf.fit(X_train, y_train) auc = metrics.roc_auc_score(np.array(y_test), clf.predict_proba(np.array(X_test))[:, 1]) print("AUC", auc) df.set_value(experiment_name, name + ' AUC', auc) if plot: # Plot ROC curve fpr, tpr, thresholds = metrics.roc_curve(np.array(y_test), clf.predict_proba( np.array(X_test))[:, 1], pos_label=1) sns.plt.figure() sns.plt.plot(fpr, tpr, label='ROC curve') sns.plt.plot([0, 1], [0, 1], 'k--') sns.plt.xlim([0.0, 1.0]) sns.plt.ylim([0.0, 1.05]) sns.plt.xlabel('False Positive Rate') sns.plt.ylabel('True Positive Rate') sns.plt.title('Receiver operating characteristic example') sns.plt.legend(loc="lower right") sns.plt.show() return df df.set_value(experiment_name, 'dataset', dataset) df.set_value(experiment_name, 'bayesian_approx', bayesian_approximation) df.set_value(experiment_name, 'inside_labels', str(inside_labels)) df.set_value(experiment_name, 'epochs', epochs) df = anomaly_detection(test_entropy_deterministic, "Classical entropy", df) df = anomaly_detection(test_mean_std_deterministic, "Classical prediction", df) df = anomaly_detection(test_entropy_bayesian, "Bayesian entropy", df) df = anomaly_detection(test_mean_std_bayesian, "Bayesian prediction", df) return df
def poolfn(pool_size, ignore_border, stride, pad, mode): xt = T.tensor4() poolx = pool_2d(xt, pool_size, ignore_border=ignore_border, st=stride, padding=pad, mode=mode) pool = theano.function([xt], poolx, allow_input_downcast=True) return pool
random_seed(args.seed) if args.model == 'convnet': x = T.ftensor4('x') elif args.model == 'mlp': x = T.matrix('x') else: raise AttributeError y = T.matrix('y') lr_ele = T.fscalar('lr_ele') lr_ele_true = np.array(args.lrEle, theano.config.floatX) mom = args.momEle #momentum lr_hyper = T.fscalar('lr_hyper') grad_valid_weight = T.tensor4('grad_valid_weight') model = DenseNet(x=x, y=y, args=args) #model = ConvNet(x=x, y=y, args=args) velocities = [theano.shared(np.asarray(param.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=param.broadcastable, name=param.name+'_vel') for param in model.params_theta] lambda_velocities = [theano.shared(np.asarray(lamb.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=lamb.broadcastable, name=lamb.name+'_vel') for lamb in model.params_lambda] momHyper = args.momHyper momLlr = args.momLlr X_elementary, Y_elementary, X_test, Y_test = load_dataset(args) #normalized #Use a large validation set (as in CPU experiments) to avoid overfitting the hyperparameters
#test_set.y = np.hstack(test_set.y) test_set.y = test_set.y.reshape(-1) # one hot #test_set.y = np.float32(np.eye(10)[test_set.y]) #print(test_set.X.shape) #print(test_set.y.shape) #print(test_set.X) #print(test_set.y) #exit(0) print('Building MLP...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.vector('targets') mlp = lfc.genLfcInf(input, 10) test_output = lasagne.layers.get_output(mlp, deterministic=True) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), target), dtype=theano.config.floatX) val_fn = theano.function([input, target], test_err) print("Loading the trained parameters and binarizing the weights...") # with np.load('../weights/mnist-w1a1.npz') as f: with np.load('../weights/mnist-w1a2.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))]
def build_network_from_ae(classn): input_var = T.tensor4('input_var') layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var) layer = batch_norm( layers.Conv2DLayer(layer, 100, filter_size=(5, 5), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Conv2DLayer(layer, 120, filter_size=(5, 5), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=2, mode='average_inc_pad') layer = batch_norm( layers.Conv2DLayer(layer, 240, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Conv2DLayer(layer, 320, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=2, mode='average_inc_pad') layer = batch_norm( layers.Conv2DLayer(layer, 640, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) prely = batch_norm( layers.Conv2DLayer(layer, 1024, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) featm = batch_norm( layers.Conv2DLayer(prely, 640, filter_size=(1, 1), nonlinearity=leaky_rectify)) feat_map = batch_norm( layers.Conv2DLayer(featm, 100, filter_size=(1, 1), nonlinearity=rectify, name="feat_map")) maskm = batch_norm( layers.Conv2DLayer(prely, 100, filter_size=(1, 1), nonlinearity=leaky_rectify)) mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1, filter_size=(1, 1), nonlinearity=None), beta=None, gamma=None) mask_map = SoftThresPerc(mask_rep, perc=0.0, alpha=96.0, beta=init.Constant(0.5), tight=100.0, name="mask_map") enlyr = ChInnerProdMerge(feat_map, mask_map, name="encoder") layer = batch_norm( layers.Deconv2DLayer(enlyr, 1024, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 640, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 640, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 320, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 320, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 240, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 120, filter_size=(5, 5), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = batch_norm( layers.Deconv2DLayer(layer, 100, filter_size=(5, 5), stride=1, crop='same', nonlinearity=leaky_rectify)) layer = layers.Deconv2DLayer(layer, 3, filter_size=(1, 1), stride=1, crop='same', nonlinearity=identity) glblf = batch_norm( layers.Conv2DLayer(prely, 128, filter_size=(1, 1), nonlinearity=leaky_rectify)) glblf = layers.Pool2DLayer(glblf, pool_size=(5, 5), stride=5, mode='average_inc_pad') glblf = batch_norm( layers.Conv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, pad='same', nonlinearity=leaky_rectify)) gllyr = batch_norm(layers.Conv2DLayer(glblf, 5, filter_size=(1, 1), nonlinearity=rectify), name="global_feature") glblf = batch_norm( layers.Deconv2DLayer(gllyr, 256, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(9, 9), stride=5, crop=(2, 2), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 128, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 64, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(4, 4), stride=2, crop=(1, 1), nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = batch_norm( layers.Deconv2DLayer(glblf, 32, filter_size=(3, 3), stride=1, crop='same', nonlinearity=leaky_rectify)) glblf = layers.Deconv2DLayer(glblf, 3, filter_size=(1, 1), stride=1, crop='same', nonlinearity=identity) layer = layers.ElemwiseSumLayer([layer, glblf]) network = ReshapeLayer(layer, ([0], -1)) layers.set_all_param_values(network, pickle.load(open(filename_model_ae, 'rb'))) mask_map.beta.set_value(np.float32(0.8 * mask_map.beta.get_value())) # Adding more layers aug_var = T.matrix('aug_var') target_var = T.imatrix('targets') add_a = layers.Conv2DLayer(enlyr, 320, filter_size=(1, 1), nonlinearity=leaky_rectify) add_b = layers.Conv2DLayer(add_a, 320, filter_size=(1, 1), nonlinearity=leaky_rectify) add_c = layers.Conv2DLayer(add_b, 320, filter_size=(1, 1), nonlinearity=leaky_rectify) add_d = layers.Conv2DLayer(add_c, 320, filter_size=(1, 1), nonlinearity=leaky_rectify) add_0 = layers.Pool2DLayer(add_d, pool_size=(15, 15), stride=15, mode='average_inc_pad') add_1 = layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify) add_2 = layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify) add_3 = layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify) add_4 = layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify) aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var) cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1) hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify) network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid) new_params = [ add_a.W, add_a.b, add_b.W, add_b.b, add_c.W, add_c.b, add_d.W, add_d.b, add_1.W, add_1.b, add_2.W, add_2.b, add_3.W, add_3.b, add_4.W, add_4.b, hidden_layer.W, hidden_layer.b, network.W, network.b ] return network, new_params, input_var, aug_var, target_var
reshaped_image = cifar_data['data'].reshape( 50000, 3, 32, 32)[np.random.randint(50000, size=1000), :, :, :] transposed_image = reshaped_image.transpose(0, 2, 3, 1) # Setting learning rate l_r = theano.shared(lasagne.utils.floatX(args.initial_lr)) batch_number = int(round(len(reshaped_image) / args.batch_size)) # Noise assignment rng = np.random.RandomState(args.seed) theano_rng = RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) noise_dim = (args.batch_size, 100) noise_fg = theano_rng.normal(size=noise_dim) noise_bg = theano_rng.normal(size=noise_dim) x_inp = tensor.tensor4('x_inp', dtype='float32') # Build the network gen = construct_gen(noise_bg, noise_fg, batch_size=args.batch_size) disc, features = build_desc(x_inp) # Output of discriminator with original images. training phase, so non deterministic disc_out = lasagne.layers.get_output(disc, x_inp, deterministic=False) gen_out = lasagne.layers.get_output(gen) disc_over_gen = lasagne.layers.get_output(disc, gen_out) true_features = lasagne.layers.get_output(features, x_inp) fake_features = lasagne.layers.get_output(features, gen_out) # Loss functions. 1) Gen's 2) Disc's for predicting correctly 3) Feature matching loss false_loss = log_sum_exp(disc_over_gen) truth_loss = log_sum_exp(disc_out) disc_loss = -0.5 * tensor.mean(truth_loss) + 0.5 * tensor.mean(
def __init__(self): #################################### # Create model # #################################### # Create tensor variables to store input / output data self.X = T.tensor4('X') # Create shared variable for input net = ConvNeuralNet() net.net_name = 'SSD Net' _batch_size = self.X.shape[0] # Input net.layer['input_4d'] = InputLayer(net, self.X) net.layer_opts['pool_boder_mode'] = 1 net.layer_opts['conv2D_border_mode'] = 1 # Stack 1 net.layer_opts['conv2D_filter_shape'] = (64, 3, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv1_1_W' net.layer_opts['conv2D_bName'] = 'conv1_1_b' net.layer['conv1_1'] = ConvLayer(net, net.layer['input_4d'].output) net.layer['relu1_1'] = ReLULayer(net.layer['conv1_1'].output) net.layer_opts['conv2D_filter_shape'] = (64, 64, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv1_2_W' net.layer_opts['conv2D_bName'] = 'conv1_2_b' net.layer['conv1_2'] = ConvLayer(net, net.layer['relu1_1'].output) net.layer['relu1_2'] = ReLULayer(net.layer['conv1_2'].output) net.layer_opts['pool_mode'] = 'max' net.layer['pool1'] = Pool2DLayer(net, net.layer['relu1_2'].output) # Stack 2 net.layer_opts['conv2D_filter_shape'] = (128, 64, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv2_1_W' net.layer_opts['conv2D_bName'] = 'conv2_1_b' net.layer['conv2_1'] = ConvLayer(net, net.layer['pool1'].output) net.layer['relu2_1'] = ReLULayer(net.layer['conv2_1'].output) net.layer_opts['conv2D_filter_shape'] = (128, 128, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv2_2_W' net.layer_opts['conv2D_bName'] = 'conv2_2_b' net.layer['conv2_2'] = ConvLayer(net, net.layer['relu2_1'].output) net.layer['relu2_2'] = ReLULayer(net.layer['conv2_2'].output) net.layer['pool2'] = Pool2DLayer(net, net.layer['relu2_2'].output) # Stack 3 net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv3_1_W' net.layer_opts['conv2D_bName'] = 'conv3_1_b' net.layer['conv3_1'] = ConvLayer(net, net.layer['pool2'].output) net.layer['relu3_1'] = ReLULayer(net.layer['conv3_1'].output) net.layer_opts['conv2D_filter_shape'] = (256, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv3_2_W' net.layer_opts['conv2D_bName'] = 'conv3_2_b' net.layer['conv3_2'] = ConvLayer(net, net.layer['relu3_1'].output) net.layer['relu3_2'] = ReLULayer(net.layer['conv3_2'].output) net.layer_opts['conv2D_filter_shape'] = (256, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv3_3_W' net.layer_opts['conv2D_bName'] = 'conv3_3_b' net.layer['conv3_3'] = ConvLayer(net, net.layer['relu3_2'].output) net.layer['relu3_3'] = ReLULayer(net.layer['conv3_3'].output) net.layer['pool3'] = Pool2DLayer(net, net.layer['relu3_3'].output) # Stack 4 net.layer_opts['conv2D_filter_shape'] = (512, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv4_1_W' net.layer_opts['conv2D_bName'] = 'conv4_1_b' net.layer['conv4_1'] = ConvLayer(net, net.layer['pool3'].output) net.layer['relu4_1'] = ReLULayer(net.layer['conv4_1'].output) net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv4_2_W' net.layer_opts['conv2D_bName'] = 'conv4_2_b' net.layer['conv4_2'] = ConvLayer(net, net.layer['relu4_1'].output) net.layer['relu4_2'] = ReLULayer(net.layer['conv4_2'].output) net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv4_3_W' net.layer_opts['conv2D_bName'] = 'conv4_3_b' net.layer['conv4_3'] = ConvLayer(net, net.layer['relu4_2'].output) net.layer['relu4_3'] = ReLULayer(net.layer['conv4_3'].output) net.layer['pool4'] = Pool2DLayer(net, net.layer['relu4_3'].output) net.layer_opts['normalize_scale'] = 20 net.layer_opts['normalize_filter_shape'] = (512, ) net.layer_opts['normalize_scale_name'] = 'conv4_3_scale' net.layer['conv4_3_norm'] = NormalizeLayer(net, net.layer['relu4_3'].output) # conv4_3_norm_mbox_conf net.layer_opts['conv2D_filter_shape'] = (84, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv4_3_norm_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'conv4_3_norm_mbox_conf_b' net.layer['conv4_3_norm_mbox_conf'] = ConvLayer(net, net.layer['conv4_3_norm'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv4_3_norm_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv4_3_norm_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv4_3_norm_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv4_3_norm_mbox_conf_perm'].output) # conv4_3_norm_mbox_loc net.layer_opts['conv2D_filter_shape'] = (16, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv4_3_norm_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'conv4_3_norm_mbox_loc_b' net.layer['conv4_3_norm_mbox_loc'] = ConvLayer(net, net.layer['conv4_3_norm'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv4_3_norm_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv4_3_norm_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv4_3_norm_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv4_3_norm_mbox_loc_perm'].output) # Stack 5 net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3) net.layer_opts['conv2D_WName'] = 'conv5_1_W' net.layer_opts['conv2D_bName'] = 'conv5_1_b' net.layer['conv5_1'] = ConvLayer(net, net.layer['pool4'].output) net.layer['relu5_1'] = ReLULayer(net.layer['conv5_1'].output) net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3) net.layer_opts['conv2D_WName'] = 'conv5_2_W' net.layer_opts['conv2D_bName'] = 'conv5_2_b' net.layer['conv5_2'] = ConvLayer(net, net.layer['relu5_1'].output) net.layer['relu5_2'] = ReLULayer(net.layer['conv5_2'].output) net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3) net.layer_opts['conv2D_WName'] = 'conv5_3_W' net.layer_opts['conv2D_bName'] = 'conv5_3_b' net.layer['conv5_3'] = ConvLayer(net, net.layer['relu5_2'].output) net.layer['relu5_3'] = ReLULayer(net.layer['conv5_3'].output) net.layer_opts['pool_ignore_border'] = True net.layer_opts['pool_filter_size'] = (3, 3) net.layer_opts['pool_stride'] = (1, 1) net.layer_opts['pool_padding'] = (1, 1) net.layer['pool5'] = Pool2DLayer(net, net.layer['relu5_3'].output) # fc6 and fc7 net.layer_opts['conv2D_filter_shape'] = (1024, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (6, 6) net.layer_opts['conv2D_filter_dilation'] = (6, 6) net.layer_opts['conv2D_WName'] = 'fc6_W' net.layer_opts['conv2D_bName'] = 'fc6_b' net.layer['fc6'] = ConvLayer(net, net.layer['pool5'].output) net.layer['relu6'] = ReLULayer(net.layer['fc6'].output) net.layer_opts['conv2D_filter_dilation'] = (1, 1) # Set default filter dilation net.layer_opts['conv2D_filter_shape'] = (1024, 1024, 1, 1) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'fc7_W' net.layer_opts['conv2D_bName'] = 'fc7_b' net.layer['fc7'] = ConvLayer(net, net.layer['relu6'].output) net.layer['relu7'] = ReLULayer(net.layer['fc7'].output) # First sub convolution to get predicted box # fc7_mbox_conf net.layer_opts['conv2D_filter_shape'] = (126, 1024, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'fc7_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'fc7_mbox_conf_b' net.layer['fc7_mbox_conf'] = ConvLayer(net, net.layer['relu7'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['fc7_mbox_conf_perm'] = PermuteLayer(net, net.layer['fc7_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['fc7_mbox_conf_flat'] = FlattenLayer(net, net.layer['fc7_mbox_conf_perm'].output) # conv6_1 and conv6_2 net.layer_opts['conv2D_filter_shape'] = (256, 1024, 1, 1) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv6_1_W' net.layer_opts['conv2D_bName'] = 'conv6_1_b' net.layer['conv6_1'] = ConvLayer(net, net.layer['relu7'].output) net.layer['conv6_1_relu'] = ReLULayer(net.layer['conv6_1'].output) net.layer_opts['conv2D_filter_shape'] = (512, 256, 3, 3) net.layer_opts['conv2D_stride'] = (2, 2) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv6_2_W' net.layer_opts['conv2D_bName'] = 'conv6_2_b' net.layer['conv6_2'] = ConvLayer(net, net.layer['conv6_1_relu'].output) net.layer['conv6_2_relu'] = ReLULayer(net.layer['conv6_2'].output) # fc7_mbox_loc net.layer_opts['conv2D_filter_shape'] = (24, 1024, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'fc7_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'fc7_mbox_loc_b' net.layer['fc7_mbox_loc'] = ConvLayer(net, net.layer['relu7'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['fc7_mbox_loc_perm'] = PermuteLayer(net, net.layer['fc7_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['fc7_mbox_loc_flat'] = FlattenLayer(net, net.layer['fc7_mbox_loc_perm'].output) # Second sub convolution to get predicted box # conv6_2_mbox_conf net.layer_opts['conv2D_filter_shape'] = (126, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv6_2_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'conv6_2_mbox_conf_b' net.layer['conv6_2_mbox_conf'] = ConvLayer(net, net.layer['conv6_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv6_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv6_2_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv6_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv6_2_mbox_conf_perm'].output) # conv7_1 and conv7_2 net.layer_opts['conv2D_filter_shape'] = (128, 512, 1, 1) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv7_1_W' net.layer_opts['conv2D_bName'] = 'conv7_1_b' net.layer['conv7_1'] = ConvLayer(net, net.layer['conv6_2_relu'].output) net.layer['conv7_1_relu'] = ReLULayer(net.layer['conv7_1'].output) net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3) net.layer_opts['conv2D_stride'] = (2, 2) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv7_2_W' net.layer_opts['conv2D_bName'] = 'conv7_2_b' net.layer['conv7_2'] = ConvLayer(net, net.layer['conv7_1_relu'].output) net.layer['conv7_2_relu'] = ReLULayer(net.layer['conv7_2'].output) # conv6_2_mbox_loc net.layer_opts['conv2D_filter_shape'] = (24, 512, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv6_2_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'conv6_2_mbox_loc_b' net.layer['conv6_2_mbox_loc'] = ConvLayer(net, net.layer['conv6_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv6_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv6_2_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv6_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv6_2_mbox_loc_perm'].output) # Third sub convolution to get predicted box # conv7_2_mbox_conf net.layer_opts['conv2D_filter_shape'] = (126, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv7_2_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'conv7_2_mbox_conf_b' net.layer['conv7_2_mbox_conf'] = ConvLayer(net, net.layer['conv7_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv7_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv7_2_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv7_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv7_2_mbox_conf_perm'].output) # conv8_1 and conv8_2 net.layer_opts['conv2D_filter_shape'] = (128, 256, 1, 1) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv8_1_W' net.layer_opts['conv2D_bName'] = 'conv8_1_b' net.layer['conv8_1'] = ConvLayer(net, net.layer['conv7_2_relu'].output) net.layer['conv8_1_relu'] = ReLULayer(net.layer['conv8_1'].output) net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv8_2_W' net.layer_opts['conv2D_bName'] = 'conv8_2_b' net.layer['conv8_2'] = ConvLayer(net, net.layer['conv8_1_relu'].output) net.layer['conv8_2_relu'] = ReLULayer(net.layer['conv8_2'].output) # conv7_2_mbox_loc net.layer_opts['conv2D_filter_shape'] = (24, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv7_2_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'conv7_2_mbox_loc_b' net.layer['conv7_2_mbox_loc'] = ConvLayer(net, net.layer['conv7_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv7_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv7_2_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv7_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv7_2_mbox_loc_perm'].output) # Fourth sub convolution to get predicted box # conv8_2_mbox_conf net.layer_opts['conv2D_filter_shape'] = (84, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv8_2_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'conv8_2_mbox_conf_b' net.layer['conv8_2_mbox_conf'] = ConvLayer(net, net.layer['conv8_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv8_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv8_2_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv8_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv8_2_mbox_conf_perm'].output) # conv9_1 and conv9_2 net.layer_opts['conv2D_filter_shape'] = (128, 256, 1, 1) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv9_1_W' net.layer_opts['conv2D_bName'] = 'conv9_1_b' net.layer['conv9_1'] = ConvLayer(net, net.layer['conv8_2_relu'].output) net.layer['conv9_1_relu'] = ReLULayer(net.layer['conv9_1'].output) net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = 0 net.layer_opts['conv2D_WName'] = 'conv9_2_W' net.layer_opts['conv2D_bName'] = 'conv9_2_b' net.layer['conv9_2'] = ConvLayer(net, net.layer['conv9_1_relu'].output) net.layer['conv9_2_relu'] = ReLULayer(net.layer['conv9_2'].output) # conv8_2_mbox_loc net.layer_opts['conv2D_filter_shape'] = (16, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv8_2_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'conv8_2_mbox_loc_b' net.layer['conv8_2_mbox_loc'] = ConvLayer(net, net.layer['conv8_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv8_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv8_2_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv8_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv8_2_mbox_loc_perm'].output) # Fifth sub convolution to get predicted box # conv9_2_mbox_conf net.layer_opts['conv2D_filter_shape'] = (84, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv9_2_mbox_conf_W' net.layer_opts['conv2D_bName'] = 'conv9_2_mbox_conf_b' net.layer['conv9_2_mbox_conf'] = ConvLayer(net, net.layer['conv9_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv9_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv9_2_mbox_conf'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv9_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv9_2_mbox_conf_perm'].output) # conv9_2_mbox_loc net.layer_opts['conv2D_filter_shape'] = (16, 256, 3, 3) net.layer_opts['conv2D_stride'] = (1, 1) net.layer_opts['conv2D_border_mode'] = (1, 1) net.layer_opts['conv2D_WName'] = 'conv9_2_mbox_loc_W' net.layer_opts['conv2D_bName'] = 'conv9_2_mbox_loc_b' net.layer['conv9_2_mbox_loc'] = ConvLayer(net, net.layer['conv9_2_relu'].output) net.layer_opts['permute_dimension'] = (0, 2, 3, 1) net.layer['conv9_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv9_2_mbox_loc'].output) net.layer_opts['flatten_ndim'] = 2 net.layer['conv9_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv9_2_mbox_loc_perm'].output) # Concat mbox_conf and mbox_loc net.layer['mbox_conf'] = ConcatLayer(net, [net.layer['conv4_3_norm_mbox_conf_flat'].output, net.layer['fc7_mbox_conf_flat'].output, net.layer['conv6_2_mbox_conf_flat'].output, net.layer['conv7_2_mbox_conf_flat'].output, net.layer['conv8_2_mbox_conf_flat'].output, net.layer['conv9_2_mbox_conf_flat'].output]) net.layer['mbox_loc'] = ConcatLayer(net, [net.layer['conv4_3_norm_mbox_loc_flat'].output, net.layer['fc7_mbox_loc_flat'].output, net.layer['conv6_2_mbox_loc_flat'].output, net.layer['conv7_2_mbox_loc_flat'].output, net.layer['conv8_2_mbox_loc_flat'].output, net.layer['conv9_2_mbox_loc_flat'].output]) net.layer_opts['reshape_new_shape'] = (_batch_size, 8732, 21) net.layer['mbox_conf_reshape'] = ReshapeLayer(net, net.layer['mbox_conf'].output) net.layer_opts['softmax_axis'] = 2 net.layer['mbox_conf_softmax'] = SoftmaxLayer(net, net.layer['mbox_conf_reshape'].output) net.layer_opts['reshape_new_shape'] = (_batch_size, 8732, 4) net.layer['mbox_loc_flatten'] = ReshapeLayer(net, net.layer['mbox_loc'].output) self.net = net # Predict function label = T.argmax(net.layer['mbox_conf_softmax'].output, axis = 2, keepdims = True) self.pred_func = theano.function( inputs = [self.X], outputs = [label, net.layer['mbox_loc_flatten'].output]) self.test_func = theano.function( inputs = [self.X], outputs = [net.layer['mbox_conf_softmax'].output])
# A fully-connected layer of 256 units with 50% dropout on its inputs: network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the 10-unit output layer with 50% dropout on its inputs: network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=10, nonlinearity=lasagne.nonlinearities.softmax) return network input_var = T.tensor4('input_var') network=build_cnn(input_var=input_var) # And load them again later on like this: with np.load('model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) test_prediction = lasagne.layers.get_output(network, deterministic=True) pred=T.argmax(test_prediction, axis=1) val_fn = theano.function([input_var], [pred]) # fourcc=cv2.cv.CV_FOURCC('X','V','I','D') # # fourcc = cv2.cv.CV_FOURCC(*'FMP4') # # o = cv2.VideoWriter('output.avi',fourcc, 60, (28,28),0) # o = VideoWriter("output.avi", frameSize=(28,28)) # o.open()
def _build_expression(self): self.input_ = T.tensor4(dtype=self.input_dtype) self.expression_ = self.input_
def _build_expression(self): self.input_ = T.tensor4(dtype=self.input_dtype) self.expression_ = max_pool_2d(self.input_, self.max_pool_stride, ignore_border=True)
def main(): B_SIZE = 10000 MID = B_SIZE // 2 synk.fork() import lasagne input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = build_mlp(input_var) # network = build_cnn(input_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) grads = theano.grad(loss, wrt=params) flat_grad = T.concatenate(list(map(T.flatten, grads))) f_loss = synk.function([input_var, target_var], loss, collect_modes=[None], reduce_ops="sum") f_grad = synk.function([input_var, target_var], flat_grad, collect_modes=[None]) synk.distribute() x_data, y_data = make_data([1, 28, 28], B_SIZE) loss_1 = f_loss(x_data, y_data) grad_1 = f_grad(x_data, y_data) x_shmem, y_shmem = f_loss.get_input_shmems() x_dat_sh = x_shmem[:B_SIZE] y_dat_sh = y_shmem[:B_SIZE] x_data_1 = x_data[:MID] x_data_2 = x_data[MID:] y_data_1 = y_data[:MID] y_data_2 = y_data[MID:] ITERS = 10 t0 = timer() for _ in range(ITERS): loss_i = f_loss.as_theano(x_data_1, y_data_1) loss_j = f_loss.as_theano(x_data_2, y_data_2) loss_time = timer() - t0 print("theano loss_time: ", loss_time) t0 = timer() for _ in range(ITERS): grad_i = f_grad.as_theano(x_data_1, y_data_1) grad_j = f_grad.as_theano(x_data_2, y_data_2) grad_time = timer() - t0 print("theano grad_time: ", grad_time) t0 = timer() for _ in range(ITERS): loss_i = f_loss(x_dat_sh, y_dat_sh) loss_time = timer() - t0 print("synk shmem loss_time: ", loss_time) t0 = timer() for _ in range(ITERS): grad_i = f_grad(x_dat_sh, y_dat_sh) grad_time = timer() - t0 print("synk shmem grad_time: ", grad_time) t0 = timer() for _ in range(ITERS): loss_i = f_loss(x_data, y_data) loss_time = timer() - t0 print("synk new input loss_time: ", loss_time) t0 = timer() for _ in range(ITERS): grad_i = f_grad(x_data, y_data) grad_time = timer() - t0 print("synk new input grad_time: ", grad_time)
def transfer(photo, style, iterations=9, contentCost=0.001, styleCost=0.2e6, varCost=0.1e-7, rowACCost=1.e-9, colACCost=1e-9): print "Performing image transfer, with %d iterations" % iterations _, _, h, w = photo.shape _, _, h2, w2 = style.shape print photo.shape print style.shape assert h == h2 and w == w2 net = vggnet.buildVgg(w, h) # Layers for loss calculation: layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] layers = {k: net[k] for k in layers} # Precompute layer activations for photo and artwork print 'Precompute activations...' input_im_theano = T.tensor4() outputs = lasagne.layers.get_output(layers.values(), input_im_theano) photo_features = {k: theano.shared(output.eval({input_im_theano: photo})) for k, output in zip(layers.keys(), outputs)} style_features = {k: theano.shared(output.eval({input_im_theano: style})) for k, output in zip(layers.keys(), outputs)} # Get expressions for layer activations for generated image print 'Generating feature expressions' generated_image = theano.shared(floatX(np.random.uniform(-128, 128, (1, 3, h, w)))) gen_features = lasagne.layers.get_output(layers.values(), generated_image) gen_features = {k: v for k, v in zip(layers.keys(), gen_features)} # Define loss function lossParts = [ # content loss contentCost * losses.content(photo_features, gen_features, 'conv4_2'), # style loss styleCost * losses.style(style_features, gen_features, 'conv1_1'), styleCost * losses.style(style_features, gen_features, 'conv2_1'), styleCost * losses.style(style_features, gen_features, 'conv3_1'), styleCost * losses.style(style_features, gen_features, 'conv4_1'), styleCost * losses.style(style_features, gen_features, 'conv5_1'), # total variation penalty varCost * losses.totalVariation(generated_image), ] if ROW_AC_LOSS: lossParts.extend([ # Autocorrelation: rowACCost * losses.totalRowAC(style, generated_image, None), # rowACCost * losses.totalRowAC(style_features, gen_features, 'conv1_1'), # rowACCost * losses.totalRowAC(style_features, gen_features, 'conv2_1'), ]) if COL_AC_LOSS: lossParts.extend([ # Autocorrelation: colACCost * losses.totalColAC(style, generated_image, None), # colACCost * losses.totalColAC(style_features, gen_features, 'conv1_1'), # colACCost * losses.totalColAC(style_features, gen_features, 'conv2_1'), ]) totalLoss = sum(lossParts) # Theano functions to evaluate loss and gradient print 'Building gradient...' f_loss = theano.function([], totalLoss) f_grad = theano.function([], T.grad(totalLoss, generated_image)) # Initialize with a noise image print 'Initializing noisy image...' generated_image.set_value(floatX(np.random.uniform(-128, 128, (1, 3, h, w)))) xAt = generated_image.get_value().astype('float64') xs = [xAt] # Helper functions to interface with scipy.optimize def eval_loss(x0): x0 = floatX(x0.reshape((1, 3, h, w))) generated_image.set_value(x0) return f_loss().astype('float64') # Losses should end up in the hundreds, or lower for mfcc def eval_grad(x0): x0 = floatX(x0.reshape((1, 3, h, w))) generated_image.set_value(x0) return np.array(f_grad()).flatten().astype('float64') # Optimize, saving the result periodically print 'Optimizing image to reduce loss....' for i in range(iterations - 1): print(i+1) scipy.optimize.fmin_l_bfgs_b(eval_loss, xAt.flatten(), fprime=eval_grad, maxfun=40, iprint=0) xAt = generated_image.get_value().astype('float64') xs.append(xAt) print f_loss() return net, xs
def __init__(self, input_shape=(None, 3, None, None), n_classes=11, n_filters_first_conv=48, n_pool=4, growth_rate=12, n_layers_per_block=5, dropout_p=0.2): """ This code implements the Fully Convolutional DenseNet described in https://arxiv.org/abs/1611.09326 The network consist of a downsampling path, where dense blocks and transition down are applied, followed by an upsampling path where transition up and dense blocks are applied. Skip connections are used between the downsampling path and the upsampling path Each layer is a composite function of BN - ReLU - Conv and the last layer is a softmax layer. :param input_shape: shape of the input batch. Only the first dimension (n_channels) is needed :param n_classes: number of classes :param n_filters_first_conv: number of filters for the first convolution applied :param n_pool: number of pooling layers = number of transition down = number of transition up :param growth_rate: number of new feature maps created by each layer in a dense block :param n_layers_per_block: number of layers per block. Can be an int or a list of size 2 * n_pool + 1 :param dropout_p: dropout rate applied after each convolution (0. for not using) """ if type(n_layers_per_block) == list: assert (len(n_layers_per_block) == 2 * n_pool + 1) elif type(n_layers_per_block) == int: n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1) else: raise ValueError # Theano variables self.input_var = T.tensor4('input_var', dtype='float32') # input image self.output_var = T.tensor4('output_var', dtype='float32') # output of the network self.target_var = T.tensor4('target_var', dtype='float32') # target ##################### # First Convolution # ##################### inputs = InputLayer(input_shape, self.input_var) # We perform a first convolution. All the features maps will be stored in the tensor called stack (the Tiramisu) stack = Conv2DLayer(inputs, n_filters_first_conv, filter_size=3, pad='same', W=HeUniform(gain='relu'), nonlinearity=linear, flip_filters=False) # The number of feature maps in the stack is stored in the variable n_filters n_filters = n_filters_first_conv ##################### # Downsampling path # ##################### skip_connection_list = [] for i in range(n_pool): # Dense Block for j in range(n_layers_per_block[i]): # Compute new feature maps l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) # And stack it : the Tiramisu is growing stack = ConcatLayer([stack, l]) n_filters += growth_rate # At the end of the dense block, the current stack is stored in the skip_connections list skip_connection_list.append(stack) # Transition Down stack = TransitionDown(stack, n_filters, dropout_p) skip_connection_list = skip_connection_list[::-1] ##################### # Bottleneck # ##################### # We store now the output of the next dense block in a list. We will only upsample these new feature maps block_to_upsample = [] # Dense Block for j in range(n_layers_per_block[n_pool]): l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) block_to_upsample.append(l) stack = ConcatLayer([stack, l]) n_filters += growth_rate ####################### # Upsampling path # ####################### for i in range(n_pool): # Transition Up ( Upsampling + concatenation with the skip connection) n_filters_keep = growth_rate * n_layers_per_block[n_pool + i] stack = TransitionUp(skip_connection_list[i], block_to_upsample, n_filters_keep) # Dense Block block_to_upsample = [] for j in range(n_layers_per_block[n_pool + i + 1]): l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) n_filters += growth_rate block_to_upsample.append(l) stack = ConcatLayer([stack, l]) ##################### # Softmax # ##################### self.output_layer = SoftmaxLayer(stack, n_classes)
def main(): lrate = 1e-3 batch_size = 32 key_size = 256 mem_size = 50 * 50 k_nbrs = 128 num_epochs = 100 input_var = T.tensor4('x') target_var = T.ivector('y') print 'Loading data and creating train/test splits... ' X_train, y_train, X_val, y_val, X_test, y_test = load_mnist() # Build our 'encoding' network network = build_network(input_var, image_size=X_train.shape[-1], output_dim=key_size) network_embedding = nn.get_output(network, deterministic=False) # Initialize the module and compile graphs for training. # Note that this is where the difference between traditional neural network # classifiers comes in. Rather then computing a logistic regression, we use # the output of the memory module and triplet loss. MM = MemoryModule(mem_size, key_size, k_nbrs) mem_loss, mem_updates = MM.build_loss_and_updates(network_embedding, target_var) mem_loss = mem_loss.mean() # Use the Adam optimizer for training. params = nn.get_all_params(network, trainable=True) updates = lasagne.updates.adam(mem_loss, params, lrate, beta1=0.9) # Whenever we update the network parameters, we'll also update the memory # within the memory module updates.update(mem_updates) train_fn = theano.function([input_var, target_var], mem_loss, updates=updates) # For validation, we'll follow a deterministic mapping determ_embedding = nn.get_output(network, deterministic=True) mem_pred, _ = MM.query(determ_embedding) test_acc = T.mean(T.eq(mem_pred, target_var), dtype=theano.config.floatX) valid_fn = theano.function([input_var, target_var], [mem_loss, test_acc]) # Finally, launch the training loop. print 'Starting training...' # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = valid_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print "Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time) print " training loss:\t\t{:.6f}".format(train_err / train_batches) print " validation loss:\t\t{:.6f}".format(val_err / val_batches) print " validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = valid_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print "Final results:" print " test loss:\t\t\t{:.6f}".format(test_err / test_batches) print " test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)
# darken = # brighten = augmentation_schedule = [ 'mirror', 'darken', 'blur', 'equalize_hist', 'brighten', 'noise' ] # new augmentation. color = 'gray' if img_cnls == 1 else 'bgr' num_classes, dataset_ = load_data(dataset, colorspace=color, random_seed=random_seed) #print 'data type =', dataset_[0][0].dtype #print 'shape =', dataset_[0][0].shape.eval() assert len(mlp_layer_sizes) - 1 == len(dropout_rates) mlp_layer_sizes[2] = num_classes # set num_classes based on dataset. if batch_size is None: batch_size = num_classes x = T.tensor4('x') # the data is presented as rasterized images. learning_rate = theano.shared( np.asarray(initial_learning_rate, dtype=theano.config.floatX)) classifier = CNN(rng=np.random.RandomState(seed=random_seed), input=x.reshape((batch_size, img_cnls, 224, 224)), ninput_chnls=img_cnls, nkerns=nkerns, dropout_rates=dropout_rates, mlp_layer_sizes=mlp_layer_sizes, activations=activations, batch_size=batch_size, use_bias=use_bias) res = test_net(classifier=classifier, num_classes=num_classes,
def __init__( self, input_width, input_height, n_actions, discount, learn_rate, batch_size, rng ): self.input_width = input_width self.input_height = input_height self.n_actions = n_actions self.discount = discount self.lr = learn_rate self.batch_size = batch_size self.rng = rng lasagne.random.set_rng(self.rng) self.l_out = self.build_network( batch_size, input_width, input_height, n_actions ) states = t.tensor4('states') next_states = t.tensor4('next_states') rewards = t.col('rewards') actions = t.icol('actions') terminals = t.icol('terminals') self.states_shared = theano.shared( np.zeros((batch_size, 1, input_height, input_width), dtype=theano.config.floatX)) self.next_states_shared = theano.shared( np.zeros((batch_size, 1, input_height, input_width), dtype=theano.config.floatX)) self.rewards_shared = theano.shared( np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared( np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) self.terminals_shared = theano.shared( np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False, True)) q_vals = lasagne.layers.get_output(self.l_out, states) next_q_vals = lasagne.layers.get_output(self.l_out, next_states) next_q_vals = theano.gradient.disconnected_grad(next_q_vals) target = (rewards + (t.ones_like(terminals) - terminals) * self.discount * t.max(next_q_vals, axis=1, keepdims=True)) diff = target - q_vals[t.arange(batch_size), actions.reshape((-1,))].reshape((-1, 1)) loss = t.sum(0.5 * diff ** 2) params = lasagne.layers.helper.get_all_params(self.l_out) givens = { states: self.states_shared, next_states: self.next_states_shared, rewards: self.rewards_shared, actions: self.actions_shared, terminals: self.terminals_shared } updates = lasagne.updates.sgd(loss, params, self.lr) self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens) self._q_vals = theano.function([], q_vals, givens={states: self.states_shared})
def main(model='mlp', num_epochs=500): # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var) elif model.startswith('custom_mlp:'): depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') network = build_custom_mlp(input_var, int(depth), int(width), float(drop_in), float(drop_hid)) elif model == 'cnn': network = build_cnn(input_var) else: print("Unrecognized model type %r." % model) return # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))
def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, trainPercent, dataPath, loadPrepedData, channel): # pragma: no cover print locals() print X = T.tensor4('inputs') Y = T.matrix('targets') linewt_init = IsotropicGaussian(wtstd) line_bias = Constant(1.0) rnnwt_init = IsotropicGaussian(wtstd) rnnbias_init = Constant(0.0) classifierWts = IsotropicGaussian(wtstd) learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX)) learning_decay = np.array(decay, dtype=theano.config.floatX) ###DATA PREP print 'loading data' if loadPrepedData: hexSessions = loadFile(dataPath) else: sessioner = sessionizer.HexSessionizer(dataPath) hexSessions = sessioner.read_pcap() hexSessions = removeBadSessionizer(hexSessions) numSessions = len(hexSessions) print str(numSessions) + ' sessions found' hexSessionsKeys = order_keys(hexSessions) hexDict = hexTokenizer() print 'creating dictionary of ip communications' comsDict, uniqIPs = srcIpDict(hexSessions) comsDict = dictUniquerizer(comsDict) print 'initializing network graph' ###ENCODER if rnnType == 'gru': rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru') dimMultiplier = 2 else: rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm') dimMultiplier = 4 fork = Fork(output_names=['linear', 'gates'], name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) ###CONTEXT if rnnType == 'gru': rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gruContext') else: rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstmContext') forkContext = Fork(output_names=['linearContext', 'gatesContext'], name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], weights_init = linewt_init, biases_init = line_bias) forkDec = Fork(output_names=['linear', 'gates'], name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], weights_init = linewt_init, biases_init = line_bias) #CLASSIFIER bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], dims=[dim, dim, numClasses], weights_init=classifierWts, biases_init=Constant(0.0001) ) #initialize the weights in all the functions fork.initialize() rnn.initialize() forkContext.initialize() rnnContext.initialize() forkDec.initialize() bmlp.initialize() def onestepEnc(X): data1, data2 = fork.apply(X) if rnnType == 'gru': hEnc = rnn.apply(data1, data2) else: hEnc, _ = rnn.apply(data2) return hEnc hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen) if attentionEnc: attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpEnc.initialize() hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim)) def onestepEncAttn(hEncAttn): preEncattn = attentionmlpEnc.apply(hEncAttn) attEncsoft = Softmax() attEncpyx = attEncsoft.apply(preEncattn.flatten()) attEncpred = attEncpyx.flatten() attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0) return attenc attenc, _ = theano.scan(onestepEncAttn, hEncAttn) hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim)) else: hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) #[:,-1] takes the last rep for each packet #(mini, numPackets, 1, dimReduced) def onestepContext(hEncReshape): data3, data4 = forkContext.apply(hEncReshape) if rnnType == 'gru': hContext = rnnContext.apply(data3, data4) else: hContext, _ = rnnContext.apply(data4) return hContext hContext, _ = theano.scan(onestepContext, hEncReshape) if attentionContext: attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts, biases_init=Constant(1.0)) attentionmlpContext.initialize() hContextAttn = T.reshape(hContext, (-1,maxPackets,dim)) def onestepContextAttn(hContextAttn): preContextatt = attentionmlpContext.apply(hContextAttn) attContextsoft = Softmax() attContextpyx = attContextsoft.apply(preContextatt.flatten()) attContextpred = attContextpyx.flatten() attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0) return attcontext attcontext, _ = theano.scan(onestepContextAttn, hContextAttn) hContextReshape = T.sum(attcontext, axis = 1) else: hContextReshape = T.reshape(hContext[:,-1], (-1,dim)) data5, _ = forkDec.apply(hContextReshape) pyx = bmlp.apply(data5) softmax = Softmax() softoutClass = softmax.apply(pyx) costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass)) #CREATE GRAPH cgClass = ComputationGraph([costClass]) paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables) learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings) updatesClass = learning.Adam() module_logger.info('starting graph compilation') classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], updates=updatesClass, allow_input_downcast=True) classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True) module_logger.info('graph compilation finished') print 'finished graph compilation' trainIndex = int(len(hexSessionsKeys)*trainPercent) epochCost = [] gradNorms = [] trainAcc = [] testAcc = [] costCollect = [] trainCollect = [] module_logger.info('beginning training') iteration = 0 #epoch for epoch in xrange(epochs): #iteration/minibatch for start, end in zip(range(0, trainIndex,batch_size), range(batch_size, trainIndex, batch_size)): trainingTargets = [] trainingSessions = [] #create one minibatch with 0.5 normal and 0.5 abby normal traffic for trainKey in range(start, end): sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0]) adfun = adversarialfunctions.Adversary(sessionForEncoding) adversaryList = [sessionForEncoding, adfun.dstIpSwapOut(comsDict, uniqIPs), adfun.portDirSwitcher(), adfun.ipDirSwitcher()] abbyIndex = random.sample(range(len(adversaryList)), 1)[0] targetClasses = [0]*numClasses targetClasses[abbyIndex] = 1 abbyTarget = np.array(targetClasses, dtype=theano.config.floatX) trainingSessions.append(abbyOneHotSes[0]) trainingTargets.append(abbyTarget) sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn)) targetsMinibatch = np.asarray(trainingTargets) costfun = classifierTrain(sessionsMinibatch, targetsMinibatch) if iteration % (numSessions / (10 * batch_size)) == 0: costCollect.append(costfun[0]) trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1))) module_logger.info(' Iteration: ', iteration) module_logger.info(' Cost: ', np.mean(costCollect)) module_logger.info(' TRAIN accuracy: ', np.mean(trainCollect)) print ' Iteration: ', iteration print ' Cost: ', np.mean(costCollect) print ' TRAIN accuracy: ', np.mean(trainCollect) iteration+=1 #testing accuracy if iteration % (numSessions / (2 * batch_size)) == 0: predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict, hexSessionsKeys, numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps, padOldTimeSteps) binaryPrecisionRecall(predtar, acttar, numClasses) module_logger.info(str(testCollect)) #save the models if iteration % (numSessions / (5 * batch_size)) == 0: save_model(classifierPredict) epochCost.append(np.mean(costCollect)) trainAcc.append(np.mean(trainCollect)) module_logger.info('Epoch: ', epoch) module_logger.info('Epoch cost average: ', epochCost[-1]) module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1]) print 'Epoch: ', epoch print 'Epoch cost average: ', epochCost[-1] print 'Epoch TRAIN accuracy: ', trainAcc[-1] return classifierTrain, classifierPredict
###################### # Model construction # ###################### from theano import tensor from blocks.bricks import Rectifier, MLP # , Softmax # from blocks.bricks.cost import CategoricalCrossEntropy from blocks.bricks.conv import (ConvolutionalLayer, ConvolutionalSequence, Flattener) from blocks.initialization import Uniform, Constant x = tensor.tensor4('images') y = tensor.lmatrix('targets') # Convolutional layers filter_sizes = [(5, 5)] * 3 + [(4, 4)] * 3 num_filters = [32, 32, 64, 64, 128, 256] pooling_sizes = [(2, 2)] * 6 activation = Rectifier().apply conv_layers = [ ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size) for filter_size, num_filters_, pooling_size in zip( filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(260, 260), weights_init=Uniform(0, 0.2), biases_init=Constant(0.))
def main(model='cnn', batch_size=500, num_epochs=500): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = build_cnn(input_var) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX) # Create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.001) #updates = lasagne.updates.adam(loss, params, learning_rate=0.1) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) training_hist = [] val_hist = [] print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: print("Training epoch {}...".format(epoch + 1)) train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(train_data, train_labels, batch_size, shuffle=True): inputs, targets = batch err, acc = train_fn(inputs, targets) train_err += err train_acc += acc train_batches += 1 if VERBOSE: print("Epoch: {} | Mini-batch: {}/{} | Elapsed time: {:.2f}s". format(epoch + 1, train_batches, train_data.shape[0] / batch_size, time.time() - start_time)) training_hist.append(train_err / train_batches) # And a full pass over the validation data: print("Validating epoch...") val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(val_data, val_labels, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 val_hist.append(val_err / val_batches) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" training accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) # After training, we compute and print the test predictions/error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(test_data, test_labels, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)) # Plot learning plt.plot(range(1, num_epochs + 1), training_hist, label="Training") plt.plot(range(1, num_epochs + 1), val_hist, label="Validation") plt.grid(True) plt.title("Training Curve") plt.xlim(1, num_epochs + 1) plt.xlabel("Epoch #") plt.ylabel("Loss") plt.legend(loc='best') plt.show()
def evaluate_lenet5(datasets, imgh, imgw, nclass, learning_rate=0.01, d=0.0003, n_epochs=500, nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :rtype : object :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nk+++++++++++++++++++++++++++++++++erns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 # x = T.matrix('x') # the data is presented as rasterized images x = T.tensor4('x') y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. # layer0_input = x.reshape((batch_size, 3, 60, 40)) layer0_input = x.reshape((batch_size, 3, imgh, imgw)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (60-5+1 , 40-5+1) = (56, 36) # maxpooling reduces this further to (56/2, 36/2) = (28, 18) # 4D output tensor is thus of shape (batch_size, nkerns[0], 28, 18) # image_shape=(batch_size, 3, 60, 40), layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, imgh, imgw), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (28-5+1, 18-5+1) = (24, 14) # maxpooling reduces this further to (24/2, 14/2) = (12, 7) # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 12, 7) # image_shape=(batch_size, nkerns[0], 28, 18), lh1 = (imgh - 5 + 1) / 2 lw1 = (imgw - 5 + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], lh1, lw1), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 12 * 7), # or (500, 50 * 12 * 7) = (500, 3360) with the default values. lh2 = (lh1 - 5 + 1) / 2 lw2 = (lw1 - 5 + 1) / 2 layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * lh2 * lw2, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=nclass) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model # the following code is modified to suit with the small test set size test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # theano expression to decay the learning rate across epoch current_rate = theano.tensor.fscalar('current_rate') # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - current_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index, current_rate], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 50 # look at least at this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant test_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_test_loss = numpy.inf learning_rate = numpy.float32(learning_rate) best_iter = 0 start_time = time.clock() epoch = 0 done_looping = False test_error = [] while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 learning_rate = learning_rate / (1 + d * (epoch - 1)) print "learning rate is %f" % learning_rate for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index, numpy.float32(learning_rate)) if (iter + 1) % test_frequency == 0: # compute zero-one loss on validation set test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_loss = numpy.mean(test_losses) test_error.append(this_test_loss) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_test_loss * 100.)) # if we got the best test score until now if this_test_loss < best_test_loss: #improve patience if loss improvement is good enough if this_test_loss < best_test_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_test_loss = this_test_loss best_iter = iter if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_test_loss * 100., best_iter + 1, best_test_loss * 100.)) print 'The code ran for %.2fm' % ((end_time - start_time) / 60.) return params, test_error
def main(L=2, z_dim=2, n_hid=1024, num_epochs=300, binary=True): print("Loading data...") X_train, X_val, X_test = load_dataset() width, height = X_train.shape[2], X_train.shape[3] input_var = T.tensor4('inputs')
def __init__(self, dropout=None, opt='adam', pad='same', stride=2, kernel_width=None, dataset='mnist'): if dataset == 'mnist': weight_shapes = [ (32, 1, 3, 3), # -> (None, 16, 14, 14) (32, 32, 3, 3), # -> (None, 16, 7, 7) (32, 32, 3, 3) ] # -> (None, 16, 4, 4) elif dataset == 'cifar10': weight_shapes = [ (32, 3, 5, 5), # -> (None, 16, 16, 16) (32, 32, 5, 5), # -> (None, 16, 8, 8) (32, 32, 5, 5) ] # -> (None, 16, 4, 4) if kernel_width is not None: # OVERRIDE dataset argument!!! weight_shapes = [ (32, 1, kernel_width, kernel_width), # -> (None, 16, 14, 14) (32, 32, kernel_width, kernel_width), # -> (None, 16, 7, 7) (32, 32, kernel_width, kernel_width) ] # -> (None, 16, 4, 4) n_kernels = np.array(weight_shapes)[:, 1].sum() kernel_shape = weight_shapes[0][:1] + weight_shapes[0][2:] # needs to be consistent with weight_shapes args = [32, kernel_width, stride, pad, lasagne.nonlinearities.rectify] # num_filters, filter_size, stride, pad, nonlinearity = args self.__dict__.update(locals()) ################## if dataset == 'mnist': layer = lasagne.layers.InputLayer([None, 1, 28, 28]) elif dataset == 'cifar10': layer = lasagne.layers.InputLayer([None, 3, 32, 32]) for j, ws in enumerate(self.weight_shapes): num_filters = ws[1] layer = lasagne.layers.Conv2DLayer(layer, num_filters, filter_size, stride, pad, nonlinearity) if dropout is not None and j != len(self.weight_shapes) - 1: if dropout == 'spatial': layer = lasagne.layers.spatial_dropout(layer) else: layer = lasagne.layers.dropout(layer) layer = lasagne.layers.Pool2DLayer(layer, pool_size=2) # MLP layers layer = lasagne.layers.DenseLayer(layer, 128) if dropout is not None and j != len(self.weight_shapes) - 1: layer = lasagne.layers.dropout(layer, dropout) layer = lasagne.layers.DenseLayer(layer, 10) layer.nonlinearity = lasagne.nonlinearities.softmax self.input_var = T.tensor4('input_var') self.target_var = T.matrix('target_var') self.learning_rate = T.scalar('leanring_rate') self.dataset_size = T.scalar('dataset_size') # useless self.layer = layer self.y = lasagne.layers.get_output(layer, self.input_var) self.y_det = lasagne.layers.get_output(layer, self.input_var, deterministic=True) losses = lasagne.objectives.categorical_crossentropy( self.y, self.target_var) self.loss = losses.mean() + self.dataset_size * 0. self.params = lasagne.layers.get_all_params(self.layer) # reset! DEPRECATED... use add_reset, call_reset instead... params0 = lasagne.layers.get_all_param_values(self.layer) updates = {p: p0 for p, p0 in zip(self.params, params0)} self.reset = theano.function([], None, updates=updates) model.add_reset('init') if opt == 'adam': self.updates = lasagne.updates.adam(self.loss, self.params, self.learning_rate) elif opt == 'momentum': self.updates = lasagne.updates.nesterov_momentum( self.loss, self.params, self.learning_rate) elif opt == 'sgd': self.updates = lasagne.updates.sgd(self.loss, self.params, self.learning_rate) print '\tgetting train_func' self.train_func = theano.function([ self.input_var, self.target_var, self.dataset_size, self.learning_rate ], self.loss, updates=self.updates) print '\tgetting useful_funcs' self.predict_proba = theano.function([self.input_var], self.y) self.predict = theano.function([self.input_var], self.y_det.argmax(1))
print md, meta_data[md] expt_name = meta_data["expt_name"] learning_rate = 1e-4 image_size = 64 # 32 attn_win = 6 # 4 glimpses = 4 #8 lstm_states = 512 fg_bias_init = 0.0 # 0.2 dropout = 0.3 # 0.2 meta_data["n_iter"] = n_iter = 1500000 batch_size = 128 meta_data["num_output"] = 2 print "... setting up the network" X = T.tensor4("input") y = T.imatrix("target") l_in = InputLayer(shape=(None, 1, image_size, image_size), input_var=X) l_noise = DropoutLayer(l_in, p=dropout) l_arc = SimpleARC(l_noise, lstm_states=lstm_states, image_size=image_size, attn_win=attn_win, glimpses=glimpses, fg_bias_init=fg_bias_init) l_y = DenseLayer(l_arc, 1, nonlinearity=sigmoid) prediction = get_output(l_y) prediction_clean = get_output(l_y, deterministic=True) embedding = get_output(l_arc, deterministic=True) loss = T.mean(binary_crossentropy(prediction, y)) accuracy = T.mean(binary_accuracy(prediction_clean, y))
num_units=2, nonlinearity=lasagne.nonlinearities.softmax, b=None) return l_out print("Loading data...") flower = load_image.load_flower() flower_corrupt_train, flower_truth_train, test_corrupt, test_truth = load_image.load_flower_random_mask( ) #load_image.show_image(flower_corrupt_train,3) input_var = T.tensor4('inputs') target_var = T.tensor4('target') adv_input = T.tensor4('adv_input') adv_target = T.ivector('adv_target') print("Building model and...") # Build the Generator net = build_ae(input_var) sample_reconstruct = lasagne.layers.get_output(net) loss = lasagne.objectives.squared_error(sample_reconstruct, target_var).mean() # Get network params, with specifications of manually updated ones params = lasagne.layers.get_all_params(net, trainable=True) #updates = lasagne.updates.sgd(loss,params,learning_rate=0.01) updates = lasagne.updates.adam(loss, params)
def main( save_to='params', dataset='mm', kl_loss='true', # use kl-div in z-space instead of mse diffs='false', seq_length=30, num_epochs=1, lstm_n_hid=1024, max_per_epoch=-1): kl_loss = kl_loss.lower() == 'true' diffs = diffs.lower() == 'true' # set up functions for data pre-processing and model training input_var = T.tensor4('inputs') # different experimental setup for moving mnist vs pulp fiction dataests if dataset == 'pf': img_size = 64 cae_weights = c.pf_cae_params cae_specstr = c.pf_cae_specstr split_layer = 'conv7' inpvar = T.tensor4('input') net = m.build_cae(inpvar, specstr=cae_specstr, shape=(img_size, img_size)) convs_from_img, _ = m.encoder_decoder(cae_weights, specstr=cae_specstr, layersplit=split_layer, shape=(img_size, img_size), poolinv=True) laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net)) zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer]) deconv_weights = c.pf_deconv_params vae_weights = c.pf_vae_params img_from_convs = m.deconvoluter(deconv_weights, specstr=cae_specstr, shape=zdec_in_shape) L = 2 vae_n_hid = 1500 binary = False z_dim = 256 l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, vae_weights) datafile = 'data/pf.hdf5' frame_skip = 3 # every 3rd frame in sequence z_decode_layer = l_x_mu_list[0] pixel_shift = 0.5 samples_per_image = 4 tr_batch_size = 16 # must be a multiple of samples_per_image elif dataset == 'mm': img_size = 64 cvae_weights = c.mm_cvae_params L = 2 vae_n_hid = 1024 binary = True z_dim = 32 zdec_in_shape = (None, 1, img_size, img_size) l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, cvae_weights) datafile = 'data/moving_mnist.hdf5' frame_skip = 1 w, h = img_size, img_size # of raw input image in the hdf5 file z_decode_layer = l_x_list[0] pixel_shift = 0 samples_per_image = 1 tr_batch_size = 128 # must be a multiple of samples_per_image # functions for moving to/from image or conv-space, and z-space z_mat = T.matrix('z') zenc = theano.function([input_var], nn.layers.get_output(l_z_mu, deterministic=True)) zdec = theano.function( [z_mat], nn.layers.get_output( z_decode_layer, { l_z_mu: z_mat }, deterministic=True).reshape((-1, zdec_in_shape[1]) + zdec_in_shape[2:])) zenc_ls = theano.function([input_var], nn.layers.get_output(l_z_ls, deterministic=True)) # functions for encoding sequences of z's print 'compiling functions' z_var = T.tensor3('z_in') z_ls_var = T.tensor3('z_ls_in') tgt_mu_var = T.tensor3('z_tgt') tgt_ls_var = T.tensor3('z_ls_tgt') learning_rate = theano.shared(nn.utils.floatX(1e-4)) # separate function definitions if we are using MSE and predicting only z, or KL divergence # and predicting both mean and sigma of z if kl_loss: def kl(p_mu, p_sigma, q_mu, q_sigma): return 0.5 * T.sum( T.sqr(p_sigma) / T.sqr(q_sigma) + T.sqr(q_mu - p_mu) / T.sqr(q_sigma) - 1 + 2 * T.log(q_sigma) - 2 * T.log(p_sigma)) lstm, _ = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) z_mu_expr, z_ls_expr = nn.layers.get_output( [lstm['output_mu'], lstm['output_ls']]) z_mu_expr_det, z_ls_expr_det = nn.layers.get_output( [lstm['output_mu'], lstm['output_ls']], deterministic=True) loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr)) te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det, T.exp(z_ls_expr_det)) params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], loss, updates=updates) test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], te_loss) else: lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) loss = nn.objectives.squared_error( nn.layers.get_output(lstm['output']), tgt_mu_var).mean() te_loss = nn.objectives.squared_error( nn.layers.get_output(lstm['output'], deterministic=True), tgt_mu_var).mean() params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates) test_fn = theano.function([z_var, tgt_mu_var], te_loss) if dataset == 'pf': z_from_img = lambda x: zenc(convs_from_img(x)) z_ls_from_img = lambda x: zenc_ls(convs_from_img(x)) img_from_z = lambda z: img_from_convs(zdec(z)) elif dataset == 'mm': z_from_img = zenc z_ls_from_img = zenc_ls img_from_z = zdec # training loop print('training for {} epochs'.format(num_epochs)) nbatch = (seq_length + 1) * tr_batch_size * frame_skip / samples_per_image data = u.DataH5PyStreamer(datafile, batch_size=nbatch) # for taking arrays of uint8 (non square) and converting them to batches of sequences def transform_data(ims_batch, center=False): imb = u.raw_to_floatX( ims_batch, pixel_shift=pixel_shift, center=center)[np.random.randint(frame_skip)::frame_skip] zbatch = np.zeros((tr_batch_size, seq_length + 1, z_dim), dtype=theano.config.floatX) zsigbatch = np.zeros((tr_batch_size, seq_length + 1, z_dim), dtype=theano.config.floatX) for i in xrange(samples_per_image): chunk = tr_batch_size / samples_per_image if diffs: zf = z_from_img(imb).reshape((chunk, seq_length + 1, -1)) zbatch[i * chunk:(i + 1) * chunk, 1:] = zf[:, 1:] - zf[:, :-1] if kl_loss: zls = z_ls_from_img(imb).reshape( (chunk, seq_length + 1, -1)) zsigbatch[i * chunk:(i + 1) * chunk, 1:] = zls[:, 1:] - zls[:, :-1] else: zbatch[i * chunk:(i + 1) * chunk] = z_from_img(imb).reshape( (chunk, seq_length + 1, -1)) if kl_loss: zsigbatch[i * chunk:(i + 1) * chunk] = z_ls_from_img(imb).reshape( (chunk, seq_length + 1, -1)) if kl_loss: return zbatch[:, : -1, :], zsigbatch[:, : -1, :], zbatch[:, 1:, :], zsigbatch[:, 1:, :] return zbatch[:, :-1, :], zbatch[:, 1:, :] # we need sequences of images, so we do not shuffle data during trainin hist = u.train_with_hdf5( data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn, train_shuffle=False, max_per_epoch=max_per_epoch, tr_transform=lambda x: transform_data(x[0], center=False), te_transform=lambda x: transform_data(x[0], center=True)) hist = np.asarray(hist) u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1, -1]))) # build functions to sample from LSTM # separate cell_init and hid_init from the other learned model parameters all_param_values = nn.layers.get_all_param_values(lstm['output']) init_indices = [ i for i, p in enumerate(nn.layers.get_all_params(lstm['output'])) if 'init' in str(p) ] init_values = [all_param_values[i] for i in init_indices] params_noinit = [ p for i, p in enumerate(all_param_values) if i not in init_indices ] # build model without learnable init values, and load non-init parameters if kl_loss: lstm_sample, state_vars = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) else: lstm_sample, state_vars = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) nn.layers.set_all_param_values(lstm_sample['output'], params_noinit) # extract layers representing thee hidden and cell states, and have sample_fn # return their outputs state_layers_keys = [ k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k ] state_layers_keys = sorted(state_layers_keys) state_layers_keys = sorted(state_layers_keys, key=lambda x: int(x.split('_')[1])) state_layers = [lstm_sample[s] for s in state_layers_keys] if kl_loss: sample_fn = theano.function( [z_var, z_ls_var] + state_vars, nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] + state_layers, deterministic=True)) else: sample_fn = theano.function([z_var] + state_vars, nn.layers.get_output([lstm['output']] + state_layers, deterministic=True)) from images2gif import writeGif from PIL import Image # sample approximately 30 different generated video sequences te_stream = data.streamer(training=True, shuffled=False) interval = data.ntrain / data.batch_size / 30 for idx, imb in enumerate(te_stream.get_epoch_iterator()): if idx % interval != 0: continue z_tup = transform_data(imb[0], center=True) seg_idx = np.random.randint(z_tup[0].shape[0]) if kl_loss: z_in, z_ls_in = z_tup[0], z_tup[1] z_last, z_ls_last = z_in[seg_idx:seg_idx + 1], z_ls_in[seg_idx:seg_idx + 1] z_vars = [z_last, z_ls_last] else: z_in = z_tup[0] z_last = z_in[seg_idx:seg_idx + 1] z_vars = [z_last] images = [] state_values = [ np.dot(np.ones((z_last.shape[0], 1), dtype=theano.config.floatX), s) for s in init_values ] output_list = sample_fn(*(z_vars + state_values)) # use whole sequence of predictions for output z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) for k in xrange(rec.shape[0]): images.append( Image.fromarray( u.get_picture_array(rec, index=k, shift=pixel_shift))) k += 1 # slice prediction to feed into lstm z_pred = z_pred[:, -1:, :] if kl_loss: z_ls_pred = output_list[1][:, -1:, :] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] for i in xrange( 30): # predict 30 frames after the end of the priming video output_list = sample_fn(*(z_vars + state_values)) z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) images.append( Image.fromarray( u.get_picture_array(rec, index=0, shift=pixel_shift))) if kl_loss: z_ls_pred = output_list[1] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] writeGif("sample_{}.gif".format(idx), images, duration=0.1, dither=0)
def evaluate_lenet5(learning_rate=0.001, batch_size=1, n_epochs=75): print "Loading data..." rng = np.random.RandomState(23455) print "Loading params..." file = open('params_v5.1.pkl') p = cPickle.load(file) file.close() print "Sharing data..." print "Building architecture..." print "Haha =_=-b!" x = T.tensor4('x') index = T.iscalar('index') print "Haha =_=-b!" layer0 = ConvPoolLayer(rng=rng, input=x, image_shape=(batch_size, 3, 32, 32), filter_shape=(32, 3, 5, 5), activation=Relu_nonlinear, poolsize=(2, 2), W=p[-2], b=p[-1]) print "Haha =_=-b!" layer1 = ConvPoolLayer(rng=rng, input=layer0.output, image_shape=(batch_size, 32, 14, 14), filter_shape=(50, 32, 5, 5), activation=Relu_nonlinear, poolsize=(2, 2), W=p[-4], b=p[-3]) print "Haha =_=-b!" layer2 = ConvPoolLayer(rng=rng, input=layer1.output, image_shape=(batch_size, 50, 5, 5), filter_shape=(64, 50, 5, 5), activation=Relu_nonlinear, poolsize=(1, 1), W=p[-6], b=p[-5]) # 3*3*80 = 720 layer3_input = layer2.output.flatten(2) print "Haha =_=-b!" layer3 = HiddenLayer(rng=rng, input=layer3_input, n_in=64, n_out=56, activation=T.tanh, W=p[4], b=p[5]) print "Haha =_=-b!" layer4 = HiddenLayer(rng=rng, input=layer3.output, n_in=56, n_out=10, activation=T.tanh, W=p[2], b=p[3]) print "Haha =_=-b!" layer5 = LogisticRegression(input=layer4.output, n_in=10, n_out=10, W=p[0], b=p[1]) print "Haha =_=-b!" prediction = layer5.p_y_given_x_in print "Haha =_=-b!, T.grad!!" grad = T.grad(prediction[0, index], x) print "Compiling function..." ScI = theano.function([x, index], grad) predict = theano.function([x, index], layer5.p_y_given_x[0, index]) test_file = open('test_batch', 'rb') map = cPickle.load(test_file) test_file.close() test_set_x = np.asarray(map['data'], dtype='float32') test_set_y = np.asarray(map['labels'], dtype='float32') plt.show() for i in xrange(10): map_out = np.zeros([32 * 2, 32 * 10]) print test_set_y[i] for l in xrange(10): x_in = test_set_x[i, :].reshape([1, 3, 32, 32]) x_in_1 = np.transpose(x_in[0, ...], [1, 2, 0]) x_out = ScI(x_in, l) x_out = np.transpose(x_out.reshape([3, 32, 32]), [1, 2, 0]) x_out = np.abs(x_out) x_out = x_out.max(axis=2) map_out[32:, l * 32:(l + 1) * 32] = x_out #map_out = np.asarray(map_out,dtype='uint8') print "haha" plt.imshow(map_out, cmap="Greys_r") plt.show()
def main_training(key, X_train, y_train, X_val, y_val, geneStore, model='cnn', num_epochs=500): # load the dataset print("loading data...") #X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # Prepare Theano variables for inputs and targets input_var = T.tensor4('input') target_var = T.fmatrix('targets') lengthOfInputVector = np.shape(X_train)[1] # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'cnn': network = build_cnn(input_var, lengthOfInputVector= lengthOfInputVector) else: print("Unrecognized model type {}".format(model)) # Create a loss expression for traing, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) # loss = loss.mean() loss = T.mean((prediction - target_var)**2) # we could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.0005, momentum=0.9) updates = lasagne.updates.adam(loss, params) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) #test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, # target_var) #test_loss = test_loss.mean() test_loss = T.mean((test_prediction - target_var)**2) # As a bonus, also create an expression for the classification accuracy: #test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), # dtype=theano.config.floatX) # Compile a function performing a training step on mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_prediction]) prediction_for_gene_expres = theano.function([input_var],prediction) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: minibatch_size = 100 for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, minibatch_size, shuffle=True): inputs, targets = batch inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector)) train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, minibatch_size, shuffle=False): inputs, targets = batch inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector)) err, val_prediction = val_fn(inputs, targets) val_err += err val_batches += 1 #print predicted_gene_expres # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) #print(" test data length:\t\t{0},{1}".format(len(predicted_gene_expres),X_val.shape[0])) # store data in HDFStore inputs = pd.DataFrame(X_val).values.astype(np.float32) inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector)) predicted_gene_expres = prediction_for_gene_expres(inputs) geneStore[key+'/prediction'] = pd.DataFrame(np.array(predicted_gene_expres).flatten()) geneStore[key+'/true_expres'] = pd.DataFrame(np.array(y_val).flatten()) geneStore[key+'/X'] = pd.DataFrame(X_val)