示例#1
0
def theano_kernel_derivative(imshp,kshp,featshp,stride=1):

    features = T.tensor4(dtype=theano.config.floatX)
    kernel = T.tensor4(dtype=theano.config.floatX)
    image = T.tensor4(dtype=theano.config.floatX)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    featshp_logical = (featshp[0],featshp[1],featshp[2]*stride,featshp[3]*stride)
    kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
    image_estimate = conv2d(features,kernel_rotated,border_mode='full',
                            image_shape=featshp,filter_shape=kshp_rotated,
                            imshp_logical=featshp_logical[1:],kshp_logical=kshp[2:])

    image_error = image - image_estimate

    image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
    imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
    featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
    features_rot = T.transpose(features,[1,0,2,3])

    featshp_rot_logical = (featshp_rot[0],featshp_rot[1],featshp_rot[2]*stride,featshp_rot[3]*stride)
    kernel_grad_rot = -conv2d(image_error_rot,features_rot,
                              image_shape=imshp_rot,filter_shape=featshp_rot,
                              imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
    kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

    return function(inputs=[image,features,kernel],outputs=kernel_grad)
示例#2
0
    def test_graph(self):
        # define common values  first
        groups = 3
        bottom = np.random.rand(3, 6, 5, 5).astype(theano.config.floatX)
        kern = np.random.rand(9, 2, 3, 3).astype(theano.config.floatX)
        bottom_sym = T.tensor4('bottom')
        kern_sym = T.tensor4('kern')

        # grouped convolution graph
        conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym)
        gconv_func = theano.function([bottom_sym, kern_sym], conv_group, mode=self.mode)

        # Graph for the normal hard way
        kern_offset = kern_sym.shape[0] // groups
        bottom_offset = bottom_sym.shape[1] // groups
        split_conv_output = [self.conv()(bottom_sym[:, i * bottom_offset:(i + 1) * bottom_offset, :, :],
                             kern_sym[i * kern_offset:(i + 1) * kern_offset, :, :, :])
                             for i in range(groups)]
        concatenated_output = T.concatenate(split_conv_output, axis=1)
        conv_func = theano.function([bottom_sym, kern_sym], concatenated_output, mode=self.mode)

        # calculate outputs for each graph
        gconv_output = gconv_func(bottom, kern)
        conv_output = conv_func(bottom, kern)

        # compare values
        utt.assert_allclose(gconv_output, conv_output)
示例#3
0
def fix_gpu_transfer():

    kshp=(10,2,10,10)
    featshp=(3,10,11,11)
    stride=8
    mask = False
    imshp = (featshp[0],kshp[1],featshp[2]*stride + kshp[2] - 1,featshp[3]*stride + kshp[3] - 1) # num images, channels, szy, szx

    from theano import tensor as T
    x = T.tensor4()
    a = T.tensor4()
    A = T.tensor4()

    image_error = helper_T_l2_cost_conv(x,a,A,imshp,kshp,featshp,stride=(stride,stride),mask=mask)
    cost = .5*T.sum(image_error **2)

    func = function([x,a,A],cost)

    import theano
    theano.printing.debugprint(func)

    x_in = np.random.randn(*imshp).astype(np.float32)
    a_in = np.random.randn(*featshp).astype(np.float32)
    A_in = np.random.randn(*kshp).astype(np.float32)

    from time import time as now
    repeats = 10
    t0 = now()
    for i in range(repeats):
        output = func(x_in,a_in,A_in)
    t = now() - t0
    print 'time / iter = %f' % (t/repeats)
示例#4
0
 def __init__(self,test_data_x,test_data_y):
     self.test_data_x=test_data_x
     self.test_data_y=test_data_y
     test = T.tensor4('test')
     pred = T.tensor4('pred')
     dc = dice_coef(test,pred)
     self.dc = theano.function([test,pred],dc)
示例#5
0
    def __init__(self):
        X1 = T.tensor4()
        X2 = T.tensor4()
        X = [X1, X2]
        Y = [T.ivector()]
        
        model = Model()
        #conv1
        model.add(Conv(filter_shape = (32, 3, 3, 3), regularizers = {'W': l1(0.0001)},  w_shared = True, n_inputs = 2))
        model.add(Conv(filter_shape = (32, 32, 2, 2), regularizers = {'W': l1(0.0001)}, w_shared = True, n_inputs = 2))
        model.add(Pooling(pool_size = (2,2)))
        model.add(Activation(mode = 'tanh'))
        #conv2
        model.add(Conv(filter_shape = (32, 32, 3, 3), regularizers = {'W': l1(0.0001)}, w_shared = True, n_inputs = 2))
        model.add(Pooling(pool_size = (2,2)))
        model.add(Activation(mode = 'tanh'))
        #abs_diff
        model.add(Abs_diff())
        #conv3
        model.add(Conv(filter_shape = (32, 32, 3, 3), regularizers = {'W': l1(0.0001)}, w_shared = True))
        model.add(Pooling(pool_size = (2,2)))
        model.add(Activation(mode = 'tanh'))
        model.add(Flatten())

        self.f = theano.function(X, model.f(X, is_train = True))

        model.add(Fully((2880, 512)))
        model.add(Activation(mode = 'tanh'))
        model.add(Dropout(0.5))
        model.add(Fully((512, 2)))
        model.add(Activation(mode = 'softmax'))
        model.build(CostFunc.nll, RMSprop(), X, Y)
        self.model = model
示例#6
0
def run_network_on_image():

    import make_seqs
    ims1, ims1a, ims2=make_seqs.make_seqs(slength=6,num_seqs=1000)

    input_var1 =  T.tensor4('inputs')
    input_var2 = T.tensor4('inputs_comp')

    network = compare_net.build_cnn_new_conv(input_var1, input_var2)
    if (os.path.isfile('net.npy')):
        spars=np.load('net.npy')
        lasagne.layers.set_all_param_values(network,spars)
    test_corr = lasagne.layers.get_output(network, deterministic=True)
    test_fn = theano.function([input_var1, input_var2], [test_corr])

    tcorr_same=test_fn(ims1,ims1a)
    tcorr_diff=test_fn(ims1,ims2)
    tt_same_std=np_standardize(tcorr_same[0])
    tt_diff_std=np_standardize(tcorr_diff[0])
    corrs_same=get_shifted_correlations(tt_same_std)
    corrs_diff=get_shifted_correlations(tt_diff_std)
    dps=optimize_dp(corrs_same)
    dpd=optimize_dp(corrs_diff)
    print(np.min(dps),np.max(dps),np.min(dpd),np.max(dpd))
    import pylab as py
    py.figure(1)
    py.hist(dps,alpha=.5)
    py.hist(dpd,alpha=.5)
    py.show()
    print('done ')
示例#7
0
    def test_theano_transposed_convolution(self):
        # how to use t_mk_conv_transpose
        from deconv.tdeconv_utils import t_mk_conv_transpose
        in4 = T.tensor4(name='conv_in', dtype=theano.config.floatX)
        f4 = T.tensor4(name='filters', dtype=theano.config.floatX)
        f_t_conv = theano.function(
            [in4],
            t_mk_conv_transpose(in4, f4),
            givens=[(f4, self.filters)]
        )

        test_input = np.array(
            [[[[0, 1, 0],
               [0, 1, 0],
               [0, 1, 0]],
              [[0, 0, 0],
               [1, 1, 1],
               [0, 0, 0]]]],
            dtype=theano.config.floatX
        )
        ground_truth = np.array(
            [[[[ 0,  0,  0,  0,  0],
               [-1, -1, -1,  0,  0],
               [ 0,  0,  0,  1,  0],
               [ 0,  0,  0,  1,  0],
               [ 0,  0,  0,  1,  0]]]],
            dtype=theano.config.floatX
        )
        assert_true(np.all(f_t_conv(test_input) == ground_truth))
def create_iter_funcs_valid(l_out, bs=None, N=50, mc_dropout=False):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    if not mc_dropout:
        y_hat = layers.get_output(l_out, X, deterministic=True)
    else:
        if bs is None:
            raise ValueError('a fixed batch size is required for mc dropout')
        X_repeat = T.extra_ops.repeat(X, N, axis=0)
        y_sample = layers.get_output(
            l_out, X_repeat, deterministic=False)

        sizes = [X_repeat.shape[0] / X.shape[0]] * bs
        y_sample_split = T.as_tensor_variable(
            T.split(y_sample, sizes, bs, axis=0))
        y_hat = T.mean(y_sample_split, axis=1)

    valid_loss = T.mean(
        T.nnet.categorical_crossentropy(y_hat, y))
    valid_acc = T.mean(
        T.eq(y_hat.argmax(axis=1), y))

    valid_iter = theano.function(
        inputs=[theano.Param(X_batch), theano.Param(y_batch)],
        outputs=[valid_loss, valid_acc],
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return valid_iter
def make_apply_gabor_function(filter_stack_shape,complex_cell=True):
    stim_tnsr = tnsr.tensor4('stim_tnsr')  ##T x n_color_channels x stim_size x stim_size
    real_filter_stack_tnsr = tnsr.tensor4('real_feature_map_tnsr') ##D x n_color_channels x stim_size x stim_size. complex
    imag_filter_stack_tnsr = tnsr.tensor4('imag_feature_map_tnsr') ##D x n_color_channels x stim_size x stim_size. complex
    real_feature_map_tnsr = tnsr.nnet.conv2d(stim_tnsr,
                                     real_filter_stack_tnsr,                                     
                                     filter_shape = filter_stack_shape,
                                     border_mode = 'full')  ##produces T x D x stim_size x stim_size maps
    imag_feature_map_tnsr = tnsr.nnet.conv2d(stim_tnsr,
                                     imag_filter_stack_tnsr,
                                     filter_shape = filter_stack_shape,
                                     border_mode = 'full')  ##produces T x D x stim_size x stim_size maps

    
    if complex_cell:
        ##for filtering with complex gabors, we need an operation for squaring/summing real/imag parts
        abs_value = tnsr.sqrt(tnsr.sqr(real_feature_map_tnsr) + tnsr.sqr(imag_feature_map_tnsr))
        ##functionize feature mapping
        make_feature_maps = function(inputs = [stim_tnsr,real_filter_stack_tnsr,imag_filter_stack_tnsr],
                                     outputs = abs_value)
    else:
        make_feature_maps = function(inputs = [stim_tnsr,real_filter_stack_tnsr],
                                     outputs = real_feature_map_tnsr)

    return make_feature_maps
示例#10
0
文件: mnist.py 项目: kundan2510/nn
def get_every_layer_functions_only_h():
    functions = []
    img = T.tensor4('img')
    X_h = next_stacks_only_h(
                img, N_CHANNELS, "Dec.PixInput", 
                filter_size = 7, 
                hstack = "hstack_a", residual = False
                )
    print "Compiling layer 0 ......."
    functions.append(theano.function([img], X_h))

    for i in xrange(PIXEL_CNN_LAYERS):
        X_h_inp = T.tensor4('X_h_inp')
        X_h = next_stacks_only_h(X_h_inp, DIM_PIX, "Dec.Pix"+str(i+1), filter_size = PIXEL_CNN_FILTER_SIZE)
        print "Compiling layer {} .......".format(i+1)
        functions.append(theano.function([X_h_inp],  X_h))

    X_h = T.tensor4('X_h')
    output = lib.ops.conv2d.Conv2D('Dec.PixOut1', input_dim=DIM_PIX, output_dim=2*DIM_PIX, filter_size=1, inputs=X_h)
    output = PixCNNGate(output)
    # skip_outputs.append(output)

    # output = PixCNNGate(lib.ops.conv2d.Conv2D('Dec.PixOut2', input_dim=DIM_1, output_dim=2*DIM_1, filter_size=1, inputs=output))
    output = lib.ops.conv2d.Conv2D('Dec.PixOut2', input_dim=DIM_PIX, output_dim=2*DIM_PIX, filter_size=1, inputs=output)
    output = PixCNNGate(output)
    # skip_outputs.append(output)

    output = lib.ops.conv2d.Conv2D('Dec.PixOut3', input_dim=DIM_PIX, output_dim=N_CHANNELS, filter_size=1, inputs=output, he_init=False)
    # output = lib.ops.conv2d.Conv2D('Dec.PixOut3', input_dim=DIM_PIX*len(skip_outputs), output_dim=N_CHANNELS, filter_size=1, inputs=T.concatenate(skip_outputs, axis=1), he_init=False)
    output = T.nnet.sigmoid(output)
    print "Compiling output function :)"
    functions.append(theano.function([X_h], output))


    return functions
示例#11
0
def create_iter_funcs_train(l_out, lr, mntm, wd):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    y_hat = layers.get_output(l_out, X, deterministic=False)

    # softmax loss
    train_loss = T.mean(
        T.nnet.categorical_crossentropy(y_hat, y))

    # L2 regularization
    train_loss += wd * regularize_network_params(l_out, l2)

    train_acc = T.mean(
        T.eq(y_hat.argmax(axis=1), y))

    all_params = layers.get_all_params(l_out, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        train_loss, all_params, lr, mntm)

    train_iter = theano.function(
        inputs=[theano.Param(X_batch), theano.Param(y_batch)],
        outputs=[train_loss, train_acc],
        updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return train_iter
def functions(encoder, network, l_rate=1.):
    # For network
    X = T.tensor4()
    Y = T.tensor4()  # X = Y
    parameters = nn.layers.get_all_params(layer=network, trainable=True)
    output = nn.layers.get_output(layer_or_layers=network, inputs=X)
    all_layers = nn.layers.get_all_layers(network)
    loss = T.mean(nn.objectives.squared_error(output, Y))
    updates = nn.updates.sgd(
        loss_or_grads=loss, params=parameters, learning_rate=l_rate)
    training_function = theano.function(
        inputs=[X, Y], outputs=loss, updates=updates)
    test_function = theano.function(
        inputs=[X, Y], outputs=[loss, output])

    # For encoder
    code_output = nn.layers.get_output(layer_or_layers=encoder, inputs=X)
    code_function = theano.function(inputs=[X], outputs=code_output)

    # For decoder
    Z = T.tensor4()
    decode_output = nn.layers.get_output(
        layer_or_layers=network, inputs={encoder: Z})
    decode_function = theano.function(inputs=[Z], outputs=decode_output)

    return training_function, test_function, code_function, decode_function
示例#13
0
文件: burn_gpu.py 项目: Theano/Theano
def burn():
    sz = 128
    img_shp = [sz, sz, sz, sz]
    kern_shp = [sz // 2, sz, 3, 3]
    out_shp = get_conv_output_shape(img_shp, kern_shp, 'valid', (1, 1))
    img = T.tensor4('img')
    kern = T.tensor4('kern')
    out = T.tensor4('out')

    def rand(shp):
        return np.random.rand(*shp).astype(theano.config.floatX)

    img = theano.shared(rand(img_shp))
    kern = theano.shared(rand(kern_shp))
    out = theano.shared(rand(out_shp))
    # beta 1 is needed to force the reuse of out, otherwise, it is
    # replaced by a GpuAllocEmpty
    o1 = dnn._dnn_conv(img, kern, conv_mode='conv', out=out, beta=1.)
    mode = theano.compile.get_default_mode().including(
        "local_remove_all_assert")
    f = theano.function([], [o1], mode=mode)
    theano.printing.debugprint(f)
    print("Start computation")
    for i in range(10000):
        f.fn()
    print("Computation stopped")
def test_mask_loss_sobel():
    th_mask, th_img = T.tensor4(), T.tensor4()
    ml = mask_loss_sobel(th_mask, th_img)
    mask_loss = theano.function([th_mask, th_img],
                                [ml.loss] + list(ml.sobel_mask) +
                                list(ml.sobel_img))

    mask_idx = next(masks(1))
    image_ok = 0.5 * np.ones_like(mask_idx)
    image_ok[mask_idx > MASK["IGNORE"]] = 1
    image_ok[mask_idx < MASK["BACKGROUND_RING"]] = 0

    print()
    loss, sobel_mask_x, sobel_mask_y, sobel_img_x, sobel_img_y = \
        mask_loss(mask_idx, image_ok)
    plt.set_cmap('gray')
    plt.subplot(221)
    plt.imshow(sobel_mask_x[0, 0])
    plt.subplot(222)
    plt.imshow(sobel_mask_y[0, 0])
    plt.colorbar()
    plt.subplot(223)
    plt.imshow(sobel_img_x[0, 0])
    plt.subplot(224)
    plt.imshow(sobel_img_y[0, 0])
    plt.colorbar()
    plt.savefig("mask_loss_sobel.png")
    print()
    print("mask_loss: {}".format(mask_loss(mask_idx, image_ok)))
    assert loss == 0
示例#15
0
    def compile(self):
        # Helper function for rendering test images during training, or standalone inference mode.
        input_tensor, seed_tensor = T.tensor4(), T.tensor4()
        input_layers = {self.network['img']: input_tensor, self.network['seed']: seed_tensor}
        output = lasagne.layers.get_output([self.network[k] for k in ['seed','out']], input_layers, deterministic=True)
        self.predict = theano.function([seed_tensor], output)

        if not args.train: return

        output_layers = [self.network['out'], self.network[args.perceptual_layer], self.network['disc']]
        gen_out, percept_out, disc_out = lasagne.layers.get_output(output_layers, input_layers, deterministic=False)

        # Generator loss function, parameters and updates.
        self.gen_lr = theano.shared(np.array(0.0, dtype=theano.config.floatX))
        self.adversary_weight = theano.shared(np.array(0.0, dtype=theano.config.floatX))
        gen_losses = [self.loss_perceptual(percept_out) * args.perceptual_weight,
                      self.loss_total_variation(gen_out) * args.smoothness_weight,
                      self.loss_adversarial(disc_out) * self.adversary_weight]
        gen_params = lasagne.layers.get_all_params(self.network['out'], trainable=True)
        print('  - {} tensors learned for generator.'.format(len(gen_params)))
        gen_updates = lasagne.updates.adam(sum(gen_losses, 0.0), gen_params, learning_rate=self.gen_lr)

        # Discriminator loss function, parameters and updates.
        self.disc_lr = theano.shared(np.array(0.0, dtype=theano.config.floatX))
        disc_losses = [self.loss_discriminator(disc_out)]
        disc_params = list(itertools.chain(*[l.get_params() for k, l in self.network.items() if 'disc' in k]))
        print('  - {} tensors learned for discriminator.'.format(len(disc_params)))
        grads = [g.clip(-5.0, +5.0) for g in T.grad(sum(disc_losses, 0.0), disc_params)]
        disc_updates = lasagne.updates.adam(grads, disc_params, learning_rate=self.disc_lr)

        # Combined Theano function for updating both generator and discriminator at the same time.
        updates = collections.OrderedDict(list(gen_updates.items()) + list(disc_updates.items()))
        self.fit = theano.function([input_tensor, seed_tensor], gen_losses + [disc_out.mean(axis=(1,2,3))], updates=updates)
def set_generator_update_function(feature_function,
                                  energy_function,
                                  generator_function,
                                  generator_params,
                                  generator_bn_params,
                                  generator_optimizer,
                                  generator_bn_optimizer):

    # set input data, hidden data, noise_data annealing rate
    input_data  = T.tensor4(name='input_data',
                            dtype=theano.config.floatX)
    hidden_data = T.matrix(name='hidden_data',
                           dtype=theano.config.floatX)
    noise_data  = T.tensor4(name='noise_data',
                            dtype=theano.config.floatX)
    annealing = T.scalar(name='annealing',
                         dtype=theano.config.floatX)

    # annealing scale
    annealing_scale = 1.0#/(1.0+99.0*(0.99**annealing))

    # get sample data
    sample_data = generator_function(hidden_data, is_train=True)
    # sample_data = sample_data + noise_data

    # get feature data
    input_feature  = feature_function(input_data, is_train=True)
    sample_feature = feature_function(sample_data, is_train=True)

    # get energy value
    input_energy  = energy_function(input_feature, is_train=True)
    sample_energy = energy_function(sample_feature, is_train=True)

    # get generator update cost
    negative_phase      = T.mean(sample_energy*annealing_scale)
    generator_updates_cost = negative_phase

    # get generator updates
    generator_updates = generator_optimizer(generator_params,
                                            generator_updates_cost)

    generator_bn_updates = generator_bn_optimizer(generator_bn_params,
                                                  generator_updates_cost)

    # update function input
    update_function_inputs  = [input_data,
                               hidden_data,
                               noise_data,
                               annealing]

    # update function output
    update_function_outputs = [input_energy,
                               sample_energy]

    # update function
    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=generator_updates+generator_bn_updates,
                                      on_unused_input='ignore')
    return update_function
def test_mask_loss_median():
    th_mask, th_img = T.tensor4(), T.tensor4()

    cuda_out = mask_loss_median(th_mask, th_img, impl='cuda')
    cuda_mask_loss = theano.function([th_mask, th_img],
                                     [cuda_out['loss'],
                                      cuda_out['median_black'],
                                      cuda_out['loss_per_sample'],
                                      cuda_out['black_white_loss']])

    theano_mask_loss = theano.function([th_mask, th_img],
                                       mask_loss_median(th_mask, th_img,
                                                 impl='theano')['loss'])
    mask_idx = next(masks(1))
    image_ok = np.zeros_like(mask_idx)
    image_ok[mask_idx > MASK["IGNORE"]] = 1

    outs = cuda_mask_loss(mask_idx, image_ok)
    for s in outs[1:]:
        print(s.shape)
    assert (cuda_mask_loss(mask_idx, image_ok)[0] == 0).all()
    assert (theano_mask_loss(mask_idx, image_ok) == 0).all()

    t = Timer(lambda: cuda_mask_loss(mask_idx, image_ok))
    n = 10
    print("cuda implementation: {}".format(t.timeit(number=n) / n))

    t = Timer(lambda: theano_mask_loss(mask_idx, image_ok))
    print("theano implementation: {}".format(t.timeit(number=n) / n))
示例#18
0
def test_batch_normalization_train_without_running_averages():
    # compile and run batch_normalization_train without running averages
    utt.seed_rng()

    x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy')
    data_shape = (5, 10, 30, 25)
    param_shape = (1, 10, 30, 25)

    # forward pass
    out, x_mean, x_invstd = bn.batch_normalization_train(x, scale, bias, 'per-activation')
    # backward pass
    grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
    # compile
    f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
    # check if the abstract Ops have been replaced
    assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                      bn.AbstractBatchNormInference,
                                      bn.AbstractBatchNormTrainGrad))
                    for n in f.maker.fgraph.toposort()])
    # run
    X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
    Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
    Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
    Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
    f(X, Scale, Bias, Dy)
示例#19
0
    def fit(self, X, y=None):
        self.n_features = y.shape[0]
        self.weights['input'] = theano.shared(value=np.zeros((
            self.n_features, X.shape[1], self.spatial[0], self.spatial[1]),
            dtype=theano.config.floatX), name='w', borrow=True)
        input = T.tensor4(name='input')
        target = T.tensor4(name='target')
        decay = T.scalar(name='decay')
        xy = T.nnet.conv2d(input.transpose(1,0,2,3), target.transpose(1,0,2,3),
                           border_mode=self.pad, subsample=self.stride)
        xx = T.sum(T.power(input, 2), axis=(0,2,3))
        k = ifelse(self.hidden_matrices['input'] is None, )

        lam = theano.shared(value=self._C, name='constrain', borrow=True)
        prediction = T.nnet.conv2d(input, self.weights['input'],
                                   border_mode=self.pad,
                                   subsample=self.stride)
        weights, _ = theano.scan(
            fn=lambda a, k, c: a/(k+c), outputs_info=None,
            sequences=[self.hidden_matrices['A'].transpose(1,0,2,3),
                       self.hidden_matrices['K']], non_sequences=lam)
        new_weights = weights.transpose(1,0,2,3)
        updates = [(self.hidden_matrices['K'],
                    self.hidden_matrices['K'].dot(decay)+xx),
                   (self.hidden_matrices['A'],
                    self.hidden_matrices['A'].dot(decay) + xy),
                   (self.weights['input'], new_weights)]
        self.conv_fct['train'] = theano.function([input, target, decay],
                                                 prediction,
                                                 updates=updates)
        self.conv_fct['predict'] = theano.function([input], prediction)
        return self.conv_fct['train'](X, y, 1)
示例#20
0
def get_dc_input_layers(shape):
    """
    Creates input layer for the CNN. Works for 2D and 3D input.

    Returns
    -------
    net: Ordered Dictionary
       net config with 3 entries: input, kspace_input, mask.
    """
    
    if len(shape) > 4:
        # 5D
        input_var = tensor5('input_var')
        kspace_input_var = tensor5('kspace_input_var')
        mask_var = tensor5('mask')
    else:
        input_var = T.tensor4('input_var')
        kspace_input_var = T.tensor4('kspace_input_var')
        mask_var = T.tensor4('mask')

    input_layer = InputLayer(shape, input_var=input_var, name='input')
    kspace_input_layer = InputLayer(shape, input_var=kspace_input_var,
                                    name='kspace_input')
    mask_layer = InputLayer(shape, input_var=mask_var, name='mask')
    return input_layer, kspace_input_layer, mask_layer
示例#21
0
def train_model():
    batch_size = 16
    num_epochs = c.ch4_train_epochs
    sz = c.fcn_img_size
    version=2
    for i in xrange(5):
        data = u.DataH5PyStreamer(os.path.join(c.data_intermediate, 'ch4_256.hdf5'),
                batch_size=batch_size, folds=(5,i))
        input_var = T.tensor4('input')
        label_var = T.tensor4('label')
        net, output, output_det = m.build_fcn_segmenter(input_var,
                (None, 1, sz, sz), version=version)
        params = nn.layers.get_all_params(net['output'], trainable=True)
        lr = theano.shared(nn.utils.floatX(3e-3))
        loss = du.sorenson_dice(output, label_var)
        te_loss = du.sorenson_dice(output_det, label_var)
        te_acc = nn.objectives.binary_accuracy(output_det, label_var).mean()
        updates = nn.updates.adam(loss, params, learning_rate=lr)
        train_fn = theano.function([input_var, label_var], loss, updates=updates)
        test_fn = theano.function([input_var, label_var], te_loss)
        acc_fn = theano.function([input_var, label_var], te_acc)
        pred_fn = theano.function([input_var], output_det)
        hist = u.train_with_hdf5(data, num_epochs=num_epochs,
                train_fn = train_fn, test_fn=test_fn,
                max_per_epoch=-1, use_tqdm=False,
                tr_transform=lambda x: du.segmenter_data_transform(x, rotate=(-180, 180)),
                te_transform=lambda x: du.segmenter_data_transform(x, rotate=None),
                last_layer = net['output'],
                save_params_to=os.path.join(c.params_dir, 'ch4seg_v{}/test_ch4seg_f{}_v{}.npz'\
                        .format(version, i, version)))
示例#22
0
    def compile_dream(self, X_train, shapes, indices, initializer):
        self.dream_compiled = True
        self.X_dream = []
        index = 0
        for i in range(len(X_train)):
            if i in indices:
                self.X_dream.append(theano.shared(initializer(shapes[index]).astype('float32')))
                index += 1
            else:
                X_train[i] = atleast_4d(X_train[i][[0]])
                self.X_dream.append(theano.shared(X_train[i].astype('float32')))

        y_hat_test, layer_updates = self.tree.get_output(self.params_shared, self.X_dream[:], True)
        preds = y_hat_test.flatten(self.num_output_dims).mean(axis=None)

        self.dream_optimizer.build([self.X_dream[index] for index in indices])
        updates = list(self.dream_optimizer.get_updates([self.X_dream[index] for index in indices], -preds))
        for i, update in enumerate(updates):
            updates[i] = (update[0], update[1].astype('float32'))
        updates += layer_updates

        y_pred = T.tensor4(dtype='float32')
        y = T.tensor4(dtype='float32')
        accuracy = self.accuracy.get_accuracy(y_pred, y)

        self.dream_accuracy_theano = theano.function([y_pred, y], accuracy)
        self.dream_update = theano.function(
            inputs=[],
            outputs=preds,
            updates=updates
        )
示例#23
0
def getTheanoConvFunction(patchsize=None, imagesize=None):
    """
    Return a theano function erforming valid convolution of a filter on an
    image
    """
    
    # Define the size of the images and filters to allow Theano to
    # further optimize the convolution op
    image_shape = (None, 1, imagesize, imagesize)
    filter_shape = (None, 1, patchsize, patchsize)
    
    # Define the input variables to the function
    img = T.tensor4(dtype='floatX')
    filter = T.tensor4(dtype='floatX')
    mask = T.tensor4(dtype='floatX')
    
    # Convolve the image with both the filter and the mask
    convImgWithFilter = T.nnet.conv.conv2d(img, filter, border_mode='valid',
                                           image_shape=image_shape,
                                           filter_shape=filter_shape)
     
    # Norm convImgWithFilter by the norm of each portions of the image's norm
    # to avoid a brighter region taking the lead on a darker, better-fitting
    # one.                                      
    convImgWithMask = T.nnet.conv.conv2d(img**2, mask, border_mode='valid',
                                         image_shape=image_shape,
                                         filter_shape=filter_shape)
    convImgWithMask = convImgWithMask ** 0.5
    
    normConvImgWithFilter = convImgWithFilter / (convImgWithMask ** 0.5)
     
    # Compile and return the theano function
    f = theano.function([img, filter, mask], normConvImgWithFilter)
    return f
示例#24
0
def theano_convolution(input_size, dtype, num_kernels, ksize, mode, iternum):
    rng = np.random.RandomState(23455)
    # instantiate 4D tensor for input
    if dtype == np.float32:
        input = T.tensor4(name='input', dtype='float32')
    else:
        input = T.tensor4(name='input', dtype='float64')
    # initialize shared variable for weights.
    w_shp = (num_kernels, input_size[-1], ksize, ksize)
    w_bound = np.sqrt(input_size[-1] * ksize * ksize)
    W = theano.shared( np.asarray(
                rng.uniform(
                    low=-1.0 / w_bound,
                    high=1.0 / w_bound,
                    size=w_shp),
                dtype=dtype), name ='W')
    conv_out = conv.conv2d(input, W, border_mode=mode)
    # create theano function to compute filtered images
    f = theano.function([input], conv_out)
    img = np.random.random_sample(input_size).astype(dtype)
    # put image in 4D tensor of shape (1, 3, height, width)
    img_ = img.swapaxes(0, 2).swapaxes(1, 2).reshape(1, input_size[-1], input_size[0], input_size[1])
    img_ = np.ascontiguousarray(img_)
    # just in case theano want to initialize something, we will run the function once first.
    filtered_img = f(img_)
    start = time.time()
    for i in range(iternum):
        filtered_img = f(img_)
    print 'theano time:', (time.time() - start) / iternum
示例#25
0
def test_pooling():
    shift = [[0, 1], [0, 1]]
    pool_shape = [2, 2]
    limits = [2, 2]
    inpt = prepare_array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    output = prepare_array([[5, 6], [8, 9]])
    inpt_expr = tensor4('input')
    output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits)
    f = theano.function([inpt_expr], output_expr)
    assert np.allclose(f(inpt), output)
    shift = [[0], [0, 1]]
    pool_shape = [2, 2]
    limits = [1, 2]
    output = prepare_array([[5, 6]])
    inpt_expr = tensor4('input')
    output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits)
    f = theano.function([inpt_expr], output_expr)
    assert np.allclose(f(inpt), output)
    shift = [[0, 1], [0, 1]]
    pool_shape = [1, 2]
    limits = [3, 2]
    output = prepare_array([[2, 3], [5, 6], [8, 9]])
    inpt_expr = tensor4('input')
    output_expr = perform_pooling(inpt_expr, shift, pool_shape, limits)
    f = theano.function([inpt_expr], output_expr)
    assert np.allclose(f(inpt), output)
示例#26
0
def create_iter_funcs_test(l_out, bs, N=50):
    X = T.tensor4('X')
    X_batch = T.tensor4('X_batch')

    X_repeat = T.extra_ops.repeat(X, N, axis=0)
    y_sample = layers.get_output(
        l_out, X_repeat, deterministic=False)

    # the number of splits needs to be pre-defined
    sizes = [X_repeat.shape[0] / X.shape[0]] * bs
    y_sample_split = T.as_tensor_variable(
        T.split(y_sample, sizes, bs, axis=0))
    y_hat = T.mean(y_sample_split, axis=1)
    #y_var = T.var(y_sample_split, axis=1)

    test_iter = theano.function(
        inputs=[theano.Param(X_batch)],
        outputs=y_hat,
        #outputs=[y_hat, y_var],
        givens={
            X: X_batch,
        },
    )

    return test_iter
示例#27
0
 def __init__(self):
     X1 = T.tensor4()
     X2 = T.tensor4()
     X = [X1, X2]
     Y = [T.ivector()]
     
     model = Model()
     #conv1
     model.add(Conv(filter_shape = (25, 3, 5, 5), w_shared = True, n_inputs = 2))
     model.add(Pooling(pool_size = (2,2)))
     model.add(Activation(mode = 'tanh'))
     #conv2
     model.add(Conv(filter_shape = (25, 25, 3, 3), w_shared = True, n_inputs = 2))
     model.add(Pooling(pool_size = (2,2)))
     model.add(Activation(mode = 'tanh'))
     #abs_diff
     model.add(Abs_diff())
     #conv3
     model.add(Conv(filter_shape = (25, 25, 3, 3), w_shared = True))
     model.add(Pooling(pool_size = (2,2)))
     model.add(Activation(mode = 'tanh'))
     model.add(Flatten())
     model.add(Fully((25*18*5, 500)))
     model.add(Activation(mode = 'tanh'))
     model.add(Fully((500, 2)))
     model.add(Activation(mode = 'softmax'))
     model.build(CostFunc.nll, RMSprop(), X, Y)
     self.model = model
示例#28
0
    def test_dtype_upcast(self):
        """
        Checks dtype upcast for CorrMM methods.
        """
        def rand(shape, dtype='float64'):
            r = numpy.asarray(numpy.random.rand(*shape), dtype=dtype)
            return r * 2 - 1

        ops = [corr.CorrMM, corr.CorrMM_gradWeights, corr.CorrMM_gradInputs]
        a_shapes = [[4, 5, 6, 3], [1, 5, 6, 3], [1, 5, 6, 3]]
        b_shapes = [[7, 5, 3, 2], [1, 5, 3, 1], [7, 1, 3, 1]]
        dtypes = ['float32', 'float64']

        for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
            for a_dtype in dtypes:
                for b_dtype in dtypes:
                    c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
                    a_tens = T.tensor4(dtype=a_dtype)
                    b_tens = T.tensor4(dtype=b_dtype)
                    a_tens_val = rand(a_shape, dtype=a_dtype)
                    b_tens_val = rand(b_shape, dtype=b_dtype)

                    c_tens = op()(a_tens, b_tens)
                    f = theano.function([a_tens, b_tens], c_tens, mode=self.mode)
                    assert_equals(f(a_tens_val, b_tens_val).dtype, c_dtype)
示例#29
0
    def __init__(self, Tt, N, H, W): 

        self.batch_size=N
        self.seq_length=Tt
        
        self.Q = T.tensor4('Q',dtype=config.floatX) # (T,N,H,W), will reshape to (T,N,1,H,W) for convolution
        self.P = T.tensor3('P',dtype=config.floatX) # (T,N,D=2)
        self.Y = T.tensor4('Y',dtype=config.floatX) # (T,N,H,W)

        self.alpha = T.scalar('alpha',dtype=config.floatX) # learning rate

        self.Q_view = self.Q.reshape((Tt,N,1,H,W))
        
        self.CONV1 = TemporalConvReluLayer(input_var=self.Q_view,layerid='CONV1')
        self.CONV2 = TemporalConvReluLayer(input_var=self.CONV1.output,
            n_input_channels=8, n_filters=16,
            layerid='CONV2')

        self.POOL = T.signal.pool.pool_2d(self.CONV2.output,(2,2))
        PandQ = T.concatenate([self.POOL.reshape((Tt,N,4*H*W)), 
                    self.P],
                    axis=2)

        self.LSTM = LSTMLayer(input_var=PandQ,num_units=512,
                    layerid='LSTM', sequence=Tt,
                    in_dim=(32*32*16/4+2))

        #self.LSTM2 = LSTMLayer(input_var=self.LSTM.output,num_units=512,layerid='LSTM2',in_dim=(512))
        #self.LSTM3 = LSTMLayer(input_var=self.LSTM2.output,num_units=512,layerid='LSTM3',in_dim=(512))

        self.FC = TemporalFC(input_var=self.LSTM.output,
                    num_units=H*W,
                    layerid='FC',
                    in_dim=512)



        #Y_pred = T.nnet.softmax(self.FC.output.reshape((Tt*N,H*W))).reshape((Tt,N,H,W))*14.0
        Y_pred = T.nnet.sigmoid(self.FC.output.reshape(self.Q.shape))

        self.output = Y_pred

        #self.loss = T.nnet.binary_crossentropy(Y_pred,self.Y).mean(dtype=config.floatX)
        #self.loss = (T.abs_(Y_pred - self.Y)).mean(dtype=config.floatX)
        #self.loss = ((Y_pred - self.Y) ** 2).mean(dtype=config.floatX) 
        self.loss = -(self.Y * T.log(Y_pred)*14 + (1-self.Y)* T.log(1-Y_pred)).mean(dtype=config.floatX)
        #self.compute_loss = function([self.Q,self.P,self.Y],outputs=self.loss)

        self.params = {}
        self.params.update(self.CONV1.params)
        self.params.update(self.CONV2.params)
        self.params.update(self.LSTM.params)
        self.params.update(self.FC.params)

        self.train_args = [self.Q,self.P,self.Y,self.alpha]
        self.predict_args = [self.Q,self.P]

        # super constructor creates gradients, _train, and _predict
        super(PongRNNModel,self).__init__()
示例#30
0
文件: test_conv.py 项目: intel/theano
 def setUp(self):
     super(TestConv2D, self).setUp()
     self.input = T.tensor4('input', dtype=self.dtype)
     self.input.name = 'default_V'
     self.filters = T.tensor4('filters', dtype=self.dtype)
     self.filters.name = 'default_filters'
     if not conv.imported_scipy_signal and theano.config.cxx == "":
         raise SkipTest("conv2d tests need SciPy or a c++ compiler")
def main(num_epochs=20, needsNormalization=True):
    # Load the dataset
    print "Loading data..."
    train_data, train_labels = load_dataset('usps/train.gz')
    test_data, test_labels = load_dataset('usps/test.gz')


    
    print "train_data has dimensions",train_data.shape
    print "train_labels has dimensions", train_labels.shape
    
    print "test_data has dimensions", test_data.shape
    print "test_labels has dimensions", test_labels.shape

    # normalize the data
    if (needsNormalization):
        train_data = normalize(train_data)
        test_data = normalize(test_data)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create CNN
    print "Building model and compiling functions..."
    network = build_cnn(input_var)

    # TODO: setup training criterion and loss functions
    #       * use categorical crossentropy loss
    #       * use SGD with Nesterov momentum 0.9 and learning rate 0.1 for optimization
    
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.1, momentum=0.9)


    # actual training
    print "Starting training..."
    # Create a loss expression for validation/testing
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)


    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    
    # We iterate over epochs:
    for epoch in range(num_epochs):
        print "epoch",epoch
        train_err = 0
        train_batches = 0
        # TODO: process an epoch
        #       * use a minibach-size of 128
        #       * keep track of the training loss after each epoch and print it
        
        for batch in iterate_minibatches(train_data, train_labels, 128, shuffle=True):
            inputs, targets = batch
            inputs=inputs.astype(np.float32)
            targets=targets.astype(np.int32)
            
            train_err +=train_fn(inputs, targets)     
            train_batches += 1
        
        
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    # After training, we compute the test error
    # TODO: use the trained network to classify the test data
    #       * print the test loss
    #       * also print the test accuracy
    
    print "STARTING VALIDATION"
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(test_data, test_labels, 128, shuffle=False):
        inputs, targets = batch
        inputs=inputs.astype(np.float32)
        targets=targets.astype(np.int32)
        
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           120,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           240,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           320,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           640,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           1024,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           640,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    maskm = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    mask_rep = batch_norm(layers.Conv2DLayer(maskm,
                                             1,
                                             filter_size=(1, 1),
                                             nonlinearity=None),
                          beta=None,
                          gamma=None)
    mask_map = SoftThresPerc(mask_rep,
                             perc=98.4,
                             alpha=0.1,
                             beta=init.Constant(0.5),
                             tight=100.0,
                             name="mask_map")
    enlyr = ChInnerProdMerge(feat_map, mask_map, name="encoder")

    layer = batch_norm(
        layers.Deconv2DLayer(enlyr,
                             1024,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             240,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             120,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           128,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    gllyr = batch_norm(layers.Conv2DLayer(glblf,
                                          5,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(gllyr,
                             256,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    old_params = layers.get_all_params(network, trainable=True)

    # Adding more layers
    aug_var = T.matrix('aug_var')
    target_var = T.imatrix('targets')
    add_a = batch_norm(
        layers.Conv2DLayer(enlyr,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_b = batch_norm(
        layers.Conv2DLayer(add_a,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_c = batch_norm(
        layers.Conv2DLayer(add_b,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_d = batch_norm(
        layers.Conv2DLayer(add_c,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_0 = layers.Pool2DLayer(add_d,
                               pool_size=(25, 25),
                               stride=25,
                               mode='average_inc_pad')
    add_1 = batch_norm(
        layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify))

    add_2 = batch_norm(
        layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify))
    add_3 = batch_norm(
        layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify))
    add_4 = batch_norm(
        layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify))

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var)

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1)

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify)
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid)

    all_params = layers.get_all_params(network, trainable=True)
    new_params = [x for x in all_params if x not in old_params]

    return network, new_params, input_var, aug_var, target_var
示例#33
0
def anomaly(experiment_name,
            dataset="mnist",
            bayesian_approximation="dropout",
            inside_labels=[0, 1],
            num_epochs=50,
            batch_size=128,
            acc_threshold=0.6,
            weight_decay=1e-5,
            dropout_p=0.5,
            fc_layers=[512, 512],
            plot=True):
    """
    This methods trains a neural network classifier on a subset of classes.
    After the training, it uses uncertainty measures (e.g. entropy) to detect anomalies.
    The anomalous classes are the ones that are not part of the training subset.
    
    dataset = "mnist" or "cifar"
    For MNIST we use a fully-connected MLP.
    For CIFAR10 we use a convolutional net (similar to LeNet)
    
    bayesian_approximation = "dropout" for Yarin Gal's method - work either with MNIST 
    bayesian_approximation = "variational" for fully-factorized Gaussian variational approximation - only work with MNIST.
    
    inside_labels are the subset of trained classes, the other classes are only used for testing.             
    """

    n_out = len(inside_labels)

    # Prepare Theano variables for inputs and targets

    # Load the dataset
    print("Loading data...")
    if dataset == "mnist":
        input_var = T.matrix('inputs')
        target_var = T.ivector('targets')
        n_in = [28 * 28]
        X_train, y_train, X_test, y_test, X_test_all, y_test_all = datasets.load_MNIST(
            inside_labels)
        if bayesian_approximation == "dropout":
            model = models.mlp_dropout(input_var, target_var, n_in, n_out,
                                       fc_layers, dropout_p, weight_decay)
        elif bayesian_approximation == "variational":
            model = models.mlp_variational(input_var, target_var, n_in, n_out,
                                           fc_layers, batch_size,
                                           len(X_train) / float(batch_size))
    elif dataset == "cifar":
        input_var = T.tensor4('inputs')
        target_var = T.ivector('targets')

        n_in = [3, 32, 32]
        X_train, y_train, X_test, y_test, X_test_all, y_test_all = datasets.load_CIFAR10(
            inside_labels)
        model = models.convnet_dropout(input_var, target_var, n_in, n_out,
                                       dropout_p, weight_decay)

    df = pd.DataFrame()

    # Mini-batch training with ADAM
    epochs = training.train(model, X_train, y_train, X_test, y_test,
                            batch_size, num_epochs, acc_threshold)
    # Mini-batch testing
    acc, bayes_acc = training.test(model, X_test, y_test, batch_size)
    df.set_value(experiment_name, "test_acc", acc)
    df.set_value(experiment_name, "bayes_test_acc", bayes_acc)

    # Uncertainty prediction
    test_mean_std_bayesian = {x: [] for x in range(10)}
    test_mean_std_deterministic = {x: [] for x in range(10)}
    test_entropy_bayesian = {x: [] for x in range(10)}
    test_entropy_deterministic = {x: [] for x in range(10)}

    for i in range(len(X_test_all)):
        bayesian_probs = model.probabilities(
            np.tile(X_test_all[i], batch_size).reshape([-1] + n_in))
        bayesian_entropy = model.entropy_bayesian(
            np.tile(X_test_all[i], batch_size).reshape([-1] + n_in))
        classical_probs = model.probabilities_deterministic(
            X_test_all[i][np.newaxis, :])[0]
        classical_entropy = model.entropy_deterministic(
            X_test_all[i][np.newaxis, :])
        predictive_mean = np.mean(bayesian_probs, axis=0)
        predictive_std = np.std(bayesian_probs, axis=0)
        test_mean_std_bayesian[y_test_all[i]].append(
            np.concatenate((predictive_mean, predictive_std)))
        test_entropy_bayesian[y_test_all[i]].append(bayesian_entropy)
        test_entropy_deterministic[y_test_all[i]].append(classical_entropy)
        test_mean_std_deterministic[y_test_all[i]].append(classical_probs)

    # Plotting
    if plot:
        for k in sorted(test_mean_std_bayesian.keys()):
            sns.plt.figure()
            #sns.plt.hist(test_pred_mean[k], label = "Prediction mean for " + str(k))
            sns.plt.hist(test_entropy_bayesian[k],
                         label="Bayesian Entropy v1 for " + str(k))
            #sns.plt.hist(test_pred_std[k], label = "Prediction std for " + str(k))
            #sns.plt.hist(test_entropy_deterministic[k], label = "Classical entropy for " + str(k))
            sns.plt.legend()
            sns.plt.show()

    # Anomaly detection using simple threshold
    def anomaly_detection_old(anomaly_score_dict, name, df):
        threshold = np.logspace(-30, 1.0, 1000)
        acc = {}
        for t in threshold:
            tp = 0.0
            tn = 0.0
            for l in anomaly_score_dict:
                if l in inside_labels:
                    tp += (np.array(anomaly_score_dict[l]) < t).mean()
                else:
                    tn += (np.array(anomaly_score_dict[l]) >= t).mean()
            tp /= len(inside_labels)
            tn /= 10.0 - len(inside_labels)
            bal_acc = (tp + tn) / 2.0
            f1_score = 2.0 * tp / (2.0 + tp - tn)
            acc[t] = [bal_acc, f1_score, tp, tn]

        print("{}\tscore\tthreshold\tTP\tTN".format(name))
        sorted_acc = sorted(acc.items(), key=lambda x: x[1][0], reverse=True)
        df.set_value(experiment_name, name + ' bal_acc', sorted_acc[0][1][0])
        df.set_value(experiment_name, name + ' bal_acc_threshold',
                     sorted_acc[0][0])

        print("\tbalanced acc\t{:.3f}\t{:.6f}\t\t{:.3f}\t{:.3f}".format(
            sorted_acc[0][1][0], sorted_acc[0][0], sorted_acc[0][1][2],
            sorted_acc[0][1][3]))
        sorted_acc = sorted(acc.items(), key=lambda x: x[1][1], reverse=True)
        df.set_value(experiment_name, name + ' f1_score', sorted_acc[0][1][1])
        df.set_value(experiment_name, name + ' f1_score_threshold',
                     sorted_acc[0][0])

        print("\tf1 score\t{:.3f}\t{:.6f}\t\t{:.3f}\t{:.3f}".format(
            sorted_acc[0][1][1], sorted_acc[0][0], sorted_acc[0][1][2],
            sorted_acc[0][1][3]))
        return df

    # Anomaly detection using logistic regression
    def anomaly_detection(anomaly_score_dict, name, df):
        X = []
        y = []
        for l in anomaly_score_dict:
            X += anomaly_score_dict[l]
            if l in inside_labels:
                y += [0] * len(anomaly_score_dict[l])
            else:
                y += [1] * len(anomaly_score_dict[l])

        X = np.array(X)
        y = np.array(y)
        X, y = utils.shuffle(X, y, random_state=0)
        X_train = X[:len(X) / 2]
        X_test = X[len(X) / 2:]
        y_train = y[:len(y) / 2]
        y_test = y[len(y) / 2:]

        clf = linear_model.LogisticRegression(C=1.0)
        clf.fit(X_train, y_train)
        auc = metrics.roc_auc_score(np.array(y_test),
                                    clf.predict_proba(np.array(X_test))[:, 1])
        print("AUC", auc)
        df.set_value(experiment_name, name + ' AUC', auc)

        if plot:  # Plot ROC curve
            fpr, tpr, thresholds = metrics.roc_curve(np.array(y_test),
                                                     clf.predict_proba(
                                                         np.array(X_test))[:,
                                                                           1],
                                                     pos_label=1)
            sns.plt.figure()
            sns.plt.plot(fpr, tpr, label='ROC curve')
            sns.plt.plot([0, 1], [0, 1], 'k--')
            sns.plt.xlim([0.0, 1.0])
            sns.plt.ylim([0.0, 1.05])
            sns.plt.xlabel('False Positive Rate')
            sns.plt.ylabel('True Positive Rate')
            sns.plt.title('Receiver operating characteristic example')
            sns.plt.legend(loc="lower right")
            sns.plt.show()
        return df

    df.set_value(experiment_name, 'dataset', dataset)
    df.set_value(experiment_name, 'bayesian_approx', bayesian_approximation)
    df.set_value(experiment_name, 'inside_labels', str(inside_labels))
    df.set_value(experiment_name, 'epochs', epochs)
    df = anomaly_detection(test_entropy_deterministic, "Classical entropy", df)
    df = anomaly_detection(test_mean_std_deterministic, "Classical prediction",
                           df)
    df = anomaly_detection(test_entropy_bayesian, "Bayesian entropy", df)
    df = anomaly_detection(test_mean_std_bayesian, "Bayesian prediction", df)

    return df
示例#34
0
def poolfn(pool_size, ignore_border, stride, pad, mode):
    xt = T.tensor4()
    poolx = pool_2d(xt, pool_size, ignore_border=ignore_border, st=stride, padding=pad, mode=mode)
    pool = theano.function([xt], poolx, allow_input_downcast=True)
    return pool
random_seed(args.seed)
    

if args.model == 'convnet':
    x = T.ftensor4('x')
elif args.model == 'mlp':
    x = T.matrix('x')
else:
    raise AttributeError
y = T.matrix('y')
lr_ele = T.fscalar('lr_ele')

lr_ele_true = np.array(args.lrEle, theano.config.floatX)
mom = args.momEle #momentum
lr_hyper = T.fscalar('lr_hyper')
grad_valid_weight = T.tensor4('grad_valid_weight')


model = DenseNet(x=x, y=y, args=args)
#model = ConvNet(x=x, y=y, args=args)

velocities = [theano.shared(np.asarray(param.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=param.broadcastable, name=param.name+'_vel') for param in model.params_theta]
lambda_velocities = [theano.shared(np.asarray(lamb.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=lamb.broadcastable, name=lamb.name+'_vel') for lamb in model.params_lambda]
momHyper = args.momHyper
momLlr = args.momLlr




X_elementary, Y_elementary, X_test, Y_test = load_dataset(args) #normalized
#Use a large validation set (as in CPU experiments) to avoid overfitting the hyperparameters
示例#36
0
    #test_set.y = np.hstack(test_set.y)
    test_set.y = test_set.y.reshape(-1)

    # one hot
    #test_set.y = np.float32(np.eye(10)[test_set.y])

    #print(test_set.X.shape)
    #print(test_set.y.shape)
    #print(test_set.X)
    #print(test_set.y)
    #exit(0)

    print('Building MLP...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.vector('targets')

    mlp = lfc.genLfcInf(input, 10)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), target),
                      dtype=theano.config.floatX)

    val_fn = theano.function([input, target], test_err)

    print("Loading the trained parameters and binarizing the weights...")

    # with np.load('../weights/mnist-w1a1.npz') as f:
    with np.load('../weights/mnist-w1a2.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
示例#37
0
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           120,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           240,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           320,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           640,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           1024,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           640,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    maskm = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    mask_rep = batch_norm(layers.Conv2DLayer(maskm,
                                             1,
                                             filter_size=(1, 1),
                                             nonlinearity=None),
                          beta=None,
                          gamma=None)
    mask_map = SoftThresPerc(mask_rep,
                             perc=0.0,
                             alpha=96.0,
                             beta=init.Constant(0.5),
                             tight=100.0,
                             name="mask_map")
    enlyr = ChInnerProdMerge(feat_map, mask_map, name="encoder")

    layer = batch_norm(
        layers.Deconv2DLayer(enlyr,
                             1024,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             240,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             120,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           128,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    gllyr = batch_norm(layers.Conv2DLayer(glblf,
                                          5,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(gllyr,
                             256,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    layers.set_all_param_values(network,
                                pickle.load(open(filename_model_ae, 'rb')))
    mask_map.beta.set_value(np.float32(0.8 * mask_map.beta.get_value()))

    # Adding more layers
    aug_var = T.matrix('aug_var')
    target_var = T.imatrix('targets')
    add_a = layers.Conv2DLayer(enlyr,
                               320,
                               filter_size=(1, 1),
                               nonlinearity=leaky_rectify)
    add_b = layers.Conv2DLayer(add_a,
                               320,
                               filter_size=(1, 1),
                               nonlinearity=leaky_rectify)
    add_c = layers.Conv2DLayer(add_b,
                               320,
                               filter_size=(1, 1),
                               nonlinearity=leaky_rectify)
    add_d = layers.Conv2DLayer(add_c,
                               320,
                               filter_size=(1, 1),
                               nonlinearity=leaky_rectify)
    add_0 = layers.Pool2DLayer(add_d,
                               pool_size=(15, 15),
                               stride=15,
                               mode='average_inc_pad')
    add_1 = layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify)

    add_2 = layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify)
    add_3 = layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify)
    add_4 = layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify)

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var)

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1)

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify)
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid)

    new_params = [
        add_a.W, add_a.b, add_b.W, add_b.b, add_c.W, add_c.b, add_d.W, add_d.b,
        add_1.W, add_1.b, add_2.W, add_2.b, add_3.W, add_3.b, add_4.W, add_4.b,
        hidden_layer.W, hidden_layer.b, network.W, network.b
    ]

    return network, new_params, input_var, aug_var, target_var
示例#38
0
    reshaped_image = cifar_data['data'].reshape(
        50000, 3, 32, 32)[np.random.randint(50000, size=1000), :, :, :]
    transposed_image = reshaped_image.transpose(0, 2, 3, 1)

    # Setting learning rate
    l_r = theano.shared(lasagne.utils.floatX(args.initial_lr))
    batch_number = int(round(len(reshaped_image) / args.batch_size))

    # Noise assignment
    rng = np.random.RandomState(args.seed)
    theano_rng = RandomStreams(rng.randint(2**15))
    lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
    noise_dim = (args.batch_size, 100)
    noise_fg = theano_rng.normal(size=noise_dim)
    noise_bg = theano_rng.normal(size=noise_dim)
    x_inp = tensor.tensor4('x_inp', dtype='float32')

    # Build the network
    gen = construct_gen(noise_bg, noise_fg, batch_size=args.batch_size)
    disc, features = build_desc(x_inp)

    # Output of discriminator with original images. training phase, so non deterministic
    disc_out = lasagne.layers.get_output(disc, x_inp, deterministic=False)
    gen_out = lasagne.layers.get_output(gen)
    disc_over_gen = lasagne.layers.get_output(disc, gen_out)
    true_features = lasagne.layers.get_output(features, x_inp)
    fake_features = lasagne.layers.get_output(features, gen_out)
    # Loss functions. 1) Gen's 2) Disc's for predicting correctly 3) Feature matching loss
    false_loss = log_sum_exp(disc_over_gen)
    truth_loss = log_sum_exp(disc_out)
    disc_loss = -0.5 * tensor.mean(truth_loss) + 0.5 * tensor.mean(
    def __init__(self):
        ####################################
        #       Create model               #
        ####################################

        # Create tensor variables to store input / output data
        self.X = T.tensor4('X')

        # Create shared variable for input
        net = ConvNeuralNet()
        net.net_name = 'SSD Net'

        _batch_size = self.X.shape[0]

        # Input
        net.layer['input_4d'] = InputLayer(net, self.X)

        net.layer_opts['pool_boder_mode']    = 1
        net.layer_opts['conv2D_border_mode'] = 1

        # Stack 1
        net.layer_opts['conv2D_filter_shape'] = (64, 3, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv1_1_W'
        net.layer_opts['conv2D_bName'] = 'conv1_1_b'
        net.layer['conv1_1'] = ConvLayer(net, net.layer['input_4d'].output)
        net.layer['relu1_1'] = ReLULayer(net.layer['conv1_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (64, 64, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv1_2_W'
        net.layer_opts['conv2D_bName'] = 'conv1_2_b'
        net.layer['conv1_2'] = ConvLayer(net, net.layer['relu1_1'].output)
        net.layer['relu1_2'] = ReLULayer(net.layer['conv1_2'].output)

        net.layer_opts['pool_mode'] = 'max'
        net.layer['pool1']   = Pool2DLayer(net, net.layer['relu1_2'].output)

        # Stack 2
        net.layer_opts['conv2D_filter_shape'] = (128, 64, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv2_1_W'
        net.layer_opts['conv2D_bName'] = 'conv2_1_b'
        net.layer['conv2_1'] = ConvLayer(net, net.layer['pool1'].output)
        net.layer['relu2_1'] = ReLULayer(net.layer['conv2_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (128, 128, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv2_2_W'
        net.layer_opts['conv2D_bName'] = 'conv2_2_b'
        net.layer['conv2_2'] = ConvLayer(net, net.layer['relu2_1'].output)
        net.layer['relu2_2'] = ReLULayer(net.layer['conv2_2'].output)

        net.layer['pool2']   = Pool2DLayer(net, net.layer['relu2_2'].output)

        # Stack 3
        net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv3_1_W'
        net.layer_opts['conv2D_bName'] = 'conv3_1_b'
        net.layer['conv3_1'] = ConvLayer(net, net.layer['pool2'].output)
        net.layer['relu3_1'] = ReLULayer(net.layer['conv3_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (256, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv3_2_W'
        net.layer_opts['conv2D_bName'] = 'conv3_2_b'
        net.layer['conv3_2'] = ConvLayer(net, net.layer['relu3_1'].output)
        net.layer['relu3_2'] = ReLULayer(net.layer['conv3_2'].output)

        net.layer_opts['conv2D_filter_shape'] = (256, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv3_3_W'
        net.layer_opts['conv2D_bName'] = 'conv3_3_b'
        net.layer['conv3_3'] = ConvLayer(net, net.layer['relu3_2'].output)
        net.layer['relu3_3'] = ReLULayer(net.layer['conv3_3'].output)

        net.layer['pool3']   = Pool2DLayer(net, net.layer['relu3_3'].output)

        # Stack 4
        net.layer_opts['conv2D_filter_shape'] = (512, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv4_1_W'
        net.layer_opts['conv2D_bName'] = 'conv4_1_b'
        net.layer['conv4_1'] = ConvLayer(net, net.layer['pool3'].output)
        net.layer['relu4_1'] = ReLULayer(net.layer['conv4_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv4_2_W'
        net.layer_opts['conv2D_bName'] = 'conv4_2_b'
        net.layer['conv4_2'] = ConvLayer(net, net.layer['relu4_1'].output)
        net.layer['relu4_2'] = ReLULayer(net.layer['conv4_2'].output)

        net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv4_3_W'
        net.layer_opts['conv2D_bName'] = 'conv4_3_b'
        net.layer['conv4_3'] = ConvLayer(net, net.layer['relu4_2'].output)
        net.layer['relu4_3'] = ReLULayer(net.layer['conv4_3'].output)

        net.layer['pool4']   = Pool2DLayer(net, net.layer['relu4_3'].output)
        net.layer_opts['normalize_scale']        = 20
        net.layer_opts['normalize_filter_shape'] = (512, )
        net.layer_opts['normalize_scale_name']   = 'conv4_3_scale'
        net.layer['conv4_3_norm']        = NormalizeLayer(net, net.layer['relu4_3'].output)

        # conv4_3_norm_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (84, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv4_3_norm_mbox_conf_W'
        net.layer_opts['conv2D_bName'] = 'conv4_3_norm_mbox_conf_b'
        net.layer['conv4_3_norm_mbox_conf'] = ConvLayer(net, net.layer['conv4_3_norm'].output)

        net.layer_opts['permute_dimension']       = (0, 2, 3, 1)
        net.layer['conv4_3_norm_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv4_3_norm_mbox_conf'].output)
        net.layer_opts['flatten_ndim']            = 2
        net.layer['conv4_3_norm_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv4_3_norm_mbox_conf_perm'].output)

        # conv4_3_norm_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (16, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv4_3_norm_mbox_loc_W'
        net.layer_opts['conv2D_bName'] = 'conv4_3_norm_mbox_loc_b'
        net.layer['conv4_3_norm_mbox_loc'] = ConvLayer(net, net.layer['conv4_3_norm'].output)

        net.layer_opts['permute_dimension']      = (0, 2, 3, 1)
        net.layer['conv4_3_norm_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv4_3_norm_mbox_loc'].output)
        net.layer_opts['flatten_ndim']           = 2
        net.layer['conv4_3_norm_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv4_3_norm_mbox_loc_perm'].output)

        # Stack 5
        net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3)
        net.layer_opts['conv2D_WName'] = 'conv5_1_W'
        net.layer_opts['conv2D_bName'] = 'conv5_1_b'
        net.layer['conv5_1'] = ConvLayer(net, net.layer['pool4'].output)
        net.layer['relu5_1'] = ReLULayer(net.layer['conv5_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3)
        net.layer_opts['conv2D_WName'] = 'conv5_2_W'
        net.layer_opts['conv2D_bName'] = 'conv5_2_b'
        net.layer['conv5_2'] = ConvLayer(net, net.layer['relu5_1'].output)
        net.layer['relu5_2'] = ReLULayer(net.layer['conv5_2'].output)

        net.layer_opts['conv2D_filter_shape'] = (512, 512, 3, 3)
        net.layer_opts['conv2D_WName'] = 'conv5_3_W'
        net.layer_opts['conv2D_bName'] = 'conv5_3_b'
        net.layer['conv5_3'] = ConvLayer(net, net.layer['relu5_2'].output)
        net.layer['relu5_3'] = ReLULayer(net.layer['conv5_3'].output)

        net.layer_opts['pool_ignore_border'] = True
        net.layer_opts['pool_filter_size']   = (3, 3)
        net.layer_opts['pool_stride']        = (1, 1)
        net.layer_opts['pool_padding']       = (1, 1)
        net.layer['pool5']    = Pool2DLayer(net, net.layer['relu5_3'].output)

        # fc6 and fc7
        net.layer_opts['conv2D_filter_shape']    = (1024, 512, 3, 3)
        net.layer_opts['conv2D_stride']          = (1, 1)
        net.layer_opts['conv2D_border_mode']     = (6, 6)
        net.layer_opts['conv2D_filter_dilation'] = (6, 6)
        net.layer_opts['conv2D_WName'] = 'fc6_W'
        net.layer_opts['conv2D_bName'] = 'fc6_b'
        net.layer['fc6']   = ConvLayer(net, net.layer['pool5'].output)
        net.layer['relu6'] = ReLULayer(net.layer['fc6'].output)
        net.layer_opts['conv2D_filter_dilation'] = (1, 1)        # Set default filter dilation

        net.layer_opts['conv2D_filter_shape'] = (1024, 1024, 1, 1)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'fc7_W'
        net.layer_opts['conv2D_bName']        = 'fc7_b'
        net.layer['fc7']   = ConvLayer(net, net.layer['relu6'].output)
        net.layer['relu7'] = ReLULayer(net.layer['fc7'].output)

        # First sub convolution to get predicted box
        # fc7_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (126, 1024, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'fc7_mbox_conf_W'
        net.layer_opts['conv2D_bName']        = 'fc7_mbox_conf_b'
        net.layer['fc7_mbox_conf']  = ConvLayer(net, net.layer['relu7'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['fc7_mbox_conf_perm']    = PermuteLayer(net, net.layer['fc7_mbox_conf'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['fc7_mbox_conf_flat']    = FlattenLayer(net, net.layer['fc7_mbox_conf_perm'].output)

        # conv6_1 and conv6_2
        net.layer_opts['conv2D_filter_shape'] = (256, 1024, 1, 1)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'conv6_1_W'
        net.layer_opts['conv2D_bName']        = 'conv6_1_b'
        net.layer['conv6_1']      = ConvLayer(net, net.layer['relu7'].output)
        net.layer['conv6_1_relu'] = ReLULayer(net.layer['conv6_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (512, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (2, 2)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv6_2_W'
        net.layer_opts['conv2D_bName']        = 'conv6_2_b'
        net.layer['conv6_2'] = ConvLayer(net, net.layer['conv6_1_relu'].output)
        net.layer['conv6_2_relu'] = ReLULayer(net.layer['conv6_2'].output)

        # fc7_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (24, 1024, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'fc7_mbox_loc_W'
        net.layer_opts['conv2D_bName']        = 'fc7_mbox_loc_b'
        net.layer['fc7_mbox_loc'] = ConvLayer(net, net.layer['relu7'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['fc7_mbox_loc_perm']     = PermuteLayer(net, net.layer['fc7_mbox_loc'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['fc7_mbox_loc_flat']     = FlattenLayer(net, net.layer['fc7_mbox_loc_perm'].output)

        # Second sub convolution to get predicted box
        # conv6_2_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (126, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv6_2_mbox_conf_W'
        net.layer_opts['conv2D_bName']        = 'conv6_2_mbox_conf_b'
        net.layer['conv6_2_mbox_conf'] = ConvLayer(net, net.layer['conv6_2_relu'].output)

        net.layer_opts['permute_dimension']  = (0, 2, 3, 1)
        net.layer['conv6_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv6_2_mbox_conf'].output)
        net.layer_opts['flatten_ndim']       = 2
        net.layer['conv6_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv6_2_mbox_conf_perm'].output)

        # conv7_1 and conv7_2
        net.layer_opts['conv2D_filter_shape'] = (128, 512, 1, 1)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'conv7_1_W'
        net.layer_opts['conv2D_bName']        = 'conv7_1_b'
        net.layer['conv7_1']      = ConvLayer(net, net.layer['conv6_2_relu'].output)
        net.layer['conv7_1_relu'] = ReLULayer(net.layer['conv7_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3)
        net.layer_opts['conv2D_stride']       = (2, 2)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv7_2_W'
        net.layer_opts['conv2D_bName']        = 'conv7_2_b'
        net.layer['conv7_2']      = ConvLayer(net, net.layer['conv7_1_relu'].output)
        net.layer['conv7_2_relu'] = ReLULayer(net.layer['conv7_2'].output)

        # conv6_2_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (24, 512, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv6_2_mbox_loc_W'
        net.layer_opts['conv2D_bName']        = 'conv6_2_mbox_loc_b'
        net.layer['conv6_2_mbox_loc'] = ConvLayer(net, net.layer['conv6_2_relu'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['conv6_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv6_2_mbox_loc'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['conv6_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv6_2_mbox_loc_perm'].output)

        # Third sub convolution to get predicted box
        # conv7_2_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (126, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv7_2_mbox_conf_W'
        net.layer_opts['conv2D_bName']        = 'conv7_2_mbox_conf_b'
        net.layer['conv7_2_mbox_conf'] = ConvLayer(net, net.layer['conv7_2_relu'].output)

        net.layer_opts['permute_dimension']  = (0, 2, 3, 1)
        net.layer['conv7_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv7_2_mbox_conf'].output)
        net.layer_opts['flatten_ndim']       = 2
        net.layer['conv7_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv7_2_mbox_conf_perm'].output)

        # conv8_1 and conv8_2
        net.layer_opts['conv2D_filter_shape'] = (128, 256, 1, 1)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'conv8_1_W'
        net.layer_opts['conv2D_bName']        = 'conv8_1_b'
        net.layer['conv8_1']      = ConvLayer(net, net.layer['conv7_2_relu'].output)
        net.layer['conv8_1_relu'] = ReLULayer(net.layer['conv8_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'conv8_2_W'
        net.layer_opts['conv2D_bName']        = 'conv8_2_b'
        net.layer['conv8_2'] = ConvLayer(net, net.layer['conv8_1_relu'].output)
        net.layer['conv8_2_relu'] = ReLULayer(net.layer['conv8_2'].output)

        # conv7_2_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (24, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv7_2_mbox_loc_W'
        net.layer_opts['conv2D_bName'] = 'conv7_2_mbox_loc_b'
        net.layer['conv7_2_mbox_loc'] = ConvLayer(net, net.layer['conv7_2_relu'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['conv7_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv7_2_mbox_loc'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['conv7_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv7_2_mbox_loc_perm'].output)

        # Fourth sub convolution to get predicted box
        # conv8_2_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (84, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName']        = 'conv8_2_mbox_conf_W'
        net.layer_opts['conv2D_bName']        = 'conv8_2_mbox_conf_b'
        net.layer['conv8_2_mbox_conf'] = ConvLayer(net, net.layer['conv8_2_relu'].output)

        net.layer_opts['permute_dimension']  = (0, 2, 3, 1)
        net.layer['conv8_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv8_2_mbox_conf'].output)
        net.layer_opts['flatten_ndim']       = 2
        net.layer['conv8_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv8_2_mbox_conf_perm'].output)

        # conv9_1 and conv9_2
        net.layer_opts['conv2D_filter_shape'] = (128, 256, 1, 1)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName']        = 'conv9_1_W'
        net.layer_opts['conv2D_bName']        = 'conv9_1_b'
        net.layer['conv9_1']      = ConvLayer(net, net.layer['conv8_2_relu'].output)
        net.layer['conv9_1_relu'] = ReLULayer(net.layer['conv9_1'].output)

        net.layer_opts['conv2D_filter_shape'] = (256, 128, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = 0
        net.layer_opts['conv2D_WName'] = 'conv9_2_W'
        net.layer_opts['conv2D_bName'] = 'conv9_2_b'
        net.layer['conv9_2']      = ConvLayer(net, net.layer['conv9_1_relu'].output)
        net.layer['conv9_2_relu'] = ReLULayer(net.layer['conv9_2'].output)

        # conv8_2_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (16, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv8_2_mbox_loc_W'
        net.layer_opts['conv2D_bName'] = 'conv8_2_mbox_loc_b'
        net.layer['conv8_2_mbox_loc'] = ConvLayer(net, net.layer['conv8_2_relu'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['conv8_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv8_2_mbox_loc'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['conv8_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv8_2_mbox_loc_perm'].output)

        # Fifth sub convolution to get predicted box
        # conv9_2_mbox_conf
        net.layer_opts['conv2D_filter_shape'] = (84, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv9_2_mbox_conf_W'
        net.layer_opts['conv2D_bName'] = 'conv9_2_mbox_conf_b'
        net.layer['conv9_2_mbox_conf'] = ConvLayer(net, net.layer['conv9_2_relu'].output)

        net.layer_opts['permute_dimension']  = (0, 2, 3, 1)
        net.layer['conv9_2_mbox_conf_perm'] = PermuteLayer(net, net.layer['conv9_2_mbox_conf'].output)
        net.layer_opts['flatten_ndim']       = 2
        net.layer['conv9_2_mbox_conf_flat'] = FlattenLayer(net, net.layer['conv9_2_mbox_conf_perm'].output)

        # conv9_2_mbox_loc
        net.layer_opts['conv2D_filter_shape'] = (16, 256, 3, 3)
        net.layer_opts['conv2D_stride']       = (1, 1)
        net.layer_opts['conv2D_border_mode']  = (1, 1)
        net.layer_opts['conv2D_WName'] = 'conv9_2_mbox_loc_W'
        net.layer_opts['conv2D_bName'] = 'conv9_2_mbox_loc_b'
        net.layer['conv9_2_mbox_loc'] = ConvLayer(net, net.layer['conv9_2_relu'].output)

        net.layer_opts['permute_dimension'] = (0, 2, 3, 1)
        net.layer['conv9_2_mbox_loc_perm'] = PermuteLayer(net, net.layer['conv9_2_mbox_loc'].output)
        net.layer_opts['flatten_ndim']      = 2
        net.layer['conv9_2_mbox_loc_flat'] = FlattenLayer(net, net.layer['conv9_2_mbox_loc_perm'].output)

        # Concat mbox_conf and mbox_loc
        net.layer['mbox_conf'] = ConcatLayer(net, [net.layer['conv4_3_norm_mbox_conf_flat'].output,
                                                   net.layer['fc7_mbox_conf_flat'].output,
                                                   net.layer['conv6_2_mbox_conf_flat'].output,
                                                   net.layer['conv7_2_mbox_conf_flat'].output,
                                                   net.layer['conv8_2_mbox_conf_flat'].output,
                                                   net.layer['conv9_2_mbox_conf_flat'].output])
        net.layer['mbox_loc']  = ConcatLayer(net, [net.layer['conv4_3_norm_mbox_loc_flat'].output,
                                                   net.layer['fc7_mbox_loc_flat'].output,
                                                   net.layer['conv6_2_mbox_loc_flat'].output,
                                                   net.layer['conv7_2_mbox_loc_flat'].output,
                                                   net.layer['conv8_2_mbox_loc_flat'].output,
                                                   net.layer['conv9_2_mbox_loc_flat'].output])

        net.layer_opts['reshape_new_shape'] = (_batch_size, 8732, 21)
        net.layer['mbox_conf_reshape']     = ReshapeLayer(net, net.layer['mbox_conf'].output)

        net.layer_opts['softmax_axis']  = 2
        net.layer['mbox_conf_softmax'] = SoftmaxLayer(net, net.layer['mbox_conf_reshape'].output)

        net.layer_opts['reshape_new_shape'] = (_batch_size, 8732, 4)
        net.layer['mbox_loc_flatten']      = ReshapeLayer(net, net.layer['mbox_loc'].output)

        self.net = net

        # Predict function
        label = T.argmax(net.layer['mbox_conf_softmax'].output, axis = 2, keepdims = True)
        self.pred_func = theano.function(
                            inputs  = [self.X],
                            outputs = [label,
                                       net.layer['mbox_loc_flatten'].output])

        self.test_func = theano.function(
                            inputs  = [self.X],
                            outputs = [net.layer['mbox_conf_softmax'].output])
示例#40
0
文件: ocr2.py 项目: Xrave/speed
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=256,
            nonlinearity=lasagne.nonlinearities.rectify)

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network

input_var = T.tensor4('input_var')
network=build_cnn(input_var=input_var)
# And load them again later on like this:
with np.load('model.npz') as f:
    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)

test_prediction = lasagne.layers.get_output(network, deterministic=True)
pred=T.argmax(test_prediction, axis=1)
val_fn = theano.function([input_var], [pred])

# fourcc=cv2.cv.CV_FOURCC('X','V','I','D')
# # fourcc = cv2.cv.CV_FOURCC(*'FMP4')
# # o = cv2.VideoWriter('output.avi',fourcc, 60, (28,28),0)
# o = VideoWriter("output.avi", frameSize=(28,28))
# o.open()
示例#41
0
 def _build_expression(self):
     self.input_ = T.tensor4(dtype=self.input_dtype)
     self.expression_ = self.input_
示例#42
0
 def _build_expression(self):
     self.input_ = T.tensor4(dtype=self.input_dtype)
     self.expression_ = max_pool_2d(self.input_, self.max_pool_stride,
                                    ignore_border=True)
示例#43
0
def main():

    B_SIZE = 10000
    MID = B_SIZE // 2

    synk.fork()
    import lasagne

    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = build_mlp(input_var)
    # network = build_cnn(input_var)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    params = lasagne.layers.get_all_params(network, trainable=True)

    grads = theano.grad(loss, wrt=params)
    flat_grad = T.concatenate(list(map(T.flatten, grads)))

    f_loss = synk.function([input_var, target_var],
                           loss,
                           collect_modes=[None],
                           reduce_ops="sum")
    f_grad = synk.function([input_var, target_var],
                           flat_grad,
                           collect_modes=[None])

    synk.distribute()

    x_data, y_data = make_data([1, 28, 28], B_SIZE)

    loss_1 = f_loss(x_data, y_data)
    grad_1 = f_grad(x_data, y_data)

    x_shmem, y_shmem = f_loss.get_input_shmems()
    x_dat_sh = x_shmem[:B_SIZE]
    y_dat_sh = y_shmem[:B_SIZE]
    x_data_1 = x_data[:MID]
    x_data_2 = x_data[MID:]
    y_data_1 = y_data[:MID]
    y_data_2 = y_data[MID:]

    ITERS = 10
    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss.as_theano(x_data_1, y_data_1)
        loss_j = f_loss.as_theano(x_data_2, y_data_2)
    loss_time = timer() - t0
    print("theano loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad.as_theano(x_data_1, y_data_1)
        grad_j = f_grad.as_theano(x_data_2, y_data_2)
    grad_time = timer() - t0
    print("theano grad_time: ", grad_time)

    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss(x_dat_sh, y_dat_sh)
    loss_time = timer() - t0
    print("synk shmem loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad(x_dat_sh, y_dat_sh)
    grad_time = timer() - t0
    print("synk shmem grad_time: ", grad_time)

    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss(x_data, y_data)
    loss_time = timer() - t0
    print("synk new input loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad(x_data, y_data)
    grad_time = timer() - t0
    print("synk new input grad_time: ", grad_time)
示例#44
0
def transfer(photo, style, iterations=9,
             contentCost=0.001, styleCost=0.2e6, varCost=0.1e-7, rowACCost=1.e-9, colACCost=1e-9):
    print "Performing image transfer, with %d iterations" % iterations
    _, _, h, w = photo.shape
    _, _, h2, w2 = style.shape
    print photo.shape
    print style.shape
    assert h == h2 and w == w2

    net = vggnet.buildVgg(w, h)

    # Layers for loss calculation:
    layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
    layers = {k: net[k] for k in layers}

    # Precompute layer activations for photo and artwork
    print 'Precompute activations...'
    input_im_theano = T.tensor4()
    outputs = lasagne.layers.get_output(layers.values(), input_im_theano)
    photo_features = {k: theano.shared(output.eval({input_im_theano: photo}))
                      for k, output in zip(layers.keys(), outputs)}
    style_features = {k: theano.shared(output.eval({input_im_theano: style}))
                      for k, output in zip(layers.keys(), outputs)}

    # Get expressions for layer activations for generated image
    print 'Generating feature expressions'
    generated_image = theano.shared(floatX(np.random.uniform(-128, 128, (1, 3, h, w))))
    gen_features = lasagne.layers.get_output(layers.values(), generated_image)
    gen_features = {k: v for k, v in zip(layers.keys(), gen_features)}

    # Define loss function
    lossParts = [
        # content loss
        contentCost * losses.content(photo_features, gen_features, 'conv4_2'),
        # style loss
        styleCost * losses.style(style_features, gen_features, 'conv1_1'),
        styleCost * losses.style(style_features, gen_features, 'conv2_1'),
        styleCost * losses.style(style_features, gen_features, 'conv3_1'),
        styleCost * losses.style(style_features, gen_features, 'conv4_1'),
        styleCost * losses.style(style_features, gen_features, 'conv5_1'),
        # total variation penalty
        varCost * losses.totalVariation(generated_image),
    ]
    if ROW_AC_LOSS:
        lossParts.extend([
            # Autocorrelation:
            rowACCost * losses.totalRowAC(style, generated_image, None),
            # rowACCost * losses.totalRowAC(style_features, gen_features, 'conv1_1'),
            # rowACCost * losses.totalRowAC(style_features, gen_features, 'conv2_1'),
        ])
    if COL_AC_LOSS:
        lossParts.extend([
            # Autocorrelation:
            colACCost * losses.totalColAC(style, generated_image, None),
            # colACCost * losses.totalColAC(style_features, gen_features, 'conv1_1'),
            # colACCost * losses.totalColAC(style_features, gen_features, 'conv2_1'),
        ])
    totalLoss = sum(lossParts)

    # Theano functions to evaluate loss and gradient
    print 'Building gradient...'
    f_loss = theano.function([], totalLoss)
    f_grad = theano.function([], T.grad(totalLoss, generated_image))

    # Initialize with a noise image
    print 'Initializing noisy image...'
    generated_image.set_value(floatX(np.random.uniform(-128, 128, (1, 3, h, w))))
    xAt = generated_image.get_value().astype('float64')
    xs = [xAt]

    # Helper functions to interface with scipy.optimize
    def eval_loss(x0):
        x0 = floatX(x0.reshape((1, 3, h, w)))
        generated_image.set_value(x0)
        return f_loss().astype('float64')
        # Losses should end up in the hundreds, or lower for mfcc

    def eval_grad(x0):
        x0 = floatX(x0.reshape((1, 3, h, w)))
        generated_image.set_value(x0)
        return np.array(f_grad()).flatten().astype('float64')

    # Optimize, saving the result periodically
    print 'Optimizing image to reduce loss....'
    for i in range(iterations - 1):
        print(i+1)
        scipy.optimize.fmin_l_bfgs_b(eval_loss, xAt.flatten(), fprime=eval_grad, maxfun=40, iprint=0)
        xAt = generated_image.get_value().astype('float64')
        xs.append(xAt)
        print f_loss()

    return net, xs
示例#45
0
    def __init__(self,
                 input_shape=(None, 3, None, None),
                 n_classes=11,
                 n_filters_first_conv=48,
                 n_pool=4,
                 growth_rate=12,
                 n_layers_per_block=5,
                 dropout_p=0.2):
        """
        This code implements the Fully Convolutional DenseNet described in https://arxiv.org/abs/1611.09326
        The network consist of a downsampling path, where dense blocks and transition down are applied, followed
        by an upsampling path where transition up and dense blocks are applied.
        Skip connections are used between the downsampling path and the upsampling path
        Each layer is a composite function of BN - ReLU - Conv and the last layer is a softmax layer.

        :param input_shape: shape of the input batch. Only the first dimension (n_channels) is needed
        :param n_classes: number of classes
        :param n_filters_first_conv: number of filters for the first convolution applied
        :param n_pool: number of pooling layers = number of transition down = number of transition up
        :param growth_rate: number of new feature maps created by each layer in a dense block
        :param n_layers_per_block: number of layers per block. Can be an int or a list of size 2 * n_pool + 1
        :param dropout_p: dropout rate applied after each convolution (0. for not using)
        """

        if type(n_layers_per_block) == list:
            assert (len(n_layers_per_block) == 2 * n_pool + 1)
        elif type(n_layers_per_block) == int:
            n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1)
        else:
            raise ValueError

        # Theano variables
        self.input_var = T.tensor4('input_var', dtype='float32')  # input image
        self.output_var = T.tensor4('output_var',
                                    dtype='float32')  # output of the network
        self.target_var = T.tensor4('target_var', dtype='float32')  # target

        #####################
        # First Convolution #
        #####################

        inputs = InputLayer(input_shape, self.input_var)

        # We perform a first convolution. All the features maps will be stored in the tensor called stack (the Tiramisu)
        stack = Conv2DLayer(inputs,
                            n_filters_first_conv,
                            filter_size=3,
                            pad='same',
                            W=HeUniform(gain='relu'),
                            nonlinearity=linear,
                            flip_filters=False)
        # The number of feature maps in the stack is stored in the variable n_filters
        n_filters = n_filters_first_conv

        #####################
        # Downsampling path #
        #####################

        skip_connection_list = []

        for i in range(n_pool):
            # Dense Block
            for j in range(n_layers_per_block[i]):
                # Compute new feature maps
                l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
                # And stack it : the Tiramisu is growing
                stack = ConcatLayer([stack, l])
                n_filters += growth_rate
            # At the end of the dense block, the current stack is stored in the skip_connections list
            skip_connection_list.append(stack)

            # Transition Down
            stack = TransitionDown(stack, n_filters, dropout_p)

        skip_connection_list = skip_connection_list[::-1]

        #####################
        #     Bottleneck    #
        #####################

        # We store now the output of the next dense block in a list. We will only upsample these new feature maps
        block_to_upsample = []

        # Dense Block
        for j in range(n_layers_per_block[n_pool]):
            l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
            block_to_upsample.append(l)
            stack = ConcatLayer([stack, l])
            n_filters += growth_rate

        #######################
        #   Upsampling path   #
        #######################

        for i in range(n_pool):
            # Transition Up ( Upsampling + concatenation with the skip connection)
            n_filters_keep = growth_rate * n_layers_per_block[n_pool + i]
            stack = TransitionUp(skip_connection_list[i], block_to_upsample,
                                 n_filters_keep)

            # Dense Block
            block_to_upsample = []
            for j in range(n_layers_per_block[n_pool + i + 1]):
                l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
                n_filters += growth_rate
                block_to_upsample.append(l)
                stack = ConcatLayer([stack, l])

        #####################
        #      Softmax      #
        #####################

        self.output_layer = SoftmaxLayer(stack, n_classes)
示例#46
0
def main():

    lrate = 1e-3
    batch_size = 32
    key_size = 256
    mem_size = 50 * 50
    k_nbrs = 128
    num_epochs = 100
    input_var = T.tensor4('x')
    target_var = T.ivector('y')

    print 'Loading data and creating train/test splits... '
    X_train, y_train, X_val, y_val, X_test, y_test = load_mnist()

    # Build our 'encoding' network
    network = build_network(input_var,
                            image_size=X_train.shape[-1],
                            output_dim=key_size)
    network_embedding = nn.get_output(network, deterministic=False)

    # Initialize the module and compile graphs for training.
    # Note that this is where the difference between traditional neural network
    # classifiers comes in. Rather then computing a logistic regression, we use
    # the output of the memory module and triplet loss.
    MM = MemoryModule(mem_size, key_size, k_nbrs)
    mem_loss, mem_updates = MM.build_loss_and_updates(network_embedding,
                                                      target_var)
    mem_loss = mem_loss.mean()

    # Use the Adam optimizer for training.
    params = nn.get_all_params(network, trainable=True)
    updates = lasagne.updates.adam(mem_loss, params, lrate, beta1=0.9)

    # Whenever we update the network parameters, we'll also update the memory
    # within the memory module
    updates.update(mem_updates)

    train_fn = theano.function([input_var, target_var],
                               mem_loss,
                               updates=updates)

    # For validation, we'll follow a deterministic mapping
    determ_embedding = nn.get_output(network, deterministic=True)
    mem_pred, _ = MM.query(determ_embedding)

    test_acc = T.mean(T.eq(mem_pred, target_var), dtype=theano.config.floatX)
    valid_fn = theano.function([input_var, target_var], [mem_loss, test_acc])

    # Finally, launch the training loop.
    print 'Starting training...'

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch

            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val,
                                         y_val,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch

            err, acc = valid_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print "Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time)
        print "  training loss:\t\t{:.6f}".format(train_err / train_batches)
        print "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
        print "  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100)

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
        inputs, targets = batch
        err, acc = valid_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print "Final results:"
    print "  test loss:\t\t\t{:.6f}".format(test_err / test_batches)
    print "  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)
示例#47
0
    # darken =
    # brighten =
    augmentation_schedule = [
        'mirror', 'darken', 'blur', 'equalize_hist', 'brighten', 'noise'
    ]  # new augmentation.
    color = 'gray' if img_cnls == 1 else 'bgr'
    num_classes, dataset_ = load_data(dataset,
                                      colorspace=color,
                                      random_seed=random_seed)
    #print 'data type =', dataset_[0][0].dtype
    #print 'shape =', dataset_[0][0].shape.eval()
    assert len(mlp_layer_sizes) - 1 == len(dropout_rates)
    mlp_layer_sizes[2] = num_classes  # set num_classes based on dataset.
    if batch_size is None:
        batch_size = num_classes
    x = T.tensor4('x')  # the data is presented as rasterized images.
    learning_rate = theano.shared(
        np.asarray(initial_learning_rate, dtype=theano.config.floatX))

    classifier = CNN(rng=np.random.RandomState(seed=random_seed),
                     input=x.reshape((batch_size, img_cnls, 224, 224)),
                     ninput_chnls=img_cnls,
                     nkerns=nkerns,
                     dropout_rates=dropout_rates,
                     mlp_layer_sizes=mlp_layer_sizes,
                     activations=activations,
                     batch_size=batch_size,
                     use_bias=use_bias)

    res = test_net(classifier=classifier,
                   num_classes=num_classes,
示例#48
0
    def __init__(
            self,
            input_width,
            input_height,
            n_actions,
            discount,
            learn_rate,
            batch_size,
            rng
    ):

        self.input_width = input_width
        self.input_height = input_height
        self.n_actions = n_actions
        self.discount = discount
        self.lr = learn_rate
        self.batch_size = batch_size
        self.rng = rng

        lasagne.random.set_rng(self.rng)

        self.l_out = self.build_network(
            batch_size,
            input_width,
            input_height,
            n_actions
        )

        states = t.tensor4('states')
        next_states = t.tensor4('next_states')
        rewards = t.col('rewards')
        actions = t.icol('actions')
        terminals = t.icol('terminals')

        self.states_shared = theano.shared(
            np.zeros((batch_size, 1, input_height, input_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((batch_size, 1, input_height, input_width),
                     dtype=theano.config.floatX))

        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        q_vals = lasagne.layers.get_output(self.l_out, states)

        next_q_vals = lasagne.layers.get_output(self.l_out, next_states)
        next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = (rewards +
                  (t.ones_like(terminals) - terminals) *
                  self.discount * t.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[t.arange(batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        loss = t.sum(0.5 * diff ** 2)

        params = lasagne.layers.helper.get_all_params(self.l_out)
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }

        updates = lasagne.updates.sgd(loss, params, self.lr)

        self._train = theano.function([], [loss, q_vals], updates=updates,
                                      givens=givens)
        self._q_vals = theano.function([], q_vals,
                                       givens={states: self.states_shared})
示例#49
0
def main(model='mlp', num_epochs=500):
    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var)
    elif model.startswith('custom_mlp:'):
        depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        network = build_custom_mlp(input_var, int(depth), int(width),
                                   float(drop_in), float(drop_hid))
    elif model == 'cnn':
        network = build_cnn(input_var)
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.01,
                                                momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))
示例#50
0
def training(runname, rnnType, maxPackets, packetTimeSteps, packetReverse, padOldTimeSteps, wtstd, 
             lr, decay, clippings, dimIn, dim, attentionEnc, attentionContext, numClasses, batch_size, epochs, 
             trainPercent, dataPath, loadPrepedData, channel):  # pragma: no cover
    print locals()
    print
    
    X = T.tensor4('inputs')
    Y = T.matrix('targets')
    linewt_init = IsotropicGaussian(wtstd)
    line_bias = Constant(1.0)
    rnnwt_init = IsotropicGaussian(wtstd)
    rnnbias_init = Constant(0.0)
    classifierWts = IsotropicGaussian(wtstd)

    learning_rateClass = theano.shared(np.array(lr, dtype=theano.config.floatX))
    learning_decay = np.array(decay, dtype=theano.config.floatX)
    
    ###DATA PREP
    print 'loading data'
    if loadPrepedData:
        hexSessions = loadFile(dataPath)

    else:
        sessioner = sessionizer.HexSessionizer(dataPath)
        hexSessions = sessioner.read_pcap()
        hexSessions = removeBadSessionizer(hexSessions)

    numSessions = len(hexSessions)
    print str(numSessions) + ' sessions found'
    hexSessionsKeys = order_keys(hexSessions)
    hexDict = hexTokenizer()
    
    print 'creating dictionary of ip communications'
    comsDict, uniqIPs = srcIpDict(hexSessions)
    comsDict = dictUniquerizer(comsDict)
     
    print 'initializing network graph'
    ###ENCODER
    if rnnType == 'gru':
        rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru')
        dimMultiplier = 2
    else:
        rnn = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'lstm')
        dimMultiplier = 4

    fork = Fork(output_names=['linear', 'gates'],
                name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    ###CONTEXT
    if rnnType == 'gru':
        rnnContext = GatedRecurrent(dim=dim, weights_init = rnnwt_init, 
                                    biases_init = rnnbias_init, name = 'gruContext')
    else:
        rnnContext = LSTM(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, 
                          name = 'lstmContext')

    forkContext = Fork(output_names=['linearContext', 'gatesContext'],
                name='forkContext', input_dim=dim, output_dims=[dim, dim * dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    forkDec = Fork(output_names=['linear', 'gates'],
                name='forkDec', input_dim=dim, output_dims=[dim, dim*dimMultiplier], 
                weights_init = linewt_init, biases_init = line_bias)

    #CLASSIFIER
    bmlp = BatchNormalizedMLP( activations=[Tanh(),Tanh()], 
               dims=[dim, dim, numClasses],
               weights_init=classifierWts,
               biases_init=Constant(0.0001) )

    #initialize the weights in all the functions
    fork.initialize()
    rnn.initialize()
    forkContext.initialize()
    rnnContext.initialize()
    forkDec.initialize()
    bmlp.initialize()

    def onestepEnc(X):
        data1, data2 = fork.apply(X) 

        if rnnType == 'gru':
            hEnc = rnn.apply(data1, data2) 
        else:
            hEnc, _ = rnn.apply(data2)

        return hEnc

    hEnc, _ = theano.scan(onestepEnc, X) #(mini*numPackets, packetLen, 1, hexdictLen)
        if attentionEnc:
        
        attentionmlpEnc = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpEnc.initialize()

        hEncAttn = T.reshape(hEnc, (-1, packetTimeSteps, dim))
        def onestepEncAttn(hEncAttn):

            preEncattn = attentionmlpEnc.apply(hEncAttn)
            attEncsoft = Softmax()
            attEncpyx = attEncsoft.apply(preEncattn.flatten())
            attEncpred = attEncpyx.flatten()
            attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0)

            return attenc

        attenc, _ = theano.scan(onestepEncAttn, hEncAttn)

        hEncReshape = T.reshape(T.sum(attenc, axis = 1), (-1, maxPackets, 1, dim))

    else:
        hEncReshape = T.reshape(hEnc[:,-1], (-1, maxPackets, 1, dim)) #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)  #[:,-1] takes the last rep for each packet
                                                                 #(mini, numPackets, 1, dimReduced)
    def onestepContext(hEncReshape):

        data3, data4 = forkContext.apply(hEncReshape)

        if rnnType == 'gru':
            hContext = rnnContext.apply(data3, data4)
        else:
            hContext, _ = rnnContext.apply(data4)

        return hContext

    hContext, _ = theano.scan(onestepContext, hEncReshape)
    
    if attentionContext:
        attentionmlpContext = MLP(activations=[Tanh()], dims = [dim, 1], weights_init=attnWts,
               biases_init=Constant(1.0))
        attentionmlpContext.initialize()

        hContextAttn = T.reshape(hContext, (-1,maxPackets,dim))
        def onestepContextAttn(hContextAttn):

            preContextatt = attentionmlpContext.apply(hContextAttn)
            attContextsoft = Softmax()
            attContextpyx = attContextsoft.apply(preContextatt.flatten())
            attContextpred = attContextpyx.flatten()
            attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0)

            return attcontext

        attcontext, _ = theano.scan(onestepContextAttn, hContextAttn)
        hContextReshape = T.sum(attcontext, axis = 1)

    else:
        hContextReshape = T.reshape(hContext[:,-1], (-1,dim))

    data5, _ = forkDec.apply(hContextReshape)
    pyx = bmlp.apply(data5)
    softmax = Softmax()
    softoutClass = softmax.apply(pyx)
    costClass = T.mean(CategoricalCrossEntropy().apply(Y, softoutClass))

    #CREATE GRAPH
    cgClass = ComputationGraph([costClass])
    paramsClass = VariableFilter(roles = [PARAMETER])(cgClass.variables)
    learning = learningfunctions.Learning(costClass,paramsClass,learning_rateClass,l1=0.,l2=0.,maxnorm=0.,c=clippings)
    updatesClass = learning.Adam() 

    module_logger.info('starting graph compilation')
    classifierTrain = theano.function([X,Y], [costClass, hEnc, hContext, pyx, softoutClass], 
                                      updates=updatesClass, allow_input_downcast=True)
    classifierPredict = theano.function([X], softoutClass, allow_input_downcast=True)
    module_logger.info('graph compilation finished')
    print 'finished graph compilation'

    trainIndex = int(len(hexSessionsKeys)*trainPercent)

    epochCost = []
    gradNorms = []
    trainAcc = []
    testAcc = []

    costCollect = []
    trainCollect = []

    module_logger.info('beginning training')
    iteration = 0
    #epoch
    for epoch in xrange(epochs):

        #iteration/minibatch
        for start, end in zip(range(0, trainIndex,batch_size),
                              range(batch_size, trainIndex, batch_size)):

            trainingTargets = []
            trainingSessions = []

            #create one minibatch with 0.5 normal and 0.5 abby normal traffic
            for trainKey in range(start, end):
                sessionForEncoding = list(hexSessions[hexSessions.keys()[trainKey]][0])
    
                adfun = adversarialfunctions.Adversary(sessionForEncoding)
                adversaryList = [sessionForEncoding, 
                                 adfun.dstIpSwapOut(comsDict, uniqIPs),
                                 adfun.portDirSwitcher(),
                                 adfun.ipDirSwitcher()]
                abbyIndex = random.sample(range(len(adversaryList)), 1)[0]

                targetClasses = [0]*numClasses
                targetClasses[abbyIndex] = 1
                abbyTarget = np.array(targetClasses, dtype=theano.config.floatX)
                trainingSessions.append(abbyOneHotSes[0])
                trainingTargets.append(abbyTarget)

            sessionsMinibatch = np.asarray(trainingSessions).reshape((-1, packetTimeSteps, 1, dimIn))
            targetsMinibatch = np.asarray(trainingTargets)

            costfun = classifierTrain(sessionsMinibatch, targetsMinibatch)

            if iteration % (numSessions / (10 * batch_size)) == 0:
                costCollect.append(costfun[0])
                trainCollect.append(np.mean(np.argmax(costfun[-1],axis=1) == np.argmax(targetsMinibatch, axis=1)))
                module_logger.info('   Iteration: ', iteration)
                module_logger.info('   Cost: ', np.mean(costCollect))
                module_logger.info('   TRAIN accuracy: ', np.mean(trainCollect))
                print '   Iteration: ', iteration
                print '   Cost: ', np.mean(costCollect)
                print '   TRAIN accuracy: ', np.mean(trainCollect)

            iteration+=1

            #testing accuracy
            if iteration % (numSessions / (2 * batch_size)) == 0:
                predtar, acttar, testCollect = predictClass(classifierPredict, hexSessions, comsDict, uniqIPs, hexDict,
                                                            hexSessionsKeys,
                                                            numClasses, trainPercent, dimIn, maxPackets, packetTimeSteps,
                                                            padOldTimeSteps)
                binaryPrecisionRecall(predtar, acttar, numClasses)
                module_logger.info(str(testCollect))

            #save the models
            if iteration % (numSessions / (5 * batch_size)) == 0:
                save_model(classifierPredict)

        epochCost.append(np.mean(costCollect))
        trainAcc.append(np.mean(trainCollect))
        
        module_logger.info('Epoch: ', epoch)
        module_logger.info('Epoch cost average: ', epochCost[-1])
        module_logger.info('Epoch TRAIN accuracy: ', trainAcc[-1])
        print 'Epoch: ', epoch
        print 'Epoch cost average: ', epochCost[-1]
        print 'Epoch TRAIN accuracy: ', trainAcc[-1]

    return classifierTrain, classifierPredict
示例#51
0
######################
# Model construction #
######################

from theano import tensor

from blocks.bricks import Rectifier, MLP  # , Softmax
# from blocks.bricks.cost import CategoricalCrossEntropy
from blocks.bricks.conv import (ConvolutionalLayer, ConvolutionalSequence,
                                Flattener)
from blocks.initialization import Uniform, Constant

x = tensor.tensor4('images')
y = tensor.lmatrix('targets')

# Convolutional layers

filter_sizes = [(5, 5)] * 3 + [(4, 4)] * 3
num_filters = [32, 32, 64, 64, 128, 256]
pooling_sizes = [(2, 2)] * 6
activation = Rectifier().apply
conv_layers = [
    ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size)
    for filter_size, num_filters_, pooling_size in zip(
        filter_sizes, num_filters, pooling_sizes)
]
convnet = ConvolutionalSequence(conv_layers,
                                num_channels=3,
                                image_size=(260, 260),
                                weights_init=Uniform(0, 0.2),
                                biases_init=Constant(0.))
示例#52
0
def main(model='cnn', batch_size=500, num_epochs=500):
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    network = build_cnn(input_var)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var),
                       dtype=theano.config.floatX)

    # Create update expressions for training
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.001)
    #updates = lasagne.updates.adam(loss, params, learning_rate=0.1)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], [loss, train_acc],
                               updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    training_hist = []
    val_hist = []

    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        print("Training epoch {}...".format(epoch + 1))
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(train_data,
                                         train_labels,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch
            err, acc = train_fn(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1
            if VERBOSE:
                print("Epoch: {} | Mini-batch: {}/{} | Elapsed time: {:.2f}s".
                      format(epoch + 1, train_batches,
                             train_data.shape[0] / batch_size,
                             time.time() - start_time))

        training_hist.append(train_err / train_batches)

        # And a full pass over the validation data:
        print("Validating epoch...")
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val_data,
                                         val_labels,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        val_hist.append(val_err / val_batches)

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  training accuracy:\t\t{:.2f} %".format(train_acc /
                                                        train_batches * 100))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    # After training, we compute and print the test predictions/error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(test_data,
                                     test_labels,
                                     batch_size,
                                     shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Plot learning
    plt.plot(range(1, num_epochs + 1), training_hist, label="Training")
    plt.plot(range(1, num_epochs + 1), val_hist, label="Validation")
    plt.grid(True)
    plt.title("Training Curve")
    plt.xlim(1, num_epochs + 1)
    plt.xlabel("Epoch #")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()
示例#53
0
def evaluate_lenet5(datasets,
                    imgh,
                    imgw,
                    nclass,
                    learning_rate=0.01,
                    d=0.0003,
                    n_epochs=500,
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :rtype : object
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nk+++++++++++++++++++++++++++++++++erns: number of kernels on each layer
    """
    rng = numpy.random.RandomState(23455)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    # x = T.matrix('x')   # the data is presented as rasterized images
    x = T.tensor4('x')
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    # layer0_input = x.reshape((batch_size, 3, 60, 40))
    layer0_input = x.reshape((batch_size, 3, imgh, imgw))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (60-5+1 , 40-5+1) = (56, 36)
    # maxpooling reduces this further to (56/2, 36/2) = (28, 18)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 28, 18)
    #     image_shape=(batch_size, 3, 60, 40),

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, imgh, imgw),
                                filter_shape=(nkerns[0], 3, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (28-5+1, 18-5+1) = (24, 14)
    # maxpooling reduces this further to (24/2, 14/2) = (12, 7)
    # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 12, 7)
    #     image_shape=(batch_size, nkerns[0], 28, 18),

    lh1 = (imgh - 5 + 1) / 2
    lw1 = (imgw - 5 + 1) / 2

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], lh1, lw1),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 12 * 7),
    # or (500, 50 * 12 * 7) = (500, 3360) with the default values.
    lh2 = (lh1 - 5 + 1) / 2
    lw2 = (lw1 - 5 + 1) / 2

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * lh2 * lw2,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=nclass)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    # the following code is modified to suit with the small test set size
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # theano expression to decay the learning rate across epoch
    current_rate = theano.tensor.fscalar('current_rate')

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - current_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index, current_rate],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50  # look at least at this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    test_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_test_loss = numpy.inf
    learning_rate = numpy.float32(learning_rate)
    best_iter = 0
    start_time = time.clock()

    epoch = 0
    done_looping = False
    test_error = []

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        learning_rate = learning_rate / (1 + d * (epoch - 1))
        print "learning rate is %f" % learning_rate

        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index,
                                  numpy.float32(learning_rate))

            if (iter + 1) % test_frequency == 0:

                # compute zero-one loss on validation set
                test_losses = [test_model(i) for i in xrange(n_test_batches)]
                this_test_loss = numpy.mean(test_losses)

                test_error.append(this_test_loss)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_test_loss * 100.))

                # if we got the best test score until now
                if this_test_loss < best_test_loss:

                    #improve patience if loss improvement is good enough
                    if this_test_loss < best_test_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_test_loss = this_test_loss
                    best_iter = iter

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_test_loss * 100., best_iter + 1, best_test_loss * 100.))
    print 'The code ran for %.2fm' % ((end_time - start_time) / 60.)

    return params, test_error
示例#54
0
def main(L=2, z_dim=2, n_hid=1024, num_epochs=300, binary=True):
    print("Loading data...")
    X_train, X_val, X_test = load_dataset()
    width, height = X_train.shape[2], X_train.shape[3]
    input_var = T.tensor4('inputs')
示例#55
0
    def __init__(self,
                 dropout=None,
                 opt='adam',
                 pad='same',
                 stride=2,
                 kernel_width=None,
                 dataset='mnist'):
        if dataset == 'mnist':
            weight_shapes = [
                (32, 1, 3, 3),  # -> (None, 16, 14, 14)
                (32, 32, 3, 3),  # -> (None, 16,  7,  7)
                (32, 32, 3, 3)
            ]  # -> (None, 16,  4,  4)
        elif dataset == 'cifar10':
            weight_shapes = [
                (32, 3, 5, 5),  # -> (None, 16, 16, 16)
                (32, 32, 5, 5),  # -> (None, 16,  8,  8)
                (32, 32, 5, 5)
            ]  # -> (None, 16,  4,  4)

        if kernel_width is not None:  # OVERRIDE dataset argument!!!
            weight_shapes = [
                (32, 1, kernel_width, kernel_width),  # -> (None, 16, 14, 14)
                (32, 32, kernel_width, kernel_width),  # -> (None, 16,  7,  7)
                (32, 32, kernel_width, kernel_width)
            ]  # -> (None, 16,  4,  4)

        n_kernels = np.array(weight_shapes)[:, 1].sum()
        kernel_shape = weight_shapes[0][:1] + weight_shapes[0][2:]

        # needs to be consistent with weight_shapes
        args = [32, kernel_width, stride, pad,
                lasagne.nonlinearities.rectify]  #
        num_filters, filter_size, stride, pad, nonlinearity = args
        self.__dict__.update(locals())
        ##################

        if dataset == 'mnist':
            layer = lasagne.layers.InputLayer([None, 1, 28, 28])
        elif dataset == 'cifar10':
            layer = lasagne.layers.InputLayer([None, 3, 32, 32])

        for j, ws in enumerate(self.weight_shapes):
            num_filters = ws[1]
            layer = lasagne.layers.Conv2DLayer(layer, num_filters, filter_size,
                                               stride, pad, nonlinearity)
            if dropout is not None and j != len(self.weight_shapes) - 1:
                if dropout == 'spatial':
                    layer = lasagne.layers.spatial_dropout(layer)
                else:
                    layer = lasagne.layers.dropout(layer)
        layer = lasagne.layers.Pool2DLayer(layer, pool_size=2)

        # MLP layers
        layer = lasagne.layers.DenseLayer(layer, 128)
        if dropout is not None and j != len(self.weight_shapes) - 1:
            layer = lasagne.layers.dropout(layer, dropout)
        layer = lasagne.layers.DenseLayer(layer, 10)

        layer.nonlinearity = lasagne.nonlinearities.softmax
        self.input_var = T.tensor4('input_var')
        self.target_var = T.matrix('target_var')
        self.learning_rate = T.scalar('leanring_rate')
        self.dataset_size = T.scalar('dataset_size')  # useless

        self.layer = layer
        self.y = lasagne.layers.get_output(layer, self.input_var)
        self.y_det = lasagne.layers.get_output(layer,
                                               self.input_var,
                                               deterministic=True)

        losses = lasagne.objectives.categorical_crossentropy(
            self.y, self.target_var)
        self.loss = losses.mean() + self.dataset_size * 0.
        self.params = lasagne.layers.get_all_params(self.layer)
        # reset! DEPRECATED... use add_reset, call_reset instead...
        params0 = lasagne.layers.get_all_param_values(self.layer)
        updates = {p: p0 for p, p0 in zip(self.params, params0)}
        self.reset = theano.function([], None, updates=updates)
        model.add_reset('init')

        if opt == 'adam':
            self.updates = lasagne.updates.adam(self.loss, self.params,
                                                self.learning_rate)
        elif opt == 'momentum':
            self.updates = lasagne.updates.nesterov_momentum(
                self.loss, self.params, self.learning_rate)
        elif opt == 'sgd':
            self.updates = lasagne.updates.sgd(self.loss, self.params,
                                               self.learning_rate)

        print '\tgetting train_func'
        self.train_func = theano.function([
            self.input_var, self.target_var, self.dataset_size,
            self.learning_rate
        ],
                                          self.loss,
                                          updates=self.updates)

        print '\tgetting useful_funcs'
        self.predict_proba = theano.function([self.input_var], self.y)
        self.predict = theano.function([self.input_var], self.y_det.argmax(1))
示例#56
0
	print md, meta_data[md]

expt_name = meta_data["expt_name"]
learning_rate = 1e-4
image_size = 64 # 32
attn_win = 6 # 4
glimpses = 4 #8
lstm_states = 512
fg_bias_init = 0.0 # 0.2
dropout = 0.3 # 0.2
meta_data["n_iter"] = n_iter = 1500000
batch_size = 128
meta_data["num_output"] = 2

print "... setting up the network"
X = T.tensor4("input")
y = T.imatrix("target")

l_in = InputLayer(shape=(None, 1, image_size, image_size), input_var=X)
l_noise = DropoutLayer(l_in, p=dropout)
l_arc = SimpleARC(l_noise, lstm_states=lstm_states, image_size=image_size, attn_win=attn_win, 
					glimpses=glimpses, fg_bias_init=fg_bias_init)
l_y = DenseLayer(l_arc, 1, nonlinearity=sigmoid)

prediction = get_output(l_y)
prediction_clean = get_output(l_y, deterministic=True)
embedding = get_output(l_arc, deterministic=True)

loss = T.mean(binary_crossentropy(prediction, y))
accuracy = T.mean(binary_accuracy(prediction_clean, y))
        num_units=2,
        nonlinearity=lasagne.nonlinearities.softmax,
        b=None)

    return l_out


print("Loading data...")

flower = load_image.load_flower()

flower_corrupt_train, flower_truth_train, test_corrupt, test_truth = load_image.load_flower_random_mask(
)
#load_image.show_image(flower_corrupt_train,3)

input_var = T.tensor4('inputs')
target_var = T.tensor4('target')
adv_input = T.tensor4('adv_input')
adv_target = T.ivector('adv_target')

print("Building model and...")

# Build the Generator
net = build_ae(input_var)
sample_reconstruct = lasagne.layers.get_output(net)
loss = lasagne.objectives.squared_error(sample_reconstruct, target_var).mean()

# Get network params, with specifications of manually updated ones
params = lasagne.layers.get_all_params(net, trainable=True)
#updates = lasagne.updates.sgd(loss,params,learning_rate=0.01)
updates = lasagne.updates.adam(loss, params)
示例#58
0
def main(
        save_to='params',
        dataset='mm',
        kl_loss='true',  # use kl-div in z-space instead of mse
        diffs='false',
        seq_length=30,
        num_epochs=1,
        lstm_n_hid=1024,
        max_per_epoch=-1):
    kl_loss = kl_loss.lower() == 'true'
    diffs = diffs.lower() == 'true'

    # set up functions for data pre-processing and model training
    input_var = T.tensor4('inputs')

    # different experimental setup for moving mnist vs pulp fiction dataests
    if dataset == 'pf':
        img_size = 64
        cae_weights = c.pf_cae_params
        cae_specstr = c.pf_cae_specstr
        split_layer = 'conv7'
        inpvar = T.tensor4('input')
        net = m.build_cae(inpvar,
                          specstr=cae_specstr,
                          shape=(img_size, img_size))
        convs_from_img, _ = m.encoder_decoder(cae_weights,
                                              specstr=cae_specstr,
                                              layersplit=split_layer,
                                              shape=(img_size, img_size),
                                              poolinv=True)
        laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net))
        zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer])
        deconv_weights = c.pf_deconv_params
        vae_weights = c.pf_vae_params
        img_from_convs = m.deconvoluter(deconv_weights,
                                        specstr=cae_specstr,
                                        shape=zdec_in_shape)
        L = 2
        vae_n_hid = 1500
        binary = False
        z_dim = 256
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
               m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid,
                        shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, vae_weights)
        datafile = 'data/pf.hdf5'
        frame_skip = 3  # every 3rd frame in sequence
        z_decode_layer = l_x_mu_list[0]
        pixel_shift = 0.5
        samples_per_image = 4
        tr_batch_size = 16  # must be a multiple of samples_per_image
    elif dataset == 'mm':
        img_size = 64
        cvae_weights = c.mm_cvae_params
        L = 2
        vae_n_hid = 1024
        binary = True
        z_dim = 32
        zdec_in_shape = (None, 1, img_size, img_size)
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
            m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary,
                       shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, cvae_weights)
        datafile = 'data/moving_mnist.hdf5'
        frame_skip = 1
        w, h = img_size, img_size  # of raw input image in the hdf5 file
        z_decode_layer = l_x_list[0]
        pixel_shift = 0
        samples_per_image = 1
        tr_batch_size = 128  # must be a multiple of samples_per_image

    # functions for moving to/from image or conv-space, and z-space
    z_mat = T.matrix('z')
    zenc = theano.function([input_var],
                           nn.layers.get_output(l_z_mu, deterministic=True))
    zdec = theano.function(
        [z_mat],
        nn.layers.get_output(
            z_decode_layer, {
                l_z_mu: z_mat
            }, deterministic=True).reshape((-1, zdec_in_shape[1]) +
                                           zdec_in_shape[2:]))
    zenc_ls = theano.function([input_var],
                              nn.layers.get_output(l_z_ls, deterministic=True))

    # functions for encoding sequences of z's
    print 'compiling functions'
    z_var = T.tensor3('z_in')
    z_ls_var = T.tensor3('z_ls_in')
    tgt_mu_var = T.tensor3('z_tgt')
    tgt_ls_var = T.tensor3('z_ls_tgt')
    learning_rate = theano.shared(nn.utils.floatX(1e-4))

    # separate function definitions if we are using MSE and predicting only z, or KL divergence
    # and predicting both mean and sigma of z
    if kl_loss:

        def kl(p_mu, p_sigma, q_mu, q_sigma):
            return 0.5 * T.sum(
                T.sqr(p_sigma) / T.sqr(q_sigma) + T.sqr(q_mu - p_mu) /
                T.sqr(q_sigma) - 1 + 2 * T.log(q_sigma) - 2 * T.log(p_sigma))

        lstm, _ = m.Z_VLSTM(z_var,
                            z_ls_var,
                            z_dim=z_dim,
                            nhid=lstm_n_hid,
                            training=True)
        z_mu_expr, z_ls_expr = nn.layers.get_output(
            [lstm['output_mu'], lstm['output_ls']])
        z_mu_expr_det, z_ls_expr_det = nn.layers.get_output(
            [lstm['output_mu'], lstm['output_ls']], deterministic=True)
        loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr))
        te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det,
                     T.exp(z_ls_expr_det))
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var],
                                   loss,
                                   updates=updates)
        test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var],
                                  te_loss)
    else:
        lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True)
        loss = nn.objectives.squared_error(
            nn.layers.get_output(lstm['output']), tgt_mu_var).mean()
        te_loss = nn.objectives.squared_error(
            nn.layers.get_output(lstm['output'], deterministic=True),
            tgt_mu_var).mean()
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates)
        test_fn = theano.function([z_var, tgt_mu_var], te_loss)

    if dataset == 'pf':
        z_from_img = lambda x: zenc(convs_from_img(x))
        z_ls_from_img = lambda x: zenc_ls(convs_from_img(x))
        img_from_z = lambda z: img_from_convs(zdec(z))
    elif dataset == 'mm':
        z_from_img = zenc
        z_ls_from_img = zenc_ls
        img_from_z = zdec

    # training loop
    print('training for {} epochs'.format(num_epochs))
    nbatch = (seq_length + 1) * tr_batch_size * frame_skip / samples_per_image
    data = u.DataH5PyStreamer(datafile, batch_size=nbatch)

    # for taking arrays of uint8 (non square) and converting them to batches of sequences
    def transform_data(ims_batch, center=False):
        imb = u.raw_to_floatX(
            ims_batch, pixel_shift=pixel_shift,
            center=center)[np.random.randint(frame_skip)::frame_skip]
        zbatch = np.zeros((tr_batch_size, seq_length + 1, z_dim),
                          dtype=theano.config.floatX)
        zsigbatch = np.zeros((tr_batch_size, seq_length + 1, z_dim),
                             dtype=theano.config.floatX)
        for i in xrange(samples_per_image):
            chunk = tr_batch_size / samples_per_image
            if diffs:
                zf = z_from_img(imb).reshape((chunk, seq_length + 1, -1))
                zbatch[i * chunk:(i + 1) * chunk, 1:] = zf[:, 1:] - zf[:, :-1]
                if kl_loss:
                    zls = z_ls_from_img(imb).reshape(
                        (chunk, seq_length + 1, -1))
                    zsigbatch[i * chunk:(i + 1) * chunk,
                              1:] = zls[:, 1:] - zls[:, :-1]
            else:
                zbatch[i * chunk:(i + 1) * chunk] = z_from_img(imb).reshape(
                    (chunk, seq_length + 1, -1))
                if kl_loss:
                    zsigbatch[i * chunk:(i + 1) *
                              chunk] = z_ls_from_img(imb).reshape(
                                  (chunk, seq_length + 1, -1))
        if kl_loss:
            return zbatch[:, :
                          -1, :], zsigbatch[:, :
                                            -1, :], zbatch[:,
                                                           1:, :], zsigbatch[:,
                                                                             1:, :]
        return zbatch[:, :-1, :], zbatch[:, 1:, :]

    # we need sequences of images, so we do not shuffle data during trainin
    hist = u.train_with_hdf5(
        data,
        num_epochs=num_epochs,
        train_fn=train_fn,
        test_fn=test_fn,
        train_shuffle=False,
        max_per_epoch=max_per_epoch,
        tr_transform=lambda x: transform_data(x[0], center=False),
        te_transform=lambda x: transform_data(x[0], center=True))

    hist = np.asarray(hist)
    u.save_params(lstm['output'],
                  os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1, -1])))

    # build functions to sample from LSTM
    # separate cell_init and hid_init from the other learned model parameters
    all_param_values = nn.layers.get_all_param_values(lstm['output'])
    init_indices = [
        i for i, p in enumerate(nn.layers.get_all_params(lstm['output']))
        if 'init' in str(p)
    ]
    init_values = [all_param_values[i] for i in init_indices]
    params_noinit = [
        p for i, p in enumerate(all_param_values) if i not in init_indices
    ]

    # build model without learnable init values, and load non-init parameters
    if kl_loss:
        lstm_sample, state_vars = m.Z_VLSTM(z_var,
                                            z_ls_var,
                                            z_dim=z_dim,
                                            nhid=lstm_n_hid,
                                            training=False)
    else:
        lstm_sample, state_vars = m.Z_LSTM(z_var,
                                           z_dim=z_dim,
                                           nhid=lstm_n_hid,
                                           training=False)
    nn.layers.set_all_param_values(lstm_sample['output'], params_noinit)

    # extract layers representing thee hidden and cell states, and have sample_fn
    # return their outputs
    state_layers_keys = [
        k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k
    ]
    state_layers_keys = sorted(state_layers_keys)
    state_layers_keys = sorted(state_layers_keys,
                               key=lambda x: int(x.split('_')[1]))
    state_layers = [lstm_sample[s] for s in state_layers_keys]
    if kl_loss:
        sample_fn = theano.function(
            [z_var, z_ls_var] + state_vars,
            nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] +
                                 state_layers,
                                 deterministic=True))
    else:
        sample_fn = theano.function([z_var] + state_vars,
                                    nn.layers.get_output([lstm['output']] +
                                                         state_layers,
                                                         deterministic=True))

    from images2gif import writeGif
    from PIL import Image

    # sample approximately 30 different generated video sequences
    te_stream = data.streamer(training=True, shuffled=False)
    interval = data.ntrain / data.batch_size / 30
    for idx, imb in enumerate(te_stream.get_epoch_iterator()):
        if idx % interval != 0:
            continue
        z_tup = transform_data(imb[0], center=True)
        seg_idx = np.random.randint(z_tup[0].shape[0])
        if kl_loss:
            z_in, z_ls_in = z_tup[0], z_tup[1]
            z_last, z_ls_last = z_in[seg_idx:seg_idx +
                                     1], z_ls_in[seg_idx:seg_idx + 1]
            z_vars = [z_last, z_ls_last]
        else:
            z_in = z_tup[0]
            z_last = z_in[seg_idx:seg_idx + 1]
            z_vars = [z_last]
        images = []
        state_values = [
            np.dot(np.ones((z_last.shape[0], 1), dtype=theano.config.floatX),
                   s) for s in init_values
        ]
        output_list = sample_fn(*(z_vars + state_values))

        # use whole sequence of predictions for output
        z_pred = output_list[0]
        state_values = output_list[2 if kl_loss else 1:]

        rec = img_from_z(z_pred.reshape(-1, z_dim))
        for k in xrange(rec.shape[0]):
            images.append(
                Image.fromarray(
                    u.get_picture_array(rec, index=k, shift=pixel_shift)))
        k += 1
        # slice prediction to feed into lstm
        z_pred = z_pred[:, -1:, :]
        if kl_loss:
            z_ls_pred = output_list[1][:, -1:, :]
            z_vars = [z_pred, z_ls_pred]
        else:
            z_vars = [z_pred]
        for i in xrange(
                30):  # predict 30 frames after the end of the priming video
            output_list = sample_fn(*(z_vars + state_values))
            z_pred = output_list[0]
            state_values = output_list[2 if kl_loss else 1:]
            rec = img_from_z(z_pred.reshape(-1, z_dim))
            images.append(
                Image.fromarray(
                    u.get_picture_array(rec, index=0, shift=pixel_shift)))
            if kl_loss:
                z_ls_pred = output_list[1]
                z_vars = [z_pred, z_ls_pred]
            else:
                z_vars = [z_pred]
        writeGif("sample_{}.gif".format(idx), images, duration=0.1, dither=0)
示例#59
0
def evaluate_lenet5(learning_rate=0.001, batch_size=1, n_epochs=75):
    print "Loading data..."
    rng = np.random.RandomState(23455)

    print "Loading params..."
    file = open('params_v5.1.pkl')
    p = cPickle.load(file)
    file.close()

    print "Sharing data..."

    print "Building architecture..."
    print "Haha =_=-b!"
    x = T.tensor4('x')
    index = T.iscalar('index')
    print "Haha =_=-b!"
    layer0 = ConvPoolLayer(rng=rng,
                           input=x,
                           image_shape=(batch_size, 3, 32, 32),
                           filter_shape=(32, 3, 5, 5),
                           activation=Relu_nonlinear,
                           poolsize=(2, 2),
                           W=p[-2],
                           b=p[-1])
    print "Haha =_=-b!"
    layer1 = ConvPoolLayer(rng=rng,
                           input=layer0.output,
                           image_shape=(batch_size, 32, 14, 14),
                           filter_shape=(50, 32, 5, 5),
                           activation=Relu_nonlinear,
                           poolsize=(2, 2),
                           W=p[-4],
                           b=p[-3])
    print "Haha =_=-b!"
    layer2 = ConvPoolLayer(rng=rng,
                           input=layer1.output,
                           image_shape=(batch_size, 50, 5, 5),
                           filter_shape=(64, 50, 5, 5),
                           activation=Relu_nonlinear,
                           poolsize=(1, 1),
                           W=p[-6],
                           b=p[-5])

    # 3*3*80 = 720
    layer3_input = layer2.output.flatten(2)
    print "Haha =_=-b!"
    layer3 = HiddenLayer(rng=rng,
                         input=layer3_input,
                         n_in=64,
                         n_out=56,
                         activation=T.tanh,
                         W=p[4],
                         b=p[5])
    print "Haha =_=-b!"
    layer4 = HiddenLayer(rng=rng,
                         input=layer3.output,
                         n_in=56,
                         n_out=10,
                         activation=T.tanh,
                         W=p[2],
                         b=p[3])
    print "Haha =_=-b!"
    layer5 = LogisticRegression(input=layer4.output,
                                n_in=10,
                                n_out=10,
                                W=p[0],
                                b=p[1])
    print "Haha =_=-b!"
    prediction = layer5.p_y_given_x_in
    print "Haha =_=-b!, T.grad!!"
    grad = T.grad(prediction[0, index], x)

    print "Compiling function..."
    ScI = theano.function([x, index], grad)
    predict = theano.function([x, index], layer5.p_y_given_x[0, index])
    test_file = open('test_batch', 'rb')
    map = cPickle.load(test_file)
    test_file.close()

    test_set_x = np.asarray(map['data'], dtype='float32')
    test_set_y = np.asarray(map['labels'], dtype='float32')

    plt.show()

    for i in xrange(10):
        map_out = np.zeros([32 * 2, 32 * 10])
        print test_set_y[i]
        for l in xrange(10):

            x_in = test_set_x[i, :].reshape([1, 3, 32, 32])
            x_in_1 = np.transpose(x_in[0, ...], [1, 2, 0])

            x_out = ScI(x_in, l)
            x_out = np.transpose(x_out.reshape([3, 32, 32]), [1, 2, 0])
            x_out = np.abs(x_out)
            x_out = x_out.max(axis=2)
            map_out[32:, l * 32:(l + 1) * 32] = x_out

            #map_out = np.asarray(map_out,dtype='uint8')

        print "haha"
        plt.imshow(map_out, cmap="Greys_r")
        plt.show()
示例#60
0
def main_training(key, X_train, y_train, X_val, y_val, geneStore, model='cnn', num_epochs=500):
    # load the dataset
    print("loading data...")
    #X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()     
    
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('input')
    target_var = T.fmatrix('targets')
    lengthOfInputVector = np.shape(X_train)[1]
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    if model == 'cnn':
        network = build_cnn(input_var, lengthOfInputVector= lengthOfInputVector)
    else:
        print("Unrecognized model type {}".format(model))
        
    # Create a loss expression for traing, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)    
    # loss = loss.mean()
    loss = T.mean((prediction - target_var)**2)
    # we could add some weight decay as well here, see lasagne.regularization.
    
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
#    updates = lasagne.updates.nesterov_momentum(
#           loss, params, learning_rate=0.0005, momentum=0.9)
    updates = lasagne.updates.adam(loss, params)        
    # Create a loss expression for validation/testing.  The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    #test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
    #                                                       target_var)
    #test_loss = test_loss.mean()
    test_loss = T.mean((test_prediction - target_var)**2)
    # As a bonus, also create an expression for the classification accuracy:
    #test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
    #                 dtype=theano.config.floatX)

    # Compile a function performing a training step on mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_prediction])
    
    prediction_for_gene_expres = theano.function([input_var],prediction) 
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    minibatch_size = 100
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, minibatch_size, shuffle=True):
            inputs, targets = batch
            inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector))
            train_err += train_fn(inputs, targets)
            train_batches += 1
            
        # And a full pass over the validation data:
        val_err = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, minibatch_size, shuffle=False):
            inputs, targets = batch
            inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector))
            err, val_prediction = val_fn(inputs, targets)
            val_err += err
            val_batches += 1
            
        #print predicted_gene_expres
        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        #print("  test data length:\t\t{0},{1}".format(len(predicted_gene_expres),X_val.shape[0]))
        
    # store data in HDFStore
    inputs = pd.DataFrame(X_val).values.astype(np.float32)    
    inputs = np.reshape(inputs,(inputs.shape[0],1,1,lengthOfInputVector))
    predicted_gene_expres = prediction_for_gene_expres(inputs)
    
    
    geneStore[key+'/prediction'] = pd.DataFrame(np.array(predicted_gene_expres).flatten())
    geneStore[key+'/true_expres'] = pd.DataFrame(np.array(y_val).flatten())
    geneStore[key+'/X'] = pd.DataFrame(X_val)