def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def test_border_mode_not_pushed(): layers = [Convolutional(border_mode='full'), ConvolutionalActivation(Rectifier().apply), ConvolutionalActivation(Rectifier().apply, border_mode='valid'), ConvolutionalLayer(Rectifier().apply, border_mode='full')] stack = ConvolutionalSequence(layers) stack.push_allocation_config() assert stack.children[0].border_mode == 'full' assert stack.children[1].border_mode == 'valid' assert stack.children[2].border_mode == 'valid' assert stack.children[3].border_mode == 'full' stack2 = ConvolutionalSequence(layers, border_mode='full') stack2.push_allocation_config() assert stack2.children[0].border_mode == 'full' assert stack2.children[1].border_mode == 'full' assert stack2.children[2].border_mode == 'full' assert stack2.children[3].border_mode == 'full'
def test_border_mode_not_pushed(): layers = [ Convolutional(border_mode='full'), ConvolutionalActivation(Rectifier().apply), ConvolutionalActivation(Rectifier().apply, border_mode='valid'), ConvolutionalActivation(Rectifier().apply, border_mode='full') ] stack = ConvolutionalSequence(layers) stack.push_allocation_config() assert stack.children[0].border_mode == 'full' assert stack.children[1].border_mode == 'valid' assert stack.children[2].border_mode == 'valid' assert stack.children[3].border_mode == 'full' stack2 = ConvolutionalSequence(layers, border_mode='full') stack2.push_allocation_config() assert stack2.children[0].border_mode == 'full' assert stack2.children[1].border_mode == 'full' assert stack2.children[2].border_mode == 'full' assert stack2.children[3].border_mode == 'full'
def create_model_brick(): layers = [ conv_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT) ] encoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='encoder_mapping') encoder = GaussianConditional(encoder_mapping, name='encoder') layers = [ conv_transpose_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic() ] decoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') layers = [ conv_brick(2, 1, 64), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK) ] x_discriminator = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK) ] z_discriminator = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1) ] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0]), image_size=(1, 1), name='joint_discriminator') discriminator = XZJointDiscriminator(x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder, decoder, discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder_mapping.layers[-2].use_bias = True decoder_mapping.layers[-2].tied_biases = False x_discriminator.layers[0].use_bias = True x_discriminator.layers[0].tied_biases = True ali.initialize() raw_marginals, = next( create_celeba_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder_mapping.layers[-2].b.set_value(b_value) return ali
conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES) ] x_discriminator = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES) ] z_discriminator = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [
def create_model_brick(): layers = [ conv_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT)] encoder_mapping = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='encoder_mapping') encoder = GaussianConditional(encoder_mapping, name='encoder') layers = [ conv_transpose_brick(4, 1, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(1, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic()] decoder_mapping = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') layers = [ conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] x_discriminator = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] z_discriminator = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1)] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0]), image_size=(1, 1), name='joint_discriminator') discriminator = XZJointDiscriminator( x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder, decoder, discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder_mapping.layers[-2].use_bias = True decoder_mapping.layers[-2].tied_biases = False ali.initialize() raw_marginals, = next( create_cifar10_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder_mapping.layers[-2].b.set_value(b_value) return ali
def build_submodel(input_shape, output_dim, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, L_exo_dropout_conv_layers, L_exo_dropout_full_layers, L_endo_dropout_conv_layers, L_endo_dropout_full_layers, L_border_mode=None, L_filter_step=None, L_pool_step=None): # TO DO : target size and name of the features x = T.tensor4('features') y = T.imatrix('targets') assert len(input_shape) == 3, "input_shape must be a 3d tensor" num_channels = input_shape[0] image_size = tuple(input_shape[1:]) print image_size print num_channels prediction = output_dim # CONVOLUTION output_conv = x output_dim = num_channels*np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) if L_filter_step is None: L_filter_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_size) if L_pool_step is None: L_pool_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_activation_conv) if L_border_mode is None: L_border_mode = ["valid"] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_border_mode) assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers) assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers) # regarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we need to have the first dropout value of L_exo_dropout_full_layers # the first value has to be 0.0 in this context, and we'll # assume that it is, but let's have an assert assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts." # here modifitication of L_exo_dropout_conv_layers L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]] if len(L_dim_conv_layers): for (num_filters, filter_size, filter_step, pool_size, pool_step, activation_str, border_mode, dropout, index) in zip(L_dim_conv_layers, L_filter_size, L_filter_step, L_pool_size, L_pool_step, L_activation_conv, L_border_mode, L_exo_dropout_conv_layers, xrange(len(L_dim_conv_layers)) ): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if filter_step is None: filter_step = (1, 1) else: filter_step = tuple(filter_step) if pool_size is None: pool_size = (0,0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters*dropout) print "border_mode : %s" % border_mode # filter_step # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv kwargs = {} if filter_step is None or filter_step == (1,1): pass else: # there's a bit of a mix of names because `Convolutional` takes # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument kwargs['conv_step'] = filter_step if (pool_size[0] == 0 and pool_size[1] == 0): layer_conv = ConvolutionalActivation(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, name="layer_%d" % index, **kwargs) else: if pool_step is None: pass else: kwargs['pooling_step'] = tuple(pool_step) layer_conv = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, pooling_size=pool_size, name="layer_%d" % index, **kwargs) conv_layers.append(layer_conv) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) output_conv = convnet.apply(output_conv) output_conv = Flattener().apply(output_conv) # FULLY CONNECTED output_mlp = output_conv full_layers = [] assert len(L_dim_full_layers) == len(L_activation_full) assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers) assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers) # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:] pre_dim = output_dim print "When constructing the model, the output_dim of the conv section is %d." % output_dim if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip(L_dim_full_layers, L_activation_full, L_exo_dropout_full_layers, range(len(L_dim_conv_layers), len(L_dim_conv_layers)+ len(L_dim_full_layers)) ): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim*dropout) print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim) layer_full = MLP(activations=[activation], dims=[pre_dim, dim], weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_%d" % index) layer_full.initialize() full_layers.append(layer_full) pre_dim = dim for layer in full_layers: output_mlp = layer.apply(output_mlp) output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1]) # COST FUNCTION output_layer = Linear(output_dim, prediction, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_"+str(len(L_dim_conv_layers)+ len(L_dim_full_layers)) ) output_layer.initialize() full_layers.append(output_layer) y_pred = output_layer.apply(output_mlp) y_hat = Softmax().apply(y_pred) # SOFTMAX and log likelihood y_pred = Softmax().apply(y_pred) # be careful. one version expects the output of a softmax; the other expects just the # output of the network cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred) #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred) cost.name = "cost" # Misclassification error_rate_brick = MisclassificationRate() error_rate = error_rate_brick.apply(y.flatten(), y_hat) error_rate.name = "error_rate" # put names D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers) # test computation graph cg = ComputationGraph(cost) # DROPOUT L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers cg_dropout = cg inputs = VariableFilter(roles=[INPUT])(cg.variables) for (index, drop_rate) in enumerate(L_endo_dropout): for input_ in inputs: m = re.match(r"layer_(\d+)_apply.*", input_.name) if m and index == int(m.group(1)): if drop_rate < 0.0001: print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name break else: cg_dropout = apply_dropout(cg, [input_], drop_rate) print "Applied dropout %f on %s." % (drop_rate, input_.name) break cg = cg_dropout return (cg, error_rate, cost, D_params, D_kind)
def create_model_brick(): # Encoder enc_layers = [ conv_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT)] encoder_mapping = EncoderMapping(layers=enc_layers, num_channels=NUM_CHANNELS, n_emb=NEMB, image_size=IMAGE_SIZE, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, use_bias=False) encoder = GaussianConditional(encoder_mapping, name='encoder') # Decoder dec_layers = [ conv_transpose_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic()] decoder = Decoder( layers=dec_layers, num_channels=NLAT + NEMB, image_size=(1, 1), use_bias=False, name='decoder_mapping') # Discriminator layers = [ conv_brick(2, 1, 64), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK)] x_discriminator = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK)] z_discriminator = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1)] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0] + NEMB), image_size=(1, 1), name='joint_discriminator') discriminator = XZYJointDiscriminator( x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ConditionalALI(encoder, decoder, discriminator, n_cond=NCLASSES, n_emb=NEMB, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder.layers[-2].use_bias = True decoder.layers[-2].tied_biases = False x_discriminator.layers[0].use_bias = True x_discriminator.layers[0].tied_biases = True ali.initialize() raw_marginals, = next( create_celeba_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder.layers[-2].b.set_value(b_value) return ali
def test_convolutional_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[-1]; b = B[-1] print W.shape.eval() print b.shape.eval() import pdb pdb.set_trace() inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg) outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg) var_input=inputs_conv[0] var_output=outputs_conv[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) import pdb pdb.set_trace() w_shape = W.shape.eval() d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3])) d_b = T.zeros((w_shape[0],6*6)) #d_b = d_b.reshape((w_shape[0], 8*8)) d_p = T.concatenate([d_W, d_b], axis=1) d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6)) #d_S = d_S.reshape((2,200, 64)) #x_value=1e3*np.random.ranf((1,15,10,10)) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 5]) print np.mean(B) return E_A = expansion_op(A, (2, 15, 10, 10), (5,5)) print E_A.shape E_A = E_A.reshape((2*36, C.shape[1])) print E_A.shape tmp = C - np.dot(B, E_A) print lin.norm(tmp, 'fro')
def build_submodel(image_size, num_channels, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, dropout, prediction, allow_comment=False, sub_dropout=0, L_pool_step=[], L_pool_padding=[]): # CONVOLUTION params_channels = [10**(-i) for i in range(len(L_dim_conv_layers) + 1)] index_params = 0 params_channels.reverse() output_dim = num_channels * np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) assert len(L_dim_conv_layers) == len(L_pool_size) assert len(L_dim_conv_layers) == len(L_activation_conv) if len(L_pool_step) == 0: L_pool_step = [(1, 1) for i in range(len(L_dim_conv_layers))] L_pool_padding = [(0, 0) for i in range(len(L_dim_conv_layers))] assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_pool_padding) L_conv_dropout = [dropout] * len( L_dim_conv_layers) # unique value of dropout for now convnet = None mlp = None if len(L_dim_conv_layers): for (num_filters, filter_size, pool_size, activation_str, dropout, index, step, padding) in zip(L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_conv_dropout, xrange(len(L_dim_conv_layers)), L_pool_step, L_pool_padding): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if pool_size is None: pool_size = (0, 0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier() elif activation_str.lower() == 'tanh': activation = Tanh() elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic() elif activation_str.lower() in ['id', 'identity']: activation = Identity() else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters * dropout) layer_conv = Convolutional(filter_size=filter_size, num_filters=num_filters, name="layer_%d" % index, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) conv_layers.append(layer_conv) conv_layers.append(activation) index_params += 1 if not (pool_size[0] == 0 and pool_size[1] == 0): #pool = MaxPooling(pooling_size=pool_size, step=step, padding=padding) pool = MaxPooling(pooling_size=pool_size) conv_layers.append(pool) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) # MLP assert len(L_dim_full_layers) == len(L_activation_full) L_full_dropout = [dropout] * len( L_dim_full_layers) # unique value of dropout for now # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers pre_dim = output_dim if allow_comment: print "When constructing the model, the output_dim of the conv section is %d." % output_dim activations = [] dims = [pre_dim] if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip( L_dim_full_layers, L_activation_full, L_full_dropout, range(len(L_dim_conv_layers), len(L_dim_conv_layers) + len(L_dim_full_layers))): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) activations.append(activation) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim * dropout) if allow_comment: print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % ( dropout, pre_dim, dim) dims.append(dim) #now construct the full MLP in one pass: activations.append(Identity()) #params_channels[index_params] dims.append(prediction) mlp = MLP(activations=activations, dims=dims, weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0), name="layer_%d" % index) mlp.push_allocation_config() mlp.initialize() return (convnet, mlp)
decoder.initialize() decoder_fun = function([z, y], decoder.apply(z, embeddings)) out = decoder_fun(z_hat, test_labels) # Discriminator layers = [ conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] x_discriminator = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] z_discriminator = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1)] joint_discriminator = ConvolutionalSequence( layers=layers,