def create_model_brick(): layers = [ conv_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT) ] encoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='encoder_mapping') encoder = GaussianConditional(encoder_mapping, name='encoder') layers = [ conv_transpose_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic() ] decoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') layers = [ conv_brick(2, 1, 64), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK) ] x_discriminator = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK) ] z_discriminator = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 1) ] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0]), image_size=(1, 1), name='joint_discriminator') discriminator = XZJointDiscriminator(x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder, decoder, discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder_mapping.layers[-2].use_bias = True decoder_mapping.layers[-2].tied_biases = False x_discriminator.layers[0].use_bias = True x_discriminator.layers[0].tied_biases = True ali.initialize() raw_marginals, = next( create_celeba_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder_mapping.layers[-2].b.set_value(b_value) return ali
def create_model_brick(): encoder_mapping = MLP( dims=[2 * INPUT_DIM, GEN_HIDDEN, GEN_HIDDEN, NLAT], activations=[ Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='encoder_h1'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='encoder_h2'), Identity(name='encoder_out') ], use_bias=False, name='encoder_mapping') encoder = COVConditional(encoder_mapping, (INPUT_DIM, ), name='encoder') decoder_mapping = MLP(dims=[ NLAT, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, INPUT_DIM ], activations=[ Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h1'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h2'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h3'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h4'), Identity(name='decoder_out') ], use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') x_discriminator = Identity(name='x_discriminator') z_discriminator = Identity(name='z_discriminator') joint_discriminator = Sequence(application_methods=[ LinearMaxout(input_dim=INPUT_DIM + NLAT, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h1').apply, LinearMaxout(input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h2').apply, LinearMaxout(input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h3').apply, Linear(input_dim=DISC_HIDDEN, output_dim=1, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_out').apply ], name='joint_discriminator') discriminator = XZJointDiscriminator(x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder=encoder, decoder=decoder, discriminator=discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.linear_transformations[-1].use_bias = True decoder_mapping.linear_transformations[-1].use_bias = True ali.initialize() print("Number of parameters in discriminator: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector( ali.discriminator).get_parameters().values() ]))) print("Number of parameters in encoder: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(ali.encoder).get_parameters().values() ]))) print("Number of parameters in decoder: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(ali.decoder).get_parameters().values() ]))) return ali