def create_main_loop(save_path): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_cifar10_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams bn_monitored_variables = ( [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring( bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring( monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(save_path, after_epoch=True, after_training=True, use_cpickle=True), ProgressBar(), Printing(), ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def create_main_loop(save_path, backup_path): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_cifar10_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams bn_monitored_variables = ( [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring(bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring(monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(save_path, after_epoch=True, after_training=True, use_cpickle=True), BackupModel(backup_path, save_path, every_n_epochs=500, after_training=True), PlotLoss(backup_path, "ALI CIFAR-10", every_n_epochs=500, after_training=True), PlotAccuracy(backup_path, "ALI CIFAR-10", every_n_epochs=500, after_training=True), ProgressBar(), Printing(), ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def create_main_loop(save_path): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_cifar10_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams for d in main_loop_stream.get_epoch_iterator(as_dict=True): print(d.keys) print(d['features'].shape, d['features'].dtype) break main_loop_stream = ShapesDataset(num_examples=600, img_size=32, min_diameter=3, seed=1234).create_stream(batch_size=BATCH_SIZE, is_train=True) for d in main_loop_stream.get_epoch_iterator(as_dict=True): print(d.keys) print(d['features'].shape, d['features'].dtype) break train_monitor_stream = ShapesDataset(num_examples=100, img_size=32, min_diameter=3, seed=1234).create_stream(batch_size=BATCH_SIZE, is_train=False) valid_monitor_stream = ShapesDataset(num_examples=100, img_size=32, min_diameter=3, seed=5678).create_stream(batch_size=BATCH_SIZE, is_train=False) bn_monitored_variables = ( [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring( bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring( monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(save_path, after_epoch=True, after_training=True, use_cpickle=True), ProgressBar(), Printing(), ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def create_model_brick(): layers = [ conv_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT) ] encoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='encoder_mapping') encoder = GaussianConditional(encoder_mapping, name='encoder') layers = [ conv_transpose_brick(4, 1, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(1, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic() ] decoder_mapping = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') layers = [ conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES) ] x_discriminator = ConvolutionalSequence(layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES) ] z_discriminator = ConvolutionalSequence(layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1) ] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0]), image_size=(1, 1), name='joint_discriminator') discriminator = XZJointDiscriminator(x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder, decoder, discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder_mapping.layers[-2].use_bias = True decoder_mapping.layers[-2].tied_biases = False ali.initialize() raw_marginals, = next( create_cifar10_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder_mapping.layers[-2].b.set_value(b_value) return ali
def create_model_brick(): layers = [ conv_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, 2 * NLAT)] encoder_mapping = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, use_bias=False, name='encoder_mapping') encoder = GaussianConditional(encoder_mapping, name='encoder') layers = [ conv_transpose_brick(4, 1, 256), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(4, 2, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_transpose_brick(1, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK), conv_brick(1, 1, NUM_CHANNELS), Logistic()] decoder_mapping = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') layers = [ conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] x_discriminator = ConvolutionalSequence( layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE, name='x_discriminator') x_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)] z_discriminator = ConvolutionalSequence( layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False, name='z_discriminator') z_discriminator.push_allocation_config() layers = [ conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES), conv_brick(1, 1, 1)] joint_discriminator = ConvolutionalSequence( layers=layers, num_channels=(x_discriminator.get_dim('output')[0] + z_discriminator.get_dim('output')[0]), image_size=(1, 1), name='joint_discriminator') discriminator = XZJointDiscriminator( x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder, decoder, discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.layers[-1].use_bias = True encoder_mapping.layers[-1].tied_biases = False decoder_mapping.layers[-2].use_bias = True decoder_mapping.layers[-2].tied_biases = False ali.initialize() raw_marginals, = next( create_cifar10_data_streams(500, 500)[0].get_epoch_iterator()) b_value = get_log_odds(raw_marginals) decoder_mapping.layers[-2].b.set_value(b_value) return ali