def setup(): # Set up brain # ######################################################################### brain = Brain(name='maxout-zca-cifar10') brain.attach(DropoutLayer(keep_prob=0.8, name='dropout1')) brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=0.9, out_channel_num=192, name='conv1')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool1')) brain.attach(MaxoutLayer(name='maxout1')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout2')) brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=384, name='conv2')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool2')) brain.attach(MaxoutLayer(name='maxout2')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( ConvolutionLayer([5, 5], [1, 1, 1, 1], 'SAME', init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=384, name='conv3')) brain.attach(PoolingLayer([1, 2, 2, 1], [1, 2, 2, 1], 'SAME', name='pool3')) brain.attach(MaxoutLayer(name='maxout3')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9, out_channel_num=2500, name='ip1')) brain.attach(MaxoutLayer(group_size=5, name='maxout4')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "uniform", "range": 0.005 }, max_norm=1.9365, out_channel_num=10, name='softmax_linear')) brain.attach(SoftmaxWithLossLayer(class_num=10, name='loss')) # Set up a sensor. # ######################################################################### cifar_source = Cifar10FeedSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', use_zca=True, num_train=50000, num_val=10000) sensor = FeedSensor(source_in=cifar_source, batch_size=128, name='data') # Summon a survivor. # ######################################################################### survivor = kids.Kid(sensor, brain, kongfus.MomentumKongFu(base_lr=0.025, momentum=0.5, decay_rate=0.1, decay_epoch_num=50), max_steps=200000) survivor.setup() return survivor
def setup(graph, lr): # Set up brain # ######################################################################### brain = GraphBrain(moving_average_decay=0.99, name='maxout-relu-cifar10') brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "truncated_normal", "stddev": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=192, name='conv1')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool1')) brain.attach(CollapseOutLayer(name='maxout1')) brain.attach(DropoutLayer(keep_prob=0.8, name='dropout1')) brain.attach( ConvolutionLayer([8, 8], [1, 1, 1, 1], 'SAME', init_para={ "name": "truncated_normal", "stddev": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=384, name='conv2')) brain.attach(PoolingLayer([1, 4, 4, 1], [1, 2, 2, 1], 'SAME', name='pool2')) brain.attach(CollapseOutLayer(name='maxout2')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout2')) brain.attach( ConvolutionLayer([5, 5], [1, 1, 1, 1], 'SAME', init_para={ "name": "truncated_normal", "stddev": 0.005 }, wd={ "type": "l2", "scale": 0.0005 }, out_channel_num=384, name='conv3')) brain.attach(PoolingLayer([1, 2, 2, 1], [1, 2, 2, 1], 'SAME', name='pool3')) brain.attach(CollapseOutLayer(name='maxout3')) brain.attach(DropoutLayer(keep_prob=0.5, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "truncated_normal", "stddev": 0.005 }, wd={ "type": "l2", "scale": 0.004 }, out_channel_num=2500, name='ip1')) brain.attach(CollapseOutLayer(group_size=5, name='maxout4')) brain.attach(DropoutLayer(keep_prob=0.3, name='dropout3')) brain.attach( InnerProductLayer(init_para={ "name": "truncated_normal", "stddev": 1 / 500.0 }, wd={ "type": "l2", "scale": 0.004 }, out_channel_num=10, name='softmax_linear')) brain.attach(SoftmaxWithLossLayer(class_num=10, name='loss')) # Set up a sensor. # ######################################################################### cifar_source = Cifar10TFSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', num_train=50000, num_val=10000) sensor = IntegratedSensor(source_in=cifar_source, batch_size=128, name='data') sensor.attach(WhitenJoker(name="per_image_whitening"), to_val=True) sensor.attach(WhitenJoker(name="per_image_whitening")) # Summon a kid. # ######################################################################### survivor = kids.Kid(sensor, brain, kongfus.MomentumKongFu(base_lr=lr, momentum=0.5, decay_rate=0.1, decay_epoch_num=50), log_dir="log_{}".format(lr), max_steps=510000, graph=graph) survivor.setup() return survivor
def setup(): from akid import AKID_DATA_PATH from akid import GraphBrain, MNISTFeedSource, FeedSensor, Kid from akid import MomentumKongFu from akid.layers import DropoutLayer, SoftmaxWithLossLayer from akid.sugar import cnn_block from akid import LearningRateScheme brain = GraphBrain(name="one-layer-mnist") brain.attach(DropoutLayer(keep_prob=0.8, name='dropout0')) brain.attach( cnn_block( ksize=[5, 5], init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 0.0005 }, in_channel_num=1, out_channel_num=32, pool_size=[5, 5], pool_stride=[5, 5], activation={{net_paras["activation"][0]}}, keep_prob=0.5, )) brain.attach( cnn_block( init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 0.0005 }, in_channel_num=1152, out_channel_num=10, activation=None, )) brain.attach( SoftmaxWithLossLayer(class_num=10, inputs=[{ "name": brain.get_last_layer_name() }, { "name": "system_in", "idxs": [1] }], name="softmax")) # Set up a sensor. # ######################################################################### source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', num_train=50000, num_val=5000, center=True, scale=True) sensor = FeedSensor(name='data', source_in=source, batch_size=64, val_batch_size=100) kid = Kid(sensor, brain, MomentumKongFu(momentum=0.9, lr_scheme={ "name": LearningRateScheme.exp_decay, "base_lr": {{opt_paras["lr"]}}, "decay_rate": 0.95, "num_batches_per_epoch": sensor.num_batches_per_epoch, "decay_epoch_num": 1 }), engine={{opt_paras["engine"]}}, max_steps=1000) kid.setup() return kid
brain.attach( InnerProductLayer(stddev=0.005, weight_decay=0.004, out_channel_num=500, name='ip1')) brain.attach(MaxoutLayer(group_size=5, name='maxout4')) brain.attach(DropoutLayer(keep_prob=0.3, name='dropout3')) brain.attach( InnerProductLayer(stddev=1 / 500.0, weight_decay=0, out_channel_num=10, name='softmax_linear')) brain.attach(SoftmaxWithLossLayer(class_num=10, name='loss')) # Set up a sensor. # ######################################################################### cifar_source = Cifar10TFSource( name="CIFAR10", url='http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', work_dir=AKID_DATA_PATH + '/cifar10', num_train=50000, num_val=10000) sensor = IntegratedSensor(source_in=cifar_source, batch_size=128, name='data') sensor.attach(WhitenJoker(name="per_image_whitening"), to_val=True) sensor.attach(WhitenJoker(name="per_image_whitening")) # Summon a survivor.
def setup(bn=None, activation_before_pooling=False): brain = GraphBrain(name="sugar_mnist") brain.attach( cnn_block(ksize=[5, 5], initial_bias_value=0., init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=1, out_channel_num=32, pool_size=[2, 2], pool_stride=[2, 2], activation={"type": "relu"}, activation_before_pooling=activation_before_pooling, bn=bn)) brain.attach( cnn_block(ksize=[5, 5], initial_bias_value=0., init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=32, out_channel_num=64, pool_size=[5, 5], pool_stride=[2, 2], activation={"type": "relu"}, activation_before_pooling=activation_before_pooling, bn=bn)) brain.attach( cnn_block(ksize=None, initial_bias_value=0.1, init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=3136, out_channel_num=512, activation={"type": "relu"}, bn=bn, keep_prob=0.5)) brain.attach( cnn_block(ksize=None, initial_bias_value=0.1, init_para={ "name": "truncated_normal", "stddev": 0.1 }, wd={ "type": "l2", "scale": 5e-4 }, in_channel_num=512, out_channel_num=10, bn=bn, activation=None)) brain.attach( SoftmaxWithLossLayer(class_num=10, inputs=[{ "name": "ip4", "idxs": [0] }, { "name": "system_in", "idxs": [1] }], name="loss")) source = MNISTFeedSource(name="MNIST", url='http://yann.lecun.com/exdb/mnist/', work_dir=AKID_DATA_PATH + '/mnist', num_train=50000, num_val=5000, center=True, scale=True) sensor = FeedSensor(name='data', source_in=source, batch_size=64, val_batch_size=100) kid = Kid(sensor, brain, MomentumKongFu(lr_scheme={ "name": LearningRateScheme.exp_decay, "base_lr": 0.01, "decay_rate": 0.95, "num_batches_per_epoch": 468, "decay_epoch_num": 1 }, momentum=0.9), max_steps=4000) kid.setup() return kid