def MultiLayerPerceptron(*args, **kwargs): ''' positional arguments: the number of hidden units of each layer keyword arguments(optional): activation: ReLU by default affine_monitor: bool activation_monitor: bool storage: dictionary ''' assert all(isinstance(arg, int) for arg in args) try: activation = kwargs.pop('activation', 'ReLU') activation = getattr(builder, activation)() except: raise Exception('unsupported activation function') affine_monitor = kwargs.pop('affine_monitor', False) activation_monitor = kwargs.pop('activation_monitor', False) if affine_monitor or activation_monitor: try: storage = kwargs['storage'] except: raise Exception('storage required to monitor intermediate result') network = builder.Sequential() for i, arg in enumerate(args[:-1]): network.append(builder.Affine(arg)) if affine_monitor: network.append(builder.Export('affine%d' % i, storage)) network.append(activation) if activation_monitor: network.append(builder.Export('activation%d' % i, storage)) network.append(builder.Affine(args[-1])) return network
from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) X = data[0][:16] hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) ''' for key, value in model.param_configs.items(): if 'weight' in key: value['init_rule'] = 'gaussian' value['init_config'] = {'stdvar' : 1} ''' initialize(model) for key, value in model.params.items(): if 'weight' in key: print np.std(value)
from custom_layers import * from solver_primitives import * sys.path.append('../') from utilities.data_utility import load_cifar10 data = load_cifar10(path='../utilities/cifar/', center=True, rescale=True) hidden_layers = 4 shapes = (1024, ) * hidden_layers + (10, ) activation = builder.ReLU storage = {} mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Affine(shapes[-1])) mlp.append(builder.Export('affine%d' % (len(shapes) - 1), storage)) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 128 batches = len(data[0]) // batch_size batch_index = 0 # raise Exception() iterations = 10000 interval = 10 # settings = {} settings = {'learning_rate': 0.01}
''' ACTIVATION = 'ReLU' DEVICE = 0 DR_INTERVAL = 10 shapes = (1024, ) * 4 + (10, ) activation = getattr(builder, ACTIVATION) set_context(gpu(DEVICE)) storage = {} chd_list = [] mlp = builder.Sequential() for i, shape in enumerate(shapes[:-1]): mlp.append(builder.Affine(shape)) mlp.append(builder.Export('affine%d' % i, storage)) mlp.append(activation()) mlp.append(builder.Export('activation%d' % i, storage)) mlp.append(ChannelDivision(np.ones(shape))) chd_list.append(mlp[-1]) mlp.append(builder.Export('chd%d' % i, storage)) mlp.append(builder.Affine(shapes[-1])) model = builder.Model(mlp, 'softmax', (3072, )) batch_size = 100 batches = len(data[0]) // batch_size batch_index = 0 iterations = 25000 interval = 10