def attended_memory_network(settings): network = layers.variable('data') for module_settings in settings: if module_settings['operator'] is 'attended_memory_module': network = _attended_memory_module(network, module_settings['settings']) else: args = module_settings.get('args', tuple()) kwargs = { key: value for key, value in module_settings.get('kwargs', {}).items() } if args: args = (network, ) + args else: kwargs['X'] = network network = getattr(layers, module_settings['operator'])(*args, **kwargs) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def recurrent_hypernetwork(T, batch_size): X = layers.variable('data') label = layers.variable('softmax_label') loss = 0 parameters = ({ 'weight': None, 'bias': None }, { 'weight': None, 'bias': None }, { 'weight': None, 'bias': None }) KERNEL_SHAPES = ((3, 3, 3 * 16), ) + ((3, 3, 16 * 16), ) * 2 for time in range(T): network = _extract_representations(X, parameters, batch_size) prediction = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(0, 0)) prediction = layers.flatten(prediction) prediction = layers.fully_connected(X=prediction, n_hidden_units=10) loss += layers.softmax_loss(prediction=prediction, label=label) for index, weight in enumerate( _generate_parameters(network, KERNEL_SHAPES)): parameters[index]['weight'] = weight return loss
def build_network(args): network = layers.variable('data') network = _convolution(X=network, n_filters=16) for n_filters in (16, 32): network = _module(network, n_filters, args.n_layers) network = _transit(network, n_filters * 2) # network = _module(network, 64, args.n_layers) _, rnn_cache = _traced_module(network, args.rnn, 64, args.n_layers) # network = layers.batch_normalization(network, fix_gamma=False) network = layers.batch_normalization(rnn_cache['h'], fix_gamma=False, id='BN') network = layers.ReLU(network) network = layers.pooling(X=rnn_cache['h'], mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, id='linear') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _traced_module(network, n_filters, n_layers): group = [] for index in range(n_layers): identity = network residual = _normalized_convolution(network, n_filters=n_filters) residual = _normalized_convolution(residual, n_filters=n_filters) trace = layers.terminate_gradient(residual) trace = layers.ReLU(trace) trace = layers.flatten(trace) group.append(trace) network = identity + residual network = layers.batch_normalization(network, fix_gamma=False) network = layers.ReLU(network) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.terminate_gradient(network) group.append(network) return layers.group(group)
def build_network(n_layers): network = layers.variable('data') network = _convolution(X=network, n_filters=16) convolution_settings = {'n_filters': None} settings = { 'convolution_settings': convolution_settings, 'n_layers': args.n_layers, 'weight_sharing': False } for n_filters in (16, 32): convolution_settings['n_filters'] = n_filters network = _rnn_attention_module(network, settings) network = _transit(network, n_filters * 2) convolution_settings['n_filters'] = 64 network = _rnn_attention_module(network, settings) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.batch_normalization(network, fix_gamma=False) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _fully_connected(network, batch_size, n_hidden_units, mode, p): long_path = layers.fully_connected(X=network, n_hidden_units=n_hidden_units) long_path = layers.ReLU(long_path) short_path = layers.mean(network, axis=1) short_path = layers.reshape(short_path, (0, 1)) short_path = layers.broadcast(short_path, (0, n_hidden_units)) short_path = layers.ReLU(short_path) gate = _random_gate(p, (batch_size, n_hidden_units)) network = gate * long_path + (1 - gate) * short_path return network
def build_rnn(args): rnn_cache = {} for i in range(args.n_layers): X = layers.variable('data%d' % i) rnn_cache = globals()[args.rnn](X, args.n_hidden_units, rnn_cache) network = layers.fully_connected(X=rnn_cache['h'], n_hidden_units=10, id='linear') loss = layers.linear_regression_loss(network, id='criterion') # network = layers.softmax_loss(prediction=network, normalization='batch', id='criterion') return network, loss
def dual_activation_network(n_layers): shared_weight = layers.variable('shared_weight') shared_bias = layers.variable('shared_bias') network = layers.variable('data') network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) for i in range(n_layers): private = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) shared = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1), weight=shared_weight, bias=shared_bias) network = private + shared network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def unattentioned_network(times, function=average, n_classes=10): # TODO simplify network structure network = layers.variable('data') cache = [] for time in range(times): network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) cache.append(network) network = layers.batch_normalization(function(cache)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = _normalized_convolution(network, (3, 3), 16, (2, 2), (1, 1)) network = layers.pooling(X=network, mode='average', kernel_shape=(8, 8), stride=(1, 1), pad=(0, 0)) network = layers.fully_connected(X=network, n_hidden_units=n_classes) network = layers.softmax_loss(network, normalization='batch') return network
def dropping_out_mlp(settings): network = layers.variable('data') network = layers.flatten(network) layer_settings = settings['layer_settings'] for index, layer_setting in enumerate(layer_settings): n_hidden_units = layer_setting['n_hidden_units'] p = layer_setting['p'] network = _fully_connected(network, n_hidden_units, p) network = layers.fully_connected(X=network, n_hidden_units=settings['n_classes']) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def _attention_network(network): network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=1) return network
def dense_network(settings, n_classes=10): network = layers.variable('data') network = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) for module_settings in settings: network = _dense_module(network, module_settings) network = layers.pooling(X=network, mode='average', kernel_shape=(1, 1), stride=(1, 1), pad=(0, 0), global_pool=True) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=n_classes) network = layers.softmax_loss(network, normalization='batch') return network
def element_wise_stochastic_pooling_mlp(settings): network = layers.variable('data') network = layers.flatten(network) layer_settings = settings['layer_settings'] for index, layer_setting in enumerate(layer_settings): n_hidden_units = layer_setting['n_hidden_units'] mode = layer_setting['pooling_mode'] p = layer_setting['p'] # the probability of using long path network = _fully_connected(network, settings['batch_size'], n_hidden_units, mode, p) network = layers.fully_connected(X=network, n_hidden_units=settings['n_classes']) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def naive_network(n_layers, weight_sharing): network = layers.variable('data') network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(5, 5), stride=(1, 1), pad=(2, 2)) network = layers.pooling(X=network, mode='maximum', kernel_shape=(2, 2), stride=(2, 2), pad=(0, 0)) if weight_sharing: shared_weight = layers.variable('shared_weight') shared_bias = layers.variable('shared_bias') for index in range(n_layers): if weight_sharing: network = _normalized_convolution(X=network, n_filters=8, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1), weight=shared_weight, bias=shared_bias) else: network = _normalized_convolution(X=network, n_filters=16, kernel_shape=(3, 3), stride=(1, 1), pad=(1, 1)) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def residual_network(procedures): network = layers.variable('data') for index, procedure in enumerate(procedures): transit, recur = procedure network = transit(network, index) network = recur(network, index) network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
def simplifed_identity_network(N): network = variable('data') for index in range(N): residual = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) residual = _normalized_convolution(network, (3, 3), 16, (1, 1), (1, 1)) identity = network network = identity + residual network = layers.pooling(X=network, mode='average', global_pool=True, kernel_shape=(1, 1), stride=(1, 1), pad=(1, 1)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10, name='linear_transition') network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') return network
for index in range(args.n_residual_layers): network = layers.batch_normalization(network, beta=shared_beta, gamma=shared_gamma, fix_gamma=False) network = layers.ReLU(network) network += _convolution(network) network = layers.pooling(X=network, mode='average', kernel_shape=(7, 7), stride=(1, 1), pad=(0, 0)) # network = layers.pooling(X=network, mode='average', kernel_shape=(14, 14), stride=(1, 1), pad=(0, 0)) network = layers.flatten(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': 0.1, 'optimizer': 'SGD' } solver = MXSolver( batch_size=64, devices=(args.gpu_index, ),
import cPickle as pickle from data_utilities import load_cifar10, load_cifar10_record from lr_scheduler import AtEpochScheduler, AtIterationScheduler from mx_initializer import PReLUInitializer from mx_solver import MXSolver from GPU_utility import GPU_availability import mx_layers as layers N_HIDDEN_UNITS = 1536 N_LAYERS = 3 network = layers.variable('data') for index in range(N_LAYERS): network = layers.fully_connected(X=network, n_hidden_units=N_HIDDEN_UNITS) network = layers.ReLU(network) network = layers.fully_connected(X=network, n_hidden_units=10) network = layers.softmax_loss(prediction=network, normalization='batch', id='softmax') BATCH_SIZE = 64 lr = 0.1 lr_table = {} lr_scheduler = AtIterationScheduler(lr, lr_table) optimizer_settings = { 'args': { 'momentum': 0.9
def _rnn_linearity(X, D, weight): global _n_rnn_linearities id = 'rnn_linearity%d' % _n_rnn_linearities _n_rnn_linearities += 1 return layers.fully_connected(X=X, n_hidden_units=D, weight=weight, no_bias=True, id=id)
def _fully_connected(network, n_hidden_units, p): network = layers.fully_connected(X=network, n_hidden_units=n_hidden_units) network = layers.ReLU(network) network = layers.dropout(network, p) return network