}, 'initial_lr': 0.1, 'optimizer': 'SGD' } solver = MXSolver( batch_size=64, devices=(args.gpu_index, ), epochs=30, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) from data_utilities import load_mnist data = load_mnist(path='stretched_canvas_mnist', scale=1, shape=(1, 56, 56))[:2] data += load_mnist(path='stretched_mnist', scale=1, shape=(1, 56, 56))[2:] info = solver.train(data) postfix = '-' + args.postfix if args.postfix else '' identifier = 'residual-network-on-stretched-mnist-%d%s' % ( args.n_residual_layers, postfix) import cPickle as pickle pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
if not ('data' in name or 'label' in name) } state_names = loss_group.list_auxiliary_states() states = { name: mx.nd.zeros(shape, context) for name, shape in zip(state_names, state_shapes) } for name, array in states.items(): initializer(name, array) executor = loss_group.bind(context, arguments, gradients, aux_states=states) from data_utilities import load_mnist original = load_mnist(path='stretched_canvas_mnist', scale=1, shape=(1, 56, 56)) stretched = load_mnist(path='stretched_mnist', scale=1, shape=(1, 56, 56)) from mxnet.io import NDArrayIter as Iterator iterator = Iterator(stretched[0], stretched[1], args.batch_size, shuffle=True) unpack = lambda batch: (batch.data[0], batch.label[0]) n_iterations = 0 for batch in iterator: data, labels = unpack(batch) arguments['data'][:] = data arguments['labels'][:] = labels executor.forward(is_train=True) executor.backward()
optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': 0.1, 'optimizer': 'SGD' } solver = MXSolver( batch_size=64, devices=(configs.gpu_index, ), epochs=50, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) training_data, training_labels, _, _, _, _ = load_mnist(shape=(1, 28, 28)) _, _, validation_data, validation_labels, test_data, test_labels = load_mnist( path='shrinked_mnist', shape=(1, 28, 28)) data = training_data, training_labels, validation_data, validation_labels, test_data, test_labels info = solver.train(data) identifier = 'shrinked-mnist-fixed-attention-network' pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
BATCH_SIZE = 128 lr = 0.1 lr_table = {10000 : 0.01} lr_scheduler = AtIterationScheduler(lr, lr_table) optimizer_settings = { 'args' : {'momentum' : 0.9}, 'initial_lr' : lr, 'lr_scheduler' : lr_scheduler, 'optimizer' : 'SGD', 'weight_decay' : 0.0001, } solver = MXSolver( batch_size = BATCH_SIZE, devices = (0, 1, 2, 3), epochs = 50, initializer = PReLUInitializer(), optimizer_settings = optimizer_settings, symbol = network, verbose = True, ) data = load_mnist(path='rescaled_mnist', shape=(1, 42, 42)) info = solver.train(data) identifier = 'rescaled-mnist-baseline-network-%d-%s' % (N, sys.argv[2]) pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
parser.add_argument('--n_filters', type=int, default=4) parser.add_argument('--n_layers', type=int, default=3) parser.add_argument('--n_scales', type=int, default=3) parser.add_argument('--n_units', type=int, default=16) args = parser.parse_args() import mxnet as mx from mxnet.context import Context context = mx.cpu() if args.gpu_index < 0 else mx.gpu(args.gpu_index) Context.default_ctx = context unpack_batch = lambda batch : \ (batch.data[0].as_in_context(context), batch.label[0].as_in_context(context)) from data_utilities import load_mnist data = load_mnist(path=args.path, normalize=True, shape=(1, 112, 112)) # data = load_mnist(path=args.path, normalize=True, shape=(1, 56, 56)) from mxnet.io import NDArrayIter training_data = NDArrayIter(data[0], data[1], batch_size=args.batch_size) validation_data = NDArrayIter(data[2], data[3], batch_size=args.batch_size) test_data = NDArrayIter(data[4], data[5], batch_size=args.batch_size) model = MSPCNN(args.n_layers, args.n_filters, args.n_scales, args.n_units) updater = Updater(model, update_rule='adam', lr=args.lr) # updater = Updater(model, update_rule='sgd_momentum', lr=1e-1, momentum=0.9) import numpy as np from mxnet.contrib.autograd import compute_gradient import minpy.nn.utils as utils
parser.add_argument('--n_filters', type=int, default=4) parser.add_argument('--n_layers', type=int, default=3) parser.add_argument('--n_scales', type=int, default=3) parser.add_argument('--n_units', type=int, default=16) args = parser.parse_args() import mxnet as mx from mxnet.context import Context context = mx.cpu() if args.gpu_index < 0 else mx.gpu(args.gpu_index) Context.default_ctx = context unpack_batch = lambda batch : \ (batch.data[0].as_in_context(context), batch.label[0].as_in_context(context)) from data_utilities import load_mnist data = load_mnist(path=args.path, normalize=True, shape=(1, 56, 56)) # data = load_mnist(path=args.path, normalize=True, shape=(1, 112, 112)) from mxnet.io import NDArrayIter training_data = NDArrayIter(data[0], data[1], batch_size=args.batch_size) validation_data = NDArrayIter(data[2], data[3], batch_size=args.batch_size) test_data = NDArrayIter(data[4], data[5], batch_size=args.batch_size) model = ReferentialCNN(args.n_layers, args.n_filters, args.n_scales, args.n_units) updater = Updater(model, update_rule='adam', lr=args.lr) # updater = Updater(model, update_rule='sgd_momentum', lr=1e-1, momentum=0.9) import numpy as np from mxnet.contrib.autograd import compute_gradient import minpy.nn.utils as utils
optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': 0.1, 'optimizer': 'SGD' } solver = MXSolver( batch_size=64, devices=(configs.gpu_index, ), epochs=30, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) data = [] data.extend(load_mnist(path='stretched_mnist', scale=1, shape=(1, 56, 56))[:2]) data.extend( load_mnist(path='stretched_canvas_mnist', scale=1, shape=(1, 56, 56))[2:]) info = solver.train(data) identifier = 'shrinked-mnist-plain-network-%d-%s' % (configs.n_plain_layers, configs.postfix) pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
'args': { 'momentum': 0.9 }, 'initial_lr': 0.1, 'optimizer': 'SGD' } solver = MXSolver( batch_size=64, devices=(args.gpu_index, ), epochs=30, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) from data_utilities import load_mnist data = load_mnist(scale=1, shape=(1, 28, 28)) info = solver.train(data) postfix = '-' + args.postfix if args.postfix else '' identifier = 'residual-network-on-standard-mnist-%d%s' % ( args.n_residual_layers, postfix) import cPickle as pickle pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))