def main(args): # Create model. model = ConvolutionNet() # Create data iterators for training and testing sets. data = get_CIFAR10_data(args.data_dir) train_dataiter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) # Create solver. solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={'stdvar': 0.001}, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, verbose=True, print_every=20) # Initialize model parameters. solver.init() # Train! solver.train()
def main(): model = RNNNet() x_train, y_train = data_gen(10000) x_test, y_test = data_gen(1000) train_dataiter = NDArrayIter(x_train, y_train, batch_size=100, shuffle=True) test_dataiter = NDArrayIter(x_test, y_test, batch_size=100, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='xavier', update_rule='adam', task_type='regression', verbose=True, print_every=20) solver.init() solver.train()
def main(_): model = TwoLayerCaffeNet() data = get_CIFAR10_data() # reshape all data to matrix data['X_train'] = data['X_train'].reshape( [data['X_train'].shape[0], 3 * 32 * 32]) data['X_val'] = data['X_val'].reshape( [data['X_val'].shape[0], 3 * 32 * 32]) data['X_test'] = data['X_test'].reshape( [data['X_test'].shape[0], 3 * 32 * 32]) # ATTENTION: the batch size should be the same as the input shape declared above. train_dataiter = NDArrayIter(data['X_train'], data['y_train'], 100, True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], 100, True) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, batch_size=128, init_rule='xavier', update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-4, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def __init__(self, model, **kwargs): from minpy.nn.io import NDArrayIter from examples.utils.data_utils import adding_problem_generator as data_gen x_train, y_train = data_gen(10000) x_test, y_test = data_gen(1000) train_dataiter = NDArrayIter(x_train, y_train, batch_size=100, shuffle=True) test_dataiter = NDArrayIter(x_test, y_test, batch_size=100, shuffle=False) super(MySolver, self).__init__(model, train_dataiter, test_dataiter, **kwargs)
def main(args): # Define a convolutional neural network the same as above net = builder.Sequential( builder.Convolution((7, 7), 32), builder.ReLU(), builder.Pooling('max', (2, 2), (2, 2)), builder.Reshape((flattened_input_size,)) builder.Affine(hidden_size), builder.Affine(num_classes), ) # Cast the definition to a model compatible with minpy solver model = builder.Model(net, 'softmax', (3 * 32 * 32,)) data = get_CIFAR10_data(args.data_dir) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=batch_size, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def main(args): # Define a 2-layer perceptron MLP = builder.Sequential( builder.Affine(512), builder.ReLU(), builder.Affine(10) ) # Cast the definition to a model compatible with minpy solver model = builder.Model(MLP, 'softmax', (3 * 32 * 32,)) data = get_CIFAR10_data(args.data_dir) data['X_train'] = data['X_train'].reshape([data['X_train'].shape[0], 3 * 32 * 32]) data['X_val'] = data['X_val'].reshape([data['X_val'].shape[0], 3 * 32 * 32]) data['X_test'] = data['X_test'].reshape([data['X_test'].shape[0], 3 * 32 * 32]) train_dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=100, shuffle=True) test_dataiter = NDArrayIter(data['X_test'], data['y_test'], batch_size=100, shuffle=False) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=10, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-5, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train()
def main(): # Create model. model = TwoLayerNet() # Create data iterators for training and testing sets. data = get_CIFAR10_data('cifar-10-batches-py') train_dataiter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) # Create solver. solver = Solver(model, train_dataiter, test_dataiter, num_epochs=5, init_rule='gaussian', init_config={ 'stdvar': 0.001 }, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-4, 'momentum': 0.9 }, verbose=True, print_every=20) solver.init() solver.train() train_acc = solver.check_accuracy( train_dataiter, num_samples=solver.train_acc_num_samples) # a bug-free mlp should reach around 60% train acc assert (train_acc >= 0.45)
def main(): # Create model. model = ConvolutionNet() # Create data iterators for training and testing sets. data = get_CIFAR10_data('cifar-10-batches-py') train_dataiter = NDArrayIter(data=data['X_train'], label=data['y_train'], batch_size=batch_size, shuffle=True) test_dataiter = NDArrayIter(data=data['X_test'], label=data['y_test'], batch_size=batch_size, shuffle=False) # Create solver. solver = Solver(model, train_dataiter, test_dataiter, num_epochs=1, init_rule='gaussian', init_config={'stdvar': 0.001}, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, verbose=True, print_every=20) # Initialize model parameters. solver.init() # Train! solver.train() train_acc = solver.check_accuracy( train_dataiter, num_samples=solver.train_acc_num_samples) # a normal cnn should reach 50% train acc assert (train_acc >= 0.40)
def main(args): data = get_CIFAR10_data(args.data_dir) # reshape all data to matrix data['X_train'] = data['X_train'].reshape([data['X_train'].shape[0], 3 * 32 * 32]) data['X_val'] = data['X_val'].reshape([data['X_val'].shape[0], 3 * 32 * 32]) data['X_test'] = data['X_test'].reshape([data['X_test'].shape[0], 3 * 32 * 32]) train_data = data['X_train'] dataiter = NDArrayIter(data['X_train'], data['y_train'], batch_size=100, shuffle=True) count = 0 for each_data in dataiter: print(each_data) count += 1 if count == 10: break
storage = {} mlp = MLP(*((1024, ) * HIDDEN_LAYERS + (10, )), activation=activation, affine_monitor=False, activation_monitor=False, storage=storage) ini_mode = sys.argv[2] # ini_mode = 'layer-by-layer' if ini_mode == 'layer-by-layer': model = builder.Model(mlp, 'softmax', (3072, ), training_X) else: model = builder.Model(mlp, 'softmax', (3072, )) solver = Solver(model, NDArrayIter(training_X, training_Y), NDArrayIter(test_X, test_Y), init_rule='xavier') solver.init() parameter_keys = list(model.params.keys()) parameter_values = list(model.params.values()) def loss_function(*args): predictions = model.forward(test_X, 'train') return model.loss(predictions, test_Y) gl = gradient_loss(loss_function, range(len(parameter_keys)))
train_data.mean(axis=0)) / (train_data.std(axis=0) + eps) test_data = (test_data - test_data.mean(axis=0)) / (test_data.std(axis=0) + eps) N, D = train_data.shape patch_size = 7 sequence_length = D / patch_size train_data = train_data.reshape((N, sequence_length, patch_size)) N, _ = test_data.shape test_data = test_data.reshape((N, sequence_length, patch_size)) from minpy.nn.io import NDArrayIter batch_size = 64 train_data_iter = NDArrayIter(train_data, data['train_label'][:sample_number], batch_size, shuffle=True) test_data_iter = NDArrayIter(test_data, data['test_label'][:sample_number], batch_size, shuffle=False) if args.rnn == 'RNN': model = RNNModel(128) elif args.rnn == 'LSTM': model = LSTMModel(128) updater = Updater(model, update_rule='rmsprop', learning_rate=0.002) iteration_number = 0 for epoch_number in range(50): for iteration, batch in enumerate(train_data_iter): iteration_number += 1
else: print 'Gen Random data' num_samples = 5000 train_X = RNG.randint(vocab_size, size=(num_samples, seq_len)) train_Y = NP.concatenate( [train_X[:, 1:], NP.zeros((num_samples, 1))], axis=1) # raise an error if we use minpy.numpy test_X = RNG.randint(vocab_size, size=(num_samples, seq_len)) test_Y = NP.concatenate( [test_X[:, 1:], NP.zeros((num_samples, 1))], axis=1) batch_size = 20 from minpy.nn.io import NDArrayIter train_data_iter = NDArrayIter(train_X, train_Y, batch_size=batch_size, shuffle=True) test_data_iter = NDArrayIter(test_X, test_Y, batch_size=batch_size, shuffle=False) model = LMModel(vocab_size=vocab_size, H_DIM=200, EMB_DIM=200) updater = Updater(model, update_rule='sgd', learning_rate=0.001) mt = time.time() iter_num = 0 for ep in xrange(50): train_data_iter.reset() for batch in train_data_iter: iter_num += 1
builder.Convolution((1, 1), 10), DReLU(), builder.Pooling('avg', (8, 8)), builder.Reshape((10,)) ) data = load_cifar10(path='../utilities/cifar/', reshape=True, center=True, rescale=True) ini_mode = sys.argv[2] # ini_mode = 'normal' if ini_mode == 'layer-by-layer': model = builder.Model(network_in_network, 'softmax', (3, 32, 32,), data[2]) solver = Solver( model, NDArrayIter(data[0], data[1]), NDArrayIter(data[0], data[1]), ) solver.init() else: model = builder.Model(network_in_network, 'softmax', (3, 32, 32,)) for arg, setting in model.param_configs.items(): print arg shape = setting['shape'] if 'weight' in arg: if len(shape) == 2: n = shape[0] elif len(shape) == 4: n = np.prod(shape[1:]) else: raise Exception()
def loss(self, predict, y): return softmax_crossentropy(predict.reshape((predict.shape[0]*predict.shape[1], predict.shape[2])), y.reshape((y.shape[0]*y.shape[1],))) def get_data(opts, test=False, post_name='.keep50kr'): return txt_data(opts.data_name, batch_size = opts.batch_size, test=test, post_name=post_name) if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('--data_name', type=str, default='data/ptb') # parser.add_argument('--train') # parser.add_argument('--test') # parser.add_argument('--fname', type=str) parser.add_argument('--batch_size', type=int, default=64) args = parser.parse_args() Dataset = get_data(args) train_word = Dataset.train_word.reshape((-1, 35)) train_Yword = Dataset.train_Yword.reshape((-1, 35)) test_word = Dataset.test_word.reshape((-1, 35)) test_Yword = Dataset.test_Yword.reshape((-1, 35)) train_dataiter = NDArrayIter(train_word, train_Yword, batch_size=64, shuffle=True) test_dataiter = NDArrayIter(test_word, test_Yword, batch_size=64, shuffle=False) model = LM_RNN(batch_size=64, WORD_DIM=Dataset.w_dim+1) solver = Solver(model, train_dataiter, test_dataiter, num_epochs=2, init_rule='xavier', update_rule='adam', print_every=20) solver.init() solver.train()
IN_DIM = 10 H_DIM = 200 OUT_DIM = 10 train_X = RNG.random((num_samples, IN_DIM)) train_Y1 = train_X**2 train_Y2 = train_X**0.5 test_X = RNG.random((num_samples, IN_DIM)) test_Y1 = test_X**2 test_Y2 = test_X**0.5 batch_size = 64 from minpy.nn.io import NDArrayIter train_data_iter1 = NDArrayIter(train_X, train_Y1, batch_size=batch_size, shuffle=True) train_data_iter2 = NDArrayIter(train_X, train_Y2, batch_size=batch_size, shuffle=True) test_data_iter1 = NDArrayIter(test_X, test_Y1, batch_size=batch_size, shuffle=False) test_data_iter2 = NDArrayIter(test_X, test_Y2, batch_size=batch_size, shuffle=False) model = RegModel(H_DIM=H_DIM, OUT_DIM=OUT_DIM)
eps = 1e-5 train_data = (train_data - train_data.mean(axis=0)) / (train_data.std(axis=0) + eps) test_data = (test_data - test_data.mean(axis=0)) / (test_data.std(axis=0) + eps) N, D = train_data.shape patch_size = 7 sequence_length = D / patch_size train_data = train_data.reshape((N, sequence_length, patch_size)) N, _ = test_data.shape test_data = test_data.reshape((N, sequence_length, patch_size)) from minpy.nn.io import NDArrayIter batch_size = 64 train_data_iter = NDArrayIter(train_data, data['train_label'][:sample_number], batch_size, shuffle=True) test_data_iter = NDArrayIter(test_data, data['test_label'][:sample_number], batch_size, shuffle=False) if args.rnn == 'RNN': model = RNNModel(128) elif args.rnn == 'LSTM' : model = LSTMModel(128) updater = Updater(model, update_rule='rmsprop', learning_rate=0.002) iteration_number = 0 for epoch_number in range(50): for iteration, batch in enumerate(train_data_iter): iteration_number += 1 data, labels = unpack_batch(batch) grad_dict, loss = model.grad_and_loss(data, labels) updater(grad_dict)