def test_lrn(): if not get_compile_info()["CGT_ENABLE_CUDA"]: raise SkipTest("Skipping because CUDA disabled") nr.seed(0) Xval = nr.randn(4,8,16,16) X = cgt.shared(Xval, name="X", fixed_shape_mask="all") # X = cgt.tensor4(name='X') y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5) f = cgt.function([],y) print f().sum() print f().sum() print f().sum() assert np.isfinite(f().sum()) # print f(Xval).sum() a = nr.rand(*cgt.infer_shape(y)) loss = (y*a).sum() gradcheck_model(loss, [X],eps=1e-5)
def test_lrn(): if not get_compile_info()["CGT_ENABLE_CUDA"]: raise SkipTest("Skipping because CUDA disabled") nr.seed(0) Xval = nr.randn(4, 8, 16, 16) X = cgt.shared(Xval, name="X", fixed_shape_mask="all") # X = cgt.tensor4(name='X') y = cross_channel_lrn(X, localsize=4, alpha=.1, beta=.5) f = cgt.function([], y) print f().sum() print f().sum() print f().sum() assert np.isfinite(f().sum()) # print f(Xval).sum() a = nr.rand(*cgt.infer_shape(y)) loss = (y * a).sum() gradcheck_model(loss, [X], eps=1e-5)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--profile", action="store_true") parser.add_argument("--dropout", action="store_true") parser.add_argument("--stepsize", type=float, default=.001) parser.add_argument("--model", choices=["dense", "conv"], default="dense") parser.add_argument("--unittest", action="store_true") parser.add_argument("--grad_check", action="store_true") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"] / 255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) if args.model == "conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) if args.model == "conv" else cgt.matrix( "X", fixed_shape=(None, 28 * 28)) y = cgt.vector("y", dtype='i8') if args.model == "dense": p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size = 128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, { X: Xtrain[:1], y: ytrain[:1] }) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, [ "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time" ]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start + batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row( 10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()
def main(): import argparse parser=argparse.ArgumentParser() parser.add_argument("--epochs",type=int,default=10) parser.add_argument("--profile",action="store_true") parser.add_argument("--dropout",action="store_true") parser.add_argument("--stepsize",type=float, default=.001) parser.add_argument("--model",choices=["dense","conv"],default="dense") parser.add_argument("--unittest",action="store_true") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"]/255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native") if args.model=="conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28)) y = cgt.vector("y",dtype='i8') if args.model == "dense": p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop]) batch_size=128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]}) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()
def run_training(self, input, stepsize=0.01, epochs=10, output='None', batch_size=128, grad_check=True, profile=False, step_decrease_rate=0.5, step_decrease_time=1000): # run NN training from input matlab data file, and save test data prediction in output file # load data from Matlab file, including # im_data: flattened images # state_data: concatenated one-hot vectors for each state variable # label_data: one-hot vector for action (state difference) if grad_check: cgt.set_precision("quad") matlab_data = sio.loadmat(input) im_data = matlab_data["im_data"] im_data = (im_data - 1) / 255 # obstacles = 1, free zone = 0 state_data = matlab_data["state_data"] value_data = matlab_data["value_data"] label_data = matlab_data["label_data"] Xdata = (np.concatenate((np.concatenate( (im_data, value_data), axis=1), state_data), axis=1)).astype(cgt.floatX) ydata = label_data training_samples = int(6 / 7.0 * Xdata.shape[0]) Xtrain = Xdata[0:training_samples] ytrain = ydata[0:training_samples] Xtest = Xdata[training_samples:] ytest = ydata[training_samples:] sortinds = np.random.permutation(training_samples) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize) self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) from cgt.tests import gradcheck_model if grad_check: cost_nodrop = cgt.core.clone(self.cost_nodrop, { self.X: Xtrain[:1], self.y: ytrain[:1] }) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, self.params[0:1]) print "success!" return if profile: cgt.profiler.start() print fmt_row(10, [ "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time" ]) for i_epoch in xrange(int(epochs)): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start + batch_size self.train(Xtrain[start:end], ytrain[start:end]) elapsed = time.time() - tstart trainerr, trainloss = self.computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = self.computeloss(Xtest, ytest) print fmt_row( 10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if (i_epoch > 0) & (i_epoch % step_decrease_time == 0): stepsize = step_decrease_rate * stepsize self.updates = rmsprop_updates(self.cost_drop, self.params, stepsize=stepsize) self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates) print stepsize if profile: cgt.execution.profiler.print_stats() # save Matlab data if output != 'None': sio.savemat(file_name=output, mdict={ 'in': Xtest, 'out': self.y_out(Xtest) })