def to_one_hot(y, nb_class, dtype=None): """ Return a matrix where each row corresponds to the one hot encoding of each element in y. Parameters ---------- y A vector of integer value between 0 and nb_class - 1. nb_class : int The number of classes in y. dtype : data-type The dtype of the returned matrix. Default floatX. Returns ------- object A matrix of shape (y.shape[0], nb_class), where each row ``i`` is the one hot encoding of the corresponding ``y[i]`` value. """ fill_vals = cgt.ones((y.shape[0],)) ret = cgt.zeros((y.shape[0], nb_class), dtype) d1 = cgt.arange(y.shape[0]) d2 = cgt.cast(y, 'i1') ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals) return ret
def test_setting_weights(): X = cgt.matrix("X", fixed_shape=(None, 28*28)) model = build_model(X, 0.0) nnbuilder.set_all_weights(model, 'mnist.p') y = cgt.vector("y", dtype='i8') cost = -cgt.mean(categorical.loglik(y, model)) selected_number = cgt.argmax(model, axis=1) err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean() computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost]) Xdata, ydata = load_data() Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(3): tstart = time.time() elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
def mean(x, axis=None, keepdims=False): """ Like numpy.mean """ if x.dtype == 'i1': x = cgt.cast(x, cgt.floatX) axes = _red_axes(axis, x.ndim) return sum(x, axis=axes, keepdims=keepdims) / mul_multi([size(x, ax) for ax in axes])
def to_one_hot(y, nb_class, dtype=None): """ Return a matrix where each row corresponds to the one hot encoding of each element in y. Parameters ---------- y A vector of integer value between 0 and nb_class - 1. nb_class : int The number of classes in y. dtype : data-type The dtype of the returned matrix. Default floatX. Returns ------- object A matrix of shape (y.shape[0], nb_class), where each row ``i`` is the one hot encoding of the corresponding ``y[i]`` value. """ fill_vals = cgt.ones((y.shape[0], )) ret = cgt.zeros((y.shape[0], nb_class), dtype) d1 = cgt.arange(y.shape[0]) d2 = cgt.cast(y, 'i1') ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals) return ret
def main(): print("Loading data...") X = cgt.matrix("X", fixed_shape=(None, 28*28)) y = cgt.vector("y", dtype='i8') model = build_model(X, 0.0) loss = -cgt.mean(categorical.loglik(y, model)) updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01) train = cgt.function(inputs=[X, y], outputs=[], updates=updates) y_nodrop = cgt.argmax(model, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, model)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size=128 Xdata, ydata = load_data() Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(3): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) nnbuilder.save_weights(model, 'mnist')
def make_funcs(opt, ntm, total_time, loss_timesteps): x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k)) y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p)) loss_timesteps = set(loss_timesteps) initial_states = make_ntm_initial_states(opt) params = ntm.get_parameters() + get_parameters(initial_states) # params = ntm.get_parameters() lossCE = 0 loss01 = 0 state_arrs = initial_states for t in xrange(total_time): tmp = ntm([x_tbk[t]] + state_arrs) raw_pred = tmp[0] state_arrs = tmp[1:4] if t in loss_timesteps: p_pred = cgt.sigmoid(raw_pred) ce = bernoulli_crossentropy( y_tbp[t], p_pred).sum() # cross-entropy of bernoulli distribution lossCE = lossCE + ce loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)), cgt.floatX).sum() lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2) loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b) gradloss = cgt.grad(lossCE, params) flatgrad = flatcat(gradloss) f_loss = cgt.function([x_tbk, y_tbp], lossCE) f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad]) print "number of nodes in computation graph:", core.count_nodes( [lossCE, loss01, flatgrad]) return f_loss, f_loss_and_grad, params
def main(): X = cgt.matrix(name='data', dtype=cgt.floatX, fixed_shape=(None, 2212)) y = cgt.vector("y", dtype='i8') model = build_nn(X) loss = -cgt.mean(categorical.loglik(y, model)) updates = nn.adagrad(loss, nn.get_parameters(loss), 0.01) y_nodrop = cgt.argmax(model, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, model)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size = 20 Xdata, ydata = load_data() Xtrain = Xdata[0:5200] ytrain = ydata[0:5200] Xtest = Xdata[5200:5573] ytest = ydata[5200:5573] sortinds = np.random.permutation(5200) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(20): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
def make_funcs(opt, ntm, total_time, loss_timesteps): x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k)) y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p)) loss_timesteps = set(loss_timesteps) initial_states = make_ntm_initial_states(opt) params = ntm.get_parameters() + get_parameters(initial_states) # params = ntm.get_parameters() lossCE = 0 loss01 = 0 state_arrs = initial_states for t in xrange(total_time): tmp = ntm([x_tbk[t]] + state_arrs) raw_pred = tmp[0] state_arrs = tmp[1:4] if t in loss_timesteps: p_pred = cgt.sigmoid(raw_pred) ce = bernoulli_crossentropy(y_tbp[t] , p_pred).sum() # cross-entropy of bernoulli distribution lossCE = lossCE + ce loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),cgt.floatX).sum() lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2) loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b) gradloss = cgt.grad(lossCE, params) flatgrad = flatcat(gradloss) f_loss = cgt.function([x_tbk, y_tbp], lossCE) f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad]) print "number of nodes in computation graph:", core.count_nodes([lossCE, loss01, flatgrad]) return f_loss, f_loss_and_grad, params
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--profile", action="store_true") parser.add_argument("--dropout", action="store_true") parser.add_argument("--stepsize", type=float, default=.001) parser.add_argument("--model", choices=["dense", "conv"], default="dense") parser.add_argument("--unittest", action="store_true") parser.add_argument("--grad_check", action="store_true") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"] / 255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) if args.model == "conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) if args.model == "conv" else cgt.matrix( "X", fixed_shape=(None, 28 * 28)) y = cgt.vector("y", dtype='i8') if args.model == "dense": p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size = 128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, { X: Xtrain[:1], y: ytrain[:1] }) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, [ "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time" ]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start + batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row( 10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()
) pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2)) conv2 = nn.rectify( nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1) ) pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2)) d0, d1, d2, d3 = pool2.shape flat = pool2.reshape([d0, d1*d2*d3]) nfeats = cgt.infer_shape(flat)[1] probs = nn.softmax(nn.Affine(nfeats, 10)(flat)) cost = -categorical.loglik(y, probs).mean() y_preds = cgt.argmax(probs, axis=1) err = cgt.cast(cgt.not_equal(y, y_preds), cgt.floatX).mean() params = nn.get_parameters(cost) updates = nn.sgd(cost, params, 1e-3) # training function f = cgt.function(inputs=[X, y], outputs=[], updates=updates) # compute the cost and error cost_and_err = cgt.function(inputs=[X, y], outputs=[cost, err]) for i in xrange(epochs): t0 = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = batch_size + start f(Xtrainimg[start:end], ytrain[start:end]) elapsed = time.time() - t0
def round01(x): return cgt.cast(x > .5, cgt.floatX)
def main(): import argparse parser=argparse.ArgumentParser() parser.add_argument("--epochs",type=int,default=10) parser.add_argument("--profile",action="store_true") parser.add_argument("--dropout",action="store_true") parser.add_argument("--stepsize",type=float, default=.001) parser.add_argument("--model",choices=["dense","conv"],default="dense") parser.add_argument("--unittest",action="store_true") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"]/255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native") if args.model=="conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28)) y = cgt.vector("y",dtype='i8') if args.model == "dense": p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop]) batch_size=128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]}) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()
def round01(x): return cgt.cast(x>.5,cgt.floatX)
def __init__(self, model="dense", im_size=[28, 28], dropout=True, devtype="cpu", grad_check=True, reg=0): if grad_check: cgt.set_precision("quad") self.model = model self.reg = reg np.random.seed(0) cgt.update_config(default_device=cgt.core.Device(devtype=devtype), backend="native") print(model) # MLP with 1 hidden layer if model == "dense1": self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1] self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) self.y = cgt.vector("y", dtype='i8') self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0) self.w_h = init_weights(self.Xsize, 256) self.w_o = init_weights(256, 8) self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o, self.p_drop_input, self.p_drop_hidden) self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.) self.params = [self.w_h, self.w_o] self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum() self.cost_drop = -cgt.mean( categorical.loglik(self.y, self.pofy_drop)) + self.reg * self.l1 # MLP with 2 hidden layers elif model == "dense2": self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1] self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) self.y = cgt.vector("y", dtype='i8') self.p_drop_input, self.p_drop_hidden = (0.2, 0.5) if dropout else (0, 0) self.w_h = init_weights(self.Xsize, 256) self.w_h2 = init_weights(256, 256) self.w_o = init_weights(256, 8) self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, self.p_drop_input, self.p_drop_hidden) self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2, self.w_o, 0., 0.) self.params = [self.w_h, self.w_h2, self.w_o] self.l1 = cgt.abs(self.w_h).sum() + cgt.abs( self.w_h2).sum() + cgt.abs(self.w_o).sum() self.cost_drop = -cgt.mean( categorical.loglik(self.y, self.pofy_drop)) + self.reg * self.l1 # MLP with 3 hidden layers elif model == "dense3": self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1] self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize)) self.y = cgt.vector("y", dtype='i8') self.p_drop_input, self.p_drop_hidden = ( 0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0]) self.w_h = init_weights(self.Xsize, 256) self.w_h2 = init_weights(256, 256) self.w_h3 = init_weights(256, 256) self.w_o = init_weights(256, 8) self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, self.p_drop_input, self.p_drop_hidden) self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2, self.w_h3, self.w_o, 0., [0., 0., 0.]) self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o] self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \ cgt.abs(self.w_o).sum() self.cost_drop = -cgt.mean( categorical.loglik(self.y, self.pofy_drop)) + self.reg * self.l1 else: raise RuntimeError("Unknown Model") self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1) self.cost_nodrop = -cgt.mean( categorical.loglik(self.y, self.pofy_nodrop)) self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y), cgt.floatX).mean() self.computeloss = cgt.function( inputs=[self.X, self.y], outputs=[self.err_nodrop, self.cost_nodrop]) self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop]) self.updates = rmsprop_updates(self.cost_drop, self.params) self.train = cgt.function(inputs=[self.X, self.y], outputs=[], updates=self.updates)