示例#1
0
文件: api.py 项目: Quantza/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """
    
    fill_vals = cgt.ones((y.shape[0],))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)
    
    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')
    
    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)
    
    return ret
示例#2
0
def test_setting_weights():
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    model = build_model(X, 0.0)
    nnbuilder.set_all_weights(model, 'mnist.p')
    y = cgt.vector("y", dtype='i8')
    cost = -cgt.mean(categorical.loglik(y, model))
    selected_number = cgt.argmax(model, axis=1)
    err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean()
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost])

    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
示例#3
0
文件: api.py 项目: zclfly/cgt
def mean(x, axis=None, keepdims=False):
    """
    Like numpy.mean
    """    
    if x.dtype == 'i1': x = cgt.cast(x, cgt.floatX)
    axes = _red_axes(axis, x.ndim)
    return sum(x, axis=axes, keepdims=keepdims) / mul_multi([size(x, ax) for ax in axes])
示例#4
0
文件: api.py 项目: xyuan/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """

    fill_vals = cgt.ones((y.shape[0], ))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)

    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')

    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)

    return ret
示例#5
0
def main():
    print("Loading data...")
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    y = cgt.vector("y", dtype='i8')

    model = build_model(X, 0.0)
    loss = -cgt.mean(categorical.loglik(y, model))

    updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01)
    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])


    batch_size=128
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])

    nnbuilder.save_weights(model, 'mnist')
def make_funcs(opt, ntm, total_time, loss_timesteps):
    x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
    y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
    loss_timesteps = set(loss_timesteps)

    initial_states = make_ntm_initial_states(opt)
    params = ntm.get_parameters() + get_parameters(initial_states)
    # params = ntm.get_parameters()

    lossCE = 0
    loss01 = 0

    state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(
                y_tbp[t],
                p_pred).sum()  # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),
                                       cgt.floatX).sum()

    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes(
        [lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
示例#7
0
def main():
    X = cgt.matrix(name='data', dtype=cgt.floatX, fixed_shape=(None, 2212))
    y = cgt.vector("y", dtype='i8')
    model = build_nn(X)
    loss = -cgt.mean(categorical.loglik(y, model))
    updates = nn.adagrad(loss, nn.get_parameters(loss), 0.01)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])

    batch_size = 20
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:5200]
    ytrain = ydata[0:5200]

    Xtest = Xdata[5200:5573]
    ytest = ydata[5200:5573]

    sortinds = np.random.permutation(5200)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(20):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
示例#8
0
def make_funcs(opt, ntm, total_time, loss_timesteps):    
    x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
    y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
    loss_timesteps = set(loss_timesteps)

    initial_states = make_ntm_initial_states(opt)
    params = ntm.get_parameters() + get_parameters(initial_states)
    # params = ntm.get_parameters()

    lossCE = 0
    loss01 = 0

    state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(y_tbp[t] , p_pred).sum() # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),cgt.floatX).sum()


    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes([lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
示例#9
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--dropout", action="store_true")
    parser.add_argument("--stepsize", type=float, default=.001)
    parser.add_argument("--model", choices=["dense", "conv"], default="dense")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--grad_check", action="store_true")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"] / 255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    if args.model == "conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",
                    fixed_shape=(None, 1, 28,
                                 28)) if args.model == "conv" else cgt.matrix(
                                     "X", fixed_shape=(None, 28 * 28))
    y = cgt.vector("y", dtype='i8')

    if args.model == "dense":
        p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]
    elif args.model == "conv":
        p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv,
                                  p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y],
                               outputs=[err_nodrop, cost_nodrop])

    batch_size = 128

    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {
            X: Xtrain[:1],
            y: ytrain[:1]
        })
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, [
        "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"
    ])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start + batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)],
                                          ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(
            10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
示例#10
0
        )
pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2))

conv2 = nn.rectify(
        nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1)
        )
pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2))
d0, d1, d2, d3 = pool2.shape

flat = pool2.reshape([d0, d1*d2*d3])
nfeats = cgt.infer_shape(flat)[1]
probs = nn.softmax(nn.Affine(nfeats, 10)(flat))
cost = -categorical.loglik(y, probs).mean()

y_preds = cgt.argmax(probs, axis=1)
err = cgt.cast(cgt.not_equal(y, y_preds), cgt.floatX).mean()

params = nn.get_parameters(cost)
updates = nn.sgd(cost, params, 1e-3) 

# training function
f = cgt.function(inputs=[X, y], outputs=[], updates=updates)
# compute the cost and error
cost_and_err = cgt.function(inputs=[X, y], outputs=[cost, err])

for i in xrange(epochs):
    t0 = time.time()
    for start in xrange(0, Xtrain.shape[0], batch_size):
        end = batch_size + start
        f(Xtrainimg[start:end], ytrain[start:end])
    elapsed = time.time() - t0
示例#11
0
def round01(x):
    return cgt.cast(x > .5, cgt.floatX)
示例#12
0
文件: demo_mnist.py 项目: EdsterG/cgt
def main():
    import argparse
    parser=argparse.ArgumentParser()
    parser.add_argument("--epochs",type=int,default=10)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--dropout",action="store_true")
    parser.add_argument("--stepsize",type=float, default=.001)
    parser.add_argument("--model",choices=["dense","conv"],default="dense")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"]/255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")

    if args.model=="conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28))
    y = cgt.vector("y",dtype='i8')

    if args.model == "dense":
        p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)    
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]        
    elif args.model == "conv":
        p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)            
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop])

    batch_size=128


    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]})
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
示例#13
0
def round01(x):
    return cgt.cast(x>.5,cgt.floatX)
示例#14
0
文件: FC.py 项目: zuiwufenghua/VIN
    def __init__(self,
                 model="dense",
                 im_size=[28, 28],
                 dropout=True,
                 devtype="cpu",
                 grad_check=True,
                 reg=0):
        if grad_check: cgt.set_precision("quad")
        self.model = model
        self.reg = reg
        np.random.seed(0)
        cgt.update_config(default_device=cgt.core.Device(devtype=devtype),
                          backend="native")
        print(model)
        # MLP with 1 hidden layer
        if model == "dense1":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 2 hidden layers
        elif model == "dense2":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2,
                                          self.w_o, self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2,
                                            self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_h2, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(
                self.w_h2).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 3 hidden layers
        elif model == "dense3":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (
                0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0])
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_h3 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2,
                                          self.w_h3, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2,
                                            self.w_h3, self.w_o, 0.,
                                            [0., 0., 0.])
            self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \
                      cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        else:
            raise RuntimeError("Unknown Model")

        self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1)
        self.cost_nodrop = -cgt.mean(
            categorical.loglik(self.y, self.pofy_nodrop))
        self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y),
                                   cgt.floatX).mean()
        self.computeloss = cgt.function(
            inputs=[self.X, self.y],
            outputs=[self.err_nodrop, self.cost_nodrop])
        self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop])
        self.updates = rmsprop_updates(self.cost_drop, self.params)
        self.train = cgt.function(inputs=[self.X, self.y],
                                  outputs=[],
                                  updates=self.updates)