示例#1
0
def test_shape_err():
    with CaptureStderr():
        with cgt.scoped_update_config(debug=True, backend="python"):
            x = cgt.vector()
            y = cgt.vector()
            f = cgt.function([x,y],x+y)
            f(np.zeros(3),np.zeros(4))
示例#2
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no",fixed_shape=(None,n_in))
        a_n = cgt.vector("a_n",dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0)/128.0 
        nhid = 64
        h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n*q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
示例#3
0
def test_shape_err():
    try:
        with CaptureStderr() as s:
            with cgt.scoped_update_config(debug=True):
                x = cgt.vector()
                y = cgt.vector()
                f = cgt.function([x,y],x+y)
                f(np.zeros(3),np.zeros(4))
    except Exception as e:
        assert "f = cgt.function([x,y],x+y)" in s.getvalue()
示例#4
0
def test_setting_weights():
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    model = build_model(X, 0.0)
    nnbuilder.set_all_weights(model, 'mnist.p')
    y = cgt.vector("y", dtype='i8')
    cost = -cgt.mean(categorical.loglik(y, model))
    selected_number = cgt.argmax(model, axis=1)
    err_nodrop = cgt.cast(cgt.not_equal(selected_number, y), cgt.floatX).mean()
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost])

    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
示例#5
0
文件: demo_cifar.py 项目: ketranm/cgt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
示例#6
0
文件: demo_cifar.py 项目: zclfly/cgt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
示例#7
0
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype='i8')
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
示例#8
0
def test_incsubtensor2():
    W = cgt.shared(np.zeros((5, 3)), name="W")
    i0 = cgt.vector(dtype='i8')
    i1 = cgt.vector(dtype='i8')
    inc = cgt.vector()

    updates2 = {W: cgt.inc_subtensor(W, (i0, i1), inc)}
    f2 = cgt.function([i0, i1, inc], [], updates=updates2)
    f2([0, 1, 2, 2], [0, 1, 2, 2], [1, 2, 3, 4])
    assert np.allclose(
        W.op.get_value(),
        np.array([
            [1., 0., 0.],
            [0., 2., 0.],
            [0., 0., 7.],
            [0., 0., 0.],
            [0., 0., 0.],
        ]))
 def make_updater_fc():
     X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
     y = cgt.vector("y", dtype="i8")
     stepsize = cgt.scalar("stepsize")
     loss = build_fc_return_loss(X, y)
     params = nn.get_parameters(loss)
     gparams = cgt.grad(loss, params)
     updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
     return cgt.function([X, y, stepsize], loss, updates=updates)
示例#10
0
 def runTest(self):
     cgt.set_precision('double')
     x = cgt.vector()
     y = cgt.square(x)
     eg = cgt.execution.compilation_pipeline([x],[y+y],[])
     pprint.pprint(eg.to_json())
     import cycgt
     interp = cycgt.cInterpreter(eg)
     print interp(np.array([3,4,5,6],'f8'))
示例#11
0
def test_incsubtensor2():
    W = cgt.shared(np.zeros((5,3)), name="W")
    i0 = cgt.vector(dtype='i8')
    i1 = cgt.vector(dtype='i8')
    inc = cgt.vector()

    updates2 = {W : cgt.inc_subtensor(W, (i0,i1), inc)}
    f2 = cgt.function([i0,i1,inc],[],updates=updates2)
    f2([0,1,2,2],[0,1,2,2],[1,2,3,4])
    assert np.allclose(W.op.get_value(), 
        np.array(
        [
         [ 1.,  0.,  0.],
         [ 0.,  2.,  0.],
         [ 0.,  0.,  7.],
         [ 0.,  0.,  0.],
         [ 0.,  0.,  0.],
         ]))
示例#12
0
文件: test_stack.py 项目: zxie/cgt
def test_stack():
    x = cgt.scalar()
    y = cgt.scalar()
    z = cgt.scalar()
    s0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3, )

    x = cgt.vector()
    y = cgt.vector()
    z = cgt.vector()
    v0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(v0, {
        x: np.zeros(2),
        y: np.zeros(2),
        z: np.zeros(2)
    }).shape == (3, 2)
    v1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(v1, {
        x: np.zeros(2),
        y: np.ones(2),
        z: np.zeros(2)
    }).shape == (2, 3)

    x = cgt.matrix()
    y = cgt.matrix()
    z = cgt.matrix()
    m0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(m0, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (3, 2, 4)
    m1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(m1, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (2, 3, 4)
    m2 = cgt.stack([x, y, z], axis=2)
    assert cgt.numeric_eval(m2, {
        x: np.zeros((2, 4)),
        y: np.zeros((2, 4)),
        z: np.zeros((2, 4))
    }).shape == (2, 4, 3)
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28))  # so shapes can be inferred
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
示例#14
0
def test_multi_output():
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y, z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3, ) * x.ndim, cgt.floatX)
            correct = (np.sin(xnum), np.cos(xnum))
            yznum = cgt.numeric_eval([y, z], {x: xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x], [y, z])
            np.testing.assert_allclose(f(xnum), correct)
示例#15
0
文件: _test_tuples.py 项目: zxie/cgt
    def runTest(self):
        f1 = cgt.function1([], ())
        assert f1() == ()

        x = cgt.vector()
        xval = np.random.randn(1)
        f2 = cgt.function([x], [(x,x),(x,),()])
        ytrue = [(xval,xval),(xval,),()]
        y = f2(xval)
        assert y==ytrue
示例#16
0
文件: test_linreg.py 项目: zclfly/cgt
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple, an, _ = cgt.core.simplify_and_analyze(g)

    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(
        g_simple,
        nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval}

    np.testing.assert_allclose(cgt.numeric_eval(err, d),
                               np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0], d),
                               2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1], d),
                               2 * np.sum(Xval.dot(wval) + bval - yval, 0))
示例#17
0
def test_multi_output():
    for x in (cgt.scalar('x'), cgt.vector('x'), cgt.matrix('x')):
        for cls in (SinCos, SinCos2):
            y,z = core.unpack(core.Result(cls(), [x]))
            xnum = np.ones((3,)*x.ndim, cgt.floatX)
            correct = (np.sin(xnum),np.cos(xnum))
            yznum = cgt.numeric_eval([y,z], {x:xnum})
            np.testing.assert_allclose(yznum, correct)
            f = cgt.function([x],[y,z])
            np.testing.assert_allclose(f(xnum), correct)
示例#18
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim))
        a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim]

        logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n*adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean()


        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])
        self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], 
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl])

        self.pc = ParamCollection(params)
示例#19
0
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28,
                                          28))  # so shapes can be inferred
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
示例#20
0
    def test_cycgt(self):
        x = cgt.vector('x')
        y = cgt.vector('y')
        z = y / x
        cs = cycgt.CallSequence([x, y], [z], list(cgt.topsorted([z])))

        xshp = (4, )
        yshp = (4, )
        zshp = (4, )

        xval = np.random.randn(*xshp).astype('float32')
        yval = np.random.randn(*yshp).astype('float32')
        zval = np.random.randn(*zshp).astype('float32')

        cs.set_shapes([xshp, yshp, zshp])
        cs.set_inputs([xval, yval])
        cs.execute()
        print xval, yval
        print xval * yval
        np.testing.assert_allclose(yval / xval, cs.get_outputs_numpy()[0])
示例#21
0
    def test_cycgt(self):
        x = cgt.vector('x')
        y = cgt.vector('y')
        z = y/x
        cs = cycgt.CallSequence([x,y],[z], list(cgt.topsorted([z])))

        xshp = (4,)
        yshp = (4,)
        zshp = (4,)

        xval = np.random.randn(*xshp).astype('float32')
        yval = np.random.randn(*yshp).astype('float32')
        zval = np.random.randn(*zshp).astype('float32')

        cs.set_shapes([xshp,yshp,zshp])
        cs.set_inputs([xval,yval])
        cs.execute()
        print xval, yval
        print xval * yval
        np.testing.assert_allclose(yval/xval , cs.get_outputs_numpy()[0])
示例#22
0
def CGT_dvLJ(x):
    N = len(x)
    xt = cgt.vector('xt')
    vLJt = 0
    for j in range(1,N):
        for i in range(j):
            rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum()
            vLJt += rho**(-6.0)-(rho**(-3.0))
    
    dvLJc = cgt.grad(4*vLJt, xt)    
    df = cgt.function([xt],dvLJc)
    return df(np.ravel(x))
示例#23
0
def test_linreg():
    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple,an,_ = cgt.core.simplify_and_analyze(g)


    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}

    np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2,
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 *  np.sum(Xval.dot(wval) + bval - yval, 0),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
示例#24
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
示例#25
0
文件: test_stack.py 项目: EdsterG/cgt
def test_stack():
    x = cgt.scalar()
    y = cgt.scalar()
    z = cgt.scalar()
    s0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(s0, {x: 1, y: 2, z: 3}).shape == (3,)

    x = cgt.vector()
    y = cgt.vector()
    z = cgt.vector()
    v0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(v0, {x: np.zeros(2), y: np.zeros(2), z: np.zeros(2)}).shape == (3, 2)
    v1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(v1, {x: np.zeros(2), y: np.ones(2), z: np.zeros(2)}).shape == (2, 3)

    x = cgt.matrix()
    y = cgt.matrix()
    z = cgt.matrix()
    m0 = cgt.stack([x, y, z], axis=0)
    assert cgt.numeric_eval(m0, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (3, 2, 4)
    m1 = cgt.stack([x, y, z], axis=1)
    assert cgt.numeric_eval(m1, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 3, 4)
    m2 = cgt.stack([x, y, z], axis=2)
    assert cgt.numeric_eval(m2, {x: np.zeros((2, 4)), y: np.zeros((2, 4)), z: np.zeros((2, 4))}).shape == (2, 4, 3)
    def make_updater_convnet_parallel():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28))  # so shapes can be inferred
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        params = nn.get_parameters(loss)
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
示例#27
0
def test_incsubtensor0():
    # First let's test fancy slice along zeroth dimension

    W = cgt.shared(np.zeros((5, 3)), name="W")
    inc = cgt.matrix()  # we'll increment W by this matrix
    incval = np.arange(9).reshape(3, 3)

    inds = cgt.vector(dtype='i8')
    updates = {W: cgt.inc_subtensor(W, inds, inc)}
    f = cgt.function([inds, inc], [], updates=updates)
    f([1, 2, 4], incval)

    assert np.allclose(
        W.op.get_value(),
        np.array([[0., 0., 0.], [0., 1., 2.], [3., 4., 5.], [0., 0., 0.],
                  [6., 7., 8.]]))
示例#28
0
    def make_updater_fc_parallel():
        X = cgt.matrix("X", fixed_shape=(None, 28 * 28))
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")

        loss = build_fc_return_loss(X, y)
        params = nn.get_parameters(loss)
        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
    def make_updater_fc_parallel():
        X = cgt.matrix("X", fixed_shape=(None,28*28))
        y = cgt.vector("y",dtype='i8')
        stepsize = cgt.scalar("stepsize")

        loss = build_fc_return_loss(X,y)
        params = nn.get_parameters(loss)        
        m = nn.Module([X,y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size//4):
            sli = slice(start, start+batch_size//4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X,y, stepsize], split_loss, updates=updates2)
示例#30
0
def main(num_epochs=NUM_EPOCHS):
    #cgt.set_precision('half')
    print("Building network ...")
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2))
    l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN)
    l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True)
    #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid)
    #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True)
    #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify)
    #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True)
    l_forward_slice = l_forward[:, MAX_LENGTH-1, :]  # Take the last element in the forward slice time dimension
    l_backward_slice = l_backward[:, 0, :]  # And the first element in the backward slice time dimension
    l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1)
    l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh)
    target_values = cgt.vector('target_output')
    predicted_values = l_out[:, 0]  # For this task we only need the last value
    cost = cgt.mean((predicted_values - target_values)**2)
    # Compute SGD updates for training
    print("Computing updates ...")
    updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE)
    #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05)
    # cgt functions for training and computing cost
    print("Compiling functions ...")
    train = cgt.function([X, target_values], cost, updates=updates)
    compute_cost = cgt.function([X, target_values], cost)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = gen_data()

    print("Training ...")
    time_start = time.time()
    try:
        for epoch in range(num_epochs):
            for _ in range(EPOCH_SIZE):
                X, y, m = gen_data()
                train(X, y)
            cost_val = compute_cost(X_val, y_val)
            print("Epoch {} validation cost = {}".format(epoch+1, cost_val))
            print ('Epoch took ' + str(time.time() - time_start))
            time_start = time.time()
    except KeyboardInterrupt:
        pass
示例#31
0
def main():
    print("Loading data...")
    X = cgt.matrix("X", fixed_shape=(None, 28*28))
    y = cgt.vector("y", dtype='i8')

    model = build_model(X, 0.0)
    loss = -cgt.mean(categorical.loglik(y, model))

    updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01)
    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])


    batch_size=128
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(3):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])

    nnbuilder.save_weights(model, 'mnist')
示例#32
0
def CGT_vLJ_Optimize(x):
    N = len(x)
    #cgt.set_precision('double')
    xt = cgt.vector('xt')
    vLJt = 0
    for j in range(1,N):
        for i in range(j):
            rho = ((xt[i*D:i*D+D] - xt[j*D:j*D+D])**2).sum()
            vLJt += rho**(-6.0)-(rho**(-3.0))
    
    f = cgt.function([xt],4*vLJt)
    dvLJc = cgt.grad(4*vLJt, xt)    
    df = cgt.function([xt],dvLJc)
    
    CGT_BFGSres = optimize.minimize(f, np.ravel(x), \
                                  method='L-BFGS-B',        \
                                  jac = df,     \
                                  options={'disp': False})
    return np.reshape(CGT_BFGSres.x, (N,D))
示例#33
0
def test_incsubtensor0():
    # First let's test fancy slice along zeroth dimension

    W = cgt.shared(np.zeros((5,3)), name="W")
    inc = cgt.matrix() # we'll increment W by this matrix
    incval = np.arange(9).reshape(3,3)
    

    inds = cgt.vector(dtype='i8')
    updates = {W : cgt.inc_subtensor(W, inds, inc)}
    f = cgt.function([inds,inc],[],updates=updates)
    f([1,2,4],incval)

    assert np.allclose(W.op.get_value(), 
        np.array(
        [[ 0.,  0.,  0.],
         [ 0.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 0.,  0.,  0.],
         [ 6.,  7.,  8.]]))
示例#34
0
def main():
    X = cgt.matrix(name='data', dtype=cgt.floatX, fixed_shape=(None, 2212))
    y = cgt.vector("y", dtype='i8')
    model = build_nn(X)
    loss = -cgt.mean(categorical.loglik(y, model))
    updates = nn.adagrad(loss, nn.get_parameters(loss), 0.01)

    y_nodrop = cgt.argmax(model, axis=1)

    cost_nodrop = -cgt.mean(categorical.loglik(y, model))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop])

    batch_size = 20
    Xdata, ydata = load_data()

    Xtrain = Xdata[0:5200]
    ytrain = ydata[0:5200]

    Xtest = Xdata[5200:5573]
    ytest = ydata[5200:5573]

    sortinds = np.random.permutation(5200)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(20):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
示例#35
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--dropout", action="store_true")
    parser.add_argument("--stepsize", type=float, default=.001)
    parser.add_argument("--model", choices=["dense", "conv"], default="dense")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--grad_check", action="store_true")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"] / 255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    if args.model == "conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",
                    fixed_shape=(None, 1, 28,
                                 28)) if args.model == "conv" else cgt.matrix(
                                     "X", fixed_shape=(None, 28 * 28))
    y = cgt.vector("y", dtype='i8')

    if args.model == "dense":
        p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]
    elif args.model == "conv":
        p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv,
                                  p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y],
                               outputs=[err_nodrop, cost_nodrop])

    batch_size = 128

    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {
            X: Xtrain[:1],
            y: ytrain[:1]
        })
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, [
        "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"
    ])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start + batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)],
                                          ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(
            10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
示例#36
0
def vector(name, dtype=None, fixed_shape=None):
    return cgt.vector(name, dtype, fixed_shape)
示例#37
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim))
        a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)),
                                               name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(
            nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(
            nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,
                            ctrl_dim,
                            weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim]

        logp_n = ((-.5) * cgt.square(
            (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square(
            (a_na - oldmean_na) / oldstd_na).sum(axis=1)
                     ) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n * adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) +
              (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) -
              .5).sum(axis=1).mean()

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                             [surr, kl])
        self._compute_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_na, adv_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                   [surr, kl])

        self.pc = ParamCollection(params)
示例#38
0
np.random.seed(42)
sortinds = np.random.permutation(Xtrain.shape[0])
Xtrain = Xtrain[sortinds]
ytrain = ytrain[sortinds]

# reshape for convnet
Xtrainimg = Xtrain.reshape(-1, 1, 28, 28)
Xtestimg = Xtest.reshape(-1, 1, 28, 28)

# Model:
# Make it VGG-like
# VGG nets have 3x3 kernels with length 1 padding and max-pooling has all 2s.
#
# VGG is a large model so here well just do a small part of it.
X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28))
y = cgt.vector('y', dtype='i8')

conv1 = nn.rectify(
        nn.SpatialConvolution(1, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(X)
        )
pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2))

conv2 = nn.rectify(
        nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1)
        )
pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2))
d0, d1, d2, d3 = pool2.shape

flat = pool2.reshape([d0, d1*d2*d3])
nfeats = cgt.infer_shape(flat)[1]
probs = nn.softmax(nn.Affine(nfeats, 10)(flat))
示例#39
0
文件: FC.py 项目: zuiwufenghua/VIN
    def __init__(self,
                 model="dense",
                 im_size=[28, 28],
                 dropout=True,
                 devtype="cpu",
                 grad_check=True,
                 reg=0):
        if grad_check: cgt.set_precision("quad")
        self.model = model
        self.reg = reg
        np.random.seed(0)
        cgt.update_config(default_device=cgt.core.Device(devtype=devtype),
                          backend="native")
        print(model)
        # MLP with 1 hidden layer
        if model == "dense1":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model1(self.X, self.w_h, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model1(self.X, self.w_h, self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 2 hidden layers
        elif model == "dense2":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (0.2,
                                                     0.5) if dropout else (0,
                                                                           0)
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model2(self.X, self.w_h, self.w_h2,
                                          self.w_o, self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model2(self.X, self.w_h, self.w_h2,
                                            self.w_o, 0., 0.)
            self.params = [self.w_h, self.w_h2, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(
                self.w_h2).sum() + cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        # MLP with 3 hidden layers
        elif model == "dense3":
            self.Xsize = 2 * im_size[0] * im_size[1] + im_size[0] + im_size[1]
            self.X = cgt.matrix("X", fixed_shape=(None, self.Xsize))
            self.y = cgt.vector("y", dtype='i8')
            self.p_drop_input, self.p_drop_hidden = (
                0.0, [0.5, 0.5, 0.5]) if dropout else (0, [0, 0, 0])
            self.w_h = init_weights(self.Xsize, 256)
            self.w_h2 = init_weights(256, 256)
            self.w_h3 = init_weights(256, 256)
            self.w_o = init_weights(256, 8)
            self.pofy_drop = dense_model3(self.X, self.w_h, self.w_h2,
                                          self.w_h3, self.w_o,
                                          self.p_drop_input,
                                          self.p_drop_hidden)
            self.pofy_nodrop = dense_model3(self.X, self.w_h, self.w_h2,
                                            self.w_h3, self.w_o, 0.,
                                            [0., 0., 0.])
            self.params = [self.w_h, self.w_h2, self.w_h3, self.w_o]
            self.l1 = cgt.abs(self.w_h).sum() + cgt.abs(self.w_h2).sum() + cgt.abs(self.w_h3).sum() + \
                      cgt.abs(self.w_o).sum()
            self.cost_drop = -cgt.mean(
                categorical.loglik(self.y,
                                   self.pofy_drop)) + self.reg * self.l1
        else:
            raise RuntimeError("Unknown Model")

        self.y_nodrop = cgt.argmax(self.pofy_nodrop, axis=1)
        self.cost_nodrop = -cgt.mean(
            categorical.loglik(self.y, self.pofy_nodrop))
        self.err_nodrop = cgt.cast(cgt.not_equal(self.y_nodrop, self.y),
                                   cgt.floatX).mean()
        self.computeloss = cgt.function(
            inputs=[self.X, self.y],
            outputs=[self.err_nodrop, self.cost_nodrop])
        self.y_out = cgt.function(inputs=[self.X], outputs=[self.y_nodrop])
        self.updates = rmsprop_updates(self.cost_drop, self.params)
        self.train = cgt.function(inputs=[self.X, self.y],
                                  outputs=[],
                                  updates=self.updates)
示例#40
0
import cgt
# X = cgt.matrix(fixed_shape=(10,3))
y = cgt.vector(fixed_shape=(3,))
w = cgt.vector(fixed_shape=(5,))
# z = X.dot(y)
y+w
# cgt.print_tree(cgt.core.simplify(cgt.shape(z)))
示例#41
0
def ivector(name):
    return cgt.vector(name, dtype='int32')
示例#42
0
def vector(name, dtype=None, fixed_shape=None):
    return cgt.vector(name, dtype, fixed_shape)
示例#43
0
import cgt
# X = cgt.matrix(fixed_shape=(10,3))
y = cgt.vector(fixed_shape=(3, ))
w = cgt.vector(fixed_shape=(5, ))
# z = X.dot(y)
y + w
# cgt.print_tree(cgt.core.simplify(cgt.shape(z)))
# scaled_data = scaler.transform(data, targets)

# split data
X_train, X_test, Y_train, Y_test = train_test_split(data, targets, test_size=0.2, random_state=0)

# hyperparams
#
# Be careful when setting alpha! If it's too large
# here the cost will blow up.
alpha = 1e-7
epochs = 100

# Linear regression model
np.random.seed(0)
X = cgt.matrix("X", fixed_shape=(None, nfeats))
Y = cgt.vector("Y")
w = cgt.shared(np.random.randn(nfeats) * 0.01)

# prediction
ypred = cgt.dot(X, w)

# cost
cost = cgt.square(Y - ypred).mean()

# derivative with respect to w
dw = cgt.grad(cost=cost, wrt=w)
updates = [(w, w - dw * alpha)]

# training function
trainf = cgt.function(inputs=[X, Y], outputs=[], updates=updates)
# cost function, no updates
示例#45
0
def ivector(name):
    return cgt.vector(name, dtype='int32')
示例#46
0
文件: demo_mnist.py 项目: EdsterG/cgt
def main():
    import argparse
    parser=argparse.ArgumentParser()
    parser.add_argument("--epochs",type=int,default=10)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--dropout",action="store_true")
    parser.add_argument("--stepsize",type=float, default=.001)
    parser.add_argument("--model",choices=["dense","conv"],default="dense")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"]/255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")

    if args.model=="conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28))
    y = cgt.vector("y",dtype='i8')

    if args.model == "dense":
        p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)    
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]        
    elif args.model == "conv":
        p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)            
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop])

    batch_size=128


    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]})
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()