示例#1
0
def test_cudnn():
    compile_info = get_compile_info()
    if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]):
        raise SkipTest("CUDNN not enabled. Skipping this test")

    Xval = nr.randn(2,3,19,18)
    Wval = nr.randn(5,3,3,3)
    bval = nr.randn(1,5,1,1)

    X = cgt.tensor4("X", fixed_shape=Xval.shape)
    W = cgt.tensor4("W", fixed_shape=Wval.shape)
    b = cgt.tensor4("b", fixed_shape=bval.shape)


    Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b])

    Y2 = nr.randn(*cgt.core.infer_shape(Y))

    fY = cgt.function([X,W,b],Y)
    Yval = fY(Xval,Wval,bval)
    cost = (Y*Y2).sum()
    fcost = cgt.function([X,W,b],cost)
    fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b]))
    angrads = fgrad(Xval,Wval,bval)
    nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3)
    for (nugrad,angrad) in zip(nugrads,angrads):
        assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7) 
示例#2
0
def test_cudnn():
    if not get_compile_info()["CGT_ENABLE_CUDNN"]:
        raise SkipTest("CUDNN not enabled. Skipping this test")

    Xval = nr.randn(2, 3, 19, 18)
    Wval = nr.randn(5, 3, 3, 3)
    bval = nr.randn(1, 5, 1, 1)

    X = cgt.tensor4("X", fixed_shape=Xval.shape)
    W = cgt.tensor4("W", fixed_shape=Wval.shape)
    b = cgt.tensor4("b", fixed_shape=bval.shape)

    Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1, 1, 1, 1), [X, W, b])

    Y2 = nr.randn(*cgt.core.infer_shape(Y))

    fY = cgt.function([X, W, b], Y)
    Yval = fY(Xval, Wval, bval)
    cost = (Y * Y2).sum()
    fcost = cgt.function([X, W, b], cost)
    fgrad = cgt.function([X, W, b], cgt.grad(cost, [X, W, b]))
    angrads = fgrad(Xval, Wval, bval)
    nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval], eps=1e-3)
    for (nugrad, angrad) in zip(nugrads, angrads):
        assert np.allclose(nugrad, angrad)
示例#3
0
def test_cudnn():
    with cgt.scoped_update_config(precision="double",backend="native"):
        if not get_compile_info()["CGT_ENABLE_CUDNN"]:
            raise SkipTest("CUDNN not enabled. Skipping this test")

        Xval = nr.randn(2,3,19,18)
        Wval = nr.randn(5,3,3,3)
        bval = nr.randn(1,5,1,1)

        X = cgt.tensor4("X", fixed_shape=Xval.shape)
        W = cgt.tensor4("W", fixed_shape=Wval.shape)
        b = cgt.tensor4("b", fixed_shape=bval.shape)


        Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b])

        Y2 = nr.randn(*cgt.core.infer_shape(Y))

        fY = cgt.function([X,W,b],Y)
        Yval = fY(Xval,Wval,bval)
        cost = (Y*Y2).sum()
        fcost = cgt.function([X,W,b],cost)
        fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b]))
        angrads = fgrad(Xval,Wval,bval)
        nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3)
        for (nugrad,angrad) in zip(nugrads,angrads):
            assert np.allclose(nugrad, angrad)
def build_model():
    net = {}
    net['input'] = cgt.tensor4(fixed_shape=(1, 3, IMAGE_W, IMAGE_W))#InputLayer((1, 3, IMAGE_W, IMAGE_W))
    net['conv1_1'] = ConvLayer(net['input'], 64, 3)
    net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)
    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3)
    net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3)
    net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3)
    net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3)
    net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3)
    net['pool3'] = PoolLayer(net['conv3_4'], 2)
    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3)
    net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3)
    net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3)
    net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3)
    net['pool4'] = PoolLayer(net['conv4_4'], 2)
    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3)
    net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3)
    net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3)
    net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3)
    net['pool5'] = PoolLayer(net['conv5_4'], 2)

    return net
示例#5
0
文件: demo_cifar.py 项目: zclfly/cgt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
示例#6
0
文件: demo_cifar.py 项目: ketranm/cgt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28))  # so shapes can be inferred
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
示例#8
0
    def make_updater_convnet():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28,
                                          28))  # so shapes can be inferred
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        params = nn.get_parameters(loss)
        gparams = cgt.grad(loss, params)
        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], loss, updates=updates)
示例#9
0
def build_bilinear_net(input_shapes, **kwargs):
    x_shape, u_shape = input_shapes
    X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape)
    U = cgt.matrix('U', fixed_shape=(None, ) + u_shape)

    X_diff_pred = Bilinear(input_shapes, b=None, name='bilinear')(X, U)
    X_next_pred = X + X_diff_pred
    Y = X.reshape((X.shape[0], cgt.mul_multi(X.shape[1:])))
    Y_diff_pred = X_diff_pred.reshape(
        (X_diff_pred.shape[0], cgt.mul_multi(X_diff_pred.shape[1:])))

    X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape)
    X_next = X + X_diff
    loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2.

    net_name = 'BilinearNet'
    input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]])
    pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y),
                             ('X_next_pred', X_next_pred)])
    return net_name, input_vars, pred_vars, loss
示例#10
0
def test_im2col():
    for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]:
        xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y*h).sum()

        fcost = cgt.function([x],cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval,eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
    def make_updater_convnet_parallel():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28))  # so shapes can be inferred
        y = cgt.vector("y", dtype="i8")
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        params = nn.get_parameters(loss)
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
示例#12
0
def test_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
    y = max_pool_2d(x, (4,4),(0,0),(1,1))
    xval = np.random.randn(2,3,5,7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y*h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum,gana)
示例#13
0
def test_im2col():
    for settings in [((4, 4), (0, 0), (1, 1)), ((3, 3), (1, 1), (2, 2)),
                     ((3, 3), (1, 1), (3, 3))]:
        xval = np.arange(2 * 1 * 28 * 28).reshape(2, 1, 28,
                                                  28).astype(cgt.floatX)
        x = cgt.tensor4("x", fixed_shape=xval.shape)
        y = im2col(x, *settings)
        h = cgt.constant(np.random.randn(*cgt.infer_shape(y)))
        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval, eps=1e-5)
        gana = fgrad(xval)
        assert np.allclose(gnum, gana)
示例#14
0
    def make_updater_convnet_parallel():
        X = cgt.tensor4("X", fixed_shape=(None, 1, 28,
                                          28))  # so shapes can be inferred
        y = cgt.vector("y", dtype='i8')
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        m = nn.Module([X, y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size // 4):
            sli = slice(start, start + batch_size // 4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        params = nn.get_parameters(loss)
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X, y, stepsize], split_loss, updates=updates2)
示例#15
0
def test_cpu_pool(**kwargs):
    np.random.seed(0)
    x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
    y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
    xval = np.random.randn(2, 3, 5, 7)
    hval = np.random.randn(*cgt.infer_shape(y))
    h = cgt.constant(hval)

    cost = (y * h).sum()

    fcost = cgt.function([x], cost)
    fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

    from cgt.numeric_diff import numeric_grad
    gnum = numeric_grad(fcost, xval)
    gana = fgrad(xval)
    assert np.allclose(gnum, gana)
示例#16
0
def test_cpu_pool():
    with cgt.scoped_update_config(precision="quad", backend="native"):
        print cgt.get_precision()
        ci = get_compile_info()

        np.random.seed(0)
        x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7))
        y = max_pool_2d(x, (4, 4), (0, 0), (1, 1))
        xval = np.random.randn(2, 3, 5, 7)
        hval = np.random.randn(*cgt.infer_shape(y))
        h = cgt.constant(hval)

        cost = (y * h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval)
        gana = fgrad(xval)

        assert np.allclose(gnum, gana)
示例#17
0
def test_cpu_pool():
    with cgt.scoped_update_config(precision="quad",backend="native"):
        print cgt.get_precision()
        ci = get_compile_info()

        np.random.seed(0)
        x = cgt.tensor4("x", fixed_shape=(2,3,5,7))
        y = max_pool_2d(x, (4,4),(0,0),(1,1))
        xval = np.random.randn(2,3,5,7)
        hval = np.random.randn(*cgt.infer_shape(y))
        h = cgt.constant(hval)

        cost = (y*h).sum()

        fcost = cgt.function([x], cost)
        fgrad = cgt.function([x], cgt.grad(cost, [x])[0])

        from cgt.numeric_diff import numeric_grad
        gnum = numeric_grad(fcost, xval)
        gana = fgrad(xval)

        assert np.allclose(gnum,gana)
示例#18
0
def tensor4(name=None, dtype=None, fixed_shape=None):
    return cgt.tensor4(name, dtype, fixed_shape)
    A = gram_matrix(a)
    G = gram_matrix(x)

    N = a.shape[1]
    M = a.shape[2] * a.shape[3]

    loss = 1./(4 * N**2 * M**2) * ((G - A)**2).sum()
    return loss

def total_variation_loss(x):
    return (((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25).sum()

layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
layers = {k: net[k] for k in layers}

input_im_theano = cgt.tensor4()
outputs = lasagne.layers.get_output(layers.values(), input_im_theano)
photo_features = {k: theano.shared(output.eval({input_im_theano: photo}))
                  for k, output in zip(layers.keys(), outputs)}
print 'finished photo features'
art_features = {k: theano.shared(output.eval({input_im_theano: art}))
                for k, output in zip(layers.keys(), outputs)}
print 'finished art features'

generated_image = theano.shared(floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W))))

gen_features = lasagne.layers.get_output(layers.values(), generated_image)
print 'finished gen_features 1'
gen_features = {k: v for k, v in zip(layers.keys(), gen_features)}

# Define loss function
示例#20
0
# shuffle the data
np.random.seed(42)
sortinds = np.random.permutation(Xtrain.shape[0])
Xtrain = Xtrain[sortinds]
ytrain = ytrain[sortinds]

# reshape for convnet
Xtrainimg = Xtrain.reshape(-1, 1, 28, 28)
Xtestimg = Xtest.reshape(-1, 1, 28, 28)

# Model:
# Make it VGG-like
# VGG nets have 3x3 kernels with length 1 padding and max-pooling has all 2s.
#
# VGG is a large model so here well just do a small part of it.
X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28))
y = cgt.vector('y', dtype='i8')

conv1 = nn.rectify(
        nn.SpatialConvolution(1, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(X)
        )
pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2))

conv2 = nn.rectify(
        nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1)
        )
pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2))
d0, d1, d2, d3 = pool2.shape

flat = pool2.reshape([d0, d1*d2*d3])
nfeats = cgt.infer_shape(flat)[1]
示例#21
0
def build_fcn_action_cond_encoder_net(input_shapes, levels=None):
    x_shape, u_shape = input_shapes
    x_c_dim = x_shape[0]
    x1_c_dim = 16
    levels = levels or [3]
    levels = sorted(set(levels))

    X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape)
    U = cgt.matrix('U', fixed_shape=(None, ) + u_shape)

    # encoding
    Xlevels = {}
    for level in range(levels[-1] + 1):
        if level == 0:
            Xlevel = X
        else:
            if level == 1:
                xlevelm1_c_dim = x_c_dim
                xlevel_c_dim = x1_c_dim
            else:
                xlevelm1_c_dim = xlevel_c_dim
                xlevel_c_dim = 2 * xlevel_c_dim
            Xlevel_1 = nn.rectify(
                nn.SpatialConvolution(xlevelm1_c_dim,
                                      xlevel_c_dim,
                                      kernelshape=(3, 3),
                                      pad=(1, 1),
                                      stride=(1, 1),
                                      name='conv%d_1' % level,
                                      weight_init=nn.IIDGaussian(std=0.01))(
                                          Xlevels[level - 1]))
            Xlevel_2 = nn.rectify(
                nn.SpatialConvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='conv%d_2' % level,
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_1))
            Xlevel = nn.max_pool_2d(Xlevel_2,
                                    kernelshape=(2, 2),
                                    pad=(0, 0),
                                    stride=(2, 2))
        Xlevels[level] = Xlevel

    # bilinear
    Xlevels_next_pred_0 = {}
    Ylevels = OrderedDict()
    Ylevels_diff_pred = OrderedDict()
    for level in levels:
        Xlevel = Xlevels[level]
        Xlevel_diff_pred = Bilinear(input_shapes,
                                    b=None,
                                    axis=2,
                                    name='bilinear%d' % level)(Xlevel, U)
        Xlevels_next_pred_0[level] = Xlevel + Xlevel_diff_pred
        Ylevels[level] = Xlevel.reshape(
            (Xlevel.shape[0], cgt.mul_multi(Xlevel.shape[1:])))
        Ylevels_diff_pred[level] = Xlevel_diff_pred.reshape(
            (Xlevel_diff_pred.shape[0],
             cgt.mul_multi(Xlevel_diff_pred.shape[1:])))

    # decoding
    Xlevels_next_pred = {}
    for level in range(levels[-1] + 1)[::-1]:
        if level == levels[-1]:
            Xlevel_next_pred = Xlevels_next_pred_0[level]
        else:
            if level == 0:
                xlevelm1_c_dim = x_c_dim
            elif level < levels[-1] - 1:
                xlevel_c_dim = xlevelm1_c_dim
                xlevelm1_c_dim = xlevelm1_c_dim // 2
            Xlevel_next_pred_2 = SpatialDeconvolution(
                xlevel_c_dim,
                xlevel_c_dim,
                kernelshape=(2, 2),
                pad=(0, 0),
                stride=(2, 2),
                name='upsample%d' % (level + 1),
                weight_init=nn.IIDGaussian(std=0.01))(Xlevels_next_pred[
                    level +
                    1])  # TODO initialize with bilinear # TODO should rectify?
            Xlevel_next_pred_1 = nn.rectify(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_2' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_2))
            nonlinearity = nn.rectify if level > 0 else cgt.tanh
            Xlevel_next_pred = nonlinearity(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevelm1_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_1' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_1))
            if level in Xlevels_next_pred_0:
                coefs = nn.parameter(nn.init_array(nn.Constant(0.5), (2, )),
                                     name='sum%d.coef' % level)
                Xlevel_next_pred = coefs[0] * Xlevel_next_pred + coefs[
                    1] * Xlevels_next_pred_0[level]
            # TODO: tanh should be after sum
        Xlevels_next_pred[level] = Xlevel_next_pred

    X_next_pred = Xlevels_next_pred[0]
    Y = cgt.concatenate(Ylevels.values(), axis=1)
    Y_diff_pred = cgt.concatenate(Ylevels_diff_pred.values(), axis=1)

    X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape)
    X_next = X + X_diff
    loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2.

    net_name = 'FcnActionCondEncoderNet_levels' + ''.join(
        str(level) for level in levels)
    input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]])
    pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y),
                             ('X_next_pred', X_next_pred)])
    return net_name, input_vars, pred_vars, loss
示例#22
0
文件: demo_mnist.py 项目: EdsterG/cgt
def main():
    import argparse
    parser=argparse.ArgumentParser()
    parser.add_argument("--epochs",type=int,default=10)
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--dropout",action="store_true")
    parser.add_argument("--stepsize",type=float, default=.001)
    parser.add_argument("--model",choices=["dense","conv"],default="dense")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--grad_check",action="store_true")
    parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"]/255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native")

    if args.model=="conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28))
    y = cgt.vector("y",dtype='i8')

    if args.model == "dense":
        p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)    
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]        
    elif args.model == "conv":
        p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0)            
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop])

    batch_size=128


    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]})
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start+batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
示例#23
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--profile", action="store_true")
    parser.add_argument("--dropout", action="store_true")
    parser.add_argument("--stepsize", type=float, default=.001)
    parser.add_argument("--model", choices=["dense", "conv"], default="dense")
    parser.add_argument("--unittest", action="store_true")
    parser.add_argument("--grad_check", action="store_true")
    args = parser.parse_args()

    if args.grad_check: cgt.set_precision("quad")

    # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/
    # converted to npz
    mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz")

    Xdata = (mnist["X"] / 255.).astype(cgt.floatX)
    ydata = mnist["y"]

    np.random.seed(0)

    if args.model == "conv":
        Xdata = Xdata.reshape(-1, 1, 28, 28)

    Xtrain = Xdata[0:60000]
    ytrain = ydata[0:60000]

    Xtest = Xdata[60000:70000]
    ytest = ydata[60000:70000]

    sortinds = np.random.permutation(60000)
    Xtrain = Xtrain[sortinds]
    ytrain = ytrain[sortinds]

    X = cgt.tensor4("X",
                    fixed_shape=(None, 1, 28,
                                 28)) if args.model == "conv" else cgt.matrix(
                                     "X", fixed_shape=(None, 28 * 28))
    y = cgt.vector("y", dtype='i8')

    if args.model == "dense":
        p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w_h = init_weights(784, 256)
        w_h2 = init_weights(256, 256)
        w_o = init_weights(256, 10)
        pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden)
        pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.)
        params = [w_h, w_h2, w_o]
    elif args.model == "conv":
        p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0)
        w = init_weights(32, 1, 3, 3)
        w2 = init_weights(64, 32, 3, 3)
        w3 = init_weights(128, 64, 3, 3)
        w4 = init_weights(128 * 2 * 2, 625)
        w_o = init_weights(625, 10)
        pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv,
                                  p_drop_hidden)
        pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.)
        params = [w, w2, w3, w4, w_o]
    else:
        raise RuntimeError("Unreachable")

    cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop))
    updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize)

    y_nodrop = cgt.argmax(pofy_nodrop, axis=1)
    cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop))
    err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean()

    train = cgt.function(inputs=[X, y], outputs=[], updates=updates)
    computeloss = cgt.function(inputs=[X, y],
                               outputs=[err_nodrop, cost_nodrop])

    batch_size = 128

    from cgt.tests import gradcheck_model
    if args.grad_check:
        cost_nodrop = cgt.core.clone(cost_nodrop, {
            X: Xtrain[:1],
            y: ytrain[:1]
        })
        print "doing gradient check..."
        print "------------------------------------"
        gradcheck_model(cost_nodrop, params[0:1])
        print "success!"
        return

    if args.profile: cgt.profiler.start()

    print fmt_row(10, [
        "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time"
    ])
    for i_epoch in xrange(args.epochs):
        tstart = time.time()
        for start in xrange(0, Xtrain.shape[0], batch_size):
            end = start + batch_size
            train(Xtrain[start:end], ytrain[start:end])
            if args.unittest: return
        elapsed = time.time() - tstart
        trainerr, trainloss = computeloss(Xtrain[:len(Xtest)],
                                          ytrain[:len(Xtest)])
        testerr, testloss = computeloss(Xtest, ytest)
        print fmt_row(
            10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
    if args.profile: cgt.execution.profiler.print_stats()
示例#24
0
def tensor4(name=None, dtype=None, fixed_shape=None):
    return cgt.tensor4(name, dtype, fixed_shape)