def test_cudnn(): compile_info = get_compile_info() if not (compile_info["CGT_ENABLE_CUDNN"] and compile_info["CGT_ENABLE_CUDA"]): raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2,3,19,18) Wval = nr.randn(5,3,3,3) bval = nr.randn(1,5,1,1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X,W,b],Y) Yval = fY(Xval,Wval,bval) cost = (Y*Y2).sum() fcost = cgt.function([X,W,b],cost) fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b])) angrads = fgrad(Xval,Wval,bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3) for (nugrad,angrad) in zip(nugrads,angrads): assert np.allclose(nugrad, angrad, rtol=9e-3, atol=1e-7)
def test_cudnn(): if not get_compile_info()["CGT_ENABLE_CUDNN"]: raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2, 3, 19, 18) Wval = nr.randn(5, 3, 3, 3) bval = nr.randn(1, 5, 1, 1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1, 1, 1, 1), [X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X, W, b], Y) Yval = fY(Xval, Wval, bval) cost = (Y * Y2).sum() fcost = cgt.function([X, W, b], cost) fgrad = cgt.function([X, W, b], cgt.grad(cost, [X, W, b])) angrads = fgrad(Xval, Wval, bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval], eps=1e-3) for (nugrad, angrad) in zip(nugrads, angrads): assert np.allclose(nugrad, angrad)
def test_cudnn(): with cgt.scoped_update_config(precision="double",backend="native"): if not get_compile_info()["CGT_ENABLE_CUDNN"]: raise SkipTest("CUDNN not enabled. Skipping this test") Xval = nr.randn(2,3,19,18) Wval = nr.randn(5,3,3,3) bval = nr.randn(1,5,1,1) X = cgt.tensor4("X", fixed_shape=Xval.shape) W = cgt.tensor4("W", fixed_shape=Wval.shape) b = cgt.tensor4("b", fixed_shape=bval.shape) Y = cgt.core.Result(cudnn_ops.CudnnConvForward(1,1,1,1),[X, W, b]) Y2 = nr.randn(*cgt.core.infer_shape(Y)) fY = cgt.function([X,W,b],Y) Yval = fY(Xval,Wval,bval) cost = (Y*Y2).sum() fcost = cgt.function([X,W,b],cost) fgrad = cgt.function([X,W,b],cgt.grad(cost, [X,W,b])) angrads = fgrad(Xval,Wval,bval) nugrads = numeric_grad_multi(fcost, [Xval, Wval, bval],eps=1e-3) for (nugrad,angrad) in zip(nugrads,angrads): assert np.allclose(nugrad, angrad)
def build_model(): net = {} net['input'] = cgt.tensor4(fixed_shape=(1, 3, IMAGE_W, IMAGE_W))#InputLayer((1, 3, IMAGE_W, IMAGE_W)) net['conv1_1'] = ConvLayer(net['input'], 64, 3) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3) net['pool3'] = PoolLayer(net['conv3_4'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3) net['pool4'] = PoolLayer(net['conv4_4'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3) net['pool5'] = PoolLayer(net['conv5_4'], 2) return net
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(relu1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--epochs",type=int,default=10) args = parser.parse_args() batchsize = 64 Xshape = (batchsize, 3, 32, 32) X = cgt.tensor4("X", fixed_shape = Xshape) y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4') conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=1e-4))(X) relu1 = nn.rectify(conv1) pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2)) conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool1) relu2 = nn.rectify(conv2) pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2)) conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), weight_init=nn.IIDGaussian(std=0.01))(pool2) pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2)) relu3 = nn.rectify(pool3) d0,d1,d2,d3 = relu3.shape flatlayer = relu3.reshape([d0,d1*d2*d3]) nfeats = cgt.infer_shape(flatlayer)[1] ip1 = nn.Affine(nfeats, 10)(flatlayer) logprobs = nn.logsoftmax(ip1) loss = -logprobs[cgt.arange(batchsize), y].mean() params = nn.get_parameters(loss) updates = rmsprop_updates(loss, params, stepsize=1e-3) train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates) if args.profile: cgt.profiler.start() data = fetch_dataset("http://rll.berkeley.edu/cgt-data/cifar10.npz") Xtrain = data["X_train"] ytrain = data["y_train"] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): for start in xrange(0, Xtrain.shape[0], batchsize): tstart = time.time() end = start+batchsize print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart if start > batchsize*5: break # elapsed = time.time() - tstart # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) # testerr, testloss = computeloss(Xtest, ytest) # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.profiler.print_stats() return if args.unittest: break
def make_updater_convnet(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype="i8") stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def make_updater_convnet(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype='i8') stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) params = nn.get_parameters(loss) gparams = cgt.grad(loss, params) updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], loss, updates=updates)
def build_bilinear_net(input_shapes, **kwargs): x_shape, u_shape = input_shapes X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape) U = cgt.matrix('U', fixed_shape=(None, ) + u_shape) X_diff_pred = Bilinear(input_shapes, b=None, name='bilinear')(X, U) X_next_pred = X + X_diff_pred Y = X.reshape((X.shape[0], cgt.mul_multi(X.shape[1:]))) Y_diff_pred = X_diff_pred.reshape( (X_diff_pred.shape[0], cgt.mul_multi(X_diff_pred.shape[1:]))) X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape) X_next = X + X_diff loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2. net_name = 'BilinearNet' input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]]) pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y), ('X_next_pred', X_next_pred)]) return net_name, input_vars, pred_vars, loss
def test_im2col(): for settings in [ ((4,4),(0,0),(1,1)), ((3,3),(1,1),(2,2)), ((3,3),(1,1),(3,3)) ]: xval = np.arange(2*1*28*28).reshape(2,1,28,28).astype(cgt.floatX) x = cgt.tensor4("x", fixed_shape=xval.shape) y = im2col(x, *settings) h = cgt.constant(np.random.randn(*cgt.infer_shape(y))) cost = (y*h).sum() fcost = cgt.function([x],cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval,eps=1e-5) gana = fgrad(xval) assert np.allclose(gnum, gana)
def make_updater_convnet_parallel(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype="i8") stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) m = nn.Module([X, y], [loss]) split_loss = 0 for start in xrange(0, batch_size, batch_size // 4): sli = slice(start, start + batch_size // 4) split_loss += m([X[sli], y[sli]])[0] split_loss /= 4 params = nn.get_parameters(loss) gparams = cgt.grad(split_loss, params) updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], split_loss, updates=updates2)
def test_pool(**kwargs): np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2,3,5,7)) y = max_pool_2d(x, (4,4),(0,0),(1,1)) xval = np.random.randn(2,3,5,7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y*h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum,gana)
def test_im2col(): for settings in [((4, 4), (0, 0), (1, 1)), ((3, 3), (1, 1), (2, 2)), ((3, 3), (1, 1), (3, 3))]: xval = np.arange(2 * 1 * 28 * 28).reshape(2, 1, 28, 28).astype(cgt.floatX) x = cgt.tensor4("x", fixed_shape=xval.shape) y = im2col(x, *settings) h = cgt.constant(np.random.randn(*cgt.infer_shape(y))) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval, eps=1e-5) gana = fgrad(xval) assert np.allclose(gnum, gana)
def make_updater_convnet_parallel(): X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) # so shapes can be inferred y = cgt.vector("y", dtype='i8') stepsize = cgt.scalar("stepsize") loss = build_convnet_return_loss(X, y) m = nn.Module([X, y], [loss]) split_loss = 0 for start in xrange(0, batch_size, batch_size // 4): sli = slice(start, start + batch_size // 4) split_loss += m([X[sli], y[sli]])[0] split_loss /= 4 params = nn.get_parameters(loss) gparams = cgt.grad(split_loss, params) updates2 = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)] return cgt.function([X, y, stepsize], split_loss, updates=updates2)
def test_cpu_pool(**kwargs): np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7)) y = max_pool_2d(x, (4, 4), (0, 0), (1, 1)) xval = np.random.randn(2, 3, 5, 7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum, gana)
def test_cpu_pool(): with cgt.scoped_update_config(precision="quad", backend="native"): print cgt.get_precision() ci = get_compile_info() np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2, 3, 5, 7)) y = max_pool_2d(x, (4, 4), (0, 0), (1, 1)) xval = np.random.randn(2, 3, 5, 7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y * h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum, gana)
def test_cpu_pool(): with cgt.scoped_update_config(precision="quad",backend="native"): print cgt.get_precision() ci = get_compile_info() np.random.seed(0) x = cgt.tensor4("x", fixed_shape=(2,3,5,7)) y = max_pool_2d(x, (4,4),(0,0),(1,1)) xval = np.random.randn(2,3,5,7) hval = np.random.randn(*cgt.infer_shape(y)) h = cgt.constant(hval) cost = (y*h).sum() fcost = cgt.function([x], cost) fgrad = cgt.function([x], cgt.grad(cost, [x])[0]) from cgt.numeric_diff import numeric_grad gnum = numeric_grad(fcost, xval) gana = fgrad(xval) assert np.allclose(gnum,gana)
def tensor4(name=None, dtype=None, fixed_shape=None): return cgt.tensor4(name, dtype, fixed_shape)
A = gram_matrix(a) G = gram_matrix(x) N = a.shape[1] M = a.shape[2] * a.shape[3] loss = 1./(4 * N**2 * M**2) * ((G - A)**2).sum() return loss def total_variation_loss(x): return (((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25).sum() layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] layers = {k: net[k] for k in layers} input_im_theano = cgt.tensor4() outputs = lasagne.layers.get_output(layers.values(), input_im_theano) photo_features = {k: theano.shared(output.eval({input_im_theano: photo})) for k, output in zip(layers.keys(), outputs)} print 'finished photo features' art_features = {k: theano.shared(output.eval({input_im_theano: art})) for k, output in zip(layers.keys(), outputs)} print 'finished art features' generated_image = theano.shared(floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W)))) gen_features = lasagne.layers.get_output(layers.values(), generated_image) print 'finished gen_features 1' gen_features = {k: v for k, v in zip(layers.keys(), gen_features)} # Define loss function
# shuffle the data np.random.seed(42) sortinds = np.random.permutation(Xtrain.shape[0]) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] # reshape for convnet Xtrainimg = Xtrain.reshape(-1, 1, 28, 28) Xtestimg = Xtest.reshape(-1, 1, 28, 28) # Model: # Make it VGG-like # VGG nets have 3x3 kernels with length 1 padding and max-pooling has all 2s. # # VGG is a large model so here well just do a small part of it. X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28)) y = cgt.vector('y', dtype='i8') conv1 = nn.rectify( nn.SpatialConvolution(1, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(X) ) pool1 = nn.max_pool_2d(conv1, kernelshape=(2,2), stride=(2,2)) conv2 = nn.rectify( nn.SpatialConvolution(32, 32, kernelshape=(3,3), stride=(1,1), pad=(1,1), weight_init=nn.IIDGaussian(std=.1))(pool1) ) pool2 = nn.max_pool_2d(conv2, kernelshape=(2,2), stride=(2,2)) d0, d1, d2, d3 = pool2.shape flat = pool2.reshape([d0, d1*d2*d3]) nfeats = cgt.infer_shape(flat)[1]
def build_fcn_action_cond_encoder_net(input_shapes, levels=None): x_shape, u_shape = input_shapes x_c_dim = x_shape[0] x1_c_dim = 16 levels = levels or [3] levels = sorted(set(levels)) X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape) U = cgt.matrix('U', fixed_shape=(None, ) + u_shape) # encoding Xlevels = {} for level in range(levels[-1] + 1): if level == 0: Xlevel = X else: if level == 1: xlevelm1_c_dim = x_c_dim xlevel_c_dim = x1_c_dim else: xlevelm1_c_dim = xlevel_c_dim xlevel_c_dim = 2 * xlevel_c_dim Xlevel_1 = nn.rectify( nn.SpatialConvolution(xlevelm1_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='conv%d_1' % level, weight_init=nn.IIDGaussian(std=0.01))( Xlevels[level - 1])) Xlevel_2 = nn.rectify( nn.SpatialConvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='conv%d_2' % level, weight_init=nn.IIDGaussian(std=0.01))(Xlevel_1)) Xlevel = nn.max_pool_2d(Xlevel_2, kernelshape=(2, 2), pad=(0, 0), stride=(2, 2)) Xlevels[level] = Xlevel # bilinear Xlevels_next_pred_0 = {} Ylevels = OrderedDict() Ylevels_diff_pred = OrderedDict() for level in levels: Xlevel = Xlevels[level] Xlevel_diff_pred = Bilinear(input_shapes, b=None, axis=2, name='bilinear%d' % level)(Xlevel, U) Xlevels_next_pred_0[level] = Xlevel + Xlevel_diff_pred Ylevels[level] = Xlevel.reshape( (Xlevel.shape[0], cgt.mul_multi(Xlevel.shape[1:]))) Ylevels_diff_pred[level] = Xlevel_diff_pred.reshape( (Xlevel_diff_pred.shape[0], cgt.mul_multi(Xlevel_diff_pred.shape[1:]))) # decoding Xlevels_next_pred = {} for level in range(levels[-1] + 1)[::-1]: if level == levels[-1]: Xlevel_next_pred = Xlevels_next_pred_0[level] else: if level == 0: xlevelm1_c_dim = x_c_dim elif level < levels[-1] - 1: xlevel_c_dim = xlevelm1_c_dim xlevelm1_c_dim = xlevelm1_c_dim // 2 Xlevel_next_pred_2 = SpatialDeconvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(2, 2), pad=(0, 0), stride=(2, 2), name='upsample%d' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevels_next_pred[ level + 1]) # TODO initialize with bilinear # TODO should rectify? Xlevel_next_pred_1 = nn.rectify( SpatialDeconvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='deconv%d_2' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_2)) nonlinearity = nn.rectify if level > 0 else cgt.tanh Xlevel_next_pred = nonlinearity( SpatialDeconvolution( xlevel_c_dim, xlevelm1_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='deconv%d_1' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_1)) if level in Xlevels_next_pred_0: coefs = nn.parameter(nn.init_array(nn.Constant(0.5), (2, )), name='sum%d.coef' % level) Xlevel_next_pred = coefs[0] * Xlevel_next_pred + coefs[ 1] * Xlevels_next_pred_0[level] # TODO: tanh should be after sum Xlevels_next_pred[level] = Xlevel_next_pred X_next_pred = Xlevels_next_pred[0] Y = cgt.concatenate(Ylevels.values(), axis=1) Y_diff_pred = cgt.concatenate(Ylevels_diff_pred.values(), axis=1) X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape) X_next = X + X_diff loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2. net_name = 'FcnActionCondEncoderNet_levels' + ''.join( str(level) for level in levels) input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]]) pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y), ('X_next_pred', X_next_pred)]) return net_name, input_vars, pred_vars, loss
def main(): import argparse parser=argparse.ArgumentParser() parser.add_argument("--epochs",type=int,default=10) parser.add_argument("--profile",action="store_true") parser.add_argument("--dropout",action="store_true") parser.add_argument("--stepsize",type=float, default=.001) parser.add_argument("--model",choices=["dense","conv"],default="dense") parser.add_argument("--unittest",action="store_true") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--devtype",choices=["cpu","gpu"],default="cpu") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"]/255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) cgt.update_config(default_device=cgt.core.Device(devtype=args.devtype), backend="native") if args.model=="conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X",fixed_shape=(None,1,28,28)) if args.model=="conv" else cgt.matrix("X", fixed_shape=(None,28*28)) y = cgt.vector("y",dtype='i8') if args.model == "dense": p_drop_input,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv,p_drop_hidden = (0.2, 0.5) if args.dropout else (0,0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop,cost_nodrop]) batch_size=128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, {X:Xtrain[:1],y:ytrain[:1]}) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--profile", action="store_true") parser.add_argument("--dropout", action="store_true") parser.add_argument("--stepsize", type=float, default=.001) parser.add_argument("--model", choices=["dense", "conv"], default="dense") parser.add_argument("--unittest", action="store_true") parser.add_argument("--grad_check", action="store_true") args = parser.parse_args() if args.grad_check: cgt.set_precision("quad") # from mldata.org http://mldata.org/repository/data/viewslug/mnist-original/ # converted to npz mnist = fetch_dataset("http://rll.berkeley.edu/cgt-data/mnist.npz") Xdata = (mnist["X"] / 255.).astype(cgt.floatX) ydata = mnist["y"] np.random.seed(0) if args.model == "conv": Xdata = Xdata.reshape(-1, 1, 28, 28) Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] X = cgt.tensor4("X", fixed_shape=(None, 1, 28, 28)) if args.model == "conv" else cgt.matrix( "X", fixed_shape=(None, 28 * 28)) y = cgt.vector("y", dtype='i8') if args.model == "dense": p_drop_input, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w_h = init_weights(784, 256) w_h2 = init_weights(256, 256) w_o = init_weights(256, 10) pofy_drop = dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden) pofy_nodrop = dense_model(X, w_h, w_h2, w_o, 0., 0.) params = [w_h, w_h2, w_o] elif args.model == "conv": p_drop_conv, p_drop_hidden = (0.2, 0.5) if args.dropout else (0, 0) w = init_weights(32, 1, 3, 3) w2 = init_weights(64, 32, 3, 3) w3 = init_weights(128, 64, 3, 3) w4 = init_weights(128 * 2 * 2, 625) w_o = init_weights(625, 10) pofy_drop = convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden) pofy_nodrop = convnet_model(X, w, w2, w3, w4, w_o, 0., 0.) params = [w, w2, w3, w4, w_o] else: raise RuntimeError("Unreachable") cost_drop = -cgt.mean(categorical.loglik(y, pofy_drop)) updates = rmsprop_updates(cost_drop, params, stepsize=args.stepsize) y_nodrop = cgt.argmax(pofy_nodrop, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, pofy_nodrop)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() train = cgt.function(inputs=[X, y], outputs=[], updates=updates) computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size = 128 from cgt.tests import gradcheck_model if args.grad_check: cost_nodrop = cgt.core.clone(cost_nodrop, { X: Xtrain[:1], y: ytrain[:1] }) print "doing gradient check..." print "------------------------------------" gradcheck_model(cost_nodrop, params[0:1]) print "success!" return if args.profile: cgt.profiler.start() print fmt_row(10, [ "Epoch", "Train NLL", "Train Err", "Test NLL", "Test Err", "Epoch Time" ]) for i_epoch in xrange(args.epochs): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start + batch_size train(Xtrain[start:end], ytrain[start:end]) if args.unittest: return elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row( 10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) if args.profile: cgt.execution.profiler.print_stats()