示例#1
0
    def get_context(self, prev_state_bf):

        state_step_bf = self.states_mlp_bf(prev_state_bf)
        state_step_b1f = cgt.dimshuffle(state_step_bf, [0, 'x', 1])

        # Compute the inner product <phi(s_i), psi(h_u)> where phi and psi are MLPs.
        # The below line computes the pointwise product of phi(s_i) and psi(h_u) and then sums to get the inner product.
        # scalar_energies_vec_bt = cgt.sqrt(cgt.sum(cgt.broadcast('*', state_step_b1f, self.features_post_mlp_btf, 'x1x,xxx'), axis=2))

        # Compute tau=tanh(h_u*W + s_i*V), broadcasting to do all h_u mults at once.
        scalar_energies_vec_btf = cgt.tanh(cgt.broadcast('+', self.features_post_mlp_btf, state_step_b1f, 'xxx,x1x'))

        # The next two lines compute w^T*(tau) with a pointwise product and then a sum.
        scalar_energies_vec_btf = cgt.broadcast('*', self.mixing_vec_w, scalar_energies_vec_btf, '11x,xxx')
        scalar_energies_vec_bt = cgt.sum(scalar_energies_vec_btf, axis=2)

        # Softmax weights the blended features over their time dimesions.
        softmax_weights_bt = nn.softmax(scalar_energies_vec_bt, axis=1)

        # This weight multiplies all features.
        extended_softmax_bt1 = cgt.dimshuffle(softmax_weights_bt, [0, 1, 'x'])
        # Weight the features by it's temporally dependent softmax weight.
        pre_blended = cgt.broadcast('*', extended_softmax_bt1, self.features_post_mlp_btf, 'xx1,xxx')
        # Integrate out time.
        blended_features_bf = cgt.sum(pre_blended, axis=1)

        return blended_features_bf
示例#2
0
文件: sfnn.py 项目: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'],
                                     config['num_units'], config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs,
                                [loss_vec], params, _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
示例#3
0
 def logprob(self, x, mu, sigma):
     """ Calculate logprob for each row of x, mu, sigma """
     assert sigma.ndim == mu.ndim == x.ndim == 2
     k = x.shape[1]
     log_det = cgt.sum(cgt.log(sigma), axis=1, keepdims=True)
     prob_z = -.5 * (k * np.log(2. * np.pi) + log_det)
     prob_e = cgt.sum(-.5 * sigma * ((x - mu)**2), axis=1, keepdims=True)
     # output shape: (size_batch, 1)
     return prob_z + prob_e
示例#4
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python") # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err,cgt.flatcat(g)])
示例#5
0
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N,K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval",bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err]+g
        f = cgt.function([], [err]+g)
        results = f()
        print results
        assert np.allclose(results[0] , np.sin(np.square(Xval).dot(wval)+bval-yval).sum())
示例#6
0
def test_flatvec():
    cgt.reset_config
    cgt.set_precision('double')
    cgt.core.update_config(backend="python")  # XXX

    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.shared(Xval, "X")
    y_n = cgt.shared(yval, "y")
    w_k = cgt.shared(wval, "w")
    b = cgt.shared(bval, name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])
    g = core.simplify(g)

    pars = [w_k, b]
    flatx = nn.setup_contiguous_storage(pars)
    f = cgt.function([], [err, cgt.flatcat(g)])
示例#7
0
文件: test_devices.py 项目: zxie/cgt
def test_devices():
    N = 10
    K = 3

    compile_info = cgt.compilation.get_compile_info()
    cuda_enabled = compile_info["CGT_ENABLE_CUDA"]
    if not cuda_enabled:
        raise SkipTest("cuda disabled")

    Xval = np.random.randn(N, K).astype(cgt.floatX)
    wval = np.random.randn(K).astype(cgt.floatX)
    bval = np.asarray(np.random.randn()).astype(cgt.floatX)
    yval = np.random.randn(N).astype(cgt.floatX)

    with cgt.scoped_update_config(default_device=cgt.Device(devtype="gpu")):

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval", bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.sin(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err] + g
        f = cgt.function([], [err] + g)
        results = f()
        print results
        assert np.allclose(
            results[0],
            np.sin(np.square(Xval).dot(wval) + bval - yval).sum())
示例#8
0
 def __init__(self, x, n_in, n_hid, n_out, nlayers=1, y=None, eps=None):
     super(GaussianMLP, self).__init__(x, n_in, n_hid, nlayers=nlayers, prefix="GaussianMLP_hidden")
     self.mu_layer = HiddenLayer(
         input=self.hidden_layers[-1].output,
         n_in=self.hidden_layers[-1].n_out,
         n_out=n_out,
         activation=None,
         prefix="GaussianMLP_mu"
     )
     # log(sigma^2)
     self.logvar_layer = HiddenLayer(
         input=self.hidden_layers[-1].output,
         n_in=self.hidden_layers[-1].n_out,
         n_out=n_out,
         activation=None,
         prefix="GaussianMLP_logvar"
     )
     self.mu = self.mu_layer.output
     self.var = cgt.exp(self.logvar_layer.output)
     self.sigma = cgt.sqrt(self.var)
     self.params = self.params + self.mu_layer.params +\
         self.logvar_layer.params
     # for use as encoder
     if eps is not None:
         assert(y is None)
         self.out = self.mu + self.sigma * eps
     # for use as decoder
     if y:
         assert(eps is None)
         self.out = cgt.sigmoid(self.mu)
         self.cost = -cgt.sum(log_diag_mvn(self.out, self.var)(y))
示例#9
0
    def get_train_objective(self, max_label_length, ground_labels_basis_btc):
        context_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.feature_size)), name=None)
        state_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.decoder_size)), name=None)
        prev_out_bc = cgt.zeros((self.batch_size, self.true_number_classes), dtype='i8') #+ self.start_token_index
        log_probs = None
        for iter_step in range(0, max_label_length):
            state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf)
            context_i_bf = self.get_context(state_i_bf)
            this_character_dist_bc = self.get_character_distribution(state_i_bf, context_i_bf)
            prev_out_bc = ground_labels_basis_btc[:, iter_step, :]
            log_probs_pre = prev_out_bc * this_character_dist_bc
            log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1))
            if log_probs is None:
                log_probs = cgt.sum(log_probs_pre)
            else:
                log_probs += cgt.sum(log_probs_pre)

        log_probs = -log_probs
        return log_probs
示例#10
0
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems'])

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [
        cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d' % t)
        for t in range(len(Ys))
    ]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0

    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
示例#11
0
文件: sfnn.py 项目: TZ2016/snn
def make_funcs(config, dbg_out={}):
    net_in, net_out = hybrid_network(config['num_inputs'],
                                     config['num_outputs'],
                                     config['num_units'],
                                     config['num_sto'],
                                     dbg_out=dbg_out)
    if not config['dbg_out_full']: dbg_out = {}
    # def f_sample(_inputs, num_samples=1, flatten=False):
    #     _mean, _var = f_step(_inputs)
    #     _samples = []
    #     for _m, _v in zip(_mean, _var):
    #         _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples)
    #         if flatten: _samples.extend(_s)
    #         else: _samples.append(_s)
    #     return np.array(_samples)
    Y_gt = cgt.matrix("Y")
    Y_prec = cgt.tensor3('V',
                         fixed_shape=(None, config['num_inputs'],
                                      config['num_inputs']))
    params = nn.get_parameters(net_out)
    size_batch, size_out = net_out.shape
    inputs, outputs = [net_in], [net_out]
    if config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec)
    if config['weight_decay'] > 0.:
        print "Applying penalty on parameter norm"
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat**2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / size_batch

    # TODO_TZ f_step seems not to fail if X has wrong dim
    f_step = cgt.function(inputs, outputs)
    f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt],
                                outputs, [loss_vec],
                                params,
                                _dbg_out=dbg_out)

    return params, f_step, None, None, None, f_surr
示例#12
0
文件: rnn.py 项目: TZ2016/snn
def make_funcs(config, dbg_out=None):
    params, Xs, Ys, C_0, H_0, C_T, H_T, C_1, H_1 = lstm_network(
        config['rnn_steps'], config['num_inputs'], config['num_outputs'],
        config['num_units'], config['num_mems']
    )

    # basic
    size_batch = Xs[0].shape[0]
    dY = Ys[0].shape[-1]
    Ys_gt = [cgt.matrix(fixed_shape=(size_batch, dY), name='Y%d'%t)
             for t in range(len(Ys))]
    Ys_var = [cgt.tensor3(fixed_shape=(size_batch, dY, dY)) for _ in Ys]
    net_inputs, net_outputs = Xs + C_0 + H_0 + Ys_var, Ys + C_T + H_T

    # calculate loss
    loss_vec = []
    for i in range(len(Ys)):
        #     if i == 0: continue
        _l = dist.gaussian.logprob(Ys_gt[i], Ys[i], Ys_var[i])
        loss_vec.append(_l)
    loss_vec = cgt.add_multi(loss_vec)
    if config['weight_decay'] > 0.:
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2)
        loss_vec -= loss_param  # / size_batch
    loss = cgt.sum(loss_vec) / config['rnn_steps'] / size_batch
    grad = cgt.grad(loss, params)

    # functions
    def f_init(size_batch):
        c_0, h_0 = [], []
        for _n_m in config['num_mems']:
            if _n_m > 0:
                c_0.append(np.zeros((size_batch, _n_m)))
                h_0.append(np.zeros((size_batch, _n_m)))
        return c_0, h_0
    f_step = cgt.function([Xs[0]] + C_0 + H_0, [Ys[0]] + C_1 + H_1)
    f_loss = cgt.function(net_inputs + Ys_gt, loss)
    f_grad = cgt.function(net_inputs + Ys_gt, grad)
    f_surr = cgt.function(net_inputs + Ys_gt, [loss] + net_outputs + grad)
    return params, f_step, f_loss, f_grad, f_init, f_surr
示例#13
0
 def __init__(self, x, n_in, n_hid, n_out, nlayers=1, y=None):
     super(BernoulliMLP, self).__init__(x, n_in, n_hid, nlayers=nlayers, prefix="BernoulliMLP_hidden")
     self.out_layer = HiddenLayer(
         input=self.hidden_layers[-1].output,
         n_in=self.hidden_layers[-1].n_out,
         n_out=n_out,
         activation=cgt.sigmoid,
         prefix="BernoulliMLP_y_hat"
     )
     self.params = self.params + self.out_layer.params
     if y is not None:
         self.out = self.out_layer.output
         self.cost = cgt.sum(nn.binary_crossentropy(self.out, y))
示例#14
0
    def __init__(self, xdim, args, dec="bernoulli"):
        self.xdim = xdim
        self.hdim = args.hdim
        self.zdim = args.zdim
        self.lmbda = args.lmbda  # weight decay coefficient * 2
        self.x = cgt.matrix("x", dtype=cgt.floatX)
        self.eps = cgt.matrix("eps", dtype=cgt.floatX)

        self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps)
        if dec == "bernoulli":
            # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y)
            self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        elif dec == "gaussian":
            self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        else:
            raise RuntimeError("unrecognized decoder %" % dec)

        self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size
        self.params = self.enc_mlp.params + self.dec_mlp.params
        # L2 regularization
        self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params]
        self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params]

        # XXX replace w/ adagrad update from nn
        ADAGRAD_EPS = 1e-10  # for stability
        self.updates = [
            (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS))
            for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums)
        ]
        self.updates += [
            (gaccum, gaccum + cgt.square(gparam))
            for gaccum, gparam in zip(self.gaccums, self.gparams)
        ]

        self.train = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=self.updates
        )
        self.test = cgt.function(
            [self.x, self.eps],
            self.cost,
            updates=None
        )
        # can be used for semi-supervised learning for example
        self.encode = cgt.function(
            [self.x, self.eps],
            self.enc_mlp.out
        )
示例#15
0
文件: demo_sfnn.py 项目: TZ2016/cgt
def make_funcs(net_in, net_out, config, dbg_out=None):
    def f_grad (*x):
        out = f_surr(*x)
        return out['loss'], out['surr_loss'], out['surr_grad']
    Y = cgt.matrix("Y")
    params = nn.get_parameters(net_out)
    if 'no_bias' in config and config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    size_out, size_batch = Y.shape[1], net_in.shape[0]
    f_step = cgt.function([net_in], [net_out])
    # loss_raw of shape (size_batch, 1); loss should be a scalar
    # sum-of-squares loss
    sigma = 0.1
    loss_raw = -cgt.sum((net_out - Y) ** 2, axis=1, keepdims=True) / sigma
    # negative log-likelihood
    # out_sigma = cgt.exp(net_out[:, size_out:]) + 1.e-6  # positive sigma
    # loss_raw = -gaussian_diagonal.logprob(
    #     Y, net_out,
        # out_sigma
        # cgt.fill(.01, [size_batch, size_out])
    # )
    if 'param_penal_wt' in config:
        print "Applying penalty on parameter norm"
        assert config['param_penal_wt'] > 0
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = cgt.fill(cgt.sum(params_flat ** 2), [size_batch, 1])
        loss_param *= config['param_penal_wt']
        loss_raw += loss_param
    loss = cgt.sum(loss_raw) / size_batch
    # end of loss definition
    f_loss = cgt.function([net_in, Y], [net_out, loss])
    f_surr = get_surrogate_func([net_in, Y],
                                [net_out] + dbg_out,
                                [loss_raw], params)
    return params, f_step, f_loss, f_grad, f_surr
示例#16
0
def make_funcs(net_in, net_out, config, dbg_out=None):
    def f_grad(*x):
        out = f_surr(*x)
        return out['loss'], out['surr_loss'], out['surr_grad']

    Y = cgt.matrix("Y")
    params = nn.get_parameters(net_out)
    if 'no_bias' in config and config['no_bias']:
        print "Excluding bias"
        params = [p for p in params if not p.name.endswith(".b")]
    size_out, size_batch = Y.shape[1], net_in.shape[0]
    f_step = cgt.function([net_in], [net_out])
    # loss_raw of shape (size_batch, 1); loss should be a scalar
    # sum-of-squares loss
    sigma = 0.1
    loss_raw = -cgt.sum((net_out - Y)**2, axis=1, keepdims=True) / sigma
    # negative log-likelihood
    # out_sigma = cgt.exp(net_out[:, size_out:]) + 1.e-6  # positive sigma
    # loss_raw = -gaussian_diagonal.logprob(
    #     Y, net_out,
    # out_sigma
    # cgt.fill(.01, [size_batch, size_out])
    # )
    if 'param_penal_wt' in config:
        print "Applying penalty on parameter norm"
        assert config['param_penal_wt'] > 0
        params_flat = cgt.concatenate([p.flatten() for p in params])
        loss_param = cgt.fill(cgt.sum(params_flat**2), [size_batch, 1])
        loss_param *= config['param_penal_wt']
        loss_raw += loss_param
    loss = cgt.sum(loss_raw) / size_batch
    # end of loss definition
    f_loss = cgt.function([net_in, Y], [net_out, loss])
    f_surr = get_surrogate_func([net_in, Y], [net_out] + dbg_out, [loss_raw],
                                params)
    return params, f_step, f_loss, f_grad, f_surr
示例#17
0
文件: test_linreg.py 项目: zclfly/cgt
def test_linreg():
    cgt.reset_config()
    cgt.set_precision('double')
    N = 10
    K = 3

    Xval = np.random.randn(N, K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple, an, _ = cgt.core.simplify_and_analyze(g)

    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(
        g_simple,
        nodefn=lambda node, o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk: Xval, w_k: wval, b: bval, y_n: yval}

    np.testing.assert_allclose(cgt.numeric_eval(err, d),
                               np.linalg.norm(Xval.dot(wval) + bval - yval)**2)
    np.testing.assert_allclose(cgt.numeric_eval(g[0], d),
                               2 * Xval.T.dot(Xval.dot(wval) + bval - yval))
    np.testing.assert_allclose(cgt.numeric_eval(g[1], d),
                               2 * np.sum(Xval.dot(wval) + bval - yval, 0))
示例#18
0
    def get_context_backup(self, prev_state_bf):
        state_step_bf = cgt.sigmoid(self.states_mlp_bf(prev_state_bf))

        product_list = []
        for time_step in range(0, 3):
            inner_product = cgt.sum(state_step_bf*self.features_post_mlp_btf[:, time_step, :], axis=1)
            product_list.append(inner_product)
        st = cgt.stack(product_list)
        st = cgt.dimshuffle(st, [1, 0])
        softmax_weights = softmax(st)

        sum = None

        for time_step in range(0, 3):
            softmax_t_step = cgt.dimshuffle(softmax_weights[:, time_step], [0, 'x'])
            if sum is None:
                sum = cgt.broadcast('*', softmax_t_step, self.features_post_mlp_btf[:, time_step, :], 'x1,xx')
            else:
                sum += cgt.broadcast('*', softmax_t_step, self.features_post_mlp_btf[:, time_step, :], 'x1,xx')

        return sum
示例#19
0
def test_linreg():
    N = 10
    K = 3

    Xval = np.random.randn(N,K)
    wval = np.random.randn(K)
    bval = np.random.randn()
    yval = np.random.randn(N)

    X_nk = cgt.matrix("X")
    y_n = cgt.vector("y")
    w_k = cgt.vector("w")
    b = cgt.scalar(name="b")

    ypred = cgt.dot(X_nk, w_k) + b

    err = cgt.sum(cgt.square(ypred - y_n))
    g = cgt.grad(err, [w_k, b])

    g_simple,an,_ = cgt.core.simplify_and_analyze(g)


    print "Loss function:"
    cgt.print_tree([err])
    print "Gradient:"
    cgt.print_tree(g)

    print "Gradient simplified"
    cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))

    print "-------"

    d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}

    np.testing.assert_allclose(cgt.numeric_eval(err,d), np.linalg.norm(Xval.dot(wval) + bval - yval)**2,
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[0],d), 2 * Xval.T.dot(Xval.dot(wval) + bval - yval),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
    np.testing.assert_allclose(cgt.numeric_eval(g[1],d), 2 *  np.sum(Xval.dot(wval) + bval - yval, 0),
        atol={"single":1e-3,"double":1e-6}[cgt.get_precision()])
示例#20
0
文件: seq_model.py 项目: zxie/cgt
    X_tnk = cgt.tensor3("X")

    cell = gru.GRUCell([dim_x], mem_size)

    Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]), cgt.floatX)
    M = Minit_nk

    for t in xrange(horizon):
        M = cell(M, X_tnk[t])

    # cgt.print_tree(M)
    print "simplifying..."
    M_simp = cgt.simplify([M])
    print "done"
    # cgt.print_tree(M_simp)
    print "fn before:", cgt.count_nodes(M)
    print "fn after:", cgt.count_nodes(M_simp)

    gs = cgt.grad(cgt.sum(M), cell.params())
    print "grad before", cgt.count_nodes(gs)
    g_simp = cgt.simplify(gs)
    print "grad after", cgt.count_nodes(g_simp)

    # M = cgt.simplify(M)
    elapsed.append(time() - tstart)

import matplotlib.pyplot as plt
plt.plot(horizons, elapsed, 'x-')
plt.show()
示例#21
0
def normalize(var):
    return cgt.broadcast("/", var, cgt.sum(var,axis=2,keepdims=True), "xxx,xx1")
示例#22
0
 def loglik(self, x, p):
     """ Log likelihood of params on dataset x """
     return cgt.sum(self.logprob(x, p))
示例#23
0
文件: api.py 项目: zclfly/cgt
def prod(x, axis=None, keepdims=False):
    """
    Like numpy.prod
    """
    return cgt.exp(cgt.sum(cgt.log(x), axis=axis, keepdims=keepdims))
示例#24
0
 def f(x):
     # expects batches
     k = mu.shape[1]
     logp = (-k / 2.0) * np.log(2 * np.pi) - 0.5 * cgt.sum(cgt.log(var), axis=1) - cgt.sum(0.5 * (1.0 / var) * (x - mu) * (x - mu), axis=1)
     return logp
示例#25
0
def kld_unit_mvn(mu, var):
    # KL divergence from N(0, I)
    return (mu.shape[1] + cgt.sum(cgt.log(var), axis=1) - cgt.sum(cgt.square(mu), axis=1) - cgt.sum(var, axis=1)) / 2.0
示例#26
0
文件: seq_model.py 项目: EdsterG/cgt
    X_tnk = cgt.tensor3("X")

    cell = gru.GRUCell([dim_x], mem_size)

    Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX)
    M = Minit_nk

    for t in xrange(horizon):
        M = cell(M, X_tnk[t])

    # cgt.print_tree(M)
    print "simplifying..."
    M_simp = cgt.simplify([M])
    print "done"
    # cgt.print_tree(M_simp)
    print "fn before:",cgt.count_nodes(M)
    print "fn after:",cgt.count_nodes(M_simp)

    gs = cgt.grad(cgt.sum(M), cell.params())
    print "grad before", cgt.count_nodes(gs)
    g_simp = cgt.simplify(gs)
    print "grad after",cgt.count_nodes(g_simp)

    # M = cgt.simplify(M)
    elapsed.append(time()-tstart)

import matplotlib.pyplot as plt
plt.plot(horizons,elapsed,'x-')
plt.show()

示例#27
0
def sum(x, axis=None):
    return cgt.sum(x, axis=axis)
示例#28
0
def sum(x, axis=None):
    return cgt.sum(x, axis=axis)
示例#29
0
def test_the_test_problem():
    #Works
    batch_size = 32  # How many samples do you want to batch.
    feat_t_steps = 20  # How many 10ms sound clips.
    feat_num_features = 10  # The dimension of the 10ms clips.
    max_label_length = feat_t_steps  # The maximal label length of the transcription. includes start character.
    num_out_classes = 27
    num_out_classes_true = num_out_classes + 2
    num_batches = 756
    num_epochs = 30

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true))

    last_time = time.time()
    print 'initializing temporal dense layer'
    d1 = nnbuilder.temporalDenseLayer(feats, num_units=128, activation=cgt.sigmoid)
    #d2 = nnbuilder.temporalDenseLayer(d1, num_units=128, activation=cgt.sigmoid)
    d3 = nnbuilder.temporalDenseLayer(d1, num_units=num_out_classes_true, activation=nnbuilder.linear)
    out = nn.three_d_softmax(d3, axis=2)

    log_probs = None
    for iter_step in range(0, max_label_length):
        this_character_dist_bc = out[:, iter_step, :]
        prev_out_bc = ground_labels_basis[:, iter_step, :]
        log_probs_pre = prev_out_bc * this_character_dist_bc
        log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1))
        if log_probs is None:
            log_probs = cgt.sum(log_probs_pre)
        else:
            log_probs += cgt.sum(log_probs_pre)

    log_probs = -log_probs

    print 'that took ' + str(time.time() - last_time) + ' seconds'

    last_time = time.time()
    print 'compiling objective function'
    updates = nn.rmsprop(log_probs, nn.get_parameters(log_probs), learning_rate=0.01)
    pred_train = cgt.function([feats, ground_labels_basis], [], updates=updates)
    pred_fun = cgt.function([feats, ground_labels_basis], [log_probs])
    most_likely_chars = cgt.argmax(out, axis=1)
    actual_predictions = cgt.function([feats, ground_labels_basis], [most_likely_chars])
    print 'that took ' + str(time.time() - last_time) + ' seconds'

    test_data = np.load('test_data.npy')
    test_labels = np.load('test_labels.npy')
    data_mean = np.mean(test_data)
    data_sd = np.mean(test_data)

    print 'now training'
    for one_epoch in range(0, num_epochs):
        trained = 0
        last_time = time.time()
        print 'starting epoch ' + str(one_epoch)
        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            pred_train(batch, labels_basis)

        for batch_iter in range(0, num_batches):
            batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd,
                                                             test_labels, num_out_classes_true)
            trained += pred_fun(batch, labels_basis)[0]

        trained = trained/batch_iter
        print 'train loss is ' + str(trained)
        print 'that took ' + str(time.time() - last_time) + ' seconds'

        act_pred = actual_predictions(batch, labels_basis)[0]
        print 'an actual prediction is '
        print act_pred
示例#30
0
 def loglik(self, x, mu, sigma):
     return cgt.sum(self.logprob(x, mu, sigma))